diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 8b18726c9499..3bf486866579 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,25 +1 @@ -/*.md @ydb-platform/docs - -/ydb/core/fq/ @ydb-platform/fq -/ydb/core/public_http/ @ydb-platform/fq - -/ydb/docs/ @ydb-platform/docs - -/ydb/library/yql/ @ydb-platform/yql -/ydb/library/yql/dq @ydb-platform/yql @ydb-platform/qp -/ydb/library/yql/providers/generic @ydb-platform/fq -/ydb/library/yql/providers/pq @ydb-platform/fq -/ydb/library/yql/providers/s3 @ydb-platform/fq -/ydb/library/yql/providers/solomon @ydb-platform/fq - -/ydb/library/yql/yt @Krock21 @Krisha11 @zlobober @gritukan - -/ydb/services/fq/ @ydb-platform/fq - -/ydb/core/kafka_proxy @ydb-platform/Topics -/ydb/core/persqueue @ydb-platform/Topics -/ydb/services/datastreams @ydb-platform/Topics -/ydb/services/deprecated/persqueue_v0 @ydb-platform/Topics -/ydb/services/persqueue_v1 @ydb-platform/Topics - -/ydb/core/config/ut @ydb-platform/core +* @ydb-platform/fq diff --git a/.github/actions/s3cmd/action.yml b/.github/actions/s3cmd/action.yml index a4ebb583555e..8a3a94d48d3e 100644 --- a/.github/actions/s3cmd/action.yml +++ b/.github/actions/s3cmd/action.yml @@ -8,16 +8,16 @@ inputs: required: true description: "s3 key secret" s3_bucket: - required: true + required: false description: "s3 bucket" s3_endpoint: required: true description: "s3 endpoint" folder_prefix: - required: true + required: false description: "folder prefix" build_preset: - required: true + required: false description: "build preset like relwithdebinfo" runs: using: "composite" @@ -35,7 +35,14 @@ runs: host_base = storage.yandexcloud.net host_bucket = %(bucket)s.storage.yandexcloud.net EOF - + env: + s3_key_id: ${{ inputs.s3_key_id }} + s3_secret_access_key: ${{ inputs.s3_key_secret }} + + - name: export s3 path variables + shell: bash + if: inputs.build_preset + run: | folder="${{ runner.arch == 'X64' && 'x86-64' || runner.arch == 'ARM64' && 'arm64' || 'unknown' }}" BUILD_PRESET="${{ inputs.build_preset }}" @@ -57,7 +64,4 @@ runs: echo "S3_BUCKET_PATH=s3://${{ inputs.s3_bucket }}/${{ github.repository }}/${{github.workflow}}/${{ github.run_id }}/${{ inputs.folder_prefix }}${folder}" >> $GITHUB_ENV echo "S3_URL_PREFIX=${{ inputs.s3_endpoint }}/${{ inputs.s3_bucket }}/${{ github.repository }}/${{ github.workflow }}/${{ github.run_id }}/${{ inputs.folder_prefix }}${folder}" >> $GITHUB_ENV echo "S3_TEST_ARTIFACTS_BUCKET_PATH=s3://${{ inputs.s3_bucket }}/testing_out_stuff/${{ github.repository }}/${{github.workflow}}/${{ github.run_id }}/${{ inputs.folder_prefix }}${folder}" >> $GITHUB_ENV - echo "S3_TEST_ARTIFACTS_URL_PREFIX=${{ inputs.s3_endpoint }}/${{ inputs.s3_bucket }}/testing_out_stuff/${{ github.repository }}/${{ github.workflow }}/${{ github.run_id }}/${{ inputs.folder_prefix }}${folder}" >> $GITHUB_ENV - env: - s3_key_id: ${{ inputs.s3_key_id }} - s3_secret_access_key: ${{ inputs.s3_key_secret }} + echo "S3_TEST_ARTIFACTS_URL_PREFIX=${{ inputs.s3_endpoint }}/${{ inputs.s3_bucket }}/testing_out_stuff/${{ github.repository }}/${{ github.workflow }}/${{ github.run_id }}/${{ inputs.folder_prefix }}${folder}" >> $GITHUB_ENV \ No newline at end of file diff --git a/.github/config/muted_ya.txt b/.github/config/muted_ya.txt index 63a8df0cbe9f..b669c82b33da 100644 --- a/.github/config/muted_ya.txt +++ b/.github/config/muted_ya.txt @@ -8,21 +8,24 @@ ydb/core/external_sources * ydb/core/quoter/ut QuoterWithKesusTest.PrefetchCoefficient ydb/core/keyvalue/ut_trace TKeyValueTracingTest.* ydb/core/kqp/provider/ut KikimrIcGateway.TestLoadBasicSecretValueFromExternalDataSourceMetadata -ydb/core/kqp/ut/olap KqpOlapIndexes.IndexesActualization +ydb/core/kqp/ut/join KqpJoinOrder.Chain65Nodes ydb/core/kqp/ut/olap KqpOlapBlobsSharing.* -ydb/core/kqp/ut/olap KqpOlap.ScanQueryOltpAndOlap ydb/core/kqp/ut/olap KqpOlapStatistics.StatsUsageWithTTL -ydb/core/kqp/ut/olap KqpOlap.YqlScriptOltpAndOlap -ydb/core/kqp/ut/olap KqpOlapAggregations.Aggregation_ResultCountAll_FilterL ydb/core/kqp/ut/pg KqpPg.CreateIndex +ydb/core/kqp/ut/tx KqpLocksTricky.TestNoLocksIssueInteractiveTx+withSink +ydb/core/kqp/ut/tx KqpLocksTricky.TestNoLocksIssue+withSink +ydb/core/kqp/ut/tx KqpSnapshotRead.ReadOnlyTxWithIndexCommitsOnConcurrentWrite+withSink +ydb/core/kqp/ut/tx KqpSinkTx.InvalidateOnError ydb/core/kqp/ut/query KqpLimits.QueryReplySize ydb/core/kqp/ut/query KqpQuery.QueryTimeout +ydb/core/kqp/ut/service KqpQueryService.TableSink_OlapRWQueries +ydb/core/kqp/ut/service KqpQueryService.TableSink_OltpReplace+HasSecondaryIndex +ydb/core/kqp/ut/query KqpQuery.OlapCreateAsSelect_Complex +ydb/core/kqp/ut/query KqpQuery.OlapCreateAsSelect_Simple +ydb/core/kqp/ut/federated_query/s3 KqpFederatedQuery.CreateTableAsSelectFromExternalDataSource +ydb/core/kqp/ut/federated_query/s3 KqpFederatedQuery.CreateTableAsSelectFromExternalTable ydb/core/kqp/ut/scan KqpRequestContext.TraceIdInErrorMessage ydb/core/kqp/ut/scheme KqpOlapScheme.TenThousandColumns -ydb/core/kqp/ut/scheme KqpOlap.OlapRead_GenericQuerys -ydb/core/kqp/ut/scheme KqpOlap.OlapRead_StreamGenericQuery -ydb/core/kqp/ut/scheme KqpOlap.OlapRead_UsesGenericQueryOnJoinWithDataShardTable -ydb/core/kqp/ut/scheme KqpOlapScheme.DropTable ydb/core/kqp/ut/scheme KqpScheme.AlterAsyncReplication ydb/core/kqp/ut/scheme KqpScheme.QueryWithAlter ydb/core/kqp/ut/scheme [14/50]* @@ -30,8 +33,11 @@ ydb/core/kqp/ut/scheme [15/50]* ydb/core/kqp/ut/scheme [44/50]* ydb/core/kqp/ut/service KqpQueryService.ExecuteQueryPgTableSelect ydb/core/kqp/ut/service KqpQueryService.QueryOnClosedSession +ydb/core/kqp/ut/service KqpQueryService.TableSink_OltpUpdate ydb/core/kqp/ut/service KqpService.CloseSessionsWithLoad ydb/core/kqp/ut/service [38/50]* +ydb/core/kqp/ut/service KqpQueryService.TableSink_OltpUpdate +ydb/core/kqp/ut/service KqpQueryService.TableSink_OltpReplace+HasSecondaryIndex ydb/core/persqueue/ut [37/40] chunk chunk ydb/core/persqueue/ut [38/40] chunk chunk ydb/core/persqueue/ut TPQTest.*DirectRead* @@ -107,4 +113,6 @@ ydb/tests/functional/tenants test_storage_config.py.TestStorageConfig.* ydb/tests/functional/tenants test_tenants.py.* ydb/tests/functional/ydb_cli test_ydb_impex.py.TestImpex.test_big_dataset* ydb/tests/tools/pq_read/test test_timeout.py.TestTimeout.test_timeout +ydb/core/kqp/ut/olap KqpDecimalColumnShard.TestFilterCompare +ydb/core/kqp/ut/olap KqpDecimalColumnShard.TestAggregation ydb/tests/functional/rename [test_rename.py */10] chunk chunk diff --git a/.github/workflows/nightly_build.yml b/.github/workflows/nightly_build.yml new file mode 100644 index 000000000000..673e375ee5ea --- /dev/null +++ b/.github/workflows/nightly_build.yml @@ -0,0 +1,54 @@ +name: Nightly-Build # workflow used to upload built binaries to s3 +on: + workflow_dispatch: + inputs: + runner_label: + type: string + default: "auto-provisioned" + description: "runner label" + commit_sha: + type: string + default: "" +jobs: + build_and_test: + strategy: + fail-fast: false + matrix: + build_preset: ["relwithdebinfo", "release-asan"] + runs-on: [ self-hosted, auto-provisioned, "${{ format('build-preset-{0}', matrix.build_preset) }}" ] + name: Build and test ${{ matrix.build_preset }} + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ inputs.commit_sha }} + fetch-depth: 2 + - name: Setup ydb access + uses: ./.github/actions/setup_ci_ydb_service_account_key_file_credentials + with: + ci_ydb_service_account_key_file_credentials: ${{ secrets.CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS }} + - name: Build and test + uses: ./.github/actions/build_and_test_ya + with: + build_preset: ${{ matrix.build_preset }} + build_target: "ydb/apps/ydbd" + increment: false + run_tests: false + put_build_results_to_cache: false + secs: ${{ format('{{"TESTMO_TOKEN2":"{0}","AWS_KEY_ID":"{1}","AWS_KEY_VALUE":"{2}","REMOTE_CACHE_USERNAME":"{3}","REMOTE_CACHE_PASSWORD":"{4}"}}', + secrets.TESTMO_TOKEN2, secrets.AWS_KEY_ID, secrets.AWS_KEY_VALUE, secrets.REMOTE_CACHE_USERNAME, secrets.REMOTE_CACHE_PASSWORD ) }} + vars: ${{ format('{{"AWS_BUCKET":"{0}","AWS_ENDPOINT":"{1}","REMOTE_CACHE_URL":"{2}","TESTMO_URL":"{3}","TESTMO_PROJECT_ID":"{4}"}}', + vars.AWS_BUCKET, vars.AWS_ENDPOINT, vars.REMOTE_CACHE_URL_YA, vars.TESTMO_URL, vars.TESTMO_PROJECT_ID ) }} + - name: Setup s3cmd + uses: ./.github/actions/s3cmd + with: + s3_bucket: "ydb-builds" + s3_endpoint: ${{ vars.AWS_ENDPOINT }} + s3_key_id: ${{ secrets.AWS_KEY_ID }} + s3_key_secret: ${{ secrets.AWS_KEY_VALUE }} + + - name: sync results to s3 and publish links + shell: bash + run: | + set -x + s3cmd sync --follow-symlinks --acl-public --no-progress --stats --no-check-md5 "ydb/apps/ydbd/ydbd" "s3://ydb-builds/${{ github.ref_name }}/${{ matrix.build_preset }}/ydbd" -d \ No newline at end of file diff --git a/.gitignore b/.gitignore index f97992275a5d..5f55e83dd6a9 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,8 @@ !*/ # Unignore all files inside canondata dir !**/canondata/** +# Allow docker files +!Dockerfile /canonization_show_res.log @@ -26,6 +28,9 @@ __pycache__/ *.pb.h *.pb.cc +# Other generated +*.fbs.h + # MacOS specific .DS_Store @@ -40,6 +45,7 @@ __pycache__/ .idea/ .vscode/ .clangd +.antlr/ # KDevelop IDE *.kdev4 @@ -79,3 +85,6 @@ list_result.log bin/config.json .vs/ + +# handy for local junk, which is not intended to appear in the repo +junk/ diff --git a/library/cpp/lwtrace/mon/mon_lwtrace.cpp b/library/cpp/lwtrace/mon/mon_lwtrace.cpp index 09d56560c4bf..a10ae4a775f8 100644 --- a/library/cpp/lwtrace/mon/mon_lwtrace.cpp +++ b/library/cpp/lwtrace/mon/mon_lwtrace.cpp @@ -301,7 +301,7 @@ struct TLogQuery { } } catch (...) { ythrow yexception() - << CurrentExceptionMessage() + << EncodeHtmlPcdata(CurrentExceptionMessage()) << " while parsing track log query: " << Text; } @@ -1853,7 +1853,7 @@ class TTracesHtmlPrinter { try { Os << src->GetStartTime().ToStringUpToSeconds(); } catch (...) { - Os << "error: " << CurrentExceptionMessage(); + Os << "error: " << EncodeHtmlPcdata(CurrentExceptionMessage()); } Os << "" << "
" @@ -3821,17 +3821,17 @@ class TLWTraceMonPage : public NMonitoring::IMonPage { } } catch (TPageGenBase& gen) { out.Clear(); - out << gen.what(); + out << EncodeHtmlPcdata(gen.what()); } catch (...) { out.Clear(); if (request.GetParams().Get("error") == "text") { // Text error reply is helpful for ajax requests out << NMonitoring::HTTPOKTEXT; - out << CurrentExceptionMessage(); + out << EncodeHtmlPcdata(CurrentExceptionMessage()); } else { WWW_HTML(out) { out << "

Error

"
-                        << CurrentExceptionMessage()
+                        << EncodeHtmlPcdata(CurrentExceptionMessage())
                         << Endl;
                 }
             }
diff --git a/ydb/apps/version/version_definition.cpp b/ydb/apps/version/version_definition.cpp
index 4cfb93b74f79..8d7273d188fc 100644
--- a/ydb/apps/version/version_definition.cpp
+++ b/ydb/apps/version/version_definition.cpp
@@ -2,10 +2,21 @@
 
 NKikimrConfig::TCurrentCompatibilityInfo NKikimr::TCompatibilityInfo::MakeCurrent() {
     using TCurrentConstructor = NKikimr::TCompatibilityInfo::TProtoConstructor::TCurrentCompatibilityInfo;
-    // using TVersionConstructor = NKikimr::TCompatibilityInfo::TProtoConstructor::TVersion;
-    // using TCompatibilityRuleConstructor = NKikimr::TCompatibilityInfo::TProtoConstructor::TCompatibilityRule;
+    using TVersionConstructor = NKikimr::TCompatibilityInfo::TProtoConstructor::TVersion;
+    using TCompatibilityRuleConstructor = NKikimr::TCompatibilityInfo::TProtoConstructor::TCompatibilityRule;
 
     return TCurrentConstructor{
         .Application = "ydb",
+        .Version = TVersionConstructor{
+            .Year = 24,
+            .Major = 3,
+        },
+        .CanConnectTo = {
+            TCompatibilityRuleConstructor{
+                .Application = "nbs",
+                .LowerLimit = TVersionConstructor{ .Year = 23, .Major = 3 },
+                .UpperLimit = TVersionConstructor{ .Year = 24, .Major = 3 },
+            }
+        }
     }.ToPB();
-}
+}
\ No newline at end of file
diff --git a/ydb/apps/ydbd/ya.make b/ydb/apps/ydbd/ya.make
index d399248c2264..12d685e849f1 100644
--- a/ydb/apps/ydbd/ya.make
+++ b/ydb/apps/ydbd/ya.make
@@ -1,6 +1,8 @@
 PROGRAM(ydbd)
 
-NO_EXPORT_DYNAMIC_SYMBOLS()
+IF (NOT SANITIZER_TYPE)  # for some reasons some tests with asan are failed, see comment in CPPCOM-32
+    NO_EXPORT_DYNAMIC_SYMBOLS()
+ENDIF()
 
 IF (OS_LINUX)
     ALLOCATOR(TCMALLOC_256K)
diff --git a/ydb/core/audit/audit_log.cpp b/ydb/core/audit/audit_log.cpp
deleted file mode 100644
index 1d799767b435..000000000000
--- a/ydb/core/audit/audit_log.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-#include "audit_log.h"
-#include "audit_log_impl.h"
-
-#include 
-#include 
-#include 
-
-namespace NKikimr::NAudit {
-
-std::atomic AUDIT_LOG_ENABLED = false;
-
-THolder CreateAuditWriter(TMap>> logBackends)
-{
-    AUDIT_LOG_ENABLED.store(true);
-    return MakeHolder(std::move(logBackends));
-}
-
-void SendAuditLog(const NActors::TActorSystem* sys, TVector>&& parts)
-{
-    auto request = MakeHolder(Now(), std::move(parts));
-    sys->Send(MakeAuditServiceID(), request.Release());
-}
-
-}    // namespace NKikimr::NAudit
diff --git a/ydb/core/audit/audit_log.h b/ydb/core/audit/audit_log.h
index ae95ab5a4fef..512e79c3e247 100644
--- a/ydb/core/audit/audit_log.h
+++ b/ydb/core/audit/audit_log.h
@@ -1,19 +1,12 @@
 #pragma once
 
-#include 
+#include 
+#include 
 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#include 
-#include 
-#include 
+#include 
+#include 
 
-#include 
-#include 
+#include 
 
 #define AUDIT_LOG_S(sys, expr)                                                                                                  \
     do {                                                                                                                        \
@@ -24,7 +17,7 @@
         }                                                                                                                       \
     } while (0) /**/
 
-#define AUDIT_LOG(expr) AUDIT_LOG_S((TlsActivationContext->ExecutorThread.ActorSystem), expr)
+#define AUDIT_LOG(expr) AUDIT_LOG_S((::NActors::TlsActivationContext->ExecutorThread.ActorSystem), expr)
 
 #define AUDIT_PART_NO_COND(key, value) AUDIT_PART_COND(key, value, true)
 #define AUDIT_PART_COND(key, value, condition)                                                                                    \
@@ -37,90 +30,14 @@
 #define GET_AUDIT_PART_MACRO(_1, _2, _3, NAME,...) NAME
 #define AUDIT_PART(...) GET_AUDIT_PART_MACRO(__VA_ARGS__, AUDIT_PART_COND, AUDIT_PART_NO_COND)(__VA_ARGS__)
 
+namespace NActors {
+    class TActorSystem;
+}
+
 namespace NKikimr::NAudit {
 
 extern std::atomic AUDIT_LOG_ENABLED;
 
-struct TEvAuditLog
-{
-    //
-    // Events declaration
-    //
-
-    enum EEvents
-    {
-        EvBegin = EventSpaceBegin(TKikimrEvents::ES_YDB_AUDIT_LOG),
-
-        // Request actors
-        EvWriteAuditLog = EvBegin + 0,
-
-        EvEnd
-    };
-
-    static_assert(EvEnd <= EventSpaceEnd(TKikimrEvents::ES_YDB_AUDIT_LOG),
-                  "expected EvEnd <= EventSpaceEnd(TKikimrEvents::ES_YDB_AUDIT_LOG)");
-
-    struct TEvWriteAuditLog
-        : public NActors::TEventLocal
-    {
-        TInstant Time;
-        TVector> Parts;
-
-        TEvWriteAuditLog(TInstant time, TVector>&& parts)
-            : Time(time)
-            , Parts(std::move(parts))
-        {}
-    };
-};
-
-class TAuditLogActor final
-    : public TActor
-{
-private:
-    const TMap>> LogBackends;
-public:
-    TAuditLogActor(TMap>> logBackends)
-        : TActor(&TThis::StateWork)
-        , LogBackends(std::move(logBackends))
-    {
-    }
-
-    static constexpr NKikimrServices::TActivity::EType ActorActivityType() {
-        return NKikimrServices::TActivity::AUDIT_WRITER_ACTOR;
-    }
-
-private:
-    STFUNC(StateWork);
-
-    void HandlePoisonPill(
-        const TEvents::TEvPoisonPill::TPtr& ev,
-        const TActorContext& ctx);
-
-    void HandleWriteAuditLog(
-        const TEvAuditLog::TEvWriteAuditLog::TPtr& ev,
-        const TActorContext& ctx);
-
-    static void WriteLog(
-        const TString& log,
-        const TVector>& logBackends);
-
-    static TString GetJsonLog(
-        const TEvAuditLog::TEvWriteAuditLog::TPtr& ev);
-
-    static TString GetTxtLog(
-        const TEvAuditLog::TEvWriteAuditLog::TPtr& ev);
-
-    void HandleUnexpectedEvent(STFUNC_SIG);
-};
-
-////////////////////////////////////////////////////////////////////////////////
-
 void SendAuditLog(const NActors::TActorSystem* sys, TVector>&& parts);
 
-inline NActors::TActorId MakeAuditServiceID() {
-    return NActors::TActorId(0, TStringBuf("YDB_AUDIT"));
-}
-
-THolder CreateAuditWriter(TMap>> logBackends);
-
 }   // namespace NKikimr::NAudit
diff --git a/ydb/core/audit/audit_log_impl.cpp b/ydb/core/audit/audit_log_impl.cpp
index 4438882c2d42..e03e9ac7ff8e 100644
--- a/ydb/core/audit/audit_log_impl.cpp
+++ b/ydb/core/audit/audit_log_impl.cpp
@@ -1,49 +1,84 @@
-#include "audit_log.h"
-#include "audit_log_impl.h"
-
 #include 
 #include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "audit_log_service.h"
+#include "audit_log.h"
+
+#if defined LOG_T || \
+    defined LOG_D || \
+    defined LOG_I || \
+    defined LOG_N || \
+    defined LOG_W || \
+    defined LOG_E
+# error log macro redefinition
+#endif
+
+#define LOG_T(stream) LOG_TRACE_S((TlsActivationContext->AsActorContext()), NKikimrServices::AUDIT_LOG_WRITER, stream)
+#define LOG_D(stream) LOG_DEBUG_S((TlsActivationContext->AsActorContext()), NKikimrServices::AUDIT_LOG_WRITER, stream)
+#define LOG_I(stream) LOG_INFO_S((TlsActivationContext->AsActorContext()), NKikimrServices::AUDIT_LOG_WRITER, stream)
+#define LOG_N(stream) LOG_NOTICE_S((TlsActivationContext->AsActorContext()), NKikimrServices::AUDIT_LOG_WRITER, stream)
+#define LOG_W(stream) LOG_WARN_S((TlsActivationContext->AsActorContext()), NKikimrServices::AUDIT_LOG_WRITER, stream)
+#define LOG_E(stream) LOG_ERROR_S((TlsActivationContext->AsActorContext()), NKikimrServices::AUDIT_LOG_WRITER, stream)
 
 namespace NKikimr::NAudit {
 
-using namespace NActors;
+// TAuditLogActor
+//
 
-void TAuditLogActor::HandlePoisonPill(
-    const TEvents::TEvPoisonPill::TPtr& ev,
-    const TActorContext& ctx)
-{
-    Y_UNUSED(ev);
-    AUDIT_LOG_ENABLED.store(false);
-    Die(ctx);
-}
+struct TEvAuditLog {
+    //
+    // Events declaration
+    //
 
-STFUNC(TAuditLogActor::StateWork)
-{
-    switch (ev->GetTypeRewrite()) {
-        HFunc(TEvents::TEvPoisonPill, HandlePoisonPill);
-        HFunc(TEvAuditLog::TEvWriteAuditLog, HandleWriteAuditLog);
-    default:
-        HandleUnexpectedEvent(ev);
-        break;
-    }
-}
+    enum EEvents {
+        EvBegin = EventSpaceBegin(TKikimrEvents::ES_YDB_AUDIT_LOG),
+
+        // Request actors
+        EvWriteAuditLog = EvBegin + 0,
+
+        EvEnd
+    };
+
+    static_assert(EvEnd <= EventSpaceEnd(TKikimrEvents::ES_YDB_AUDIT_LOG),
+        "expected EvEnd <= EventSpaceEnd(TKikimrEvents::ES_YDB_AUDIT_LOG)"
+    );
+
+    struct TEvWriteAuditLog : public NActors::TEventLocal {
+        TInstant Time;
+        TVector> Parts;
+
+        TEvWriteAuditLog(TInstant time, TVector>&& parts)
+            : Time(time)
+            , Parts(std::move(parts))
+        {}
+    };
+};
 
-void TAuditLogActor::WriteLog(const TString& log, const TVector>& logBackends) {
+void WriteLog(const TString& log, const TVector>& logBackends) {
     for (auto& logBackend : logBackends) {
         try {
-            logBackend->WriteData(
-                TLogRecord(
-                    ELogPriority::TLOG_INFO,
-                    log.data(),
-                    log.length()));
+            logBackend->WriteData(TLogRecord(
+                ELogPriority::TLOG_INFO,
+                log.data(),
+                log.length()
+            ));
         } catch (const yexception& e) {
-            LOG_W("WriteLog:"
-                << " unable to write audit log (error: " << e.what() << ")");
+            LOG_W("WriteLog: unable to write audit log (error: " << e.what() << ")");
         }
     }
 }
 
-TString TAuditLogActor::GetJsonLog(const TEvAuditLog::TEvWriteAuditLog::TPtr& ev) {
+TString GetJsonLog(const TEvAuditLog::TEvWriteAuditLog::TPtr& ev) {
     const auto* msg = ev->Get();
     TStringStream ss;
     ss << msg->Time << ": ";
@@ -56,7 +91,7 @@ TString TAuditLogActor::GetJsonLog(const TEvAuditLog::TEvWriteAuditLog::TPtr& ev
     return ss.Str();
 }
 
-TString TAuditLogActor::GetTxtLog(const TEvAuditLog::TEvWriteAuditLog::TPtr& ev) {
+TString GetTxtLog(const TEvAuditLog::TEvWriteAuditLog::TPtr& ev) {
     const auto* msg = ev->Get();
     TStringStream ss;
     ss << msg->Time << ": ";
@@ -69,29 +104,81 @@ TString TAuditLogActor::GetTxtLog(const TEvAuditLog::TEvWriteAuditLog::TPtr& ev)
     return ss.Str();
 }
 
-void TAuditLogActor::HandleWriteAuditLog(const TEvAuditLog::TEvWriteAuditLog::TPtr& ev, const TActorContext& ctx) {
-    Y_UNUSED(ctx);
-
-    for (auto& logBackends : LogBackends) {
-        switch (logBackends.first) {
-            case NKikimrConfig::TAuditConfig::JSON:
-                WriteLog(GetJsonLog(ev), logBackends.second);
-                break;
-            case NKikimrConfig::TAuditConfig::TXT:
-                WriteLog(GetTxtLog(ev), logBackends.second);
-                break;
-            default:
-                WriteLog(GetJsonLog(ev), logBackends.second);
-                break;
+class TAuditLogActor final : public TActor {
+private:
+    const TAuditLogBackends LogBackends;
+
+public:
+    TAuditLogActor(TAuditLogBackends&& logBackends)
+        : TActor(&TThis::StateWork)
+        , LogBackends(std::move(logBackends))
+    {}
+
+    static constexpr NKikimrServices::TActivity::EType ActorActivityType() {
+        return NKikimrServices::TActivity::AUDIT_WRITER_ACTOR;
+    }
+
+private:
+    STFUNC(StateWork) {
+        switch (ev->GetTypeRewrite()) {
+            HFunc(TEvents::TEvPoisonPill, HandlePoisonPill);
+            HFunc(TEvAuditLog::TEvWriteAuditLog, HandleWriteAuditLog);
+        default:
+            HandleUnexpectedEvent(ev);
+            break;
         }
     }
+
+    void HandlePoisonPill(const TEvents::TEvPoisonPill::TPtr& ev, const TActorContext& ctx) {
+        Y_UNUSED(ev);
+        AUDIT_LOG_ENABLED.store(false);
+        Die(ctx);
+    }
+
+    void HandleWriteAuditLog(const TEvAuditLog::TEvWriteAuditLog::TPtr& ev, const TActorContext& ctx) {
+        Y_UNUSED(ctx);
+
+        for (auto& logBackends : LogBackends) {
+            switch (logBackends.first) {
+                case NKikimrConfig::TAuditConfig::JSON:
+                    WriteLog(GetJsonLog(ev), logBackends.second);
+                    break;
+                case NKikimrConfig::TAuditConfig::TXT:
+                    WriteLog(GetTxtLog(ev), logBackends.second);
+                    break;
+                default:
+                    WriteLog(GetJsonLog(ev), logBackends.second);
+                    break;
+            }
+        }
+    }
+
+    void HandleUnexpectedEvent(STFUNC_SIG) {
+        LOG_W("TAuditLogActor:"
+            << " unhandled event type: " << ev->GetTypeRewrite()
+            << " event: " << ev->GetTypeName()
+        );
+    }
+};
+
+// Client interface implementation
+//
+
+std::atomic AUDIT_LOG_ENABLED = false;
+
+void SendAuditLog(const NActors::TActorSystem* sys, TVector>&& parts)
+{
+    auto request = MakeHolder(Now(), std::move(parts));
+    sys->Send(MakeAuditServiceID(), request.Release());
 }
 
-void TAuditLogActor::HandleUnexpectedEvent(STFUNC_SIG)
+// Service interface implementation
+//
+
+THolder CreateAuditWriter(TAuditLogBackends&& logBackends)
 {
-    LOG_W("TAuditLogActor:"
-          << " unhandled event type: " << ev->GetTypeRewrite()
-          << " event: " << ev->GetTypeName());
+    AUDIT_LOG_ENABLED.store(true);
+    return MakeHolder(std::move(logBackends));
 }
 
 }    // namespace NKikimr::NAudit
diff --git a/ydb/core/audit/audit_log_impl.h b/ydb/core/audit/audit_log_impl.h
deleted file mode 100644
index 91de44ddead3..000000000000
--- a/ydb/core/audit/audit_log_impl.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#if defined LOG_T || \
-    defined LOG_D || \
-    defined LOG_I || \
-    defined LOG_N || \
-    defined LOG_W || \
-    defined LOG_E
-# error log macro redefinition
-#endif
-
-#define LOG_T(stream) LOG_TRACE_S((TlsActivationContext->AsActorContext()), NKikimrServices::AUDIT_LOG_WRITER, stream)
-#define LOG_D(stream) LOG_DEBUG_S((TlsActivationContext->AsActorContext()), NKikimrServices::AUDIT_LOG_WRITER, stream)
-#define LOG_I(stream) LOG_INFO_S((TlsActivationContext->AsActorContext()), NKikimrServices::AUDIT_LOG_WRITER, stream)
-#define LOG_N(stream) LOG_NOTICE_S((TlsActivationContext->AsActorContext()), NKikimrServices::AUDIT_LOG_WRITER, stream)
-#define LOG_W(stream) LOG_WARN_S((TlsActivationContext->AsActorContext()), NKikimrServices::AUDIT_LOG_WRITER, stream)
-#define LOG_E(stream) LOG_ERROR_S((TlsActivationContext->AsActorContext()), NKikimrServices::AUDIT_LOG_WRITER, stream)
diff --git a/ydb/core/audit/audit_log_service.h b/ydb/core/audit/audit_log_service.h
new file mode 100644
index 000000000000..379a1ad516b1
--- /dev/null
+++ b/ydb/core/audit/audit_log_service.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include 
+
+#include 
+#include 
+
+class TLogBackend;
+
+namespace NActors {
+    class IActor;
+}
+
+namespace NKikimr::NAudit {
+
+inline NActors::TActorId MakeAuditServiceID() {
+    return NActors::TActorId(0, TStringBuf("YDB_AUDIT"));
+}
+
+using TAuditLogBackends = TMap>>;
+
+THolder CreateAuditWriter(TAuditLogBackends&& logBackends);
+
+}   // namespace NKikimr::NAudit
diff --git a/ydb/core/audit/ya.make b/ydb/core/audit/ya.make
index e02fe8a19374..c748e3bd017f 100644
--- a/ydb/core/audit/ya.make
+++ b/ydb/core/audit/ya.make
@@ -1,10 +1,9 @@
 LIBRARY()
 
 SRCS(
-    audit_log_impl.cpp
-    audit_log_impl.h
-    audit_log.cpp
     audit_log.h
+    audit_log_service.h
+    audit_log_impl.cpp
 )
 
 PEERDIR(
diff --git a/ydb/core/base/appdata.cpp b/ydb/core/base/appdata.cpp
index ca40e8537151..97737c602414 100644
--- a/ydb/core/base/appdata.cpp
+++ b/ydb/core/base/appdata.cpp
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -61,6 +62,7 @@ struct TAppData::TImpl {
     NKikimrConfig::TGraphConfig GraphConfig;
     NKikimrSharedCache::TSharedCacheConfig SharedCacheConfig;
     NKikimrConfig::TMetadataCacheConfig MetadataCacheConfig;
+    NKikimrReplication::TReplicationDefaults ReplicationConfig;
 };
 
 TAppData::TAppData(
@@ -113,6 +115,7 @@ TAppData::TAppData(
     , GraphConfig(Impl->GraphConfig)
     , SharedCacheConfig(Impl->SharedCacheConfig)
     , MetadataCacheConfig(Impl->MetadataCacheConfig)
+    , ReplicationConfig(Impl->ReplicationConfig)
     , KikimrShouldContinue(kikimrShouldContinue)
 {}
 
diff --git a/ydb/core/base/appdata_fwd.h b/ydb/core/base/appdata_fwd.h
index 78c78590ef15..f22a246d2cd1 100644
--- a/ydb/core/base/appdata_fwd.h
+++ b/ydb/core/base/appdata_fwd.h
@@ -65,6 +65,10 @@ namespace NKikimrConfig {
     class TMetadataCacheConfig;
 }
 
+namespace NKikimrReplication {
+    class TReplicationDefaults;
+}
+
 namespace NKikimrNetClassifier {
     class TNetClassifierDistributableConfig;
     class TNetClassifierConfig;
@@ -213,6 +217,7 @@ struct TAppData {
     NKikimrConfig::TGraphConfig& GraphConfig;
     NKikimrSharedCache::TSharedCacheConfig& SharedCacheConfig;
     NKikimrConfig::TMetadataCacheConfig& MetadataCacheConfig;
+    NKikimrReplication::TReplicationDefaults& ReplicationConfig;
     bool EnforceUserTokenRequirement = false;
     bool EnforceUserTokenCheckRequirement = false; // check token if it was specified
     bool AllowHugeKeyValueDeletes = true; // delete when all clients limit deletes per request
diff --git a/ydb/core/base/board_lookup.cpp b/ydb/core/base/board_lookup.cpp
index 634cc5dc064e..e3a6e7df35b5 100644
--- a/ydb/core/base/board_lookup.cpp
+++ b/ydb/core/base/board_lookup.cpp
@@ -25,6 +25,7 @@ namespace NKikimr {
 class TBoardLookupActor : public TActorBootstrapped {
     const TString Path;
     const TActorId Owner;
+    const ui64 Cookie;
     const EBoardLookupMode Mode;
     const bool Subscriber;
     TBoardRetrySettings BoardRetrySettings;
@@ -111,12 +112,12 @@ class TBoardLookupActor : public TActorBootstrapped {
     void NotAvailable() {
         if (CurrentStateFunc() != &TThis::StateSubscribe) {
             Send(Owner, new TEvStateStorage::TEvBoardInfo(
-                TEvStateStorage::TEvBoardInfo::EStatus::NotAvailable, Path));
+                TEvStateStorage::TEvBoardInfo::EStatus::NotAvailable, Path), 0, Cookie);
         } else {
             Send(Owner,
                 new TEvStateStorage::TEvBoardInfoUpdate(
                     TEvStateStorage::TEvBoardInfo::EStatus::NotAvailable, Path
-                )
+                ), 0, Cookie
             );
         }
         return PassAway();
@@ -129,7 +130,7 @@ class TBoardLookupActor : public TActorBootstrapped {
                 auto reply = MakeHolder(
                     TEvStateStorage::TEvBoardInfo::EStatus::Ok, Path);
                 reply->InfoEntries = std::move(Info);
-                Send(Owner, std::move(reply));
+                Send(Owner, std::move(reply), 0, Cookie);
                 if (Subscriber) {
                     Become(&TThis::StateSubscribe);
                     return;
@@ -240,7 +241,7 @@ class TBoardLookupActor : public TActorBootstrapped {
                 auto reply = MakeHolder(
                     TEvStateStorage::TEvBoardInfo::EStatus::Ok, Path);
                 reply->Updates = { { oid, std::move(update.value()) } };
-                Send(Owner, std::move(reply));
+                Send(Owner, std::move(reply), 0, Cookie);
             }
         } else {
             if (info.GetDropped()) {
@@ -308,7 +309,7 @@ class TBoardLookupActor : public TActorBootstrapped {
                 auto reply = MakeHolder(
                     TEvStateStorage::TEvBoardInfo::EStatus::Ok, Path);
                 reply->Updates = std::move(updates);
-                Send(Owner, std::move(reply));
+                Send(Owner, std::move(reply), 0, Cookie);
             }
         }
 
@@ -484,7 +485,7 @@ class TBoardLookupActor : public TActorBootstrapped {
             auto reply = MakeHolder(
                 TEvStateStorage::TEvBoardInfo::EStatus::Ok, Path);
             reply->Updates = std::move(updates);
-            Send(Owner, std::move(reply));
+            Send(Owner, std::move(reply), 0, Cookie);
         }
     }
 
@@ -495,9 +496,10 @@ class TBoardLookupActor : public TActorBootstrapped {
 
     TBoardLookupActor(
         const TString &path, TActorId owner, EBoardLookupMode mode,
-        TBoardRetrySettings boardRetrySettings)
+        TBoardRetrySettings boardRetrySettings, ui64 cookie = 0)
         : Path(path)
         , Owner(owner)
+        , Cookie(cookie)
         , Mode(mode)
         , Subscriber(Mode == EBoardLookupMode::Subscription)
         , BoardRetrySettings(std::move(boardRetrySettings))
@@ -545,8 +547,8 @@ class TBoardLookupActor : public TActorBootstrapped {
 
 IActor* CreateBoardLookupActor(
         const TString &path, const TActorId &owner, EBoardLookupMode mode,
-        TBoardRetrySettings boardRetrySettings) {
-    return new TBoardLookupActor(path, owner, mode, std::move(boardRetrySettings));
+        TBoardRetrySettings boardRetrySettings, ui64 cookie) {
+    return new TBoardLookupActor(path, owner, mode, std::move(boardRetrySettings), cookie);
 }
 
 }
diff --git a/ydb/core/base/counters.cpp b/ydb/core/base/counters.cpp
index 538266429c5a..915810b80074 100644
--- a/ydb/core/base/counters.cpp
+++ b/ydb/core/base/counters.cpp
@@ -10,6 +10,7 @@ static const THashSet DATABASE_SERVICES
     = {{ TString("compile"),
          TString("coordinator"),
          TString("dsproxy"),
+         TString("dsproxy_mon"),
          TString("dsproxynode"),
          TString("dsproxy_overview"),
          TString("dsproxy_percentile"),
@@ -39,6 +40,7 @@ static const THashSet DATABASE_SERVICES
          TString("pqproxy|readSession"),
          TString("pqproxy|schemecache"),
          TString("pqproxy|mirrorWriteTimeLag"),
+         TString("pqproxy|userAgents"),
          TString("datastreams"),
     }};
 
diff --git a/ydb/core/base/events.h b/ydb/core/base/events.h
index a73efb723307..704f503215e6 100644
--- a/ydb/core/base/events.h
+++ b/ydb/core/base/events.h
@@ -12,175 +12,180 @@ namespace NKikimr {
 struct TKikimrEvents : TEvents {
     enum EEventSpaceKikimr {
         /* WARNING:
-           Please mind that you should never change the order
-           for the following keywords, you should consider
-           issues about "rolling update".
+           Please mind that you should never change values,
+           you should consider issues about "rolling update".
         */
-        ES_KIKIMR_ES_BEGIN = ES_USERSPACE,  //4096
-        ES_STATESTORAGE, //4097
-        ES_DEPRECATED_4098, //4098
-        ES_BLOBSTORAGE, //4099
-        ES_HIVE, //4100
-        ES_TABLETBASE, //4101
-        ES_TABLET, //4102
-        ES_TABLETRESOLVER,
-        ES_LOCAL,
-        ES_DEPRECATED_4105,
-        ES_TX_PROXY, // generic proxy commands 4106
-        ES_TX_COORDINATOR,
-        ES_TX_MEDIATOR,
-        ES_TX_PROCESSING, // 4109
-        ES_DEPRECATED_4110,
-        ES_DEPRECATED_4111,
-        ES_DEPRECATED_4112,
-        ES_TX_DATASHARD,
-        ES_DEPRECATED_4114,
-        ES_TX_USERPROXY, // user proxy interface
-        ES_SCHEME_CACHE,
-        ES_TX_PROXY_REQ,
-        ES_TABLET_PIPE,
-        ES_DEPRECATED_4118,
-        ES_TABLET_COUNTERS_AGGREGATOR,
-        ES_DEPRECATED_4121,
-        ES_PROXY_BUS, //4122
-        ES_BOOTSTRAPPER,
-        ES_TX_MEDIATORTIMECAST,
-        ES_DEPRECATED_4125,
-        ES_DEPRECATED_4126,
-        ES_DEPRECATED_4127,
-        ES_DEPRECATED_4128,
-        ES_DEPRECATED_4129,
-        ES_DEPRECATED_4130,
-        ES_DEPRECATED_4131,
-        ES_KEYVALUE, //4132
-        ES_MSGBUS_TRACER,
-        ES_RTMR_TABLET,
-        ES_FLAT_EXECUTOR,
-        ES_NODE_WHITEBOARD,
-        ES_FLAT_TX_SCHEMESHARD, // 4137
-        ES_PQ,
-        ES_YQL_KIKIMR_PROXY,
-        ES_PQ_META_CACHE,
-        ES_DEPRECATED_4141,
-        ES_PQ_L2_CACHE, //4142
-        ES_TOKEN_BUILDER,
-        ES_TICKET_PARSER,
-        ES_KQP = NYql::NDq::TDqEvents::ES_DQ_COMPUTE_KQP_COMPATIBLE, // 4145
-        ES_BLACKBOX_VALIDATOR,
-        ES_SELF_PING,
-        ES_PIPECACHE,
-        ES_PQ_PROXY,
-        ES_CMS,
-        ES_NODE_BROKER,
-        ES_TX_ALLOCATOR, //4152
+        ES_KIKIMR_ES_BEGIN = ES_USERSPACE, // 4096
+        ES_STATESTORAGE = 4097, 
+        ES_DEPRECATED_4098 = 4098, 
+        ES_BLOBSTORAGE = 4099, 
+        ES_HIVE = 4100, 
+        ES_TABLETBASE = 4101, 
+        ES_TABLET = 4102, 
+        ES_TABLETRESOLVER = 4103, 
+        ES_LOCAL = 4104, 
+        ES_DEPRECATED_4105 = 4105, 
+        ES_TX_PROXY = 4106, 
+        ES_TX_COORDINATOR = 4107, 
+        ES_TX_MEDIATOR = 4108, 
+        ES_TX_PROCESSING = 4109, 
+        ES_DEPRECATED_4110 = 4110, 
+        ES_DEPRECATED_4111 = 4111, 
+        ES_DEPRECATED_4112 = 4112, 
+        ES_TX_DATASHARD = 4113, 
+        ES_DEPRECATED_4114 = 4114, 
+        ES_TX_USERPROXY = 4115, 
+        ES_SCHEME_CACHE = 4116, 
+        ES_TX_PROXY_REQ = 4117, 
+        ES_TABLET_PIPE = 4118, 
+        ES_DEPRECATED_4118 = 4119, 
+        ES_TABLET_COUNTERS_AGGREGATOR = 4120, 
+        ES_DEPRECATED_4121 = 4121, 
+        ES_PROXY_BUS = 4122, 
+        ES_BOOTSTRAPPER = 4123, 
+        ES_TX_MEDIATORTIMECAST = 4124, 
+        ES_DEPRECATED_4125 = 4125, 
+        ES_DEPRECATED_4126 = 4126, 
+        ES_DEPRECATED_4127 = 4127, 
+        ES_DEPRECATED_4128 = 4128, 
+        ES_DEPRECATED_4129 = 4129, 
+        ES_DEPRECATED_4130 = 4130, 
+        ES_DEPRECATED_4131 = 4131, 
+        ES_KEYVALUE = 4132, 
+        ES_MSGBUS_TRACER = 4133, 
+        ES_RTMR_TABLET = 4134, 
+        ES_FLAT_EXECUTOR = 4135, 
+        ES_NODE_WHITEBOARD = 4136, 
+        ES_FLAT_TX_SCHEMESHARD = 4137, 
+        ES_PQ = 4138, 
+        ES_YQL_KIKIMR_PROXY = 4139, 
+        ES_PQ_META_CACHE = 4140, 
+        ES_DEPRECATED_4141 = 4141, 
+        ES_PQ_L2_CACHE = 4142, 
+        ES_TOKEN_BUILDER = 4143, 
+        ES_TICKET_PARSER = 4144, 
+        ES_KQP = 4145, 
+        ES_BLACKBOX_VALIDATOR = 4146, 
+        ES_SELF_PING = 4147, 
+        ES_PIPECACHE = 4148, 
+        ES_PQ_PROXY = 4149, 
+        ES_CMS = 4150, 
+        ES_NODE_BROKER = 4151, 
+        ES_TX_ALLOCATOR = 4152, 
         // reserve event space for each RTMR process
-        ES_RTMR_STORAGE,
-        ES_RTMR_PROXY,
-        ES_RTMR_PUSHER,
-        ES_RTMR_HOST,
-        ES_RESOURCE_BROKER,
-        ES_VIEWER,
-        ES_SUB_DOMAIN,
-        ES_GRPC_PROXY_STATUS, //OLD
-        ES_SQS,
-        ES_BLOCKSTORE, //4162
-        ES_RTMR_ICBUS,
-        ES_TENANT_POOL,
-        ES_USER_REGISTRY,
-        ES_TVM_SETTINGS_UPDATER,
-        ES_PQ_CLUSTERS_UPDATER,
-        ES_TENANT_SLOT_BROKER,
-        ES_GRPC_CALLS,
-        ES_CONSOLE,
-        ES_KESUS_PROXY,
-        ES_KESUS,
-        ES_CONFIGS_DISPATCHER,
-        ES_IAM_SERVICE,
-        ES_FOLDER_SERVICE,
-        ES_GRPC_MON,
-        ES_QUOTA, // must be in sync with ydb/core/quoter/public/quoter.h
-        ES_COORDINATED_QUOTA,
-        ES_ACCESS_SERVICE,
-        ES_USER_ACCOUNT_SERVICE,
-        ES_PQ_PROXY_NEW,
-        ES_GRPC_STREAMING,
-        ES_SCHEME_BOARD,
-        ES_FLAT_TX_SCHEMESHARD_PROTECTED,
-        ES_GRPC_REQUEST_PROXY,
-        ES_EXPORT_SERVICE,
-        ES_TX_ALLOCATOR_CLIENT,
-        ES_PQ_CLUSTER_TRACKER,
-        ES_NET_CLASSIFIER,
-        ES_SYSTEM_VIEW,
-        ES_TENANT_NODE_ENUMERATOR,
-        ES_SERVICE_ACCOUNT_SERVICE,
-        ES_INDEX_BUILD,
-        ES_BLOCKSTORE_PRIVATE,
-        ES_YT_WRAPPER,
-        ES_S3_WRAPPER,
-        ES_FILESTORE,
-        ES_FILESTORE_PRIVATE,
-        ES_YDB_METERING,
-        ES_IMPORT_SERVICE, // 4200
-        ES_TX_OLAPSHARD,
-        ES_TX_COLUMNSHARD,
-        ES_CROSSREF,
-        ES_SCHEME_BOARD_MON,
-        ES_YQL_ANALYTICS_PROXY = NFq::TEventIds::ES_YQL_ANALYTICS_PROXY,
-        ES_BLOB_CACHE,
-        ES_LONG_TX_SERVICE,
-        ES_TEST_SHARD,
-        ES_DATASTREAMS_PROXY,
-        ES_IAM_TOKEN_SERVICE,
-        ES_HEALTH_CHECK,
-        ES_DQ = NYql::NDq::TDqEvents::ES_DQ_COMPUTE, // 4212
-        ES_YQ, // 4213
-        ES_CHANGE_EXCHANGE_DATASHARD,
-        ES_DATABASE_SERVICE, //4215
-        ES_SEQUENCESHARD, // 4216
-        ES_SEQUENCEPROXY, // 4217
-        ES_CLOUD_STORAGE,
-        ES_CLOUD_STORAGE_PRIVATE,
-        ES_FOLDER_SERVICE_ADAPTER,
-        ES_PQ_PARTITION_WRITER,
-        ES_YDB_PROXY,
-        ES_REPLICATION_CONTROLLER,
-        ES_HTTP_PROXY,
-        ES_BLOB_DEPOT,
-        ES_DATASHARD_LOAD,
-        ES_METADATA_PROVIDER,
-        ES_INTERNAL_REQUEST,
-        ES_BACKGROUND_TASKS,
-        ES_TIERING,
-        ES_METADATA_INITIALIZER,
-        ES_YDB_AUDIT_LOG,
-        ES_METADATA_MANAGER,
-        ES_METADATA_SECRET,
-        ES_TEST_LOAD,
-        ES_GRPC_CANCELATION,
-        ES_DISCOVERY,
-        ES_EXT_INDEX,
-        ES_CONVEYOR,
-        ES_KQP_SCAN_EXCHANGE,
-        ES_IC_NODE_CACHE,
-        ES_DATA_OPERATIONS,
-        ES_KAFKA,
-        ES_STATISTICS,
-        ES_LDAP_AUTH_PROVIDER,
-        ES_DB_METADATA_CACHE,
-        ES_TABLE_CREATOR,
-        ES_PQ_PARTITION_CHOOSER,
-        ES_GRAPH,
-        ES_REPLICATION_WORKER,
-        ES_CHANGE_EXCHANGE,
-        ES_S3_PROVIDER,
-        ES_NEBIUS_ACCESS_SERVICE,
-        ES_REPLICATION_SERVICE,
-        ES_BACKUP_SERVICE,
-        ES_TX_BACKGROUND,
-        ES_SS_BG_TASKS,
-        ES_LIMITER
+        ES_RTMR_STORAGE = 4153, 
+        ES_RTMR_PROXY = 4154, 
+        ES_RTMR_PUSHER = 4155, 
+        ES_RTMR_HOST = 4156, 
+        ES_RESOURCE_BROKER = 4157, 
+        ES_VIEWER = 4158, 
+        ES_SUB_DOMAIN = 4159, 
+        ES_GRPC_PROXY_STATUS = 4160, 
+        ES_SQS = 4161, 
+        ES_BLOCKSTORE = 4162, 
+        ES_RTMR_ICBUS = 4163, 
+        ES_TENANT_POOL = 4164, 
+        ES_USER_REGISTRY = 4165, 
+        ES_TVM_SETTINGS_UPDATER = 4166, 
+        ES_PQ_CLUSTERS_UPDATER = 4167, 
+        ES_TENANT_SLOT_BROKER = 4168, 
+        ES_GRPC_CALLS = 4169, 
+        ES_CONSOLE = 4170, 
+        ES_KESUS_PROXY = 4171, 
+        ES_KESUS = 4172, 
+        ES_CONFIGS_DISPATCHER = 4173, 
+        ES_IAM_SERVICE = 4174, 
+        ES_FOLDER_SERVICE = 4175, 
+        ES_GRPC_MON = 4176, 
+        ES_QUOTA = 4177, // must be in sync with ydb/core/quoter/public/quoter.h
+        ES_COORDINATED_QUOTA = 4178, 
+        ES_ACCESS_SERVICE = 4179, 
+        ES_USER_ACCOUNT_SERVICE = 4180, 
+        ES_PQ_PROXY_NEW = 4181, 
+        ES_GRPC_STREAMING = 4182, 
+        ES_SCHEME_BOARD = 4183, 
+        ES_FLAT_TX_SCHEMESHARD_PROTECTED = 4184, 
+        ES_GRPC_REQUEST_PROXY = 4185, 
+        ES_EXPORT_SERVICE = 4186, 
+        ES_TX_ALLOCATOR_CLIENT = 4187, 
+        ES_PQ_CLUSTER_TRACKER = 4188, 
+        ES_NET_CLASSIFIER = 4189, 
+        ES_SYSTEM_VIEW = 4190, 
+        ES_TENANT_NODE_ENUMERATOR = 4191, 
+        ES_SERVICE_ACCOUNT_SERVICE = 4192, 
+        ES_INDEX_BUILD = 4193, 
+        ES_BLOCKSTORE_PRIVATE = 4194, 
+        ES_YT_WRAPPER = 4195, 
+        ES_S3_WRAPPER = 4196, 
+        ES_FILESTORE = 4197, 
+        ES_FILESTORE_PRIVATE = 4198, 
+        ES_YDB_METERING = 4199, 
+        ES_IMPORT_SERVICE = 4200, 
+        ES_TX_OLAPSHARD = 4201, 
+        ES_TX_COLUMNSHARD = 4202, 
+        ES_CROSSREF = 4203, 
+        ES_SCHEME_BOARD_MON = 4204, 
+        ES_YQL_ANALYTICS_PROXY = 4205, 
+        ES_BLOB_CACHE = 4206, 
+        ES_LONG_TX_SERVICE = 4207, 
+        ES_TEST_SHARD = 4208, 
+        ES_DATASTREAMS_PROXY = 4209, 
+        ES_IAM_TOKEN_SERVICE = 4210, 
+        ES_HEALTH_CHECK = 4211, 
+        ES_DQ = 4212, 
+        ES_YQ = 4213, 
+        ES_CHANGE_EXCHANGE_DATASHARD = 4214, 
+        ES_DATABASE_SERVICE = 4215, 
+        ES_SEQUENCESHARD = 4216, 
+        ES_SEQUENCEPROXY = 4217, 
+        ES_CLOUD_STORAGE = 4218, 
+        ES_CLOUD_STORAGE_PRIVATE = 4219, 
+        ES_FOLDER_SERVICE_ADAPTER = 4220, 
+        ES_PQ_PARTITION_WRITER = 4221, 
+        ES_YDB_PROXY = 4222, 
+        ES_REPLICATION_CONTROLLER = 4223, 
+        ES_HTTP_PROXY = 4224, 
+        ES_BLOB_DEPOT = 4225, 
+        ES_DATASHARD_LOAD = 4226, 
+        ES_METADATA_PROVIDER = 4227, 
+        ES_INTERNAL_REQUEST = 4228, 
+        ES_BACKGROUND_TASKS = 4229, 
+        ES_TIERING = 4230, 
+        ES_METADATA_INITIALIZER = 4231, 
+        ES_YDB_AUDIT_LOG = 4232, 
+        ES_METADATA_MANAGER = 4233, 
+        ES_METADATA_SECRET = 4234, 
+        ES_TEST_LOAD = 4235, 
+        ES_GRPC_CANCELATION = 4236, 
+        ES_DISCOVERY = 4237, 
+        ES_EXT_INDEX = 4238, 
+        ES_CONVEYOR = 4239, 
+        ES_KQP_SCAN_EXCHANGE = 4240, 
+        ES_IC_NODE_CACHE = 4241, 
+        ES_DATA_OPERATIONS = 4242, 
+        ES_KAFKA = 4243, 
+        ES_STATISTICS = 4244, 
+        ES_LDAP_AUTH_PROVIDER = 4245, 
+        ES_DB_METADATA_CACHE = 4246, 
+        ES_TABLE_CREATOR = 4247, 
+        ES_PQ_PARTITION_CHOOSER = 4248, 
+        ES_GRAPH = 4249, 
+        ES_REPLICATION_WORKER = 4250, 
+        ES_CHANGE_EXCHANGE = 4251, 
+        ES_S3_PROVIDER = 4252, 
+        ES_NEBIUS_ACCESS_SERVICE = 4253, 
+        ES_REPLICATION_SERVICE = 4254, 
+        ES_BACKUP_SERVICE = 4255, 
+        ES_TX_BACKGROUND = 4256, 
+        ES_SS_BG_TASKS = 4257, 
+        ES_LIMITER = 4258, 
+        //ES_MEMORY = 4259, NB. exists in main
+        ES_GROUPED_ALLOCATIONS_MANAGER = 4260,
     };
 };
 
+static_assert((int)TKikimrEvents::EEventSpaceKikimr::ES_KQP == (int)NYql::NDq::TDqEvents::ES_DQ_COMPUTE_KQP_COMPATIBLE);
+static_assert((int)TKikimrEvents::EEventSpaceKikimr::ES_DQ == (int)NYql::NDq::TDqEvents::ES_DQ_COMPUTE);
+static_assert((int)TKikimrEvents::EEventSpaceKikimr::ES_YQL_ANALYTICS_PROXY == (int)NFq::TEventIds::ES_YQL_ANALYTICS_PROXY);
+
 }
diff --git a/ydb/core/base/path.h b/ydb/core/base/path.h
index ca7c1403f56b..71263b8c6b79 100644
--- a/ydb/core/base/path.h
+++ b/ydb/core/base/path.h
@@ -37,4 +37,12 @@ inline TVector ChildPath(const TVector& parentPath, const TStr
     return path;
 }
 
+inline TVector ChildPath(const TVector& parentPath, const TVector& childPath) {
+    auto path = parentPath;
+    for (const auto& childName : childPath) {
+        path.push_back(childName);
+    }
+    return path;
+}
+
 }
diff --git a/ydb/core/base/pool_stats_collector.cpp b/ydb/core/base/pool_stats_collector.cpp
index 0ddce4c08d37..40e57e8eb2ca 100644
--- a/ydb/core/base/pool_stats_collector.cpp
+++ b/ydb/core/base/pool_stats_collector.cpp
@@ -46,12 +46,24 @@ class TStatsCollectingActor : public NActors::TStatsCollectingActor {
     void OnWakeup(const TActorContext &ctx) override {
         MiniKQLPoolStats.Update();
 
-        TVector> pools;
+        auto systemUpdate = std::make_unique();
+        ui32 coresTotal = 0;
+        double coresUsed = 0;
         for (const auto& pool : PoolCounters) {
-            pools.emplace_back(pool.Name, pool.Usage, pool.Threads);
+            auto& pb = *systemUpdate->Record.AddPoolStats();
+            pb.SetName(pool.Name);
+            pb.SetUsage(pool.Usage);
+            pb.SetThreads(static_cast(pool.Threads));
+            pb.SetLimit(static_cast(pool.LimitThreads));
+            if (pool.Name != "IO") {
+                coresTotal += static_cast(pool.DefaultThreads);
+            }
+            coresUsed += pool.Usage * pool.LimitThreads;
         }
+        systemUpdate->Record.SetCoresTotal(coresTotal);
+        systemUpdate->Record.SetCoresUsed(coresUsed);
 
-        ctx.Send(NNodeWhiteboard::MakeNodeWhiteboardServiceId(ctx.SelfID.NodeId()), new NNodeWhiteboard::TEvWhiteboard::TEvSystemStateUpdate(pools));
+        ctx.Send(NNodeWhiteboard::MakeNodeWhiteboardServiceId(ctx.SelfID.NodeId()), systemUpdate.release());
     }
 
 private:
diff --git a/ydb/core/base/statestorage.h b/ydb/core/base/statestorage.h
index 1c609a702d40..6a1726a3fb3b 100644
--- a/ydb/core/base/statestorage.h
+++ b/ydb/core/base/statestorage.h
@@ -556,7 +556,7 @@ IActor* CreateStateStorageBoardReplica(const TIntrusivePtr &,
 IActor* CreateSchemeBoardReplica(const TIntrusivePtr&, ui32);
 IActor* CreateBoardLookupActor(
     const TString &path, const TActorId &owner, EBoardLookupMode mode,
-    TBoardRetrySettings boardRetrySettings = {});
+    TBoardRetrySettings boardRetrySettings = {}, ui64 cookie = 0);
 IActor* CreateBoardPublishActor(
     const TString &path, const TString &payload, const TActorId &owner, ui32 ttlMs, bool reg,
     TBoardRetrySettings boardRetrySettings = {});
diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_monactor.cpp b/ydb/core/blobstorage/dsproxy/dsproxy_monactor.cpp
index c082235cc18f..6aba81c8ec94 100644
--- a/ydb/core/blobstorage/dsproxy/dsproxy_monactor.cpp
+++ b/ydb/core/blobstorage/dsproxy/dsproxy_monactor.cpp
@@ -402,8 +402,15 @@ class TBlobStorageGroupProxyMonActor : public TActorBootstrappedRegisterActorPage(proxiesMonPage, path, name, false, TlsActivationContext->ExecutorThread.ActorSystem,
-                SelfId());
+            mon->RegisterActorPage(TMon::TRegisterActorPageFields{
+                .Title = name,
+                .RelPath = path,
+                .ActorSystem = TlsActivationContext->ExecutorThread.ActorSystem,
+                .Index = proxiesMonPage, 
+                .PreTag = false, 
+                .ActorId = SelfId(),
+                .MonServiceName = "dsproxy_mon"
+            });
         }
 
         Become(&TThis::StateOnline);
diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_patch.cpp b/ydb/core/blobstorage/dsproxy/dsproxy_patch.cpp
index 19659e6b7759..fbb545409a89 100644
--- a/ydb/core/blobstorage/dsproxy/dsproxy_patch.cpp
+++ b/ydb/core/blobstorage/dsproxy/dsproxy_patch.cpp
@@ -29,10 +29,33 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor FoundParts;
@@ -69,12 +94,16 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor EmptyResponseFlags;
     TStackVec ErrorResponseFlags;
     TStackVec ForceStopFlags;
+    TStackVec SlowFlags;
     TBlobStorageGroupInfo::TVDiskIds VDisks;
 
     bool UseVPatch = false;
     bool IsGoodPatchedBlobId = false;
     bool IsAllowedErasure = false;
     bool IsSecured = false;
+    bool HasSlowVDisk = false;
+    bool IsContinuedVPatch = false;
+    bool IsMovedPatch = false;
 
 #define PATCH_LOG(priority, service, marker, msg, ...)                         \
         STLOG(priority, service, marker, msg,                                  \
@@ -97,6 +126,15 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActorActivePatch;
     }
 
+    void ScheduleWakeUp(TInstant startTime, EWakeUpTag tag) {
+        TDuration duration = TActivationContext::Now() - startTime;
+        Schedule(duration, new TEvents::TEvWakeup(tag));
+    }
+
+    void ScheduleWakeUp(EWakeUpTag tag) {
+        ScheduleWakeUp(StageStart, tag);
+    }
+
     static constexpr ERequestType RequestType() {
         return ERequestType::Patch;
     }
@@ -279,6 +317,12 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActorType.ErasureFamily() != TErasureType::ErasureMirror) {
+            if (ReceivedFoundParts == SentStarts / 2 + SentStarts % 2) {
+                ScheduleWakeUp(VPatchStartTag);
+            }
+        }
+
         NKikimrBlobStorage::TEvVPatchFoundParts &record = ev->Get()->Record;
 
         Y_ABORT_UNLESS(record.HasCookie());
@@ -312,6 +356,7 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor(TStringBuilder() << ReceivedFoundParts << '/' << SentStarts)),
                 (ErrorReason, errorReason));
 
@@ -341,6 +386,13 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActorType.ErasureFamily() != TErasureType::ErasureMirror) {
+            if (ReceivedResults == SentVPatchDiff / 2 + SentVPatchDiff % 2) {
+                ScheduleWakeUp(VPatchDiffTag);
+            }
+        }
+
         PullOutStatusFlagsAndFressSpace(record);
         Y_ABORT_UNLESS(record.HasStatus());
         NKikimrProto::EReplyStatus status = record.GetStatus();
@@ -352,6 +404,7 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor(TStringBuilder() << ReceivedResults << '/' << Info->Type.TotalPartCount())),
                 (ErrorReason, errorReason));
 
@@ -499,6 +552,7 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor= dataParts, "vdiskIdx# " << vdiskIdx << " partIdx# " << partIdx);
             placements.push_back(TPartPlacement{static_cast(vdiskIdx), static_cast(partIdx + 1)});
+            SentVPatchDiff++;
         }
         SendDiffs(placements);
     }
@@ -537,15 +592,38 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor subgroupIdx = 0;
 
-        ui32 subgroupIdx = 0;
         if (OkVDisksWithParts) {
             ui32 okVDiskIdx = RandomNumber(OkVDisksWithParts.size());
             subgroupIdx = OkVDisksWithParts[okVDiskIdx];
         } else {
+            ui64 worstNs = 0;
+            ui64 nextToWorstNs = 0;
+            i32 worstSubGroubIdx = -1;
+            GetWorstPredictedDelaysNs(NKikimrBlobStorage::EVDiskQueueId::PutAsyncBlob, &worstNs, &nextToWorstNs, &worstSubGroubIdx);
+            if (worstNs > nextToWorstNs * 2) {
+                SlowFlags[worstSubGroubIdx] = true;
+                HasSlowVDisk = true;
+            }
+            if (HasSlowVDisk) {
+                TStackVec goodDisks;
+                for (ui32 idx = 0; idx < VDisks.size(); ++idx) {
+                    if (!SlowFlags[idx] && !ErrorResponseFlags[idx]) {
+                        goodDisks.push_back(idx);
+                    }
+                }
+                if (goodDisks.size()) {
+                    ui32 okVDiskIdx = RandomNumber(goodDisks.size());
+                    subgroupIdx = goodDisks[okVDiskIdx];
+                }
+            }
+        }
+        if (!subgroupIdx) {
             subgroupIdx = RandomNumber(Info->Type.TotalPartCount());
         }
-        TVDiskID vDisk = Info->GetVDiskInSubgroup(subgroupIdx, OriginalId.Hash());
+        TVDiskID vDisk = Info->GetVDiskInSubgroup(*subgroupIdx, OriginalId.Hash());
         TDeque> events;
 
         ui64 cookie = ((ui64)OriginalId.Hash() << 32) | PatchedId.Hash();
@@ -574,7 +652,7 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActorPatchesWithFallback->Inc();
-        if (WithMovingPatchRequestToStaticNode && UseVPatch && !IsSecured) {
+        if (WithMovingPatchRequestToStaticNode && UseVPatch && !IsSecured && !IsMovedPatch) {
             PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA05, "Start Moved strategy from fallback");
             StartMovedPatch();
         } else {
@@ -587,20 +665,31 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActorPickSubgroup(OriginalId.Hash(), &VDisks, nullptr);
         ReceivedResponseFlags.assign(VDisks.size(), false);
         ErrorResponseFlags.assign(VDisks.size(), false);
         EmptyResponseFlags.assign(VDisks.size(), false);
         ForceStopFlags.assign(VDisks.size(), false);
+        SlowFlags.assign(VDisks.size(), false);
+
+        ui64 worstNs = 0;
+        ui64 nextToWorstNs = 0;
+        i32 worstSubGroubIdx = -1;
+        GetWorstPredictedDelaysNs(NKikimrBlobStorage::EVDiskQueueId::GetFastRead, &worstNs, &nextToWorstNs, &worstSubGroubIdx);
+        if (worstNs > nextToWorstNs * 2) {
+            SlowFlags[worstSubGroubIdx] = true;
+            HasSlowVDisk = true;
+        }
 
         TDeque> events;
-
         for (ui32 idx = 0; idx < VDisks.size(); ++idx) {
-            std::unique_ptr ev = std::make_unique(
-                    OriginalId, PatchedId, VDisks[idx], Deadline, idx, true);
-            events.emplace_back(std::move(ev));
-            SentStarts++;
+            if (!SlowFlags[idx]) {
+                std::unique_ptr ev = std::make_unique(
+                        OriginalId, PatchedId, VDisks[idx], Deadline, idx, true);
+                events.emplace_back(std::move(ev));
+                SentStarts++;
+            }
         }
 
         PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA08, "Start VPatch strategy",
@@ -701,6 +790,17 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor nextToWorstNs * 2) {
+            SlowFlags[worstSubGroubIdx] = true;
+            HasSlowVDisk = true;
+        }
 
         if (Info->Type.GetErasure() == TErasureType::ErasureMirror3dc) {
             return ContinueVPatchForMirror3dc();
@@ -713,6 +813,9 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActorPickSubgroup(OriginalId.Hash(), &VDisks, nullptr);
         IsSecured = (Info->GetEncryptionMode() != TBlobStorageGroupInfo::EEM_NONE);
 
         IsGoodPatchedBlobId = result;
         IsAllowedErasure = Info->Type.ErasureFamily() == TErasureType::ErasureParityBlock
                 || Info->Type.GetErasure() == TErasureType::ErasureNone
                 || Info->Type.GetErasure() == TErasureType::ErasureMirror3dc;
-        if (IsGoodPatchedBlobId && IsAllowedErasure && UseVPatch && OriginalGroupId == Info->GroupID && !IsSecured) {
+        if (false && IsGoodPatchedBlobId && IsAllowedErasure && UseVPatch && OriginalGroupId == Info->GroupID && !IsSecured) {
             PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA03, "Start VPatch strategy from bootstrap");
             StartVPatch();
         } else {
@@ -825,6 +930,75 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActorGetPredictedDelayNsByOrderNumber(diskIdx, queueId);;
+            if (predictedNs > *outWorstNs) {
+                *outNextToWorstNs = *outWorstNs;
+                *outWorstNs = predictedNs;
+                *outWorstSubgroupIdx = diskIdx;
+            } else if (predictedNs > *outNextToWorstNs) {
+                *outNextToWorstNs = predictedNs;
+            }
+        }
+    }
+
+    void SetSlowDisks() {
+        for (ui32 idx = 0; idx < SlowFlags.size(); ++idx) {
+            SlowFlags[idx] = !ReceivedResponseFlags[idx] && !EmptyResponseFlags[idx] && !ErrorResponseFlags[idx];
+            if (SlowFlags[idx]) {
+                HasSlowVDisk = true;
+            }
+        }
+    }
+
+    template 
+    void HandleWakeUp(TEvents::TEvWakeup::TPtr &ev) {
+        PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA36, "HandleWakeUp",
+                (ExpectedTag, ToString(ExpectedTag)),
+                (ReceivedTag, ToString(ev->Get()->Tag)));
+        if (ev->Get()->Tag == ExpectedTag) {
+            SetSlowDisks();
+            StartFallback();
+        }
+        if (ev->Get()->Tag == NeverTag) {
+            SetSlowDisks();
+            StartFallback();
+            PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA40, "Found NeverTag wake up", (ExpectedTag, ToString(ExpectedTag)));
+        }
+    }
+
+    void HandleVPatchWakeUp(TEvents::TEvWakeup::TPtr &ev) {
+        ui64 expectedTag = (IsContinuedVPatch ? VPatchDiffTag : VPatchStartTag);
+        PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA37, "HandleWakeUp",
+                (ExpectedTag, ToString(expectedTag)),
+                (ReceivedTag, ToString(ev->Get()->Tag)));
+        if (ev->Get()->Tag == expectedTag) {
+            SetSlowDisks();
+            StartFallback();
+        }
+        if (ev->Get()->Tag == NeverTag) {
+            SetSlowDisks();
+            StartFallback();
+            PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA41, "Found NeverTag wake up", (ExpectedTag, ToString(expectedTag)));
+        }
+    }
+
+    void HandleNeverTagWakeUp(TEvents::TEvWakeup::TPtr &ev) {
+        PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA42, "HandleWakeUp",
+                (ExpectedTag, ToString(NeverTag)),
+                (ReceivedTag, ToString(ev->Get()->Tag)));
+        if (ev->Get()->Tag == NeverTag) {
+            PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA43, "Found NeverTag wake up in naive state");
+            ReplyAndDie(NKikimrProto::DEADLINE);
+        }
+    }
+
     STATEFN(NaiveState) {
         if (ProcessEvent(ev)) {
             return;
@@ -832,9 +1006,14 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActorGetTypeRewrite()) {
             hFunc(TEvBlobStorage::TEvGetResult, Handle);
             hFunc(TEvBlobStorage::TEvPutResult, Handle);
+
+            IgnoreFunc(TEvents::TEvWakeup);
+            //hFunc(TEvents::TEvWakeup, HandleWakeUp);
             IgnoreFunc(TEvBlobStorage::TEvVPatchResult);
+            IgnoreFunc(TEvBlobStorage::TEvVPatchFoundParts);
+            IgnoreFunc(TEvBlobStorage::TEvVMovedPatchResult);
         default:
-            Y_ABORT("Received unknown event");
+            Y_FAIL_S("Received unknown event " << TypeName(*ev->GetBase()));
         };
     }
 
@@ -844,9 +1023,11 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActorGetTypeRewrite()) {
             hFunc(TEvBlobStorage::TEvVMovedPatchResult, Handle);
+            hFunc(TEvents::TEvWakeup, HandleWakeUp);
             IgnoreFunc(TEvBlobStorage::TEvVPatchResult);
+            IgnoreFunc(TEvBlobStorage::TEvVPatchFoundParts);
         default:
-            Y_ABORT("Received unknown event");
+            Y_FAIL_S("Received unknown event " << TypeName(*ev->GetBase()));
         };
     }
 
@@ -857,8 +1038,9 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActorGetTypeRewrite()) {
             hFunc(TEvBlobStorage::TEvVPatchFoundParts, Handle);
             hFunc(TEvBlobStorage::TEvVPatchResult, Handle);
+            hFunc(TEvents::TEvWakeup, HandleVPatchWakeUp);
         default:
-            Y_ABORT("Received unknown event");
+            Y_FAIL_S("Received unknown event " << TypeName(*ev->GetBase()));
         };
     }
 };
diff --git a/ydb/core/blobstorage/dsproxy/ut/dsproxy_patch_ut.cpp b/ydb/core/blobstorage/dsproxy/ut/dsproxy_patch_ut.cpp
index d6b70cc6f9c2..741d5ce3b875 100644
--- a/ydb/core/blobstorage/dsproxy/ut/dsproxy_patch_ut.cpp
+++ b/ydb/core/blobstorage/dsproxy/ut/dsproxy_patch_ut.cpp
@@ -110,6 +110,18 @@ enum class ENaivePatchCase {
     ErrorOnPut,
 };
 
+#define CASE_TO_RETURN_STRING(cs) \
+    case cs: return #cs \
+// end CASE_TO_RETURN_STRING
+TString ToString(ENaivePatchCase cs) {
+    switch (cs) {
+        CASE_TO_RETURN_STRING(ENaivePatchCase::Ok);
+        CASE_TO_RETURN_STRING(ENaivePatchCase::ErrorOnGetItem);
+        CASE_TO_RETURN_STRING(ENaivePatchCase::ErrorOnGet);
+        CASE_TO_RETURN_STRING(ENaivePatchCase::ErrorOnPut);
+    }
+}
+
 NKikimrProto::EReplyStatus GetPatchResultStatus(ENaivePatchCase naiveCase) {
     switch (naiveCase) {
     case ENaivePatchCase::Ok:
@@ -156,6 +168,17 @@ enum class EVPatchCase {
     Custom,
 };
 
+TString ToString(EVPatchCase cs) {
+    switch (cs) {
+        CASE_TO_RETURN_STRING(EVPatchCase::Ok);
+        CASE_TO_RETURN_STRING(EVPatchCase::OneErrorAndAllPartExistInStart);
+        CASE_TO_RETURN_STRING(EVPatchCase::OnePartLostInStart);
+        CASE_TO_RETURN_STRING(EVPatchCase::DeadGroupInStart);
+        CASE_TO_RETURN_STRING(EVPatchCase::ErrorDuringVPatchDiff);
+        CASE_TO_RETURN_STRING(EVPatchCase::Custom);
+    }
+}
+
 NKikimrProto::EReplyStatus GetPatchResultStatus(EVPatchCase vpatchCase) {
     switch (vpatchCase) {
         case EVPatchCase::Ok:
@@ -249,6 +272,15 @@ enum class EMovedPatchCase {
     Error
 };
 
+TString ToString(EMovedPatchCase cs) {
+    switch (cs) {
+        CASE_TO_RETURN_STRING(EMovedPatchCase::Ok);
+        CASE_TO_RETURN_STRING(EMovedPatchCase::Error);
+    }
+}
+
+#undef CASE_TO_RETURN_STRING
+
 NKikimrProto::EReplyStatus GetPatchResultStatus(EMovedPatchCase movedCase) {
     switch (movedCase) {
     case EMovedPatchCase::Ok:
@@ -289,7 +321,7 @@ void ReceivePatchResult(TTestBasicRuntime &runtime, const TTestArgs &args, NKiki
 }
 
 void ConductGet(TTestBasicRuntime &runtime, const TTestArgs &args, ENaivePatchCase naiveCase) {
-    CTEST << "ConductGet: Start\n";
+    CTEST << "ConductGet: Start NaiveCase: " << ToString(naiveCase) << "\n";
     NKikimrProto::EReplyStatus resultStatus = GetGetResultStatus(naiveCase);
     TAutoPtr handle;
     TEvBlobStorage::TEvGet *get = runtime.GrabEdgeEventRethrow(handle);
@@ -328,10 +360,10 @@ TString MakePatchedBuffer(const TTestArgs &args) {
 void ConductPut(TTestBasicRuntime &runtime, const TTestArgs &args, ENaivePatchCase naiveCase) {
     NKikimrProto::EReplyStatus resultStatus = GetPutResultStatus(naiveCase);
     if (resultStatus == NKikimrProto::UNKNOWN) {
-        CTEST << "ConductPut: Skip\n";
+        CTEST << "ConductPut: Skip NaiveCase: " << ToString(naiveCase) << "\n";
         return;
     }
-    CTEST << "ConductPut: Start\n";
+    CTEST << "ConductPut: Start NaiveCase: " << ToString(naiveCase) << "\n";
     TAutoPtr handle;
     TEvBlobStorage::TEvPut *put = runtime.GrabEdgeEventRethrow(handle);
     UNIT_ASSERT_VALUES_EQUAL(put->Id, args.PatchedId);
@@ -346,7 +378,7 @@ void ConductPut(TTestBasicRuntime &runtime, const TTestArgs &args, ENaivePatchCa
 }
 
 void ConductNaivePatch(TTestBasicRuntime &runtime, const TTestArgs &args, ENaivePatchCase naiveCase) {
-    CTEST << "ConductNaivePatch: Start\n";
+    CTEST << "ConductNaivePatch: Start NaiveCase: " << ToString(naiveCase) << Endl;
     ConductGet(runtime, args, naiveCase);
     ConductPut(runtime, args, naiveCase);
     NKikimrProto::EReplyStatus resultStatus = GetPatchResultStatus(naiveCase);
@@ -354,14 +386,27 @@ void ConductNaivePatch(TTestBasicRuntime &runtime, const TTestArgs &args, ENaive
     CTEST << "ConductNaivePatch: Finish\n";
 }
 
+template  
+TString ToString(const TVector &lst) {
+    TStringBuilder bld;
+    bld << '[';
+    for (ui32 idx = 0; idx < lst.size(); ++idx) {
+        if (idx) {
+            bld << ", ";
+        }
+        bld << lst[idx];
+    }
+    bld << ']';
+    return bld;
+}
 
 void ConductVPatchStart(TTestBasicRuntime &runtime, const TDSProxyEnv &env, const TTestArgs &args,
-        EVPatchCase naiveCase, TVDiskPointer vdiskPointer)
+        EVPatchCase vpatchCase, TVDiskPointer vdiskPointer)
 {
     auto [vdiskIdx, idxInSubgroup] = vdiskPointer.GetIndecies(env, args.OriginalId.Hash());
-    CTEST << "ConductVPatchStart: Start vdiskIdx# " <<  vdiskIdx << " idxInSubgroup# " << idxInSubgroup << "\n";
+    CTEST << "ConductVPatchStart: Start vdiskIdx# " <<  vdiskIdx << " idxInSubgroup# " << idxInSubgroup << " VPatchCase: " << ToString(vpatchCase) << "\n";
     TVDiskID vdisk = env.Info->GetVDiskInSubgroup(idxInSubgroup, args.OriginalId.Hash());
-    auto [status, parts] = GetVPatchFoundPartsStatus(env, args, naiveCase, vdiskPointer);
+    auto [status, parts] = GetVPatchFoundPartsStatus(env, args, vpatchCase, vdiskPointer);
 
     auto start = runtime.GrabEdgeEventRethrow({env.VDisks[vdiskIdx]});
     auto &startRecord = start->Get()->Record;
@@ -376,21 +421,22 @@ void ConductVPatchStart(TTestBasicRuntime &runtime, const TDSProxyEnv &env, cons
     for (auto partId : parts) {
         foundParts->AddPart(partId);
     }
+    CTEST << "ConductVPatchStart: Send FoundParts vdiskIdx# " <<  vdiskIdx << " idxInSubgroup# " << idxInSubgroup << "parts# " << ToString(parts) << "\n";
     SendByHandle(runtime, start, std::move(foundParts));
     CTEST << "ConductVPatchStart: Finish vdiskIdx# " <<  vdiskIdx << " idxInSubgroup# " << idxInSubgroup << "\n";
 }
 
 void ConductVPatchDiff(TTestBasicRuntime &runtime, const TDSProxyEnv &env, const TTestArgs &args,
-        EVPatchCase naiveCase, TVDiskPointer vdiskPointer)
+        EVPatchCase vpatchCase, TVDiskPointer vdiskPointer)
 {
     auto [vdiskIdx, idxInSubgroup] = vdiskPointer.GetIndecies(env, args.PatchedId.Hash());
     TVDiskID vdisk = env.Info->GetVDiskInSubgroup(idxInSubgroup, args.PatchedId.Hash());
-    NKikimrProto::EReplyStatus resultStatus = GetVPatchResultStatus(env, args, naiveCase, vdiskPointer);
+    NKikimrProto::EReplyStatus resultStatus = GetVPatchResultStatus(env, args, vpatchCase, vdiskPointer);
     if (resultStatus == NKikimrProto::UNKNOWN) {
-        CTEST << "ConductVPatchDiff: Skip vdiskIdx# " <<  vdiskIdx << " idxInSubgroup# " << idxInSubgroup << "\n";
+        CTEST << "ConductVPatchDiff: Skip vdiskIdx# " <<  vdiskIdx << " idxInSubgroup# " << idxInSubgroup << " VPatchCase: " << ToString(vpatchCase) << "\n";
         return;
     }
-    CTEST << "ConductVPatchDiff: Start vdiskIdx# " <<  vdiskIdx << " idxInSubgroup# " << idxInSubgroup << "\n";
+    CTEST << "ConductVPatchDiff: Start vdiskIdx# " <<  vdiskIdx << " idxInSubgroup# " << idxInSubgroup << " VPatchCase: " << ToString(vpatchCase) << "\n";
 
     auto diffEv = runtime.GrabEdgeEventRethrow({env.VDisks[vdiskIdx]});
     auto &diffRecord = diffEv->Get()->Record;
@@ -415,6 +461,7 @@ void ConductVPatchDiff(TTestBasicRuntime &runtime, const TDSProxyEnv &env, const
 }
 
 void ConductFailedVPatch(TTestBasicRuntime &runtime, const TDSProxyEnv &env, const TTestArgs &args) {
+    return; // disabled vpatch
     CTEST << "ConductFailedVPatch: Start\n";
     for (ui32 idxInSubgroup = 0; idxInSubgroup < args.GType.BlobSubgroupSize(); ++idxInSubgroup) {
         TVDiskPointer vdisk = TVDiskPointer::GetVDiskIdx(idxInSubgroup);
@@ -429,7 +476,7 @@ void ConductFailedVPatch(TTestBasicRuntime &runtime, const TDSProxyEnv &env, con
 
 
 void ConductVMovedPatch(TTestBasicRuntime &runtime, const TTestArgs &args, EMovedPatchCase movedCase) {
-    CTEST << "ConductVMovedPatch: Start\n";
+    CTEST << "ConductVMovedPatch: Start MovedPatchCase: " << ToString(movedCase) << Endl;
     NKikimrProto::EReplyStatus resultStatus = GetVMovedPatchResultStatus(movedCase);
     TAutoPtr handle;
     TEvBlobStorage::TEvVMovedPatch *vPatch = runtime.GrabEdgeEventRethrow(handle);
@@ -459,7 +506,7 @@ void ConductVMovedPatch(TTestBasicRuntime &runtime, const TTestArgs &args, EMove
 void ConductMovedPatch(TTestBasicRuntime &runtime, const TDSProxyEnv &env, const TTestArgs &args,
         EMovedPatchCase movedCase)
 {
-    CTEST << "ConductMovedPatch: Start\n";
+    CTEST << "ConductMovedPatch: Start MovedPatchCase: " << ToString(movedCase) << Endl;
     ConductFailedVPatch(runtime, env, args);
     ConductVMovedPatch(runtime, args, movedCase);
     NKikimrProto::EReplyStatus resultStatus = GetPatchResultStatus(movedCase);
@@ -481,7 +528,8 @@ void ConductFallbackPatch(TTestBasicRuntime &runtime, const TTestArgs &args) {
 void ConductVPatchEvents(TTestBasicRuntime &runtime, const TDSProxyEnv &env, const TTestArgs &args,
         EVPatchCase vpatchCase)
 {
-    CTEST << "ConductVPatchEvents: Start\n";
+    return; // disabled vpatch
+    CTEST << "ConductVPatchEvents: Start VPatchCase: " << ToString(vpatchCase) << Endl;
     for (ui32 idxInSubgroup = 0; idxInSubgroup < args.GType.BlobSubgroupSize(); ++idxInSubgroup) {
         TVDiskPointer vdisk = TVDiskPointer::GetVDiskIdx(idxInSubgroup);
         ConductVPatchStart(runtime, env, args, vpatchCase, vdisk);
@@ -496,7 +544,7 @@ void ConductVPatchEvents(TTestBasicRuntime &runtime, const TDSProxyEnv &env, con
 void ConductVPatch(TTestBasicRuntime &runtime, const TDSProxyEnv &env, const TTestArgs &args,
         EVPatchCase vpatchCase)
 {
-    CTEST << "ConductFallbackPatch: Start\n";
+    CTEST << "ConductFallbackPatch: Start VPatchCase: " << ToString(vpatchCase) << Endl;
     ConductVPatchEvents(runtime, env, args, vpatchCase);
     NKikimrProto::EReplyStatus resultStatus = GetPatchResultStatus(vpatchCase);
     if (resultStatus == NKikimrProto::UNKNOWN) {
@@ -620,17 +668,18 @@ void RunGeneralTest(void(*runner)(TTestBasicRuntime &runtime, const TTestArgs &a
     Y_UNIT_TEST_NAIVE(ErrorOnPut, erasure) \
     Y_UNIT_TEST_MOVED(Ok, erasure) \
     Y_UNIT_TEST_MOVED(Error, erasure) \
-    Y_UNIT_TEST_VPATCH(Ok, erasure) \
-    Y_UNIT_TEST_VPATCH(OneErrorAndAllPartExistInStart, erasure) \
-    Y_UNIT_TEST_VPATCH(OnePartLostInStart, erasure) \
-    Y_UNIT_TEST_VPATCH(DeadGroupInStart, erasure) \
-    Y_UNIT_TEST_VPATCH(ErrorDuringVPatchDiff, erasure) \
     Y_UNIT_TEST_SECURED(Ok, erasure) \
     Y_UNIT_TEST_SECURED(ErrorOnGetItem, erasure) \
     Y_UNIT_TEST_SECURED(ErrorOnGet, erasure) \
     Y_UNIT_TEST_SECURED(ErrorOnPut, erasure) \
 // end Y_UNIT_TEST_PATCH_PACK
 
+//    Y_UNIT_TEST_VPATCH(Ok, erasure)
+//    Y_UNIT_TEST_VPATCH(OneErrorAndAllPartExistInStart, erasure)
+//    Y_UNIT_TEST_VPATCH(OnePartLostInStart, erasure)
+//    Y_UNIT_TEST_VPATCH(DeadGroupInStart, erasure)
+//    Y_UNIT_TEST_VPATCH(ErrorDuringVPatchDiff, erasure) 
+
     Y_UNIT_TEST_PATCH_PACK(ErasureNone)
     Y_UNIT_TEST_PATCH_PACK(Erasure4Plus2Block)
     Y_UNIT_TEST_PATCH_PACK(ErasureMirror3dc)
@@ -712,6 +761,7 @@ EFaultToleranceCase GetFaultToleranceCaseForBlock4Plus2(const TDSProxyEnv &env,
             }
         }
     }
+    return EFaultToleranceCase::Fallback; // disabled vpatch
     if (layout.CountEffectiveReplicas(env.Info->Type) == env.Info->Type.TotalPartCount()) {
         return EFaultToleranceCase::Ok;
     } else {
@@ -736,6 +786,7 @@ EFaultToleranceCase GetFaultToleranceCaseForMirror3dc(const TDSProxyEnv &env, co
     for (ui32 dcIdx = 0; dcIdx < dcCnt; ++dcIdx) {
         x2cnt += (replInDc[dcIdx] >= 2);
     }
+    return EFaultToleranceCase::Fallback; // disabled vpatch
     if ((replInDc[0] && replInDc[1] && replInDc[2]) || x2cnt >= 2) {
         return EFaultToleranceCase::Ok;
     } else {
diff --git a/ydb/core/blobstorage/nodewarden/node_warden_impl.cpp b/ydb/core/blobstorage/nodewarden/node_warden_impl.cpp
index 754d0924d1b9..f969813d5b57 100644
--- a/ydb/core/blobstorage/nodewarden/node_warden_impl.cpp
+++ b/ydb/core/blobstorage/nodewarden/node_warden_impl.cpp
@@ -90,6 +90,10 @@ void TNodeWarden::RemoveDrivesWithBadSerialsAndReport(TVector TNodeWarden::ListLocalDrives() {
+    if (!AppData()->FeatureFlags.GetEnableDriveSerialsDiscovery()) {
+        return {};
+    }
+
     TStringStream details;
     TVector drives = ListDevicesWithPartlabel(details);
 
@@ -179,6 +183,7 @@ void TNodeWarden::Bootstrap() {
         icb->RegisterSharedControl(EnableSyncLogChunkCompressionSSD, "VDiskControls.EnableSyncLogChunkCompressionSSD");
         icb->RegisterSharedControl(MaxSyncLogChunksInFlightHDD, "VDiskControls.MaxSyncLogChunksInFlightHDD");
         icb->RegisterSharedControl(MaxSyncLogChunksInFlightSSD, "VDiskControls.MaxSyncLogChunksInFlightSSD");
+        icb->RegisterSharedControl(DefaultHugeGarbagePerMille, "VDiskControls.DefaultHugeGarbagePerMille");
 
         icb->RegisterSharedControl(CostMetricsParametersByMedia[NPDisk::DEVICE_TYPE_ROT].BurstThresholdNs,
                 "VDiskControls.BurstThresholdNsHDD");
diff --git a/ydb/core/blobstorage/nodewarden/node_warden_impl.h b/ydb/core/blobstorage/nodewarden/node_warden_impl.h
index 3d77ae4f1c7e..3a141f3b113c 100644
--- a/ydb/core/blobstorage/nodewarden/node_warden_impl.h
+++ b/ydb/core/blobstorage/nodewarden/node_warden_impl.h
@@ -139,6 +139,7 @@ namespace NKikimr::NStorage {
         TControlWrapper EnableSyncLogChunkCompressionSSD;
         TControlWrapper MaxSyncLogChunksInFlightHDD;
         TControlWrapper MaxSyncLogChunksInFlightSSD;
+        TControlWrapper DefaultHugeGarbagePerMille;
 
         TReplQuoter::TPtr ReplNodeRequestQuoter;
         TReplQuoter::TPtr ReplNodeResponseQuoter;
@@ -162,6 +163,7 @@ namespace NKikimr::NStorage {
             , EnableSyncLogChunkCompressionSSD(0, 0, 1)
             , MaxSyncLogChunksInFlightHDD(10, 1, 1024)
             , MaxSyncLogChunksInFlightSSD(10, 1, 1024)
+            , DefaultHugeGarbagePerMille(300, 1, 1000)
             , CostMetricsParametersByMedia({
                 TCostMetricsParameters{200},
                 TCostMetricsParameters{50},
diff --git a/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp b/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp
index 0497406a1332..75061a6bcc90 100644
--- a/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp
+++ b/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp
@@ -179,6 +179,7 @@ namespace NKikimr::NStorage {
         vdiskConfig->EnableVDiskCooldownTimeout = Cfg->EnableVDiskCooldownTimeout;
         vdiskConfig->ReplPausedAtStart = Cfg->VDiskReplPausedAtStart;
         vdiskConfig->EnableVPatch = EnableVPatch;
+        vdiskConfig->DefaultHugeGarbagePerMille = DefaultHugeGarbagePerMille;
 
         vdiskConfig->EnableLocalSyncLogDataCutting = EnableLocalSyncLogDataCutting;
         if (deviceType == NPDisk::EDeviceType::DEVICE_TYPE_ROT) {
diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_async.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_async.cpp
index 7f2792c639de..75a8429bee87 100644
--- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_async.cpp
+++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_async.cpp
@@ -401,16 +401,15 @@ class TRealBlockDevice : public IBlockDevice {
                 }
                 EndOffset = op->GetOffset() + opSize;
 
-                ui64 duration = HPNow() - completionAction->SubmitTime;
-                ui64 durationMs = HPMilliSecondsFloat(duration);
+                double duration = HPMilliSecondsFloat(HPNow() - completionAction->SubmitTime);
                 if (op->GetType() == IAsyncIoOperation::EType::PRead) {
                     NSan::Unpoison(op->GetData(), opSize);
                     REQUEST_VALGRIND_MAKE_MEM_DEFINED(op->GetData(), opSize);
-                    Device.Mon.DeviceReadDuration.Increment(durationMs);
-                    LWPROBE(PDiskDeviceReadDuration, Device.GetPDiskId(), HPMilliSecondsFloat(duration), opSize);
+                    Device.Mon.DeviceReadDuration.Increment(duration);
+                    LWPROBE(PDiskDeviceReadDuration, Device.GetPDiskId(), duration, opSize);
                 } else {
-                    Device.Mon.DeviceWriteDuration.Increment(durationMs);
-                    LWPROBE(PDiskDeviceWriteDuration, Device.GetPDiskId(), HPMilliSecondsFloat(duration), opSize);
+                    Device.Mon.DeviceWriteDuration.Increment(duration);
+                    LWPROBE(PDiskDeviceWriteDuration, Device.GetPDiskId(), duration, opSize);
                 }
                 if (completionAction->FlushAction) {
                     ui64 idx = completionAction->FlushAction->OperationIdx;
@@ -668,8 +667,8 @@ class TRealBlockDevice : public IBlockDevice {
                             Device.IsTrimEnabled = Device.IoContext->DoTrim(op);
                             NHPTimer::STime endTime = HPNow();
                             Device.IdleCounter.Decrement();
-                            const ui64 durationUs = HPMicroSeconds(endTime - beginTime);
-                            Device.Mon.DeviceTrimDuration.Increment(durationUs);
+                            const double duration = HPMilliSecondsFloat(endTime - beginTime);
+                            Device.Mon.DeviceTrimDuration.Increment(duration);
                             *Device.Mon.DeviceEstimatedCostNs += completion->CostNs;
                             if (Device.ActorSystem && Device.IsTrimEnabled) {
                                 LOG_DEBUG_S(*Device.ActorSystem, NKikimrServices::BS_DEVICE,
@@ -680,7 +679,7 @@ class TRealBlockDevice : public IBlockDevice {
                                         << "\" offset# " << op->GetOffset()
                                         << " size# " << op->GetSize());
                                 LWPROBE(PDiskDeviceTrimDuration, Device.GetPDiskId(),
-                                        HPMilliSecondsFloat(endTime - beginTime), op->GetOffset());
+                                        duration, op->GetOffset());
                             }
                         }
                         completion->SetResult(EIoResult::Ok);
diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_chunk_tracker.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_chunk_tracker.h
index 43fe42c0d353..96542ae28416 100644
--- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_chunk_tracker.h
+++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_chunk_tracker.h
@@ -163,7 +163,7 @@ class TPerOwnerQuotaTracker {
         str << "";
     }
 
-    void PrintHTML(IOutputStream &str, TQuotaRecord *sharedQuota, NKikimrBlobStorage::TPDiskSpaceColor::E *colorBorder) {
+    void PrintHTML(IOutputStream &str, TQuotaRecord *sharedQuota, NKikimrBlobStorage::TPDiskSpaceColor::E *colorBorder, double *borderOccupancy) {
         str << "
";
         str << "ColorLimits#\n";
         ColorLimits.Print(str);
@@ -171,8 +171,12 @@ class TPerOwnerQuotaTracker {
         str << "\nExpectedOwnerCount# " << ExpectedOwnerCount;
         str << "\nActiveOwners# " << ActiveOwnerIds.size();
         if (colorBorder) {
-            str << "\nColorBorder# " << NKikimrBlobStorage::TPDiskSpaceColor::E_Name(*colorBorder) << "\n";
+            str << "\nColorBorder# " << NKikimrBlobStorage::TPDiskSpaceColor::E_Name(*colorBorder);
         }
+        if (borderOccupancy) {
+            str << "\nColorBorderOccupancy# " << *borderOccupancy;
+        }
+        str << "\n";
         str << "
"; str << ""; str << R"_( @@ -531,9 +535,9 @@ using TColor = NKikimrBlobStorage::TPDiskSpaceColor; void PrintHTML(IOutputStream &str) { str << "

GlobalQuota

"; - GlobalQuota->PrintHTML(str, nullptr, nullptr); + GlobalQuota->PrintHTML(str, nullptr, nullptr, nullptr); str << "

OwnerQuota

"; - OwnerQuota->PrintHTML(str, SharedQuota.Get(), &ColorBorder); + OwnerQuota->PrintHTML(str, SharedQuota.Get(), &ColorBorder, &ColorBorderOccupancy); } ui32 ColorFlagLimit(TOwner owner, NKikimrBlobStorage::TPDiskSpaceColor::E color) { diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_color_limits.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_color_limits.h index d0d58b4b1157..0337910644da 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_color_limits.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_color_limits.h @@ -26,7 +26,7 @@ struct TDiskColor { } double CalculateOccupancy(i64 total) const { - return (double)CalculateQuota(total) / total; + return 1 - (double)CalculateQuota(total) / total; } }; @@ -92,15 +92,15 @@ struct TColorLimits { double GetOccupancyForColor(NKikimrBlobStorage::TPDiskSpaceColor::E color, i64 total) { switch (color) { - case NKikimrBlobStorage::TPDiskSpaceColor::GREEN: return Cyan.CalculateOccupancy(total); - case NKikimrBlobStorage::TPDiskSpaceColor::CYAN: return LightYellow.CalculateOccupancy(total); - case NKikimrBlobStorage::TPDiskSpaceColor::LIGHT_YELLOW: return Yellow.CalculateOccupancy(total); - case NKikimrBlobStorage::TPDiskSpaceColor::YELLOW: return LightOrange.CalculateOccupancy(total); - case NKikimrBlobStorage::TPDiskSpaceColor::LIGHT_ORANGE: return PreOrange.CalculateOccupancy(total); - case NKikimrBlobStorage::TPDiskSpaceColor::PRE_ORANGE: return Orange.CalculateOccupancy(total); - case NKikimrBlobStorage::TPDiskSpaceColor::ORANGE: return Red.CalculateOccupancy(total); - case NKikimrBlobStorage::TPDiskSpaceColor::RED: return Black.CalculateOccupancy(total); - case NKikimrBlobStorage::TPDiskSpaceColor::BLACK: return 1.0; + case NKikimrBlobStorage::TPDiskSpaceColor::GREEN: return 0.0; + case NKikimrBlobStorage::TPDiskSpaceColor::CYAN: return Cyan.CalculateOccupancy(total); + case NKikimrBlobStorage::TPDiskSpaceColor::LIGHT_YELLOW: return LightYellow.CalculateOccupancy(total); + case NKikimrBlobStorage::TPDiskSpaceColor::YELLOW: return Yellow.CalculateOccupancy(total); + case NKikimrBlobStorage::TPDiskSpaceColor::LIGHT_ORANGE: return LightOrange.CalculateOccupancy(total); + case NKikimrBlobStorage::TPDiskSpaceColor::PRE_ORANGE: return PreOrange.CalculateOccupancy(total); + case NKikimrBlobStorage::TPDiskSpaceColor::ORANGE: return Orange.CalculateOccupancy(total); + case NKikimrBlobStorage::TPDiskSpaceColor::RED: return Red.CalculateOccupancy(total); + case NKikimrBlobStorage::TPDiskSpaceColor::BLACK: return Black.CalculateOccupancy(total); case NKikimrBlobStorage::TPDiskSpaceColor_E_TPDiskSpaceColor_E_INT_MIN_SENTINEL_DO_NOT_USE_: case NKikimrBlobStorage::TPDiskSpaceColor_E_TPDiskSpaceColor_E_INT_MAX_SENTINEL_DO_NOT_USE_: diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_config.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_config.h index d221cfba4b9f..3b51e4cab2c5 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_config.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_config.h @@ -298,6 +298,7 @@ struct TPDiskConfig : public TThrRefBase { str << " OrangeLogChunksMultiplier# " << OrangeLogChunksMultiplier << x; str << " WarningLogChunksMultiplier# " << WarningLogChunksMultiplier << x; str << " YellowLogChunksMultiplier# " << YellowLogChunksMultiplier << x; + str << " SpaceColorBorder# " << SpaceColorBorder << x; str << "}"; return str.Str(); } diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp index fc0c597a99ca..2ac80067655a 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp @@ -1477,7 +1477,7 @@ void TPDisk::WhiteboardReport(TWhiteboardReport &whiteboardReport) { TGuard guard(StateMutex); const ui64 totalSize = Format.DiskSize; const ui64 availableSize = (ui64)Format.ChunkSize * Keeper.GetFreeChunkCount(); - + if (*Mon.PDiskBriefState != TPDiskMon::TPDisk::Error) { *Mon.FreeSpaceBytes = availableSize; *Mon.UsedSpaceBytes = totalSize - availableSize; @@ -1487,7 +1487,7 @@ void TPDisk::WhiteboardReport(TWhiteboardReport &whiteboardReport) { *Mon.UsedSpaceBytes = 32_KB; *Mon.TotalSpaceBytes = 32_KB; } - + NKikimrWhiteboard::TPDiskStateInfo& pdiskState = reportResult->PDiskState->Record; pdiskState.SetPDiskId(PDiskId); pdiskState.SetPath(Cfg->GetDevicePath()); @@ -1499,6 +1499,7 @@ void TPDisk::WhiteboardReport(TWhiteboardReport &whiteboardReport) { pdiskState.SetSystemSize(Format.ChunkSize * (Keeper.GetOwnerHardLimit(OwnerSystemLog) + Keeper.GetOwnerHardLimit(OwnerSystemReserve))); pdiskState.SetLogUsedSize(Format.ChunkSize * (Keeper.GetOwnerHardLimit(OwnerCommonStaticLog) - Keeper.GetOwnerFree(OwnerCommonStaticLog))); pdiskState.SetLogTotalSize(Format.ChunkSize * Keeper.GetOwnerHardLimit(OwnerCommonStaticLog)); + pdiskState.SetNumActiveSlots(TotalOwners); if (ExpectedSlotCount) { pdiskState.SetExpectedSlotCount(ExpectedSlotCount); } diff --git a/ydb/core/blobstorage/ut_blobstorage/gc.cpp b/ydb/core/blobstorage/ut_blobstorage/gc.cpp new file mode 100644 index 000000000000..80c779093259 --- /dev/null +++ b/ydb/core/blobstorage/ut_blobstorage/gc.cpp @@ -0,0 +1,22 @@ +#include + +Y_UNIT_TEST_SUITE(GarbageCollection) { + Y_UNIT_TEST(EmptyGcCmd) { + TEnvironmentSetup env({ + .Erasure = TBlobStorageGroupType::Erasure4Plus2Block, + }); + auto& runtime = env.Runtime; + + env.CreateBoxAndPool(1, 1); + auto info = env.GetGroupInfo(env.GetGroups().front()); + + auto ev = std::make_unique(1u, 1u, 1u, 0u, false, 0u, 0u, nullptr, nullptr, + TInstant::Max(), true); + const TActorId edge = runtime->AllocateEdgeActor(1, __FILE__, __LINE__); + runtime->WrapInActorContext(edge, [&] { + SendToBSProxy(edge, info->GroupID, ev.release()); + }); + auto res = env.WaitForEdgeActorEvent(edge); + UNIT_ASSERT_VALUES_EQUAL(res->Get()->Status, NKikimrProto::ERROR); + } +} diff --git a/ydb/core/blobstorage/ut_blobstorage/ya.make b/ydb/core/blobstorage/ut_blobstorage/ya.make index 0edf9906ed95..ba965e9e83ca 100644 --- a/ydb/core/blobstorage/ut_blobstorage/ya.make +++ b/ydb/core/blobstorage/ut_blobstorage/ya.make @@ -22,6 +22,7 @@ SRCS( ds_proxy_lwtrace.cpp encryption.cpp extra_block_checks.cpp + gc.cpp gc_quorum_3dc.cpp get.cpp group_reconfiguration.cpp diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_config.h b/ydb/core/blobstorage/vdisk/common/vdisk_config.h index 31e30788eeb1..73b1d27daaa1 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_config.h +++ b/ydb/core/blobstorage/vdisk/common/vdisk_config.h @@ -218,6 +218,7 @@ namespace NKikimr { TDuration WhiteboardUpdateInterval; bool EnableVDiskCooldownTimeout; TControlWrapper EnableVPatch = true; + TControlWrapper DefaultHugeGarbagePerMille; ///////////// COST METRICS SETTINGS //////////////// bool UseCostTracker = true; diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_events.h b/ydb/core/blobstorage/vdisk/common/vdisk_events.h index f31c02db3bcf..98473e08648b 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_events.h +++ b/ydb/core/blobstorage/vdisk/common/vdisk_events.h @@ -1585,7 +1585,7 @@ namespace NKikimr { if (deadline != TInstant::Max()) { this->Record.MutableMsgQoS()->SetDeadlineSeconds((ui32)deadline.Seconds()); } - this->Record.MutableMsgQoS()->SetExtQueueId(HandleClassToQueueId(NKikimrBlobStorage::AsyncBlob)); + this->Record.MutableMsgQoS()->SetExtQueueId(NKikimrBlobStorage::PutAsyncBlob); } bool GetIgnoreBlock() const { @@ -1965,6 +1965,25 @@ namespace NKikimr { } Record.MutableMsgQoS()->SetExtQueueId(NKikimrBlobStorage::EVDiskQueueId::GetFastRead); } + + TString ToString() const { + return ToString(this->Record); + } + + static TString ToString(const NKikimrBlobStorage::TEvVPatchStart &record) { + TStringStream str; + TLogoBlobID originalId = LogoBlobIDFromLogoBlobID(record.GetOriginalBlobId()); + TLogoBlobID patchedId = LogoBlobIDFromLogoBlobID(record.GetPatchedBlobId()); + str << "{TEvVPatchStart"; + str << " OriginalBlobId# " << originalId.ToString(); + str << " PatchedBlobId# " << patchedId.ToString(); + if (record.HasMsgQoS()) { + str << " "; + TEvBlobStorage::TEvVPut::OutMsgQos(record.GetMsgQoS(), str); + } + str << "}"; + return str.Str(); + } }; struct TEvBlobStorage::TEvVPatchFoundParts @@ -2010,6 +2029,25 @@ namespace NKikimr { Record.SetStatus(status); } + TString ToString() const { + return ToString(this->Record); + } + + static TString ToString(const NKikimrBlobStorage::TEvVPatchFoundParts &record) { + TStringStream str; + TLogoBlobID originalId = LogoBlobIDFromLogoBlobID(record.GetOriginalBlobId()); + TLogoBlobID patchedId = LogoBlobIDFromLogoBlobID(record.GetPatchedBlobId()); + str << "{TEvVPatchFoundParts"; + str << " OriginalBlobId# " << originalId.ToString(); + str << " PatchedBlobId# " << patchedId.ToString(); + if (record.HasMsgQoS()) { + str << " "; + TEvBlobStorage::TEvVPut::OutMsgQos(record.GetMsgQoS(), str); + } + str << "}"; + return str.Str(); + } + void MakeError(NKikimrProto::EReplyStatus status, const TString& errorReason, const NKikimrBlobStorage::TEvVPatchStart &request) { Record.SetErrorReason(errorReason); @@ -2099,6 +2137,25 @@ namespace NKikimr { } return result; } + + TString ToString() const { + return ToString(this->Record); + } + + static TString ToString(const NKikimrBlobStorage::TEvVPatchDiff &record) { + TStringStream str; + TLogoBlobID originalId = LogoBlobIDFromLogoBlobID(record.GetOriginalPartBlobId()); + TLogoBlobID patchedId = LogoBlobIDFromLogoBlobID(record.GetPatchedPartBlobId()); + str << "{TEvVPatchDiff"; + str << " OriginalBlobId# " << originalId.ToString(); + str << " PatchedBlobId# " << patchedId.ToString(); + if (record.HasMsgQoS()) { + str << " "; + TEvBlobStorage::TEvVPut::OutMsgQos(record.GetMsgQoS(), str); + } + str << "}"; + return str.Str(); + } }; @@ -2144,6 +2201,25 @@ namespace NKikimr { } return result; } + + TString ToString() const { + return ToString(this->Record); + } + + static TString ToString(const NKikimrBlobStorage::TEvVPatchXorDiff &record) { + TStringStream str; + TLogoBlobID originalId = LogoBlobIDFromLogoBlobID(record.GetOriginalPartBlobId()); + TLogoBlobID patchedId = LogoBlobIDFromLogoBlobID(record.GetPatchedPartBlobId()); + str << "{TEvVPatchXorDiff"; + str << " OriginalBlobId# " << originalId.ToString(); + str << " PatchedBlobId# " << patchedId.ToString(); + if (record.HasMsgQoS()) { + str << " "; + TEvBlobStorage::TEvVPut::OutMsgQos(record.GetMsgQoS(), str); + } + str << "}"; + return str.Str(); + } }; struct TEvBlobStorage::TEvVPatchXorDiffResult diff --git a/ydb/core/blobstorage/vdisk/defrag/defrag_actor.cpp b/ydb/core/blobstorage/vdisk/defrag/defrag_actor.cpp index fa7014b64837..54a7d2cfaf53 100644 --- a/ydb/core/blobstorage/vdisk/defrag/defrag_actor.cpp +++ b/ydb/core/blobstorage/vdisk/defrag/defrag_actor.cpp @@ -16,12 +16,14 @@ namespace NKikimr { //////////////////////////////////////////////////////////////////////////// TDefragCtx::TDefragCtx( const TIntrusivePtr &vctx, + const TIntrusivePtr &vconfig, const std::shared_ptr &hugeBlobCtx, const TPDiskCtxPtr &pdiskCtx, const TActorId &skeletonId, const TActorId &hugeKeeperId, bool runDefrageBySchedule) : VCtx(vctx) + , VCfg(vconfig) , HugeBlobCtx(hugeBlobCtx) , PDiskCtx(pdiskCtx) , SkeletonId(skeletonId) @@ -48,7 +50,8 @@ namespace NKikimr { bool HugeHeapDefragmentationRequired( const TOutOfSpaceState& oos, ui32 hugeCanBeFreedChunks, - ui32 hugeTotalChunks) { + ui32 hugeTotalChunks, + double defaultPercent) { if (hugeCanBeFreedChunks < 10) return false; @@ -56,11 +59,14 @@ namespace NKikimr { double percentOfGarbage = static_cast(hugeCanBeFreedChunks) / hugeTotalChunks; if (oos.GetLocalColor() > TSpaceColor::CYAN) { - return percentOfGarbage >= 0.02; + // For anything worse than CYAN + return percentOfGarbage >= Min(0.02, defaultPercent); } else if (oos.GetLocalColor() > TSpaceColor::GREEN) { - return percentOfGarbage >= 0.15; + // For CYAN + return percentOfGarbage >= Min(0.15, defaultPercent); } else { - return percentOfGarbage >= 0.30; + // For GREEN + return percentOfGarbage >= Min(0.30, defaultPercent); } } @@ -113,7 +119,8 @@ namespace NKikimr { const auto& oos = DCtx->VCtx->GetOutOfSpaceState(); Y_ABORT_UNLESS(usefulChunks <= totalChunks); const ui32 canBeFreedChunks = totalChunks - usefulChunks; - if (HugeHeapDefragmentationRequired(oos, canBeFreedChunks, totalChunks)) { + double defaultPercent = DCtx->VCfg->DefaultHugeGarbagePerMille / 1000.0; + if (HugeHeapDefragmentationRequired(oos, canBeFreedChunks, totalChunks, defaultPercent)) { TChunksToDefrag chunksToDefrag = calcStat.GetChunksToDefrag(DCtx->MaxChunksToDefrag); Y_ABORT_UNLESS(chunksToDefrag); STLOG(PRI_INFO, BS_VDISK_DEFRAG, BSVDD03, VDISKP(DCtx->VCtx->VDiskLogPrefix, "scan finished"), diff --git a/ydb/core/blobstorage/vdisk/defrag/defrag_actor.h b/ydb/core/blobstorage/vdisk/defrag/defrag_actor.h index f59ecee374c6..08c451b094e7 100644 --- a/ydb/core/blobstorage/vdisk/defrag/defrag_actor.h +++ b/ydb/core/blobstorage/vdisk/defrag/defrag_actor.h @@ -18,6 +18,7 @@ namespace NKikimr { //////////////////////////////////////////////////////////////////////////// struct TDefragCtx { const TIntrusivePtr VCtx; + const TIntrusivePtr VCfg; const std::shared_ptr HugeBlobCtx; const TPDiskCtxPtr PDiskCtx; const TActorId SkeletonId; @@ -30,6 +31,7 @@ namespace NKikimr { TDefragCtx( const TIntrusivePtr &vctx, + const TIntrusivePtr &vconfig, const std::shared_ptr &hugeBlobCtx, const TPDiskCtxPtr &pdiskCtx, const TActorId &skeletonId, @@ -45,7 +47,8 @@ namespace NKikimr { bool HugeHeapDefragmentationRequired( const TOutOfSpaceState& oos, ui32 hugeCanBeFreedChunks, - ui32 hugeTotalChunks); + ui32 hugeTotalChunks, + double defaultPercent); //////////////////////////////////////////////////////////////////////////// // VDISK DEFRAG ACTOR CREATOR diff --git a/ydb/core/blobstorage/vdisk/defrag/defrag_actor_ut.cpp b/ydb/core/blobstorage/vdisk/defrag/defrag_actor_ut.cpp index ebcfec56195b..fa059a1d2c8e 100644 --- a/ydb/core/blobstorage/vdisk/defrag/defrag_actor_ut.cpp +++ b/ydb/core/blobstorage/vdisk/defrag/defrag_actor_ut.cpp @@ -15,21 +15,21 @@ namespace NKikimr { TOutOfSpaceState oos(1, 0); ui32 hugeCanBeFreedChunks = 9; ui32 hugeUsedChunks = 20; - bool defrag = HugeHeapDefragmentationRequired(oos, hugeCanBeFreedChunks, hugeUsedChunks); + bool defrag = HugeHeapDefragmentationRequired(oos, hugeCanBeFreedChunks, hugeUsedChunks, 0.30); UNIT_ASSERT(!defrag); } { TOutOfSpaceState oos(1, 0); ui32 hugeCanBeFreedChunks = 200; ui32 hugeUsedChunks = 1000; - bool defrag = HugeHeapDefragmentationRequired(oos, hugeCanBeFreedChunks, hugeUsedChunks); + bool defrag = HugeHeapDefragmentationRequired(oos, hugeCanBeFreedChunks, hugeUsedChunks, 0.30); UNIT_ASSERT(!defrag); } { TOutOfSpaceState oos(1, 0); ui32 hugeCanBeFreedChunks = 301; ui32 hugeUsedChunks = 1000; - bool defrag = HugeHeapDefragmentationRequired(oos, hugeCanBeFreedChunks, hugeUsedChunks); + bool defrag = HugeHeapDefragmentationRequired(oos, hugeCanBeFreedChunks, hugeUsedChunks, 0.30); UNIT_ASSERT(defrag); } } diff --git a/ydb/core/blobstorage/vdisk/defrag/defrag_quantum.cpp b/ydb/core/blobstorage/vdisk/defrag/defrag_quantum.cpp index 1e1753bf6a99..9f3cb4006e1b 100644 --- a/ydb/core/blobstorage/vdisk/defrag/defrag_quantum.cpp +++ b/ydb/core/blobstorage/vdisk/defrag/defrag_quantum.cpp @@ -95,7 +95,7 @@ namespace NKikimr { Compact(); auto hugeStat = GetHugeStat(); - Y_ABORT_UNLESS(hugeStat.LockedChunks.size() < 100); + Y_DEBUG_ABORT_UNLESS(hugeStat.LockedChunks.size() < 100); } Send(ParentActorId, new TEvDefragQuantumResult(std::move(stat))); diff --git a/ydb/core/blobstorage/vdisk/hullop/blobstorage_hull.cpp b/ydb/core/blobstorage/vdisk/hullop/blobstorage_hull.cpp index fedaca042103..a1eeb6af94fb 100644 --- a/ydb/core/blobstorage/vdisk/hullop/blobstorage_hull.cpp +++ b/ydb/core/blobstorage/vdisk/hullop/blobstorage_hull.cpp @@ -413,6 +413,13 @@ namespace NKikimr { if (!CheckGC(ctx, record)) return {NKikimrProto::ERROR, 0, false}; // record has duplicates + if (!collect && !record.KeepSize() && !record.DoNotKeepSize()) { + LOG_ERROR_S(ctx, NKikimrServices::BS_HULLRECS, HullDs->HullCtx->VCtx->VDiskLogPrefix + << "Db# Barriers ValidateGCCmd: empty garbage collection command" + << " TabletId# " << tabletID); + return {NKikimrProto::ERROR, "empty garbage collection command"}; + } + auto blockStatus = THullDbRecovery::IsBlocked(record); switch (blockStatus.Status) { case TBlocksCache::EStatus::OK: diff --git a/ydb/core/blobstorage/vdisk/repl/blobstorage_repl.cpp b/ydb/core/blobstorage/vdisk/repl/blobstorage_repl.cpp index bdec548c93e9..b1b4e794e86e 100644 --- a/ydb/core/blobstorage/vdisk/repl/blobstorage_repl.cpp +++ b/ydb/core/blobstorage/vdisk/repl/blobstorage_repl.cpp @@ -174,6 +174,8 @@ namespace NKikimr { TEvResumeForce *ResumeForceToken = nullptr; TInstant ReplicationEndTime; bool UnrecoveredNonphantomBlobs = false; + bool RequestedReplicationToken = false; + bool HoldingReplicationToken = false; TWatchdogTimer ReplProgressWatchdog; @@ -287,6 +289,12 @@ namespace NKikimr { case Plan: // this is a first quantum of replication, so we have to register it in the broker State = AwaitToken; + Y_DEBUG_ABORT_UNLESS(!RequestedReplicationToken); + if (RequestedReplicationToken) { + STLOG(PRI_CRIT, BS_REPL, BSVR38, ReplCtx->VCtx->VDiskLogPrefix << "excessive replication token requested"); + break; + } + RequestedReplicationToken = true; if (!Send(MakeBlobStorageReplBrokerID(), new TEvQueryReplToken(ReplCtx->VDiskCfg->BaseInfo.PDiskId))) { HandleReplToken(); } @@ -303,6 +311,10 @@ namespace NKikimr { } void HandleReplToken() { + Y_ABORT_UNLESS(RequestedReplicationToken); + RequestedReplicationToken = false; + HoldingReplicationToken = true; + // switch to replication state Transition(AwaitToken, Replication); if (!ResumeIfReady()) { @@ -408,6 +420,9 @@ namespace NKikimr { if (State == WaitQueues || State == Replication) { // release token as we have finished replicating Send(MakeBlobStorageReplBrokerID(), new TEvReleaseReplToken); + Y_DEBUG_ABORT_UNLESS(!RequestedReplicationToken); + Y_DEBUG_ABORT_UNLESS(HoldingReplicationToken); + HoldingReplicationToken = false; } ResetReplProgressTimer(true); @@ -425,8 +440,8 @@ namespace NKikimr { // no more blobs to replicate; replication will not resume State = Finished; ReplCtx->MonGroup.ReplUnreplicatedVDisks() = 0; - ReplCtx->MonGroup.ReplUnreplicatedPhantoms() = 1; - ReplCtx->MonGroup.ReplUnreplicatedNonPhantoms() = 1; + ReplCtx->MonGroup.ReplUnreplicatedPhantoms() = 0; + ReplCtx->MonGroup.ReplUnreplicatedNonPhantoms() = 0; ReplCtx->MonGroup.ReplWorkUnitsRemaining() = 0; ReplCtx->MonGroup.ReplWorkUnitsDone() = 0; ReplCtx->MonGroup.ReplItemsRemaining() = 0; @@ -635,7 +650,15 @@ namespace NKikimr { // return replication token if we have one if (State == AwaitToken || State == WaitQueues || State == Replication) { - Send(MakeBlobStorageReplBrokerID(), new TEvReleaseReplToken); + Y_DEBUG_ABORT_UNLESS(RequestedReplicationToken || HoldingReplicationToken); + if (RequestedReplicationToken || HoldingReplicationToken) { + Send(MakeBlobStorageReplBrokerID(), new TEvReleaseReplToken); + } + } else { + Y_DEBUG_ABORT_UNLESS(!RequestedReplicationToken && !HoldingReplicationToken); + if (RequestedReplicationToken || HoldingReplicationToken) { + STLOG(PRI_CRIT, BS_REPL, BSVR37, ReplCtx->VCtx->VDiskLogPrefix << "stuck replication token"); + } } if (ReplJobActorId) { diff --git a/ydb/core/blobstorage/vdisk/repl/blobstorage_replproxy.cpp b/ydb/core/blobstorage/vdisk/repl/blobstorage_replproxy.cpp index b37ce712e0c5..0a8569e1241f 100644 --- a/ydb/core/blobstorage/vdisk/repl/blobstorage_replproxy.cpp +++ b/ydb/core/blobstorage/vdisk/repl/blobstorage_replproxy.cpp @@ -129,6 +129,7 @@ namespace NKikimr { ui64 NextReceiveCookie; TResultQueue ResultQueue; std::shared_ptr Tracker = std::make_shared(); + bool Terminated = false; TQueue> SchedulerRequestQ; THashMap RequestTokens; @@ -227,9 +228,7 @@ namespace NKikimr { PrefetchDataSize = 0; RequestFromVDiskProxyPending = false; if (Finished) { - Send(MakeBlobStorageReplBrokerID(), new TEvPruneQueue); - RequestTokens.clear(); - return PassAway(); // TODO(alexvru): check correctness of invocations + return PassAway(); } } // send request(s) if prefetch queue is not full @@ -297,6 +296,9 @@ namespace NKikimr { if (msg->Record.GetCookie() == NextReceiveCookie) { ui64 cookie = NextReceiveCookie; ProcessResult(msg); + if (Terminated) { + return; + } ReleaseMemToken(cookie); while (!ResultQueue.empty()) { const TQueueItem& top = ResultQueue.top(); @@ -305,6 +307,9 @@ namespace NKikimr { } ui64 cookie = NextReceiveCookie; ProcessResult(top.get()); + if (Terminated) { + return; + } ReleaseMemToken(cookie); ResultQueue.pop(); } @@ -314,6 +319,7 @@ namespace NKikimr { } void ReleaseMemToken(ui64 cookie) { + Y_ABORT_UNLESS(!Terminated); if (RequestTokens) { auto it = RequestTokens.find(cookie); Y_ABORT_UNLESS(it != RequestTokens.end()); @@ -428,6 +434,13 @@ namespace NKikimr { } } + void PassAway() override { + Y_ABORT_UNLESS(!Terminated); + Terminated = true; + Send(MakeBlobStorageReplBrokerID(), new TEvPruneQueue); + TActorBootstrapped::PassAway(); + } + STRICT_STFUNC(StateFunc, hFunc(TEvReplProxyNext, Handle) hFunc(TEvReplMemToken, Handle) @@ -446,8 +459,7 @@ namespace NKikimr { TTrackableVector&& ids, const TVDiskID& vdiskId, const TActorId& serviceId) - : TActorBootstrapped() - , ReplCtx(std::move(replCtx)) + : ReplCtx(std::move(replCtx)) , GType(ReplCtx->VCtx->Top->GType) , Ids(std::move(ids)) , VDiskId(vdiskId) diff --git a/ydb/core/blobstorage/vdisk/repl/query_donor.h b/ydb/core/blobstorage/vdisk/repl/query_donor.h index ffd59a535d3b..dbdc02b8a880 100644 --- a/ydb/core/blobstorage/vdisk/repl/query_donor.h +++ b/ydb/core/blobstorage/vdisk/repl/query_donor.h @@ -68,10 +68,9 @@ namespace NKikimr { } if (action) { - const TActorId temp(actorId); LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::BS_VDISK_GET, SelfId() << " sending " << query->ToString() - << " to " << temp); - Send(actorId, query.release()); + << " to " << actorId); + Send(actorId, query.release(), IEventHandle::FlagTrackDelivery); } else { PassAway(); } @@ -116,6 +115,7 @@ namespace NKikimr { STRICT_STFUNC(StateFunc, hFunc(TEvBlobStorage::TEvVGetResult, Handle); + cFunc(TEvents::TSystem::Undelivered, Step); cFunc(TEvents::TSystem::Poison, PassAway); ) }; diff --git a/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp b/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp index 043b5300e308..c7a7ff970a4c 100644 --- a/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp +++ b/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp @@ -225,12 +225,19 @@ namespace NKikimr { //////////////////////////////////////////////////////////////////////// void Handle(TEvBlobStorage::TEvVMovedPatch::TPtr &ev, const TActorContext &ctx) { + LOG_DEBUG_S(ctx, BS_VDISK_PATCH, VCtx->VDiskLogPrefix << "TEvVMovedPatch: receive request;" + << " Event# " << ev->Get()->ToString()); if (!CheckIfWriteAllowed(ev, ctx)) { + LOG_DEBUG_S(ctx, BS_VDISK_PATCH, VCtx->VDiskLogPrefix << "TEvVMovedPatch: is not allowed;" + << " Event# " << ev->Get()->ToString()); return; } const bool postpone = OverloadHandler->PostponeEvent(ev); if (!postpone) { PrivateHandle(ev, ctx); + } else { + LOG_DEBUG_S(ctx, BS_VDISK_PATCH, VCtx->VDiskLogPrefix << "TEvVMovedPatch: is postponned;" + << " Event# " << ev->Get()->ToString()); } } @@ -270,11 +277,16 @@ namespace NKikimr { void Handle(TEvBlobStorage::TEvVPatchStart::TPtr &ev, const TActorContext &ctx) { if (!CheckIfWriteAllowed(ev, ctx)) { + LOG_DEBUG_S(ctx, BS_VDISK_PATCH, VCtx->VDiskLogPrefix << "TEvVPatchStart: receive request;" + << " Event# " << ev->Get()->ToString()); return; } const bool postpone = OverloadHandler->PostponeEvent(ev); if (!postpone) { PrivateHandle(ev, ctx); + } else { + LOG_DEBUG_S(ctx, BS_VDISK_PATCH, VCtx->VDiskLogPrefix << "TEvVPatchStart: postponned;" + << " Event# " << ev->Get()->ToString()); } } @@ -307,24 +319,32 @@ namespace NKikimr { template void HandleVPatchDiffResending(TEvDiffPtr &ev, const TActorContext &ctx) { if (!CheckIfWriteAllowed(ev, ctx)) { + LOG_DEBUG_S(ctx, BS_VDISK_PATCH, VCtx->VDiskLogPrefix << "TEvVPatch: is not allowed;" + << " Event# " << ev->Get()->ToString()); return; } if constexpr (std::is_same_v) { LOG_DEBUG_S(ctx, BS_VDISK_PATCH, VCtx->VDiskLogPrefix << "TEvVPatch: recieve diff;" << " Event# " << ev->Get()->ToString()); IFaceMonGroup->PatchDiffMsgs()++; - } - if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v) { LOG_DEBUG_S(ctx, BS_VDISK_PATCH, VCtx->VDiskLogPrefix << "TEvVPatch: recieve xor diff;" << " Event# " << ev->Get()->ToString()); IFaceMonGroup->PatchXorDiffMsgs()++; + } else { + LOG_ERROR_S(ctx, BS_VDISK_PATCH, VCtx->VDiskLogPrefix << "TEvVPatch: UNKNOWN diff;" + << " Event# " << ev->Get()->ToString()); } TLogoBlobID patchedBlobId = LogoBlobIDFromLogoBlobID(ev->Get()->Record.GetPatchedPartBlobId()).FullID(); auto it = VPatchActors.find(patchedBlobId); if (it != VPatchActors.end()) { TActivationContext::Send(ev->Forward(it->second)); + LOG_DEBUG_S(ctx, BS_VDISK_PATCH, VCtx->VDiskLogPrefix << "TEvVPatch: diff sent to actor;" + << " Event# " << ev->Get()->ToString()); } else { ReplyError(NKikimrProto::ERROR, "VPatchActor doesn't exist", ev, ctx, TAppData::TimeProvider->Now()); + LOG_DEBUG_S(ctx, BS_VDISK_PATCH, VCtx->VDiskLogPrefix << "TEvVPatch: diff didn't send to actor; actor didn't exist" + << " Event# " << ev->Get()->ToString()); } } @@ -1789,7 +1809,7 @@ namespace NKikimr { } void StartDefrag(const TActorContext &ctx) { - auto defragCtx = std::make_shared(VCtx, HugeBlobCtx, PDiskCtx, ctx.SelfID, + auto defragCtx = std::make_shared(VCtx, Config, HugeBlobCtx, PDiskCtx, ctx.SelfID, Db->HugeKeeperID, true); DefragId = ctx.Register(CreateDefragActor(defragCtx, GInfo)); ActiveActors.Insert(DefragId, __FILE__, __LINE__, ctx, NKikimrServices::BLOBSTORAGE); // keep forever diff --git a/ydb/core/blobstorage/vdisk/skeleton/skeleton_vmovedpatch_actor.cpp b/ydb/core/blobstorage/vdisk/skeleton/skeleton_vmovedpatch_actor.cpp index db94713be486..a0b22acc05a0 100644 --- a/ydb/core/blobstorage/vdisk/skeleton/skeleton_vmovedpatch_actor.cpp +++ b/ydb/core/blobstorage/vdisk/skeleton/skeleton_vmovedpatch_actor.cpp @@ -9,8 +9,6 @@ namespace NKikimr { class TVMovedPatchActor : public TActorBootstrapped { friend TActorBootstrapped; - static constexpr ui64 SubRequestDurationMs = 1000; - ui32 OriginalGroupId; ui32 PatchedGroupId; TLogoBlobID OriginalId; @@ -29,6 +27,7 @@ namespace NKikimr { TActorId LeaderId; TOutOfSpaceStatus OOSStatus; + TInstant Deadline = TInstant::Zero(); NLWTrace::TOrbit Orbit; @@ -58,6 +57,10 @@ namespace NKikimr { OriginalId = LogoBlobIDFromLogoBlobID(record.GetOriginalBlobId()); Y_ABORT_UNLESS(record.HasPatchedBlobId()); PatchedId = LogoBlobIDFromLogoBlobID(record.GetPatchedBlobId()); + Deadline = TInstant::Seconds(record.GetMsgQoS().HasDeadlineSeconds()); + if (record.HasMsgQoS() && record.GetMsgQoS().HasDeadlineSeconds()) { + Deadline = TInstant::Seconds(record.GetMsgQoS().HasDeadlineSeconds()); + } DiffCount = record.DiffsSize(); Diffs.reset(new TEvBlobStorage::TEvPatch::TDiff[DiffCount]); @@ -96,6 +99,12 @@ namespace NKikimr { << " ErrorReason# " << ErrorReason << " Marker# BSVSP01"); } + LOG_DEBUG_S(ctx, NKikimrServices::BS_VDISK_PATCH, VCtx->VDiskLogPrefix + << "Send result TEvVMovedPatch: " << errorSubMsg << ';' + << " OriginalBlobId# " << OriginalId + << " PatchedBlobId# " << PatchedId + << " ErrorReason# " << ErrorReason + << " Marker# BSVSP01"); SendVDiskResponse(ctx, Event->Sender, vMovedPatchResult.release(), Event->Cookie, VCtx); PassAway(); } @@ -108,6 +117,10 @@ namespace NKikimr { } void Handle(TEvBlobStorage::TEvGetResult::TPtr &ev, const TActorContext &ctx) { + LOG_DEBUG_S(ctx, NKikimrServices::BS_VDISK_PATCH, VCtx->VDiskLogPrefix + << "Receive Get ub TEvVMovedPatch: " + << " OriginalBlobId# " << OriginalId + << " PatchedBlobId# " << PatchedId); TEvBlobStorage::TEvGetResult *result = ev->Get(); Orbit = std::move(result->Orbit); @@ -138,15 +151,18 @@ namespace NKikimr { Buffer = result->Responses[0].Buffer.ConvertToString(); ApplyDiffs(); - TInstant deadline = TActivationContext::Now() + TDuration::MilliSeconds(SubRequestDurationMs); // We have chosen UserData as PutHandleClass on purpose. // If VMovedPatch and Put were AsyncWrite, it would become a deadlock // because the put subrequest may not send and the moved patch request will end by timeout. - std::unique_ptr put = std::make_unique(PatchedId, Buffer, deadline, + std::unique_ptr put = std::make_unique(PatchedId, Buffer, Deadline, NKikimrBlobStorage::UserData, TEvBlobStorage::TEvPut::TacticDefault); put->Orbit = std::move(Orbit); + LOG_DEBUG_S(ctx, NKikimrServices::BS_VDISK_PATCH, VCtx->VDiskLogPrefix + << "Send Put ub TEvVMovedPatch: " + << " OriginalBlobId# " << OriginalId + << " PatchedBlobId# " << PatchedId); SendToBSProxy(SelfId(), PatchedGroupId, put.release(), OriginalId.Hash()); } @@ -156,6 +172,11 @@ namespace NKikimr { ui32 originalIdHash = OriginalId.Hash(); + LOG_DEBUG_S(ctx, NKikimrServices::BS_VDISK_PATCH, VCtx->VDiskLogPrefix + << "Receive Put ub TEvVMovedPatch: " + << " OriginalBlobId# " << OriginalId + << " PatchedBlobId# " << PatchedId); + constexpr auto errorSubMsg = "failed on VPut"; if (ev->Cookie != originalIdHash) { ErrorReason = "Couldn't put the patched blob; Received TEvPutResult with wrong cookie"; @@ -173,11 +194,20 @@ namespace NKikimr { } void Bootstrap() { - TInstant deadline = TActivationContext::Now() + TDuration::MilliSeconds(SubRequestDurationMs); + if (Deadline && Deadline < TActivationContext::Now()) { + SendResponseAndDie(TActivationContext::AsActorContext(), NKikimrProto::DEADLINE); + return; + } + std::unique_ptr get = std::make_unique(OriginalId, 0, - OriginalId.BlobSize(), deadline, NKikimrBlobStorage::AsyncRead); + OriginalId.BlobSize(), Deadline, NKikimrBlobStorage::AsyncRead); get->Orbit = std::move(Event->Get()->Orbit); + LOG_DEBUG_S(TActivationContext::AsActorContext(), NKikimrServices::BS_VDISK_PATCH, VCtx->VDiskLogPrefix + << "Send Get ub TEvVMovedPatch: " + << " OriginalBlobId# " << OriginalId + << " PatchedBlobId# " << PatchedId); + SendToBSProxy(SelfId(), OriginalGroupId, get.release(), PatchedId.Hash()); Become(&TThis::StateWait); } diff --git a/ydb/core/blobstorage/vdisk/skeleton/skeleton_vpatch_actor.cpp b/ydb/core/blobstorage/vdisk/skeleton/skeleton_vpatch_actor.cpp index 5c4325e85ced..05f6f035277a 100644 --- a/ydb/core/blobstorage/vdisk/skeleton/skeleton_vpatch_actor.cpp +++ b/ydb/core/blobstorage/vdisk/skeleton/skeleton_vpatch_actor.cpp @@ -306,7 +306,7 @@ namespace NKikimr::NPrivate { void SendVPatchResult(NKikimrProto::EReplyStatus status, bool forceEnd = false) { STLOG(PRI_INFO, BS_VDISK_PATCH, BSVSP07, - VDiskLogPrefix << " TEvVPatch: send patch result;", + VDiskLogPrefix << " TEvVPatch: " << (forceEnd ? "received force end;" : "send patch result;"), (OriginalBlobId, OriginalBlobId), (PatchedBlobId, PatchedBlobId), (OriginalPartId, (ui32)OriginalPartId), diff --git a/ydb/core/change_exchange/change_sender_common_ops.h b/ydb/core/change_exchange/change_sender_common_ops.h index 8c9f45a1c698..f2c02c0b595b 100644 --- a/ydb/core/change_exchange/change_sender_common_ops.h +++ b/ydb/core/change_exchange/change_sender_common_ops.h @@ -336,7 +336,7 @@ class TBaseChangeSender { Y_ABORT_UNLESS(it != Broadcasting.end()); auto& broadcast = it->second; - if (broadcast.Partitions.contains(partitionId)) { + if (broadcast.CompletedPartitions.contains(partitionId)) { return false; } @@ -413,8 +413,10 @@ class TBaseChangeSender { } TActorId GetChangeServer() const { return ChangeServer; } - void CreateSenders(const TVector& partitionIds, bool partitioningChanged = true) { - if (partitioningChanged) { + +private: + void CreateSendersImpl(const TVector& partitionIds) { + if (partitionIds) { CreateMissingSenders(partitionIds); } else { RecreateSenders(GonePartitions); @@ -427,6 +429,16 @@ class TBaseChangeSender { } } +protected: + void CreateSenders(const TVector& partitionIds) { + Y_ABORT_UNLESS(partitionIds); + CreateSendersImpl(partitionIds); + } + + void CreateSenders() { + CreateSendersImpl({}); + } + void KillSenders() { for (const auto& [_, sender] : std::exchange(Senders, {})) { if (sender.ActorId) { diff --git a/ydb/core/change_exchange/util.cpp b/ydb/core/change_exchange/util.cpp new file mode 100644 index 000000000000..c4c0516e0e94 --- /dev/null +++ b/ydb/core/change_exchange/util.cpp @@ -0,0 +1,15 @@ +#include "util.h" + +namespace NKikimr::NChangeExchange { + +TVector MakePartitionIds(const TVector& partitions) { + TVector result(::Reserve(partitions.size())); + + for (const auto& partition : partitions) { + result.push_back(partition.ShardId); + } + + return result; +} + +} diff --git a/ydb/core/change_exchange/util.h b/ydb/core/change_exchange/util.h new file mode 100644 index 000000000000..f8ba146fdeaf --- /dev/null +++ b/ydb/core/change_exchange/util.h @@ -0,0 +1,9 @@ +#pragma once + +#include + +namespace NKikimr::NChangeExchange { + +TVector MakePartitionIds(const TVector& partitions); + +} diff --git a/ydb/core/change_exchange/ya.make b/ydb/core/change_exchange/ya.make index b95ab2178442..680c246118ea 100644 --- a/ydb/core/change_exchange/ya.make +++ b/ydb/core/change_exchange/ya.make @@ -4,6 +4,7 @@ SRCS( change_exchange.cpp change_record.cpp change_sender_monitoring.cpp + util.cpp ) GENERATE_ENUM_SERIALIZATION(change_record.h) diff --git a/ydb/core/client/server/msgbus_server_pq_metacache.cpp b/ydb/core/client/server/msgbus_server_pq_metacache.cpp index 619fc4577981..78d316db4a1c 100644 --- a/ydb/core/client/server/msgbus_server_pq_metacache.cpp +++ b/ydb/core/client/server/msgbus_server_pq_metacache.cpp @@ -218,6 +218,7 @@ class TPersQueueMetaCacheActor : public TActorBootstrappedRecord.MutableRequest()->SetType(NKikimrKqp::QUERY_TYPE_SQL_DML); req->Record.MutableRequest()->SetKeepSession(false); req->Record.MutableRequest()->SetDatabase(NKikimr::NPQ::GetDatabaseFromConfig(AppData(ctx)->PQConfig)); + req->Record.MutableRequest()->SetUsePublicResponseDataFormat(true); req->Record.MutableRequest()->MutableQueryCachePolicy()->set_keep_in_cache(true); req->Record.MutableRequest()->MutableTxControl()->mutable_begin_tx()->mutable_serializable_read_write(); @@ -274,9 +275,14 @@ class TPersQueueMetaCacheActor : public TActorBootstrappedGet()->Record.GetRef(); - Y_ABORT_UNLESS(record.GetResponse().GetResults().size() == 1); - const auto& rr = record.GetResponse().GetResults(0).GetValue().GetStruct(0); - ui64 newVersion = rr.ListSize() == 0 ? 0 : rr.GetList(0).GetStruct(0).GetOptional().GetInt64(); + Y_VERIFY(record.GetResponse().YdbResultsSize() == 1); + NYdb::TResultSetParser parser(record.GetResponse().GetYdbResults(0)); + + ui64 newVersion = 0; + if (parser.RowsCount() != 0) { + parser.TryNextRow(); + newVersion = *parser.ColumnParser(0).GetOptionalInt64(); + } LastVersionUpdate = ctx.Now(); if (newVersion > CurrentTopicsVersion || CurrentTopicsVersion == 0 || SkipVersionCheck) { @@ -293,17 +299,18 @@ class TPersQueueMetaCacheActor : public TActorBootstrappedGet()->Record.GetRef(); - Y_ABORT_UNLESS(record.GetResponse().GetResults().size() == 1); + Y_VERIFY(record.GetResponse().YdbResultsSize() == 1); TString path, dc; - const auto& rr = record.GetResponse().GetResults(0).GetValue().GetStruct(0); - for (const auto& row : rr.GetList()) { - - path = row.GetStruct(0).GetOptional().GetText(); - dc = row.GetStruct(1).GetOptional().GetText(); + NYdb::TResultSetParser parser(record.GetResponse().GetYdbResults(0)); + const ui32 rowCount = parser.RowsCount(); + while (parser.TryNextRow()) { + path = *parser.ColumnParser(0).GetOptionalUtf8(); + dc = *parser.ColumnParser(1).GetOptionalUtf8(); NewTopics.emplace_back(decltype(NewTopics)::value_type{path, dc}); } - if (rr.ListSize() > 0) { + + if (rowCount > 0) { LastTopicKey = {path, dc}; return RunQuery(EQueryType::EGetTopics, ctx); } else { @@ -710,7 +717,7 @@ class TPersQueueMetaCacheActor : public TActorBootstrapped); + DynamicNodesMapping.reset(new THashMap); } while(!NodesMappingWaiters.empty()) { ctx.Send(NodesMappingWaiters.front(), diff --git a/ydb/core/client/server/msgbus_server_pq_metarequest_ut.cpp b/ydb/core/client/server/msgbus_server_pq_metarequest_ut.cpp index c0f92246a02a..c9c8c77ce265 100644 --- a/ydb/core/client/server/msgbus_server_pq_metarequest_ut.cpp +++ b/ydb/core/client/server/msgbus_server_pq_metarequest_ut.cpp @@ -169,7 +169,7 @@ class TMessageBusServerPersQueueRequestTestBase: public TTestBase { static int version = 0; ++version; - THolder request(new TEvPersQueue::TEvUpdateConfig()); + auto request = MakeHolder(); for (size_t i : partitions) { request->Record.MutableTabletConfig()->AddPartitionIds(i); } diff --git a/ydb/core/cms/api_adapters.cpp b/ydb/core/cms/api_adapters.cpp index 3e141a741122..2b2d37258423 100644 --- a/ydb/core/cms/api_adapters.cpp +++ b/ydb/core/cms/api_adapters.cpp @@ -54,11 +54,33 @@ namespace { } } + Ydb::Maintenance::ActionState::ActionReason ConvertReason(NKikimrCms::TAction::TIssue::EType cmsActionIssueType) { + using EIssueType = NKikimrCms::TAction::TIssue; + switch (cmsActionIssueType) { + case EIssueType::UNKNOWN: + return Ydb::Maintenance::ActionState::ACTION_REASON_UNSPECIFIED; + case EIssueType::GENERIC: + return Ydb::Maintenance::ActionState::ACTION_REASON_GENERIC; + case EIssueType::TOO_MANY_UNAVAILABLE_VDISKS: + return Ydb::Maintenance::ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS; + case EIssueType::TOO_MANY_UNAVAILABLE_STATE_STORAGE_RINGS: + return Ydb::Maintenance::ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_STATE_STORAGE_RINGS; + case EIssueType::DISABLED_NODES_LIMIT_REACHED: + return Ydb::Maintenance::ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED; + case EIssueType::TENANT_DISABLED_NODES_LIMIT_REACHED: + return Ydb::Maintenance::ActionState::ACTION_REASON_TENANT_DISABLED_NODES_LIMIT_REACHED; + case EIssueType::SYS_TABLETS_NODE_LIMIT_REACHED: + return Ydb::Maintenance::ActionState::ACTION_REASON_SYS_TABLETS_NODE_LIMIT_REACHED; + } + return Ydb::Maintenance::ActionState::ACTION_REASON_UNSPECIFIED; + } + void ConvertAction(const NKikimrCms::TAction& cmsAction, Ydb::Maintenance::ActionState& actionState) { ConvertAction(cmsAction, *actionState.mutable_action()->mutable_lock_action()); // FIXME: specify action_uid actionState.set_status(Ydb::Maintenance::ActionState::ACTION_STATUS_PENDING); - actionState.set_reason(Ydb::Maintenance::ActionState::ACTION_REASON_UNSPECIFIED); // FIXME: specify + actionState.set_reason(ConvertReason(cmsAction.GetIssue().GetType())); + actionState.set_reason_details(cmsAction.GetIssue().GetMessage()); } void ConvertActionUid(const TString& taskUid, const TString& permissionId, diff --git a/ydb/core/cms/cluster_info.h b/ydb/core/cms/cluster_info.h index de2c85473518..19fea924fe1a 100644 --- a/ydb/core/cms/cluster_info.h +++ b/ydb/core/cms/cluster_info.h @@ -37,13 +37,6 @@ using TClusterInfoPtr = TIntrusivePtr; struct TCmsState; using TCmsStatePtr = TIntrusivePtr; -struct TErrorInfo { - NKikimrCms::TStatus::ECode Code = NKikimrCms::TStatus::ALLOW; - TString Reason; - TInstant Deadline; - ui64 RollbackPoint = 0; -}; - /** * Structure to hold info about issued permission. A set of * all issued permissions is a part of CMS persistent state. diff --git a/ydb/core/cms/cms.cpp b/ydb/core/cms/cms.cpp index 5c1cf97ab814..2b459c6925cb 100644 --- a/ydb/core/cms/cms.cpp +++ b/ydb/core/cms/cms.cpp @@ -36,6 +36,38 @@ namespace NKikimr::NCms { using namespace NNodeWhiteboard; using namespace NKikimrCms; +namespace { + +constexpr size_t MAX_ISSUES_TO_STORE = 100; + +TAction::TIssue ConvertIssue(const TReason& reason) { + TAction::TIssue issue; + switch (reason.GetType()) { + case TReason::EType::Generic: + issue.SetType(TAction::TIssue::GENERIC); + break; + case TReason::EType::TooManyUnavailableVDisks: + issue.SetType(TAction::TIssue::TOO_MANY_UNAVAILABLE_VDISKS); + break; + case TReason::EType::TooManyUnavailableStateStorageRings: + issue.SetType(TAction::TIssue::TOO_MANY_UNAVAILABLE_STATE_STORAGE_RINGS); + break; + case TReason::EType::DisabledNodesLimitReached: + issue.SetType(TAction::TIssue::DISABLED_NODES_LIMIT_REACHED); + break; + case TReason::EType::TenantDisabledNodesLimitReached: + issue.SetType(TAction::TIssue::TENANT_DISABLED_NODES_LIMIT_REACHED); + break; + case TReason::EType::SysTabletsNodeLimitReached: + issue.SetType(TAction::TIssue::SYS_TABLETS_NODE_LIMIT_REACHED); + break; + } + issue.SetMessage(reason.GetMessage()); + return issue; +} + +} // anonymous namespace + void TCms::DefaultSignalTabletActive(const TActorContext &) { // must be empty @@ -326,6 +358,8 @@ bool TCms::CheckPermissionRequest(const TPermissionRequest &request, }; auto point = ClusterInfo->PushRollbackPoint(); + size_t storedIssues = 0; + size_t processedActions = 0; for (const auto &action : request.GetActions()) { TDuration permissionDuration = State->Config.DefaultPermissionDuration; if (request.HasDuration()) @@ -352,28 +386,40 @@ bool TCms::CheckPermissionRequest(const TPermissionRequest &request, auto *permission = response.AddPermissions(); permission->MutableAction()->CopyFrom(action); + permission->MutableAction()->ClearIssue(); permission->SetDeadline(error.Deadline.GetValue()); AddPermissionExtensions(action, *permission); ClusterInfo->AddTempLocks(action, &ctx); } else { LOG_DEBUG(ctx, NKikimrServices::CMS, "Result: %s (reason: %s)", - ToString(error.Code).data(), error.Reason.data()); + ToString(error.Code).data(), error.Reason.GetMessage().data()); if (CodesRate[response.GetStatus().GetCode()] > CodesRate[error.Code]) { response.MutableStatus()->SetCode(error.Code); - response.MutableStatus()->SetReason(error.Reason); + response.MutableStatus()->SetReason(error.Reason.GetMessage()); if (error.Code == TStatus::DISALLOW_TEMP || error.Code == TStatus::ERROR_TEMP) response.SetDeadline(error.Deadline.GetValue()); } + if (schedule) { + auto *scheduledAction = scheduled.AddActions(); + scheduledAction->CopyFrom(action); + + // Limit stored issues to avoid overloading the local database + if (storedIssues < MAX_ISSUES_TO_STORE) { + *scheduledAction->MutableIssue() = ConvertIssue(error.Reason); + ++storedIssues; + } else { + scheduledAction->ClearIssue(); + } + } + if (!allowPartial) break; - - if (schedule) - scheduled.AddActions()->CopyFrom(action); } + ++processedActions; } ClusterInfo->RollbackLocks(point); @@ -396,9 +442,21 @@ bool TCms::CheckPermissionRequest(const TPermissionRequest &request, if (schedule && response.GetStatus().GetCode() != TStatus::ALLOW_PARTIAL) { if (response.GetStatus().GetCode() == TStatus::DISALLOW_TEMP || response.GetStatus().GetCode() == TStatus::ERROR_TEMP) - scheduled.MutableActions()->CopyFrom(request.GetActions()); - else + { + if (!allowPartial) { + // Only the first problem action was scheduled during + // the actions check loop. Merge it with rest actions. + Y_ABORT_UNLESS(scheduled.ActionsSize() == 1); + TAction::TIssue issue = std::move(*scheduled.MutableActions()->begin()->MutableIssue()); + scheduled.MutableActions()->CopyFrom(request.GetActions()); + for (auto &action : *scheduled.MutableActions()) { + action.ClearIssue(); + } + *scheduled.MutableActions(processedActions)->MutableIssue() = std::move(issue); + } + } else { scheduled.ClearActions(); + } } return response.GetStatus().GetCode() == TStatus::ALLOW @@ -505,6 +563,12 @@ bool TCms::CheckEvictVDisks(const TAction &action, TErrorInfo &error) const { return false; } + if (State->Config.SentinelConfig.EvictVDisksStatus.Empty()) { + error.Code = TStatus::ERROR; + error.Reason = "Evict vdisks is disabled in Sentinel (self heal)"; + return false; + } + switch (action.GetType()) { case TAction::RESTART_SERVICES: case TAction::SHUTDOWN_HOST: @@ -701,12 +765,15 @@ bool TCms::TryToLockStateStorageReplica(const TAction& action, case MODE_MAX_AVAILABILITY: if (restartRings + lockedRings > 1) { error.Code = TStatus::DISALLOW_TEMP; - error.Reason = TStringBuilder() << "Too many unavailable state storage rings" - << ". Restarting rings: " - << (currentRingState == TStateStorageRingInfo::Restart ? restartRings : restartRings - 1) - << ". Temporary (for a 2 minutes) locked rings: " - << (currentRingState == TStateStorageRingInfo::Locked ? lockedRings + 1 : lockedRings) - << ". Maximum allowed number of unavailable rings for this mode: " << 1; + error.Reason = TReason( + TStringBuilder() << "Too many unavailable state storage rings" + << ". Restarting rings: " + << (currentRingState == TStateStorageRingInfo::Restart ? restartRings : restartRings - 1) + << ". Temporary (for a 2 minutes) locked rings: " + << (currentRingState == TStateStorageRingInfo::Locked ? lockedRings + 1 : lockedRings) + << ". Maximum allowed number of unavailable rings for this mode: " << 1, + TReason::EType::TooManyUnavailableStateStorageRings + ); error.Deadline = defaultDeadline; return false; } @@ -714,13 +781,16 @@ bool TCms::TryToLockStateStorageReplica(const TAction& action, case MODE_KEEP_AVAILABLE: if (restartRings + lockedRings + disabledRings > (nToSelect - 1) / 2) { error.Code = TStatus::DISALLOW_TEMP; - error.Reason = TStringBuilder() << "Too many unavailable state storage rings" - << ". Restarting rings: " - << (currentRingState == TStateStorageRingInfo::Restart ? restartRings : restartRings - 1) - << ". Temporary (for a 2 minutes) locked rings: " - << (currentRingState == TStateStorageRingInfo::Locked ? lockedRings + 1 : lockedRings) - << ". Disabled rings: " << disabledRings - << ". Maximum allowed number of unavailable rings for this mode: " << (nToSelect - 1) / 2; + error.Reason = TReason( + TStringBuilder() << "Too many unavailable state storage rings" + << ". Restarting rings: " + << (currentRingState == TStateStorageRingInfo::Restart ? restartRings : restartRings - 1) + << ". Temporary (for a 2 minutes) locked rings: " + << (currentRingState == TStateStorageRingInfo::Locked ? lockedRings + 1 : lockedRings) + << ". Disabled rings: " << disabledRings + << ". Maximum allowed number of unavailable rings for this mode: " << (nToSelect - 1) / 2, + TReason::EType::TooManyUnavailableStateStorageRings + ); error.Deadline = defaultDeadline; return false; } @@ -1484,6 +1554,13 @@ void TCms::CheckAndEnqueueRequest(TEvCms::TEvPermissionRequest::TPtr &ev, const ev, TStatus::WRONG_REQUEST, "Priority value is out of range", ctx); } + for (const auto &action : rec.GetActions()) { + if (action.HasIssue()) { + return ReplyWithError( + ev, TStatus::WRONG_REQUEST, TStringBuilder() << "Action issue is read-only", ctx); + } + } + EnqueueRequest(ev.Release(), ctx); } diff --git a/ydb/core/cms/cms_maintenance_api_ut.cpp b/ydb/core/cms/cms_maintenance_api_ut.cpp index a1f49f8616b8..151461525408 100644 --- a/ydb/core/cms/cms_maintenance_api_ut.cpp +++ b/ydb/core/cms/cms_maintenance_api_ut.cpp @@ -69,6 +69,32 @@ Y_UNIT_TEST_SUITE(TMaintenanceApiTest) { ) ); } + + Y_UNIT_TEST(ActionReason) { + TCmsTestEnv env(8); + + auto response = env.CheckMaintenanceTaskCreate("task-1", Ydb::StatusIds::SUCCESS, + MakeActionGroup( + MakeLockAction(env.GetNodeId(0), TDuration::Minutes(10)) + ), + MakeActionGroup( + MakeLockAction(env.GetNodeId(1), TDuration::Minutes(10)) + ) + ); + + UNIT_ASSERT_VALUES_EQUAL(response.action_group_states().size(), 2); + UNIT_ASSERT_VALUES_EQUAL(response.action_group_states(0).action_states().size(), 1); + const auto &a1 = response.action_group_states(0).action_states(0); + UNIT_ASSERT_VALUES_EQUAL(a1.status(), ActionState::ACTION_STATUS_PERFORMED); + UNIT_ASSERT_VALUES_EQUAL(a1.reason(), ActionState::ACTION_REASON_OK); + UNIT_ASSERT(a1.reason_details().empty()); + + UNIT_ASSERT_VALUES_EQUAL(response.action_group_states(1).action_states().size(), 1); + const auto &a2 = response.action_group_states(1).action_states(0); + UNIT_ASSERT_VALUES_EQUAL(a2.status(), ActionState::ACTION_STATUS_PENDING); + UNIT_ASSERT_VALUES_EQUAL(a2.reason(), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS); + UNIT_ASSERT(a2.reason_details().Contains("too many unavailable vdisks")); + } } } // namespace NKikimr::NCmsTest diff --git a/ydb/core/cms/cms_ut.cpp b/ydb/core/cms/cms_ut.cpp index 54279556431d..d9050fd8eb7e 100644 --- a/ydb/core/cms/cms_ut.cpp +++ b/ydb/core/cms/cms_ut.cpp @@ -631,6 +631,104 @@ Y_UNIT_TEST_SUITE(TCmsTest) { env.CheckListRequests("user1", 0); } + Y_UNIT_TEST(ActionIssue) + { + TCmsTestEnv env(16); + + // Acquire lock on one node + auto rec = env.CheckPermissionRequest + ("user", false, false, true, true, TStatus::ALLOW, + MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(0), 60000000)); + UNIT_ASSERT_VALUES_EQUAL(rec.PermissionsSize(), 1); + UNIT_ASSERT(!rec.GetPermissions(0).GetAction().HasIssue()); + + auto pid = rec.GetPermissions(0).GetId(); + + // Schedule request + rec = env.CheckPermissionRequest + ("user", false, false, true, true, TStatus::DISALLOW_TEMP, + MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(9), 60000000), + MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(1), 60000000)); + UNIT_ASSERT_VALUES_EQUAL(rec.PermissionsSize(), 0); + + auto rid = rec.GetRequestId(); + + // Get scheduled request + auto scheduledRec = env.CheckGetRequest("user", rid); + UNIT_ASSERT_VALUES_EQUAL(scheduledRec.RequestsSize(), 1); + UNIT_ASSERT_VALUES_EQUAL(scheduledRec.GetRequests(0).ActionsSize(), 2); + auto action1 = scheduledRec.GetRequests(0).GetActions(0); + UNIT_ASSERT(!action1.HasIssue()); + auto action2 = scheduledRec.GetRequests(0).GetActions(1); + UNIT_ASSERT(action2.HasIssue()); + UNIT_ASSERT_VALUES_EQUAL(action2.GetIssue().GetType(), TAction::TIssue::TOO_MANY_UNAVAILABLE_VDISKS); + + // Try to check request + env.CheckRequest("user", rid, false, TStatus::DISALLOW_TEMP); + + // Get scheduled request + scheduledRec = env.CheckGetRequest("user", rid); + UNIT_ASSERT_VALUES_EQUAL(scheduledRec.RequestsSize(), 1); + UNIT_ASSERT_VALUES_EQUAL(scheduledRec.GetRequests(0).ActionsSize(), 2); + action1 = scheduledRec.GetRequests(0).GetActions(0); + UNIT_ASSERT(!action1.HasIssue()); + action2 = scheduledRec.GetRequests(0).GetActions(1); + UNIT_ASSERT(action2.HasIssue()); + UNIT_ASSERT_VALUES_EQUAL(action2.GetIssue().GetType(), TAction::TIssue::TOO_MANY_UNAVAILABLE_VDISKS); + + // Done with permission + env.CheckDonePermission("user", pid); + + // Try to check request + rec = env.CheckRequest("user", rid, false, TStatus::ALLOW, 2); + UNIT_ASSERT(!rec.GetPermissions(0).GetAction().HasIssue()); + UNIT_ASSERT(!rec.GetPermissions(1).GetAction().HasIssue()); + + env.CheckGetRequest("user", rid, false, TStatus::WRONG_REQUEST); + } + + Y_UNIT_TEST(ActionIssuePartialPermissions) + { + TCmsTestEnv env(8); + + // Schedule request + auto rec = env.CheckPermissionRequest + ("user", true, false, true, true, TStatus::ALLOW_PARTIAL, + MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(0), 60000000), + MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(1), 60000000)); + UNIT_ASSERT_VALUES_EQUAL(rec.PermissionsSize(), 1); + UNIT_ASSERT(!rec.GetPermissions(0).GetAction().HasIssue()); + + auto pid = rec.GetPermissions(0).GetId(); + auto rid = rec.GetRequestId(); + + // Get scheduled request + auto scheduledRec = env.CheckGetRequest("user", rid); + UNIT_ASSERT_VALUES_EQUAL(scheduledRec.RequestsSize(), 1); + UNIT_ASSERT_VALUES_EQUAL(scheduledRec.GetRequests(0).ActionsSize(), 1); + auto action = scheduledRec.GetRequests(0).GetActions(0); + UNIT_ASSERT_VALUES_EQUAL(action.GetIssue().GetType(), TAction::TIssue::TOO_MANY_UNAVAILABLE_VDISKS); + + // Try to check request + env.CheckRequest("user", rid, false, TStatus::DISALLOW_TEMP); + + // Get scheduled request + scheduledRec = env.CheckGetRequest("user", rid); + UNIT_ASSERT_VALUES_EQUAL(scheduledRec.RequestsSize(), 1); + UNIT_ASSERT_VALUES_EQUAL(scheduledRec.GetRequests(0).ActionsSize(), 1); + action = scheduledRec.GetRequests(0).GetActions(0); + UNIT_ASSERT_VALUES_EQUAL(action.GetIssue().GetType(), TAction::TIssue::TOO_MANY_UNAVAILABLE_VDISKS); + + // Done with permission + env.CheckDonePermission("user", pid); + + // Try to check request + rec = env.CheckRequest("user", rid, false, TStatus::ALLOW, 1); + UNIT_ASSERT(!rec.GetPermissions(0).GetAction().HasIssue()); + + env.CheckGetRequest("user", rid, false, TStatus::WRONG_REQUEST); + } + Y_UNIT_TEST(WalleTasks) { TCmsTestEnv env(24, 4); @@ -1808,9 +1906,94 @@ Y_UNIT_TEST_SUITE(TCmsTest) { env.CheckRejectRequest("user", request3.GetRequestId()); } + Y_UNIT_TEST(AllVDisksEvictionInRack) + { + auto opts = TTestEnvOpts(8) + .WithSentinel() + .WithNodeLocationCallback([](ui32 nodeId) { + NActorsInterconnect::TNodeLocation location; + location.SetRack(ToString(nodeId / 2 + 1)); + return TNodeLocation(location); // Node = [0, 1, 2, 3, 4, 5, 6, 7] + // Rack = [1, 1, 2, 2, 3, 3, 4, 4] + }); + TCmsTestEnv env(opts); + env.SetLogPriority(NKikimrServices::CMS, NLog::PRI_DEBUG); + + // Evict all VDisks from rack 1 + auto request1 = env.CheckPermissionRequest( + MakePermissionRequest(TRequestOptions("user").WithEvictVDisks(), + MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(0), 600000000, "storage") + ), + TStatus::DISALLOW_TEMP // ok, waiting for move VDisks + ); + auto request2 = env.CheckPermissionRequest( + MakePermissionRequest(TRequestOptions("user").WithEvictVDisks(), + MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(1), 600000000, "storage") + ), + TStatus::DISALLOW_TEMP // ok, waiting for move VDisks + ); + + // Check that FAULTY BSC requests are sent + env.CheckBSCUpdateRequests({ env.GetNodeId(0), env.GetNodeId(1) }, NKikimrBlobStorage::FAULTY); + + // "Move" VDisks from rack 1 + auto& node1 = TFakeNodeWhiteboardService::Info[env.GetNodeId(0)]; + node1.VDisksMoved = true; + node1.VDiskStateInfo.clear(); + auto& node2 = TFakeNodeWhiteboardService::Info[env.GetNodeId(1)]; + node2.VDisksMoved = true; + node2.VDiskStateInfo.clear(); + env.RegenerateBSConfig(TFakeNodeWhiteboardService::Config.MutableResponse()->MutableStatus(0)->MutableBaseConfig(), opts); + + auto permission1 = env.CheckRequest("user", request1.GetRequestId(), false, TStatus::ALLOW, 1); + auto permission2 = env.CheckRequest("user", request2.GetRequestId(), false, TStatus::ALLOW, 1); + env.CheckDonePermission("user", permission1.GetPermissions(0).GetId()); + env.CheckDonePermission("user", permission2.GetPermissions(0).GetId()); + } + + Y_UNIT_TEST(DisabledEvictVDisks) + { + auto opts = TTestEnvOpts(8).WithSentinel(); + TCmsTestEnv env(opts); + env.SetLogPriority(NKikimrServices::CMS, NLog::PRI_DEBUG); + + // Make transition faster for tests purposes + auto cmsConfig = env.GetCmsConfig(); + cmsConfig.MutableSentinelConfig()->SetDefaultStateLimit(1); + env.SetCmsConfig(cmsConfig); + + // Evict VDisks + auto request = env.CheckPermissionRequest( + MakePermissionRequest(TRequestOptions("user").WithEvictVDisks(), + MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(0), 600000000, "storage") + ), + TStatus::DISALLOW_TEMP // ok, waiting for move VDisks + ); + + // Check that FAULTY BSC request is sent + env.CheckBSCUpdateRequests({ env.GetNodeId(0) }, NKikimrBlobStorage::FAULTY); + + // Disable VDisks eviction + cmsConfig.MutableSentinelConfig()->SetEvictVDisksStatus(NKikimrCms::TCmsConfig::TSentinelConfig::DISABLED); + env.SetCmsConfig(cmsConfig); + + // Check that ACTIVE BSC request is sent + env.CheckBSCUpdateRequests({ env.GetNodeId(0) }, NKikimrBlobStorage::ACTIVE); + + // Check that CMS returns ERROR when VDisks eviction is disabled + env.CheckRequest("user", request.GetRequestId(), false, TStatus::ERROR, 0); + + // Enable VDisks eviction again + cmsConfig.MutableSentinelConfig()->SetEvictVDisksStatus(NKikimrCms::TCmsConfig::TSentinelConfig::FAULTY); + env.SetCmsConfig(cmsConfig); + + // Check that FAULTY BSC request is sent again + env.CheckBSCUpdateRequests({ env.GetNodeId(0) }, NKikimrBlobStorage::FAULTY); + } + Y_UNIT_TEST(EmergencyDuringRollingRestart) { - TCmsTestEnv env(TTestEnvOpts(8).WithEnableCMSRequestPriorities()); + TCmsTestEnv env(8); // Start rolling restart auto rollingRestart = env.CheckPermissionRequest @@ -1839,7 +2022,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { Y_UNIT_TEST(ScheduledEmergencyDuringRollingRestart) { - TCmsTestEnv env(TTestEnvOpts(8).WithEnableCMSRequestPriorities()); + TCmsTestEnv env(8); // Start rolling restart auto rollingRestart = env.CheckPermissionRequest @@ -1871,7 +2054,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { Y_UNIT_TEST(WalleRequestDuringRollingRestart) { - TCmsTestEnv env(TTestEnvOpts(8).WithEnableCMSRequestPriorities()); + TCmsTestEnv env(8); // Start rolling restart auto rollingRestart = env.CheckPermissionRequest @@ -1899,7 +2082,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { Y_UNIT_TEST(ScheduledWalleRequestDuringRollingRestart) { - TCmsTestEnv env(TTestEnvOpts(8).WithEnableCMSRequestPriorities()); + TCmsTestEnv env(8); // Start rolling restart auto rollingRestart = env.CheckPermissionRequest @@ -1930,7 +2113,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { Y_UNIT_TEST(EnableCMSRequestPrioritiesFeatureFlag) { - TCmsTestEnv env(8); + TCmsTestEnv env(TTestEnvOpts(8).WithoutEnableCMSRequestPriorities()); // Start rolling restart with specified priority auto rollingRestart = env.CheckPermissionRequest ("user", true, false, true, true, -80, TStatus::WRONG_REQUEST, @@ -1943,7 +2126,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { Y_UNIT_TEST(SamePriorityRequest) { - TCmsTestEnv env(TTestEnvOpts(8).WithEnableCMSRequestPriorities()); + TCmsTestEnv env(8); // Start rolling restart auto rollingRestart = env.CheckPermissionRequest @@ -1973,7 +2156,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { Y_UNIT_TEST(SamePriorityRequest2) { - TCmsTestEnv env(TTestEnvOpts(8).WithEnableCMSRequestPriorities()); + TCmsTestEnv env(8); // Start rolling restart auto rollingRestart = env.CheckPermissionRequest @@ -2003,7 +2186,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { Y_UNIT_TEST(PriorityRange) { - TCmsTestEnv env(TTestEnvOpts(8).WithEnableCMSRequestPriorities()); + TCmsTestEnv env(8); const TString expectedReason = "Priority value is out of range"; @@ -2024,7 +2207,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { Y_UNIT_TEST(WalleTasksDifferentPriorities) { - TCmsTestEnv env(TTestEnvOpts(8).WithEnableCMSRequestPriorities()); + TCmsTestEnv env(8); // Without node limits NKikimrCms::TCmsConfig config; diff --git a/ydb/core/cms/cms_ut_common.cpp b/ydb/core/cms/cms_ut_common.cpp index b57c54dcc01c..4a0f6715e431 100644 --- a/ydb/core/cms/cms_ut_common.cpp +++ b/ydb/core/cms/cms_ut_common.cpp @@ -29,6 +29,17 @@ const bool ENABLE_DETAILED_CMS_LOG = true; const bool ENABLE_DETAILED_CMS_LOG = false; #endif +#define COMMA , +Y_DECLARE_OUT_SPEC(, std::map>, o, value) { + std::vector pairs; + for (const auto& [status, nodes] : value) { + pairs.push_back( + TStringBuilder() << status << "=" << '[' << JoinSeq(',', nodes) << ']' + ); + } + o << '[' << JoinSeq(',', pairs) << ']'; +}; + namespace NKikimr { namespace NCmsTest { @@ -391,7 +402,7 @@ static NKikimrConfig::TBootstrap GenerateBootstrapConfig(TTestActorRuntime &runt return res; } -static void SetupServices(TTestActorRuntime &runtime, const TTestEnvOpts &options) { +static void SetupServices(TTestBasicRuntime &runtime, const TTestEnvOpts &options) { const ui32 domainsNum = 1; const ui32 disksInDomain = 1; @@ -503,6 +514,7 @@ static void SetupServices(TTestActorRuntime &runtime, const TTestEnvOpts &option ), 0); + runtime.LocationCallback = options.NodeLocationCallback; runtime.Initialize(app.Unwrap()); auto dnsConfig = new TDynamicNameserviceConfig(); dnsConfig->MaxStaticNodeId = 1000; @@ -868,6 +880,39 @@ TCmsTestEnv::CheckRequest(const TString &user, return rec; } +void TCmsTestEnv::CheckBSCUpdateRequests(std::set expectedNodes, + NKikimrBlobStorage::EDriveStatus expectedStatus) +{ + using TBSCRequests = std::map>; + + TBSCRequests expectedRequests = { {expectedStatus, expectedNodes} }; + TBSCRequests actualRequests; + + TDispatchOptions options; + options.FinalEvents.emplace_back([&](IEventHandle& ev) { + if (ev.GetTypeRewrite() == TEvBlobStorage::TEvControllerConfigRequest::EventType) { + const auto& request = ev.Get()->Record; + bool foundUpdateDriveCommand = false; + for (const auto& command : request.GetRequest().GetCommand()) { + if (command.HasUpdateDriveStatus()) { + foundUpdateDriveCommand = true; + const auto& update = command.GetUpdateDriveStatus(); + actualRequests[update.GetStatus()].insert(update.GetHostKey().GetNodeId()); + } + } + return foundUpdateDriveCommand; + } + return false; + }); + DispatchEvents(options, TDuration::Minutes(1)); + + UNIT_ASSERT_C( + actualRequests == expectedRequests, + TStringBuilder() << "Sentinel sent wrong update requests to BSC: " + << "expected# " << expectedRequests + << ", actual# " << actualRequests + ); +} void TCmsTestEnv::CheckWalleStoreTaskIsFailed(NCms::TEvCms::TEvStoreWalleTask* req) { diff --git a/ydb/core/cms/cms_ut_common.h b/ydb/core/cms/cms_ut_common.h index eafa2624ba97..c719133702f5 100644 --- a/ydb/core/cms/cms_ut_common.h +++ b/ydb/core/cms/cms_ut_common.h @@ -92,6 +92,9 @@ struct TTestEnvOpts { bool EnableCMSRequestPriorities; bool EnableSingleCompositeActionGroup; + using TNodeLocationCallback = std::function; + TNodeLocationCallback NodeLocationCallback; + TTestEnvOpts() = default; TTestEnvOpts(ui32 nodeCount, @@ -107,7 +110,7 @@ struct TTestEnvOpts { , UseMirror3dcErasure(false) , AdvanceCurrentTime(false) , EnableSentinel(false) - , EnableCMSRequestPriorities(false) + , EnableCMSRequestPriorities(true) , EnableSingleCompositeActionGroup(true) { } @@ -122,10 +125,16 @@ struct TTestEnvOpts { return *this; } - TTestEnvOpts& WithEnableCMSRequestPriorities() { - EnableCMSRequestPriorities = true; + TTestEnvOpts& WithoutEnableCMSRequestPriorities() { + EnableCMSRequestPriorities = false; + return *this; + } + + TTestEnvOpts& WithNodeLocationCallback(TNodeLocationCallback nodeLocationCallback) { + NodeLocationCallback = nodeLocationCallback; return *this; } + }; class TCmsTestEnv : public TTestBasicRuntime { @@ -323,6 +332,8 @@ class TCmsTestEnv : public TTestBasicRuntime { return CheckRequest(user, id, dry, NKikimrCms::MODE_MAX_AVAILABILITY, res, count); } + void CheckBSCUpdateRequests(std::set expectedNodes, NKikimrBlobStorage::EDriveStatus expectedStatus); + void CheckWalleStoreTaskIsFailed(NCms::TEvCms::TEvStoreWalleTask *req); template diff --git a/ydb/core/cms/config.h b/ydb/core/cms/config.h index 6eeb5407111a..4f2f6a7259b9 100644 --- a/ydb/core/cms/config.h +++ b/ydb/core/cms/config.h @@ -1,12 +1,14 @@ #pragma once #include "pdisk_state.h" +#include "pdisk_status.h" #include #include #include #include +#include namespace NKikimr::NCms { @@ -30,6 +32,8 @@ struct TCmsSentinelConfig { ui32 RoomRatio; ui32 RackRatio; + TMaybeFail EvictVDisksStatus; + void Serialize(NKikimrCms::TCmsConfig::TSentinelConfig &config) const { config.SetEnable(Enable); config.SetDryRun(DryRun); @@ -45,6 +49,7 @@ struct TCmsSentinelConfig { config.SetRackRatio(RackRatio); SaveStateLimits(config); + SaveEvictVDisksStatus(config); } void Deserialize(const NKikimrCms::TCmsConfig::TSentinelConfig &config) { @@ -63,6 +68,8 @@ struct TCmsSentinelConfig { auto newStateLimits = LoadStateLimits(config); StateLimits.swap(newStateLimits); + + EvictVDisksStatus = LoadEvictVDisksStatus(config); } void SaveStateLimits(NKikimrCms::TCmsConfig::TSentinelConfig &config) const { @@ -129,6 +136,31 @@ struct TCmsSentinelConfig { return stateLimits; } + + static TMaybeFail LoadEvictVDisksStatus(const NKikimrCms::TCmsConfig::TSentinelConfig &config) { + using EEvictVDisksStatus = NKikimrCms::TCmsConfig::TSentinelConfig; + switch (config.GetEvictVDisksStatus()) { + case EEvictVDisksStatus::UNKNOWN: + case EEvictVDisksStatus::FAULTY: + return EPDiskStatus::FAULTY; + case EEvictVDisksStatus::DISABLED: + return Nothing(); + } + return EPDiskStatus::FAULTY; + } + + void SaveEvictVDisksStatus(NKikimrCms::TCmsConfig::TSentinelConfig &config) const { + using EEvictVDisksStatus = NKikimrCms::TCmsConfig::TSentinelConfig; + + if (EvictVDisksStatus.Empty()) { + config.SetEvictVDisksStatus(EEvictVDisksStatus::DISABLED); + return; + } + + if (*EvictVDisksStatus == EPDiskStatus::FAULTY) { + config.SetEvictVDisksStatus(EEvictVDisksStatus::FAULTY); + } + } }; struct TCmsLogConfig { diff --git a/ydb/core/cms/console/configs_dispatcher.cpp b/ydb/core/cms/console/configs_dispatcher.cpp index 9688829f0f9b..3c0cc91dcb00 100644 --- a/ydb/core/cms/console/configs_dispatcher.cpp +++ b/ydb/core/cms/console/configs_dispatcher.cpp @@ -927,7 +927,7 @@ void TConfigsDispatcher::Handle(TEvConsole::TEvConfigSubscriptionNotification::T if (subscription->Yaml && YamlConfigEnabled) { ReplaceConfigItems(YamlProtoConfig, trunc, FilterKinds(subscription->Kinds), BaseConfig); } else { - Y_FOR_EACH_BIT(kind, kinds) { + Y_FOR_EACH_BIT(kind, FilterKinds(kinds)) { if (affectedKinds.contains(kind)) { hasAffectedKinds = true; } @@ -941,15 +941,15 @@ void TConfigsDispatcher::Handle(TEvConsole::TEvConfigSubscriptionNotification::T ReplaceConfigItems(ev->Get()->Record.GetConfig(), trunc, FilterKinds(kinds), BaseConfig); } - if (hasAffectedKinds || !CompareConfigs(subscription->CurrentConfig.Config, trunc) || CurrentStateFunc() == &TThis::StateInit) { + if (hasAffectedKinds || !CompareConfigs(subscription->CurrentConfig.Config, trunc, FilterKinds(kinds)) || CurrentStateFunc() == &TThis::StateInit) { subscription->UpdateInProcess = MakeHolder(); subscription->UpdateInProcess->Record.MutableConfig()->CopyFrom(trunc); subscription->UpdateInProcess->Record.SetLocal(true); - Y_FOR_EACH_BIT(kind, kinds) { + Y_FOR_EACH_BIT(kind, FilterKinds(kinds)) { subscription->UpdateInProcess->Record.AddItemKinds(kind); } subscription->UpdateInProcessCookie = ++NextRequestCookie; - subscription->UpdateInProcessConfigVersion = FilterVersion(ev->Get()->Record.GetConfig().GetVersion(), kinds); + subscription->UpdateInProcessConfigVersion = FilterVersion(ev->Get()->Record.GetConfig().GetVersion(), FilterKinds(kinds)); if (YamlConfigEnabled) { UpdateYamlVersion(subscription); @@ -1046,6 +1046,12 @@ void TConfigsDispatcher::Handle(TEvConfigsDispatcher::TEvSetConfigSubscriptionRe "SetConfigSubscriptionRequest handler"); Y_UNUSED(nonYamlKinds); auto kinds = KindsToBitMap(ev->Get()->ConfigItemKinds); + + auto truncKinds = FilterKinds(kinds); + if (truncKinds.Empty() && !kinds.Empty()) { + return; + } + auto subscriberActor = ev->Get()->Subscriber ? ev->Get()->Subscriber : ev->Sender; auto subscription = FindSubscription(kinds); diff --git a/ydb/core/cms/console/console__replace_yaml_config.cpp b/ydb/core/cms/console/console__replace_yaml_config.cpp index 1d6840692492..1e510997c812 100644 --- a/ydb/core/cms/console/console__replace_yaml_config.cpp +++ b/ydb/core/cms/console/console__replace_yaml_config.cpp @@ -1,5 +1,6 @@ #include "console_configs_manager.h" #include "console_configs_provider.h" +#include "console_audit.h" #include #include @@ -16,6 +17,7 @@ class TConfigsManager::TTxReplaceYamlConfig : public TTransactionBaseGet()->Record.GetRequest().config()) + , Peer(ev->Get()->Record.GetPeerName()) , Sender(ev->Sender) , UserSID(NACLib::TUserToken(ev->Get()->Record.GetUserToken()).GetUserSID()) , Force(force) @@ -146,6 +148,7 @@ class TConfigsManager::TTxReplaceYamlConfig : public TTransactionBaseRecord.AddIssues(); issue->set_severity(NYql::TSeverityIds::S_ERROR); issue->set_message(ex.what()); + ErrorReason = ex.what(); Response = MakeHolder(Sender, ctx.SelfID, ev.Release()); } @@ -159,6 +162,14 @@ class TConfigsManager::TTxReplaceYamlConfig : public TTransactionBaseYamlConfig, + /* newConfig = */ Config, + /* reason = */ {}, + /* success = */ true); + Self->YamlVersion = Version + 1; Self->YamlConfig = UpdatedConfig; Self->YamlDropped = false; @@ -167,6 +178,14 @@ class TConfigsManager::TTxReplaceYamlConfig : public TTransactionBase(Self->YamlConfig); ctx.Send(Self->ConfigsProvider, resp.Release()); + } else if (Error && !DryRun) { + AuditLogReplaceConfigTransaction( + /* peer = */ Peer, + /* userSID = */ UserSID, + /* oldConfig = */ Self->YamlConfig, + /* newConfig = */ Config, + /* reason = */ ErrorReason, + /* success = */ false); } Self->TxProcessor->TxCompleted(this, ctx); @@ -174,6 +193,7 @@ class TConfigsManager::TTxReplaceYamlConfig : public TTransactionBase Response; bool Error = false; + TString ErrorReason; bool Modify = false; TSimpleSharedPtr UnknownFieldsCollector = nullptr; ui32 Version; diff --git a/ydb/core/cms/console/console_audit.cpp b/ydb/core/cms/console/console_audit.cpp new file mode 100644 index 000000000000..8ea7d2e6b9cb --- /dev/null +++ b/ydb/core/cms/console/console_audit.cpp @@ -0,0 +1,34 @@ +#include "console_audit.h" + +#include +#include + +namespace NKikimr::NConsole { + +void AuditLogReplaceConfigTransaction( + const TString& peer, + const TString& userSID, + const TString& oldConfig, + const TString& newConfig, + const TString& reason, + bool success) +{ + static const TString COMPONENT_NAME = "console"; + + static const TString EMPTY_VALUE = "{none}"; + + auto peerName = NKikimr::NAddressClassifier::ExtractAddress(peer); + + AUDIT_LOG( + AUDIT_PART("component", COMPONENT_NAME) + AUDIT_PART("remote_address", (!peerName.empty() ? peerName : EMPTY_VALUE)) + AUDIT_PART("subject", (!userSID.empty() ? userSID : EMPTY_VALUE)) + AUDIT_PART("status", TString(success ? "SUCCESS" : "ERROR")) + AUDIT_PART("reason", reason, !reason.empty()) + AUDIT_PART("operation", TString("REPLACE DYNCONFIG")) + AUDIT_PART("old_config", oldConfig) + AUDIT_PART("new_config", newConfig) + ); +} + +} // namespace NKikimr::NConsole diff --git a/ydb/core/cms/console/console_audit.h b/ydb/core/cms/console/console_audit.h new file mode 100644 index 000000000000..028176090129 --- /dev/null +++ b/ydb/core/cms/console/console_audit.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +namespace NKikimr::NConsole { + +void AuditLogReplaceConfigTransaction( + const TString& peer, + const TString& userSID, + const TString& oldConfig, + const TString& newConfig, + const TString& reason, + bool success); + +} // namespace NKikimr::NConsole diff --git a/ydb/core/cms/console/console_configs_manager.cpp b/ydb/core/cms/console/console_configs_manager.cpp index b9495819e6f6..6906c6b307cf 100644 --- a/ydb/core/cms/console/console_configs_manager.cpp +++ b/ydb/core/cms/console/console_configs_manager.cpp @@ -1,6 +1,7 @@ #include "console_configs_manager.h" #include "configs_dispatcher.h" +#include "console_audit.h" #include "console_configs_provider.h" #include "console_impl.h" #include "http.h" @@ -974,4 +975,24 @@ void TConfigsManager::ScheduleLogCleanup(const TActorContext &ctx) LogCleanupTimerCookieHolder.Get()); } +void TConfigsManager::HandleUnauthorized(TEvConsole::TEvReplaceYamlConfigRequest::TPtr &ev, const TActorContext &) { + AuditLogReplaceConfigTransaction( + /* peer = */ ev->Get()->Record.GetPeerName(), + /* userSID = */ ev->Get()->Record.GetUserToken(), + /* oldConfig = */ YamlConfig, + /* newConfig = */ ev->Get()->Record.GetRequest().config(), + /* reason = */ "Unauthorized.", + /* success = */ false); +} + +void TConfigsManager::HandleUnauthorized(TEvConsole::TEvSetYamlConfigRequest::TPtr &ev, const TActorContext &) { + AuditLogReplaceConfigTransaction( + /* peer = */ ev->Get()->Record.GetPeerName(), + /* userSID = */ ev->Get()->Record.GetUserToken(), + /* oldConfig = */ YamlConfig, + /* newConfig = */ ev->Get()->Record.GetRequest().config(), + /* reason = */ "Unauthorized.", + /* success = */ false); +} + } // namespace NKikimr::NConsole diff --git a/ydb/core/cms/console/console_configs_manager.h b/ydb/core/cms/console/console_configs_manager.h index 63feebe774f7..7884c6eb5341 100644 --- a/ydb/core/cms/console/console_configs_manager.h +++ b/ydb/core/cms/console/console_configs_manager.h @@ -152,6 +152,8 @@ class TConfigsManager : public TActorBootstrapped { void Handle(TEvInterconnect::TEvNodesInfo::TPtr &ev, const TActorContext &ctx); void Handle(TEvConsole::TEvReplaceYamlConfigRequest::TPtr & ev, const TActorContext & ctx); void Handle(TEvConsole::TEvSetYamlConfigRequest::TPtr & ev, const TActorContext & ctx); + void HandleUnauthorized(TEvConsole::TEvReplaceYamlConfigRequest::TPtr & ev, const TActorContext & ctx); + void HandleUnauthorized(TEvConsole::TEvSetYamlConfigRequest::TPtr & ev, const TActorContext & ctx); void Handle(TEvConsole::TEvDropConfigRequest::TPtr & ev, const TActorContext & ctx); void Handle(TEvPrivate::TEvStateLoaded::TPtr &ev, const TActorContext &ctx); void Handle(TEvPrivate::TEvCleanupSubscriptions::TPtr &ev, const TActorContext &ctx); @@ -160,9 +162,16 @@ class TConfigsManager : public TActorBootstrapped { template void HandleWithRights(T &ev, const TActorContext &ctx) { + constexpr bool HasHandleUnauthorized = requires(T &ev) { + HandleUnauthorized(ev, ctx); + }; + if (CheckRights(ev->Get()->Record.GetUserToken())) { Handle(ev, ctx); } else { + if constexpr (HasHandleUnauthorized) { + HandleUnauthorized(ev, ctx); + } auto req = MakeHolder(); ctx.Send(ev->Sender, req.Release()); } diff --git a/ydb/core/cms/console/ya.make b/ydb/core/cms/console/ya.make index 6fecc0620508..57092fca8ea4 100644 --- a/ydb/core/cms/console/ya.make +++ b/ydb/core/cms/console/ya.make @@ -11,6 +11,8 @@ SRCS( configs_dispatcher.h console.cpp console.h + console_audit.cpp + console_audit.h console_configs_manager.cpp console_configs_manager.h console_configs_provider.cpp diff --git a/ydb/core/cms/erasure_checkers.cpp b/ydb/core/cms/erasure_checkers.cpp index 8d1b257ec1a2..1d753ba32cf0 100644 --- a/ydb/core/cms/erasure_checkers.cpp +++ b/ydb/core/cms/erasure_checkers.cpp @@ -77,10 +77,13 @@ bool TErasureCounterBase::CheckForMaxAvailability(TClusterInfoPtr info, TErrorIn } error.Code = TStatus::DISALLOW_TEMP; - error.Reason = TStringBuilder() << "Issue in affected group with id '" << GroupId << "'" + error.Reason = TReason( + TStringBuilder() << "Issue in affected group with id '" << GroupId << "'" << ": too many unavailable vdisks" << ". Locked: " << DumpVDisksInfo(Locked, info) - << ". Down: " << DumpVDisksInfo(Down, info); + << ". Down: " << DumpVDisksInfo(Down, info), + TReason::EType::TooManyUnavailableVDisks + ); error.Deadline = defaultDeadline; return false; } @@ -150,10 +153,13 @@ bool TDefaultErasureCounter::CheckForKeepAvailability(TClusterInfoPtr info, TErr } error.Code = TStatus::DISALLOW_TEMP; - error.Reason = TStringBuilder() << "Issue in affected group with id '" << GroupId << "'" + error.Reason = TReason( + TStringBuilder() << "Issue in affected group with id '" << GroupId << "'" << ": too many unavailable vdisks" << ". Locked: " << DumpVDisksInfo(Locked, info) - << ". Down: " << DumpVDisksInfo(Down, info); + << ". Down: " << DumpVDisksInfo(Down, info), + TReason::EType::TooManyUnavailableVDisks + ); error.Deadline = defaultDeadline; return false; } @@ -191,20 +197,26 @@ bool TMirror3dcCounter::CheckForKeepAvailability(TClusterInfoPtr info, TErrorInf if (DataCenterDisabledNodes.size() > 2) { error.Code = TStatus::DISALLOW_TEMP; - error.Reason = TStringBuilder() << "Issue in affected group with id '" << GroupId << "'" + error.Reason = TReason( + TStringBuilder() << "Issue in affected group with id '" << GroupId << "'" << ": too many unavailable vdisks" << ". Number of data centers with unavailable vdisks: " << DataCenterDisabledNodes.size() << ". Locked: " << DumpVDisksInfo(Locked, info) - << ". Down: " << DumpVDisksInfo(Down, info); + << ". Down: " << DumpVDisksInfo(Down, info), + TReason::EType::TooManyUnavailableVDisks + ); error.Deadline = defaultDeadline; return false; } error.Code = TStatus::DISALLOW_TEMP; - error.Reason = TStringBuilder() << "Issue in affected group with id '" << GroupId << "'" + error.Reason = TReason( + TStringBuilder() << "Issue in affected group with id '" << GroupId << "'" << ": too many unavailable vdisks" << ". Locked: " << DumpVDisksInfo(Locked, info) - << ". Down: " << DumpVDisksInfo(Down, info); + << ". Down: " << DumpVDisksInfo(Down, info), + TReason::EType::TooManyUnavailableVDisks + ); error.Deadline = defaultDeadline; return false; diff --git a/ydb/core/cms/error_info.h b/ydb/core/cms/error_info.h new file mode 100644 index 000000000000..c6346f133330 --- /dev/null +++ b/ydb/core/cms/error_info.h @@ -0,0 +1,59 @@ +#pragma once + +#include "defs.h" + +namespace NKikimr::NCms { + +class TReason { +public: + // Must be sync with proto enum + enum class EType { + Generic, + TooManyUnavailableVDisks, + TooManyUnavailableStateStorageRings, + DisabledNodesLimitReached, + TenantDisabledNodesLimitReached, + SysTabletsNodeLimitReached, + }; + + TReason(const TString &message, EType type = EType::Generic) + : Message(message) + , Type(type) + {} + + TReason(const char* message, EType type = EType::Generic) + : Message(message) + , Type(type) + {} + + TReason() = default; + + operator TString() const { + return Message; + } + + const TString& GetMessage() const { + return Message; + } + + EType GetType() const { + return Type; + } + +private: + TString Message; + EType Type = EType::Generic; +}; + +struct TErrorInfo { + NKikimrCms::TStatus::ECode Code = NKikimrCms::TStatus::ALLOW; + TReason Reason; + TInstant Deadline; + ui64 RollbackPoint = 0; +}; + +} // namespace NKikimr::NCms + +Y_DECLARE_OUT_SPEC(inline, NKikimr::NCms::TReason, stream, value) { + stream << value.GetMessage(); +} diff --git a/ydb/core/cms/json_proxy_proto.h b/ydb/core/cms/json_proxy_proto.h index 8079eb6971e2..d65d419f4495 100644 --- a/ydb/core/cms/json_proxy_proto.h +++ b/ydb/core/cms/json_proxy_proto.h @@ -80,6 +80,8 @@ class TJsonProxyProto : public TActorBootstrapped { return ReplyWithTypeDescription(*NKikimrConfig::TImmediateControlsConfig::TVDiskControls::descriptor(), ctx); else if (name == ".NKikimrConfig.TImmediateControlsConfig.TTabletControls") return ReplyWithTypeDescription(*NKikimrConfig::TImmediateControlsConfig::TTabletControls::descriptor(), ctx); + else if (name == ".NKikimrConfig.TImmediateControlsConfig.TBlobStorageControllerControls") + return ReplyWithTypeDescription(*NKikimrConfig::TImmediateControlsConfig::TBlobStorageControllerControls::descriptor(), ctx); } ctx.Send(RequestEvent->Sender, diff --git a/ydb/core/cms/node_checkers.cpp b/ydb/core/cms/node_checkers.cpp index 51306ab3efb6..b478063a6d98 100644 --- a/ydb/core/cms/node_checkers.cpp +++ b/ydb/core/cms/node_checkers.cpp @@ -87,7 +87,7 @@ const THashMap& TNodesCounterBase::GetNodeToSta return NodeToState; } -bool TNodesLimitsCounterBase::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, TString& reason) const { +bool TNodesLimitsCounterBase::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, TReason& reason) const { Y_ABORT_UNLESS(NodeToState.contains(nodeId)); auto nodeState = NodeToState.at(nodeId); @@ -126,28 +126,34 @@ bool TNodesLimitsCounterBase::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabili const auto disabledNodes = LockedNodesCount + DownNodesCount + 1; if (DisabledNodesLimit > 0 && disabledNodes > DisabledNodesLimit) { - reason = TStringBuilder() << ReasonPrefix(nodeId) + reason = TReason( + TStringBuilder() << ReasonPrefix(nodeId) << ": too many unavailable nodes." << " Locked: " << LockedNodesCount << ", down: " << DownNodesCount - << ", limit: " << DisabledNodesLimit; + << ", limit: " << DisabledNodesLimit, + DisabledNodesLimitReachedReasonType() + ); return false; } if (DisabledNodesRatioLimit > 0 && (disabledNodes * 100 > NodeToState.size() * DisabledNodesRatioLimit)) { - reason = TStringBuilder() << ReasonPrefix(nodeId) + reason = TReason( + TStringBuilder() << ReasonPrefix(nodeId) << ": too many unavailable nodes." << " Locked: " << LockedNodesCount << ", down: " << DownNodesCount << ", total: " << NodeToState.size() - << ", limit: " << DisabledNodesRatioLimit << "%"; + << ", limit: " << DisabledNodesRatioLimit << "%", + DisabledNodesLimitReachedReasonType() + ); return false; } return true; } -bool TSysTabletsNodesCounter::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, TString& reason) const { +bool TSysTabletsNodesCounter::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, TReason& reason) const { Y_ABORT_UNLESS(NodeToState.contains(nodeId)); auto nodeState = NodeToState.at(nodeId); @@ -198,12 +204,15 @@ bool TSysTabletsNodesCounter::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabili Y_ABORT("Unknown availability mode"); } - reason = TStringBuilder() << "Cannot lock node '" << nodeId << "'" + reason = TReason( + TStringBuilder() << "Cannot lock node '" << nodeId << "'" << ": tablet '" << NKikimrConfig::TBootstrap_ETabletType_Name(TabletType) << "'" << " has too many unavailable nodes." << " Locked: " << LockedNodesCount << ", down: " << DownNodesCount - << ", limit: " << limit; + << ", limit: " << limit, + TReason::EType::SysTabletsNodeLimitReached + ); return false; } diff --git a/ydb/core/cms/node_checkers.h b/ydb/core/cms/node_checkers.h index a71066cfe2df..55b606e5ce58 100644 --- a/ydb/core/cms/node_checkers.h +++ b/ydb/core/cms/node_checkers.h @@ -1,6 +1,7 @@ #pragma once #include "defs.h" +#include "error_info.h" #include #include @@ -39,7 +40,7 @@ class INodesChecker { virtual void LockNode(ui32 nodeId) = 0; virtual void UnlockNode(ui32 nodeId) = 0; - virtual bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, TString& reason) const = 0; + virtual bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, TReason& reason) const = 0; }; /** @@ -80,7 +81,13 @@ class TNodesLimitsCounterBase : public TNodesCounterBase { ui32 DisabledNodesLimit; ui32 DisabledNodesRatioLimit; - virtual TString ReasonPrefix(ui32 nodeId) const = 0; + virtual TString ReasonPrefix(ui32 nodeId) const { + return TStringBuilder() << "Cannot lock node '" << nodeId << "'"; + } + + virtual TReason::EType DisabledNodesLimitReachedReasonType() const { + return TReason::EType::DisabledNodesLimitReached; + }; public: explicit TNodesLimitsCounterBase(ui32 disabledNodesLimit, ui32 disabledNodesRatioLimit) @@ -94,7 +101,7 @@ class TNodesLimitsCounterBase : public TNodesCounterBase { DisabledNodesRatioLimit = ratioLimit; } - bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, TString& reason) const override final; + bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, TReason& reason) const override final; }; class TTenantLimitsCounter : public TNodesLimitsCounterBase { @@ -106,6 +113,10 @@ class TTenantLimitsCounter : public TNodesLimitsCounterBase { return TStringBuilder() << "Cannot lock node '" << nodeId << "' of tenant '" << TenantName << "'"; } + TReason::EType DisabledNodesLimitReachedReasonType() const override final { + return TReason::EType::TenantDisabledNodesLimitReached; + } + public: explicit TTenantLimitsCounter(const TString& tenantName, ui32 disabledNodesLimit, ui32 disabledNodesRatioLimit) : TNodesLimitsCounterBase(disabledNodesLimit, disabledNodesRatioLimit) @@ -115,11 +126,6 @@ class TTenantLimitsCounter : public TNodesLimitsCounterBase { }; class TClusterLimitsCounter : public TNodesLimitsCounterBase { -protected: - TString ReasonPrefix(ui32 nodeId) const override final { - return TStringBuilder() << "Cannot lock node '" << nodeId << "'"; - } - public: explicit TClusterLimitsCounter(ui32 disabledNodesLimit, ui32 disabledNodesRatioLimit) : TNodesLimitsCounterBase(disabledNodesLimit, disabledNodesRatioLimit) @@ -143,7 +149,7 @@ class TSysTabletsNodesCounter : public TNodesCounterBase { { } - bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, TString& reason) const override final; + bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, TReason& reason) const override final; }; } // namespace NKikimr::NCms diff --git a/ydb/core/cms/pdisk_status.h b/ydb/core/cms/pdisk_status.h new file mode 100644 index 000000000000..b36a86cec46c --- /dev/null +++ b/ydb/core/cms/pdisk_status.h @@ -0,0 +1,9 @@ +#pragma once + +#include + +namespace NKikimr::NCms { + +using EPDiskStatus = NKikimrBlobStorage::EDriveStatus; + +} // namespace NKikimr::NCms diff --git a/ydb/core/cms/sentinel.cpp b/ydb/core/cms/sentinel.cpp index 3ab0d3cf883d..c99e236d4a16 100644 --- a/ydb/core/cms/sentinel.cpp +++ b/ydb/core/cms/sentinel.cpp @@ -125,6 +125,10 @@ void TPDiskStatusComputer::SetForcedStatus(EPDiskStatus status) { ForcedStatus = status; } +bool TPDiskStatusComputer::HasForcedStatus() const { + return ForcedStatus.Defined(); +} + void TPDiskStatusComputer::ResetForcedStatus() { ForcedStatus.Clear(); } @@ -196,6 +200,7 @@ void TPDiskStatus::DisallowChanging() { TPDiskInfo::TPDiskInfo(EPDiskStatus initialStatus, const ui32& defaultStateLimit, const TLimitsMap& stateLimits) : TPDiskStatus(initialStatus, defaultStateLimit, stateLimits) + , ActualStatus(initialStatus) { Touch(); } @@ -890,15 +895,15 @@ class TSentinel: public TActorBootstrapped { continue; } - if (it->second.HasFaultyMarker()) { - info.SetForcedStatus(EPDiskStatus::FAULTY); + if (it->second.HasFaultyMarker() && Config.EvictVDisksStatus.Defined()) { + info.SetForcedStatus(*Config.EvictVDisksStatus); } else { info.ResetForcedStatus(); } all.AddPDisk(id); if (info.IsChanged()) { - if (info.IsNewStatusGood()) { + if (info.IsNewStatusGood() || info.HasForcedStatus()) { alwaysAllowed.insert(id); } else { changed.AddPDisk(id); diff --git a/ydb/core/cms/sentinel_impl.h b/ydb/core/cms/sentinel_impl.h index 8622050ff1d8..52cc9d8af030 100644 --- a/ydb/core/cms/sentinel_impl.h +++ b/ydb/core/cms/sentinel_impl.h @@ -3,8 +3,7 @@ #include "defs.h" #include "pdiskid.h" #include "pdisk_state.h" - -#include +#include "pdisk_status.h" #include #include @@ -12,7 +11,6 @@ namespace NKikimr::NCms::NSentinel { -using EPDiskStatus = NKikimrBlobStorage::EDriveStatus; using TLimitsMap = TMap; class TPDiskStatusComputer { @@ -29,6 +27,7 @@ class TPDiskStatusComputer { void Reset(); void SetForcedStatus(EPDiskStatus status); + bool HasForcedStatus() const; void ResetForcedStatus(); private: @@ -84,7 +83,7 @@ struct TPDiskInfo using EIgnoreReason = NKikimrCms::TPDiskInfo::EIgnoreReason; EPDiskStatus ActualStatus = EPDiskStatus::ACTIVE; - EPDiskStatus PrevStatus = EPDiskStatus::ACTIVE; + EPDiskStatus PrevStatus = EPDiskStatus::UNKNOWN; TInstant LastStatusChange; bool StatusChangeFailed = false; // means that this pdisk status change last time was the reason of whole request failure diff --git a/ydb/core/cms/ya.make b/ydb/core/cms/ya.make index d3edcc7908e7..1d4c9845dc5f 100644 --- a/ydb/core/cms/ya.make +++ b/ydb/core/cms/ya.make @@ -30,6 +30,7 @@ SRCS( downtime.cpp erasure_checkers.h erasure_checkers.cpp + error_info.h http.cpp http.h info_collector.cpp diff --git a/ydb/core/config/init/init.cpp b/ydb/core/config/init/init.cpp index 9890ccabc480..10dcde834b5d 100644 --- a/ydb/core/config/init/init.cpp +++ b/ydb/core/config/init/init.cpp @@ -48,8 +48,8 @@ class TDefaultProtoConfigFileProvider static bool IsFileReadable(const fs::path& p) { std::error_code ec; // For noexcept overload usage. auto perms = fs::status(p, ec).permissions(); - if ((perms & fs::perms::owner_read) != fs::perms::none && - (perms & fs::perms::group_read) != fs::perms::none && + if ((perms & fs::perms::owner_read) != fs::perms::none || + (perms & fs::perms::group_read) != fs::perms::none || (perms & fs::perms::others_read) != fs::perms::none ) { return true; @@ -228,6 +228,7 @@ class TDefaultNodeBrokerClient const TGrpcSslSettings& grpcSettings, const TString addr, const NYdb::NDiscovery::TNodeRegistrationSettings& settings, + const TString& nodeRegistrationToken, const IEnv& env) { TCommandConfig::TServerEndpoint endpoint = TCommandConfig::ParseServerAddress(addr); @@ -242,7 +243,9 @@ class TDefaultNodeBrokerClient config.UseClientCertificate(certificate.c_str(), privateKey.c_str()); } } - config.SetAuthToken(BUILTIN_ACL_ROOT); + if (nodeRegistrationToken) { + config.SetAuthToken(nodeRegistrationToken); + } config.SetEndpoint(endpoint.Address); auto connection = NYdb::TDriver(config); @@ -313,6 +316,7 @@ class TDefaultNodeBrokerClient const TGrpcSslSettings& grpcSettings, const TVector& addrs, const NYdb::NDiscovery::TNodeRegistrationSettings& settings, + const TString& nodeRegistrationToken, const IEnv& env, IInitLogger& logger) { @@ -326,6 +330,7 @@ class TDefaultNodeBrokerClient grpcSettings, addr, settings, + nodeRegistrationToken, env); if (result.IsSuccess()) { logger.Out() << "Success. Registered via discovery service as " << result.GetNodeId() << Endl; @@ -387,6 +392,7 @@ class TDefaultNodeBrokerClient grpcSettings, addrs, newRegSettings, + regSettings.NodeRegistrationToken, env, logger); diff --git a/ydb/core/config/init/init.h b/ydb/core/config/init/init.h index bb81c5461369..815bce93b8d0 100644 --- a/ydb/core/config/init/init.h +++ b/ydb/core/config/init/init.h @@ -118,6 +118,7 @@ struct TNodeRegistrationSettings { bool FixedNodeID; ui32 InterconnectPort; NActors::TNodeLocation Location; + TString NodeRegistrationToken; }; class INodeRegistrationResult { diff --git a/ydb/core/config/init/init_impl.h b/ydb/core/config/init/init_impl.h index c2a8a9fee2fd..bbe418c82e93 100644 --- a/ydb/core/config/init/init_impl.h +++ b/ydb/core/config/init/init_impl.h @@ -1119,7 +1119,7 @@ class TInitialConfiguratorImpl } void FillData(const NConfig::TCommonAppOptions& cf) { - if (cf.TenantName && ScopeId.IsEmpty()) { + if (!cf.TenantName && ScopeId.IsEmpty()) { const TString myDomain = DeduceNodeDomain(cf, AppConfig); for (const auto& domain : AppConfig.GetDomainsConfig().GetDomain()) { if (domain.GetName() == myDomain) { @@ -1247,6 +1247,7 @@ class TInitialConfiguratorImpl cf.FixedNodeID, cf.InterconnectPort, cf.CreateNodeLocation(), + AppConfig.GetAuthConfig().GetNodeRegistrationToken(), }; auto result = NodeBrokerClient.RegisterDynamicNode(cf.GrpcSslSettings, addrs, settings, Env, Logger); diff --git a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp index f482af16e987..94c5764d0c1c 100644 --- a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp +++ b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp @@ -7,7 +7,7 @@ #include #include -#include "ydb/core/audit/audit_log.h" +#include "ydb/core/audit/audit_log_service.h" #include #include @@ -123,7 +123,7 @@ #include #include -#include +#include #include #include @@ -186,6 +186,9 @@ #include #include +#include +#include + #include #include @@ -240,6 +243,29 @@ #include +#ifndef KIKIMR_DISABLE_S3_OPS +#include +#endif + +namespace { + +#ifndef KIKIMR_DISABLE_S3_OPS +struct TAwsApiGuard { + TAwsApiGuard() { + Aws::InitAPI(Options); + } + + ~TAwsApiGuard() { + Aws::ShutdownAPI(Options); + } + +private: + Aws::SDKOptions Options; +}; +#endif + +} + namespace NKikimr { namespace NKikimrServicesInitializers { @@ -303,6 +329,7 @@ void AddExecutorPool( TBasicExecutorPoolConfig basic; basic.PoolId = poolId; basic.PoolName = poolConfig.GetName(); + basic.UseRingQueue = systemConfig.HasUseRingQueue() && systemConfig.GetUseRingQueue(); if (poolConfig.HasMaxAvgPingDeviation()) { auto poolGroup = counters->GetSubgroup("execpool", basic.PoolName); auto &poolInfo = cpuManager.PingInfoByPool[poolId]; @@ -1146,6 +1173,8 @@ void TSharedCacheInitializer::InitializeServices( config->TotalAsyncQueueInFlyLimit = cfg.GetAsyncQueueInFlyLimit(); config->TotalScanQueueInFlyLimit = cfg.GetScanQueueInFlyLimit(); + config->ReplacementPolicy = cfg.GetReplacementPolicy(); + config->LimitBytes = cfg.GetMemoryLimit(); if (cfg.HasActivePagesReservationPercent()) { config->ActivePagesReservationPercent = cfg.GetActivePagesReservationPercent(); @@ -1157,10 +1186,6 @@ void TSharedCacheInitializer::InitializeServices( TIntrusivePtr<::NMonitoring::TDynamicCounters> tabletGroup = GetServiceCounters(appData->Counters, "tablets"); TIntrusivePtr<::NMonitoring::TDynamicCounters> sausageGroup = tabletGroup->GetSubgroup("type", "S_CACHE"); - config->CacheConfig = new TCacheCacheConfig(cfg.GetMemoryLimit(), - sausageGroup->GetCounter("fresh"), - sausageGroup->GetCounter("staging"), - sausageGroup->GetCounter("warm")); config->Counters = new TSharedPageCacheCounters(sausageGroup); setup->LocalServices.push_back(std::pair(MakeSharedPageCacheId(0), @@ -1645,7 +1670,7 @@ void TSecurityServicesInitializer::InitializeServices(NActors::TActorSystemSetup .AuthConfig = Config.GetAuthConfig(), .CertificateAuthValues = { .ClientCertificateAuthorization = Config.GetClientCertificateAuthorization(), - .ServerCertificateFilePath = grpcConfig.GetCert(), + .ServerCertificateFilePath = grpcConfig.HasPathToCertificateFile() ? grpcConfig.GetPathToCertificateFile() : grpcConfig.GetCert(), .Domain = Config.GetAuthConfig().GetCertificateAuthenticationDomain() } }; @@ -1981,7 +2006,8 @@ TPersQueueL2CacheInitializer::TPersQueueL2CacheInitializer(const TKikimrRunConfi {} void TPersQueueL2CacheInitializer::InitializeServices(NActors::TActorSystemSetup* setup, const NKikimr::TAppData* appData) { - static const ui64 DEFAULT_PQ_L2_MAX_SIZE_MB = 8 * 1024; + static const ui64 DEFAULT_PQ_L2_MAX_SIZE_MB = + NKikimrNodeLimits::TNodeLimitsConfig_TPersQueueNodeConfig::default_instance().GetSharedCacheSizeMb(); static const TDuration DEFAULT_PQ_L2_KEEP_TIMEOUT = TDuration::Seconds(10); NPQ::TCacheL2Parameters params; @@ -2149,7 +2175,7 @@ void TKqpServiceInitializer::InitializeServices(NActors::TActorSystemSetup* setu // Create resource manager auto rm = NKqp::CreateKqpResourceManagerActor(Config.GetTableServiceConfig().GetResourceManager(), nullptr, - {}, kqpProxySharedResources); + {}, kqpProxySharedResources, NodeId); setup->LocalServices.push_back(std::make_pair( NKqp::MakeKqpRmServiceID(NodeId), TActorSetupCmd(rm, TMailboxType::HTSwap, appData->UserPoolId))); @@ -2179,6 +2205,26 @@ void TKqpServiceInitializer::InitializeServices(NActors::TActorSystemSetup* setu } } +TGroupedMemoryLimiterInitializer::TGroupedMemoryLimiterInitializer(const TKikimrRunConfig& runConfig) + : IKikimrServicesInitializer(runConfig) { +} + +void TGroupedMemoryLimiterInitializer::InitializeServices(NActors::TActorSystemSetup* setup, const NKikimr::TAppData* appData) { + NOlap::NGroupedMemoryManager::TConfig serviceConfig; + Y_ABORT_UNLESS(serviceConfig.DeserializeFromProto(Config.GetGroupedMemoryLimiterConfig())); + + if (serviceConfig.IsEnabled()) { + TIntrusivePtr<::NMonitoring::TDynamicCounters> tabletGroup = GetServiceCounters(appData->Counters, "tablets"); + TIntrusivePtr<::NMonitoring::TDynamicCounters> countersGroup = tabletGroup->GetSubgroup("type", "TX_GROUPED_MEMORY_LIMITER"); + + auto service = NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::CreateService(serviceConfig, countersGroup); + + setup->LocalServices.push_back(std::make_pair( + NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::MakeServiceId(NodeId), + TActorSetupCmd(service, TMailboxType::HTSwap, appData->UserPoolId))); + } +} + TCompDiskLimiterInitializer::TCompDiskLimiterInitializer(const TKikimrRunConfig& runConfig) : IKikimrServicesInitializer(runConfig) { } @@ -2792,5 +2838,18 @@ void TGraphServiceInitializer::InitializeServices(NActors::TActorSystemSetup* se TActorSetupCmd(NGraph::CreateGraphService(appData->TenantName), TMailboxType::HTSwap, appData->UserPoolId)); } +#ifndef KIKIMR_DISABLE_S3_OPS +TAwsApiInitializer::TAwsApiInitializer(IGlobalObjectStorage& globalObjects) + : GlobalObjects(globalObjects) +{ +} + +void TAwsApiInitializer::InitializeServices(NActors::TActorSystemSetup* setup, const NKikimr::TAppData* appData) { + Y_UNUSED(setup); + Y_UNUSED(appData); + GlobalObjects.AddGlobalObject(std::make_shared()); +} +#endif + } // namespace NKikimrServicesInitializers } // namespace NKikimr diff --git a/ydb/core/driver_lib/run/kikimr_services_initializers.h b/ydb/core/driver_lib/run/kikimr_services_initializers.h index cd2060c52aec..04f30522186f 100644 --- a/ydb/core/driver_lib/run/kikimr_services_initializers.h +++ b/ydb/core/driver_lib/run/kikimr_services_initializers.h @@ -397,6 +397,12 @@ class TCompDiskLimiterInitializer: public IKikimrServicesInitializer { void InitializeServices(NActors::TActorSystemSetup* setup, const NKikimr::TAppData* appData) override; }; +class TGroupedMemoryLimiterInitializer: public IKikimrServicesInitializer { +public: + TGroupedMemoryLimiterInitializer(const TKikimrRunConfig& runConfig); + void InitializeServices(NActors::TActorSystemSetup* setup, const NKikimr::TAppData* appData) override; +}; + class TCompConveyorInitializer: public IKikimrServicesInitializer { public: TCompConveyorInitializer(const TKikimrRunConfig& runConfig); @@ -612,5 +618,16 @@ class TGraphServiceInitializer : public IKikimrServicesInitializer { void InitializeServices(NActors::TActorSystemSetup* setup, const NKikimr::TAppData* appData) override; }; +#ifndef KIKIMR_DISABLE_S3_OPS +class TAwsApiInitializer : public IServiceInitializer { + IGlobalObjectStorage& GlobalObjects; + +public: + TAwsApiInitializer(IGlobalObjectStorage& globalObjects); + + void InitializeServices(NActors::TActorSystemSetup* setup, const NKikimr::TAppData* appData) override; +}; +#endif + } // namespace NKikimrServicesInitializers } // namespace NKikimr diff --git a/ydb/core/driver_lib/run/run.cpp b/ydb/core/driver_lib/run/run.cpp index 5996acf0166d..2f2a61a50517 100644 --- a/ydb/core/driver_lib/run/run.cpp +++ b/ydb/core/driver_lib/run/run.cpp @@ -122,6 +122,7 @@ #include #include #include +#include #include @@ -495,8 +496,11 @@ static TString ReadFile(const TString& fileName) { } void TKikimrRunner::InitializeGracefulShutdown(const TKikimrRunConfig& runConfig) { - Y_UNUSED(runConfig); GracefulShutdownSupported = true; + const auto& config = runConfig.AppConfig.GetShutdownConfig(); + if (config.HasMinDelayBeforeShutdownSeconds()) { + MinDelayBeforeShutdown = TDuration::Seconds(config.GetMinDelayBeforeShutdownSeconds()); + } } void TKikimrRunner::InitializeKqpController(const TKikimrRunConfig& runConfig) { @@ -597,6 +601,8 @@ void TKikimrRunner::InitializeGRpc(const TKikimrRunConfig& runConfig) { names["keyvalue"] = &hasKeyValue; TServiceCfg hasReplication = services.empty(); names["replication"] = &hasReplication; + TServiceCfg hasView = services.empty(); + names["view"] = &hasView; std::unordered_set enabled; for (const auto& name : services) { @@ -872,6 +878,11 @@ void TKikimrRunner::InitializeGRpc(const TKikimrRunConfig& runConfig) { grpcRequestProxies[0], hasReplication.IsRlAllowed())); } + if (hasView) { + server.AddService(new NGRpcService::TGRpcViewService(ActorSystem.Get(), Counters, + grpcRequestProxies[0], hasView.IsRlAllowed())); + } + if (ModuleFactories) { for (const auto& service : ModuleFactories->GrpcServiceFactory.Create(enabled, disabled, ActorSystem.Get(), Counters, grpcRequestProxies[0])) { server.AddService(service); @@ -1126,6 +1137,10 @@ void TKikimrRunner::InitializeAppData(const TKikimrRunConfig& runConfig) AppData->MetadataCacheConfig.CopyFrom(runConfig.AppConfig.GetMetadataCacheConfig()); } + if (runConfig.AppConfig.HasReplicationConfig()) { + AppData->ReplicationConfig = runConfig.AppConfig.GetReplicationConfig(); + } + // setup resource profiles AppData->ResourceProfiles = new TResourceProfiles; if (runConfig.AppConfig.GetBootstrapConfig().ResourceProfilesSize()) @@ -1183,6 +1198,8 @@ void TKikimrRunner::InitializeLogSettings(const TKikimrRunConfig& runConfig) ); LogSettings->ClusterName = logConfig.HasClusterName() ? logConfig.GetClusterName() : ""; + LogSettings->TenantName = runConfig.TenantName; + LogSettings->NodeId = runConfig.NodeId; if (logConfig.GetFormat() == "full") { LogSettings->Format = NLog::TSettings::PLAIN_FULL_FORMAT; @@ -1550,6 +1567,10 @@ TIntrusivePtr TKikimrRunner::CreateServiceInitializers sil->AddServiceInitializer(new TCompDiskLimiterInitializer(runConfig)); } + if (serviceMask.EnableGroupedMemoryLimiter) { + sil->AddServiceInitializer(new TGroupedMemoryLimiterInitializer(runConfig)); + } + if (serviceMask.EnableScanConveyor) { sil->AddServiceInitializer(new TScanConveyorInitializer(runConfig)); } @@ -1647,6 +1668,12 @@ TIntrusivePtr TKikimrRunner::CreateServiceInitializers sil->AddServiceInitializer(new TGraphServiceInitializer(runConfig)); } +#ifndef KIKIMR_DISABLE_S3_OPS + if (serviceMask.EnableAwsService) { + sil->AddServiceInitializer(new TAwsApiInitializer(*this)); + } +#endif + return sil; } @@ -1694,6 +1721,7 @@ void TKikimrRunner::KikimrStop(bool graceful) { ActorSystem->Send(new IEventHandle(NGRpcService::CreateGrpcPublisherServiceActorId(), {}, new TEvents::TEvPoisonPill)); } + THPTimer timer; TIntrusivePtr drainProgress(new TDrainProgress()); if (AppData->FeatureFlags.GetEnableDrainOnShutdown() && GracefulShutdownSupported && ActorSystem) { drainProgress->OnSend(); @@ -1727,6 +1755,12 @@ void TKikimrRunner::KikimrStop(bool graceful) { } } + // Wait for a minimum delay to make sure that clients forget about this node + auto passedTime = TDuration::Seconds(timer.Passed()); + if (MinDelayBeforeShutdown > passedTime) { + Sleep(MinDelayBeforeShutdown - passedTime); + } + if (ActorSystem) { ActorSystem->BroadcastToProxies([](const TActorId& proxyId) { return new IEventHandle(proxyId, {}, new TEvInterconnect::TEvTerminate); diff --git a/ydb/core/driver_lib/run/run.h b/ydb/core/driver_lib/run/run.h index a6c9283fc43d..eaa4ff1f91a6 100644 --- a/ydb/core/driver_lib/run/run.h +++ b/ydb/core/driver_lib/run/run.h @@ -42,6 +42,7 @@ class TKikimrRunner : public virtual TThrRefBase, private IGlobalObjectStorage { bool EnabledGrpcService = false; bool GracefulShutdownSupported = false; + TDuration MinDelayBeforeShutdown; THolder SqsHttp; THolder YdbDriver; diff --git a/ydb/core/driver_lib/run/service_mask.h b/ydb/core/driver_lib/run/service_mask.h index b99356bd3cff..044557229c6b 100644 --- a/ydb/core/driver_lib/run/service_mask.h +++ b/ydb/core/driver_lib/run/service_mask.h @@ -78,6 +78,8 @@ union TBasicKikimrServicesMask { bool EnableDatabaseMetadataCache:1; bool EnableGraphService:1; bool EnableCompDiskLimiter:1; + bool EnableGroupedMemoryLimiter:1; + bool EnableAwsService:1; }; struct { diff --git a/ydb/core/driver_lib/run/ya.make b/ydb/core/driver_lib/run/ya.make index 2433ec8d5b2d..e479bf5b7b7e 100644 --- a/ydb/core/driver_lib/run/ya.make +++ b/ydb/core/driver_lib/run/ya.make @@ -1,5 +1,15 @@ LIBRARY(run) +IF (OS_WINDOWS) + CFLAGS( + -DKIKIMR_DISABLE_S3_OPS + ) +ELSE() + PEERDIR( + contrib/libs/aws-sdk-cpp/aws-cpp-sdk-core + ) +ENDIF() + SRCS( auto_config_initializer.cpp config.cpp @@ -98,8 +108,8 @@ PEERDIR( ydb/core/scheme_types ydb/core/security ydb/core/security/ldap_auth_provider - ydb/core/statistics ydb/core/statistics/aggregator + ydb/core/statistics/service ydb/core/sys_view/processor ydb/core/sys_view/service ydb/core/tablet @@ -111,6 +121,7 @@ PEERDIR( ydb/core/tx/coordinator ydb/core/tx/conveyor/service ydb/core/tx/limiter/service + ydb/core/tx/limiter/grouped_memory/usage ydb/core/tx/datashard ydb/core/tx/long_tx_service ydb/core/tx/long_tx_service/public @@ -161,6 +172,7 @@ PEERDIR( ydb/services/persqueue_v1 ydb/services/rate_limiter ydb/services/replication + ydb/services/view ydb/services/ydb ) diff --git a/ydb/core/driver_lib/version/version.cpp b/ydb/core/driver_lib/version/version.cpp index adc22dfeeca6..85effd8a57d0 100644 --- a/ydb/core/driver_lib/version/version.cpp +++ b/ydb/core/driver_lib/version/version.cpp @@ -26,7 +26,8 @@ TCompatibilityInfo::TCompatibilityInfo() { auto current = MakeCurrent(); - // bool success = CompleteFromTag(current); + bool success = CompleteFromTag(current); + Y_UNUSED(success); // Y_ABORT_UNLESS(success); CurrentCompatibilityInfo.CopyFrom(current); @@ -72,12 +73,13 @@ const TStored* TCompatibilityInfo::GetDefault(TComponentId componentId) const { // obsolete version control TMaybe VERSION = NActors::TInterconnectProxyCommon::TVersionInfo{ // version of this binary - "trunk", + "stable-24-3", // compatible versions; must include all compatible old ones, including this one; version verification occurs on both // peers and connection is accepted if at least one of peers accepts the version of the other peer { - "trunk" + "stable-24-2", + "stable-24-3" } }; diff --git a/ydb/core/engine/mkql_keys.cpp b/ydb/core/engine/mkql_keys.cpp index 93d70dcfcbb3..d282ccf5f40f 100644 --- a/ydb/core/engine/mkql_keys.cpp +++ b/ydb/core/engine/mkql_keys.cpp @@ -51,14 +51,6 @@ NScheme::TTypeInfo UnpackTypeInfo(NKikimr::NMiniKQL::TType *type, bool &isOption } } - -template -TCell MakeCell(const NUdf::TUnboxedValuePod& value) { - static_assert(TCell::CanInline(sizeof(T)), "Can't inline data in cell."); - const auto v = value.Get(); - return TCell(reinterpret_cast(&v), sizeof(v)); -} - THolder ExtractKeyTuple(const TTableId& tableId, TTupleLiteral* tuple, const TVector& columns, TKeyDesc::ERowOperation rowOperation, bool requireStaticKey, const TTypeEnvironment& env) { diff --git a/ydb/core/engine/mkql_keys.h b/ydb/core/engine/mkql_keys.h index 517120748a77..b51e789d0182 100644 --- a/ydb/core/engine/mkql_keys.h +++ b/ydb/core/engine/mkql_keys.h @@ -45,6 +45,13 @@ THolder ExtractTableKey(TCallable& callable, const TTableStrings& stri TVector> ExtractTableKeys(TExploringNodeVisitor& explorer, const TTypeEnvironment& env); TTableId ExtractTableId(const TRuntimeNode& node); +template +TCell MakeCell(const NUdf::TUnboxedValuePod& value) { + static_assert(TCell::CanInline(sizeof(T)), "Can't inline data in cell."); + const auto v = value.Get(); + return TCell(reinterpret_cast(&v), sizeof(v)); +} + TCell MakeCell(NScheme::TTypeInfo type, const NUdf::TUnboxedValuePod& value, const TTypeEnvironment& env, bool copy = true, i32 typmod = -1, TMaybe* error = {}); diff --git a/ydb/core/engine/mkql_proto.cpp b/ydb/core/engine/mkql_proto.cpp index 98c622608f7d..4dd266740a83 100644 --- a/ydb/core/engine/mkql_proto.cpp +++ b/ydb/core/engine/mkql_proto.cpp @@ -223,6 +223,19 @@ bool CellsFromTuple(const NKikimrMiniKQL::TType* tupleType, } break; } + case NScheme::NTypeIds::Decimal: + { + if (v.HasLow128() && v.HasHi128()) { + NYql::NDecimal::TInt128 int128 = NYql::NDecimal::FromProto(v); + auto &data = memoryOwner.emplace_back(); + data.resize(sizeof(NYql::NDecimal::TInt128)); + std::memcpy(data.Detach(), &int128, sizeof(NYql::NDecimal::TInt128)); + c = TCell(data); + } else { + CHECK_OR_RETURN_ERROR(false, Sprintf("Cannot parse value of type Decimal in tuple at position %" PRIu32, i)); + } + break; + } default: CHECK_OR_RETURN_ERROR(false, Sprintf("Unsupported typeId %" PRIu16 " at index %" PRIu32, typeId, i)); break; @@ -328,6 +341,13 @@ bool CellToValue(NScheme::TTypeInfo type, const TCell& c, NKikimrMiniKQL::TValue val.MutableOptional()->SetText(c.Data(), c.Size()); break; + case NScheme::NTypeIds::Decimal: { + const auto loHi = c.AsValue>(); + val.MutableOptional()->SetLow128(loHi.first); + val.MutableOptional()->SetHi128(loHi.second); + break; + } + case NScheme::NTypeIds::Pg: { auto convert = NPg::PgNativeTextFromNativeBinary(c.AsBuf(), type.GetTypeDesc()); if (convert.Error) { diff --git a/ydb/core/external_sources/external_data_source.cpp b/ydb/core/external_sources/external_data_source.cpp index e11c7cce2446..313da24f1910 100644 --- a/ydb/core/external_sources/external_data_source.cpp +++ b/ydb/core/external_sources/external_data_source.cpp @@ -36,6 +36,10 @@ struct TExternalDataSource : public IExternalSource { ythrow TExternalSourceException() << "Only external table supports parameters"; } + bool DataSourceMustHaveDataBaseName(const TProtoStringType& sourceType) const { + return IsIn({"Greenplum", "PostgreSQL", "MySQL", "MsSQLServer", "ClickHouse"}, sourceType); + } + virtual void ValidateExternalDataSource(const TString& externalDataSourceDescription) const override { NKikimrSchemeOp::TExternalDataSourceDescription proto; if (!proto.ParseFromString(externalDataSourceDescription)) { @@ -49,6 +53,10 @@ struct TExternalDataSource : public IExternalSource { ythrow TExternalSourceException() << "Unsupported property: " << key; } + if (DataSourceMustHaveDataBaseName(proto.GetSourceType()) && !proto.GetProperties().GetProperties().contains("database_name")) { + ythrow TExternalSourceException() << proto.GetSourceType() << " source must provide database_name"; + } + ValidateHostname(HostnamePatterns, proto.GetLocation()); } diff --git a/ydb/core/external_sources/external_source_factory.cpp b/ydb/core/external_sources/external_source_factory.cpp index c0be11d62eab..13fb2e479f84 100644 --- a/ydb/core/external_sources/external_source_factory.cpp +++ b/ydb/core/external_sources/external_source_factory.cpp @@ -36,12 +36,13 @@ IExternalSourceFactory::TPtr CreateExternalSourceFactory(const std::vector credentialsFactory, - bool enableInfer) { + bool enableInfer, + bool allowLocalFiles) { std::vector hostnamePatternsRegEx(hostnamePatterns.begin(), hostnamePatterns.end()); return MakeIntrusive(TMap{ { ToString(NYql::EDatabaseType::ObjectStorage), - CreateObjectStorageExternalSource(hostnamePatternsRegEx, actorSystem, pathsLimit, std::move(credentialsFactory), enableInfer) + CreateObjectStorageExternalSource(hostnamePatternsRegEx, actorSystem, pathsLimit, std::move(credentialsFactory), enableInfer, allowLocalFiles) }, { ToString(NYql::EDatabaseType::ClickHouse), @@ -70,7 +71,16 @@ IExternalSourceFactory::TPtr CreateExternalSourceFactory(const std::vector credentialsFactory = nullptr, - bool enableInfer = false); + bool enableInfer = false, + bool allowLocalFiles = false); } diff --git a/ydb/core/external_sources/hive_metastore/ut/common.cpp b/ydb/core/external_sources/hive_metastore/ut/common.cpp index cb99adf37a1d..94433fce74b5 100644 --- a/ydb/core/external_sources/hive_metastore/ut/common.cpp +++ b/ydb/core/external_sources/hive_metastore/ut/common.cpp @@ -25,7 +25,7 @@ TString Exec(const TString& cmd) { TString GetExternalPort(const TString& service, const TString& port) { auto dockerComposeBin = BinaryPath("library/recipes/docker_compose/bin/docker-compose"); - auto composeFileYml = ArcadiaSourceRoot() + "/ydb/core/external_sources/hive_metastore/ut/docker-compose.yml"; + auto composeFileYml = ArcadiaFromCurrentLocation(__SOURCE_FILE__, "docker-compose.yml"); auto result = StringSplitter(Exec(dockerComposeBin + " -f " + composeFileYml + " port " + service + " " + port)).Split(':').ToList(); return result ? Strip(result.back()) : TString{}; } diff --git a/ydb/core/external_sources/hive_metastore/ut/ya.make b/ydb/core/external_sources/hive_metastore/ut/ya.make index 9fe9ad0ae516..49cb3844651a 100644 --- a/ydb/core/external_sources/hive_metastore/ut/ya.make +++ b/ydb/core/external_sources/hive_metastore/ut/ya.make @@ -29,13 +29,19 @@ IF (AUTOCHECK) ) ENDIF() +ENV(COMPOSE_HTTP_TIMEOUT=1200) # during parallel tests execution there could be huge disk io, which triggers timeouts in docker-compose INCLUDE(${ARCADIA_ROOT}/library/recipes/docker_compose/recipe.inc) IF (OPENSOURCE) - # Including of docker_compose/recipe.inc automatically converts these tests into LARGE, - # which makes it impossible to run them during precommit checks on Github CI. - # Next several lines forces these tests to be MEDIUM. To see discussion, visit YDBOPS-8928. - SIZE(MEDIUM) + IF (SANITIZER_TYPE) + # Too huge for precommit check with sanitizers + SIZE(LARGE) + ELSE() + # Including of docker_compose/recipe.inc automatically converts these tests into LARGE, + # which makes it impossible to run them during precommit checks on Github CI. + # Next several lines forces these tests to be MEDIUM. To see discussion, visit YDBOPS-8928. + SIZE(MEDIUM) + ENDIF() SET(TEST_TAGS_VALUE) SET(TEST_REQUIREMENTS_VALUE) diff --git a/ydb/core/external_sources/object_storage.cpp b/ydb/core/external_sources/object_storage.cpp index 5c679a07b9d8..e7b8760b0fa2 100644 --- a/ydb/core/external_sources/object_storage.cpp +++ b/ydb/core/external_sources/object_storage.cpp @@ -3,20 +3,30 @@ #include "validation_functions.h" #include "object_storage/s3_fetcher.h" +#include #include #include +#include #include #include #include #include #include +#include #include #include +#include #include +#include +#include #include #include #include +#include +#include +#include +#include #include @@ -31,12 +41,14 @@ struct TObjectStorageExternalSource : public IExternalSource { NActors::TActorSystem* actorSystem, size_t pathsLimit, std::shared_ptr credentialsFactory, - bool enableInfer) + bool enableInfer, + bool allowLocalFiles) : HostnamePatterns(hostnamePatterns) , PathsLimit(pathsLimit) , ActorSystem(actorSystem) , CredentialsFactory(std::move(credentialsFactory)) , EnableInfer(enableInfer) + , AllowLocalFiles(allowLocalFiles) {} virtual TString Pack(const NKikimrExternalSources::TSchema& schema, @@ -55,14 +67,14 @@ struct TObjectStorageExternalSource : public IExternalSource { for (const auto& column: json.GetArray()) { *objectStorage.add_partitioned_by() = column; } - } else if (IsIn({"file_pattern"sv, "data.interval.unit"sv, "data.datetime.format_name"sv, "data.datetime.format"sv, "data.timestamp.format_name"sv, "data.timestamp.format"sv, "csv_delimiter"sv}, lowerKey)) { + } else if (IsIn({"file_pattern"sv, "data.interval.unit"sv, "data.datetime.format_name"sv, "data.datetime.format"sv, "data.timestamp.format_name"sv, "data.timestamp.format"sv, "data.date.format"sv, "csv_delimiter"sv}, lowerKey)) { objectStorage.mutable_format_setting()->insert({lowerKey, value}); } else { ythrow TExternalSourceException() << "Unknown attribute " << key; } } - if (auto issues = Validate(schema, objectStorage, PathsLimit)) { + if (auto issues = Validate(schema, objectStorage, PathsLimit, general.location())) { ythrow TExternalSourceException() << issues.ToString(); } @@ -131,11 +143,20 @@ struct TObjectStorageExternalSource : public IExternalSource { } template - static NYql::TIssues Validate(const TScheme& schema, const TObjectStorage& objectStorage, size_t pathsLimit) { + static NYql::TIssues Validate(const TScheme& schema, const TObjectStorage& objectStorage, size_t pathsLimit, const TString& location) { NYql::TIssues issues; - issues.AddIssues(ValidateFormatSetting(objectStorage.format(), objectStorage.format_setting())); + if (TString errorString = NYql::NS3::ValidateWildcards(location)) { + issues.AddIssue(MakeErrorIssue(Ydb::StatusIds::BAD_REQUEST, TStringBuilder() << "Location '" << location << "' contains invalid wildcard: " << errorString)); + } + const bool hasPartitioning = objectStorage.projection_size() || objectStorage.partitioned_by_size(); + issues.AddIssues(ValidateFormatSetting(objectStorage.format(), objectStorage.format_setting(), location, hasPartitioning)); + issues.AddIssues(ValidateSchema(schema)); + issues.AddIssues(ValidateJsonListFormat(objectStorage.format(), schema, objectStorage.partitioned_by())); issues.AddIssues(ValidateRawFormat(objectStorage.format(), schema, objectStorage.partitioned_by())); - if (objectStorage.projection_size() || objectStorage.partitioned_by_size()) { + if (hasPartitioning) { + if (NYql::NS3::HasWildcards(location)) { + issues.AddIssue(MakeErrorIssue(Ydb::StatusIds::BAD_REQUEST, TStringBuilder() << "Location '" << location << "' contains wildcards")); + } try { TVector partitionedBy{objectStorage.partitioned_by().begin(), objectStorage.partitioned_by().end()}; issues.AddIssues(ValidateProjectionColumns(schema, partitionedBy)); @@ -155,11 +176,17 @@ struct TObjectStorageExternalSource : public IExternalSource { return issues; } - static NYql::TIssues ValidateFormatSetting(const TString& format, const google::protobuf::Map& formatSetting) { + static NYql::TIssues ValidateFormatSetting(const TString& format, const google::protobuf::Map& formatSetting, const TString& location, bool hasPartitioning) { NYql::TIssues issues; issues.AddIssues(ValidateDateFormatSetting(formatSetting)); for (const auto& [key, value]: formatSetting) { if (key == "file_pattern"sv) { + if (TString errorString = NYql::NS3::ValidateWildcards(value)) { + issues.AddIssue(MakeErrorIssue(Ydb::StatusIds::BAD_REQUEST, TStringBuilder() << "File pattern '" << value << "' contains invalid wildcard: " << errorString)); + } + if (value && !hasPartitioning && !location.EndsWith("/")) { + issues.AddIssue(MakeErrorIssue(Ydb::StatusIds::BAD_REQUEST, "Path pattern cannot be used with file_pattern")); + } continue; } @@ -170,7 +197,7 @@ struct TObjectStorageExternalSource : public IExternalSource { continue; } - if (IsIn({ "data.datetime.format_name"sv, "data.datetime.format"sv, "data.timestamp.format_name"sv, "data.timestamp.format"sv}, key)) { + if (IsIn({ "data.datetime.format_name"sv, "data.datetime.format"sv, "data.timestamp.format_name"sv, "data.timestamp.format"sv, "data.date.format"sv}, key)) { continue; } @@ -231,6 +258,10 @@ struct TObjectStorageExternalSource : public IExternalSource { continue; } + if (key == "data.date.format"sv) { + continue; + } + if (matchAllSettings) { issues.AddIssue(MakeErrorIssue(Ydb::StatusIds::BAD_REQUEST, "unknown format setting " + key)); } @@ -238,6 +269,46 @@ struct TObjectStorageExternalSource : public IExternalSource { return issues; } + template + static NYql::TIssues ValidateSchema(const TScheme& schema) { + NYql::TIssues issues; + for (const auto& column: schema.column()) { + const auto type = column.type(); + if (type.has_optional_type() && type.optional_type().item().has_optional_type()) { + issues.AddIssue(MakeErrorIssue( + Ydb::StatusIds::BAD_REQUEST, + TStringBuilder{} << "Double optional types are not supported (you have '" + << column.name() << " " << NYdb::TType(column.type()).ToString() << "' field)")); + } + } + + return issues; + } + + template + static NYql::TIssues ValidateJsonListFormat(const TString& format, const TScheme& schema, const google::protobuf::RepeatedPtrField& partitionedBy) { + NYql::TIssues issues; + if (format != "json_list"sv) { + return issues; + } + + TSet partitionedBySet{partitionedBy.begin(), partitionedBy.end()}; + + for (const auto& column: schema.column()) { + if (partitionedBySet.contains(column.name())) { + continue; + } + if (ValidateDateOrTimeType(column.type())) { + issues.AddIssue(MakeErrorIssue( + Ydb::StatusIds::BAD_REQUEST, + TStringBuilder{} << "Date, Timestamp and Interval types are not allowed in json_list format (you have '" + << column.name() << " " << NYdb::TType(column.type()).ToString() << "' field)")); + } + } + + return issues; + } + template static NYql::TIssues ValidateRawFormat(const TString& format, const TScheme& schema, const google::protobuf::RepeatedPtrField& partitionedBy) { NYql::TIssues issues; @@ -274,7 +345,6 @@ struct TObjectStorageExternalSource : public IExternalSource { }; virtual NThreading::TFuture> LoadDynamicMetadata(std::shared_ptr meta) override { - Y_UNUSED(ActorSystem); auto format = meta->Attributes.FindPtr("format"); if (!format || !meta->Attributes.contains("withinfer")) { return NThreading::MakeFuture(std::move(meta)); @@ -284,12 +354,13 @@ struct TObjectStorageExternalSource : public IExternalSource { return NThreading::MakeFuture(std::move(meta)); } - NYql::TS3Credentials::TAuthInfo authInfo{}; + NYql::TStructuredTokenBuilder structuredTokenBuilder; if (std::holds_alternative(meta->Auth)) { auto& awsAuth = std::get(meta->Auth); - authInfo.AwsAccessKey = awsAuth.AccessKey; - authInfo.AwsAccessSecret = awsAuth.SecretAccessKey; - authInfo.AwsRegion = awsAuth.Region; + NYql::NS3::TAwsParams params; + params.SetAwsAccessKey(awsAuth.AccessKey); + params.SetAwsRegion(awsAuth.Region); + structuredTokenBuilder.SetBasicAuth(params.SerializeAsString(), awsAuth.SecretAccessKey); } else if (std::holds_alternative(meta->Auth)) { if (!CredentialsFactory) { try { @@ -299,31 +370,96 @@ struct TObjectStorageExternalSource : public IExternalSource { } } auto& saAuth = std::get(meta->Auth); - NYql::GetAuthInfo(CredentialsFactory, ""); - authInfo.Token = CredentialsFactory->Create(saAuth.ServiceAccountId, saAuth.ServiceAccountIdSignature)->CreateProvider()->GetAuthInfo(); + structuredTokenBuilder.SetServiceAccountIdAuth(saAuth.ServiceAccountId, saAuth.ServiceAccountIdSignature); + } else { + structuredTokenBuilder.SetNoAuth(); } - auto httpGateway = NYql::IHTTPGateway::Make(); - auto s3Lister = NYql::NS3Lister::MakeS3Lister(httpGateway, NYql::NS3Lister::TListingRequest{ + const NYql::TS3Credentials credentials(CredentialsFactory, structuredTokenBuilder.ToJson()); + + const TString path = meta->TableLocation; + const TString filePattern = meta->Attributes.Value("filepattern", TString{}); + const TString projection = meta->Attributes.Value("projection", TString{}); + const TVector partitionedBy = GetPartitionedByConfig(meta); + + NYql::NPathGenerator::TPathGeneratorPtr pathGenerator; + + bool shouldInferPartitions = !partitionedBy.empty() && !projection; + bool ignoreEmptyListings = !projection.empty(); + + NYql::NS3Lister::TListingRequest request { .Url = meta->DataSourceLocation, - .AuthInfo = authInfo, - .Pattern = meta->TableLocation, - }, Nothing(), false); - auto afterListing = s3Lister->Next().Apply([path = meta->TableLocation](const NThreading::TFuture& listResFut) { - auto& listRes = listResFut.GetValue(); - if (std::holds_alternative(listRes)) { - auto& error = std::get(listRes); - throw yexception() << error.Issues.ToString(); - } - auto& entries = std::get(listRes); - if (entries.Objects.empty()) { - throw yexception() << "couldn't find files at " << path; - } - for (const auto& entry : entries.Objects) { - if (entry.Size > 0) { - return entry.Path; + .Credentials = credentials + }; + TVector requests; + + if (!projection) { + auto error = NYql::NS3::BuildS3FilePattern(path, filePattern, partitionedBy, request); + if (error) { + throw yexception() << *error; + } + requests.push_back(request); + } else { + if (NYql::NS3::HasWildcards(path)) { + throw yexception() << "Path prefix: '" << path << "' contains wildcards"; + } + + pathGenerator = NYql::NPathGenerator::CreatePathGenerator(projection, partitionedBy); + for (const auto& rule : pathGenerator->GetRules()) { + YQL_ENSURE(rule.ColumnValues.size() == partitionedBy.size()); + + request.Pattern = NYql::NS3::NormalizePath(TStringBuilder() << path << "/" << rule.Path << "/*"); + request.PatternType = NYql::NS3Lister::ES3PatternType::Wildcard; + request.Prefix = request.Pattern.substr(0, NYql::NS3::GetFirstWildcardPos(request.Pattern)); + + requests.push_back(request); + } + } + + auto partByData = std::make_shared(); + if (shouldInferPartitions) { + *partByData << JoinSeq(",", partitionedBy); + } + + TVector> futures; + auto httpGateway = NYql::IHTTPGateway::Make(); + auto httpRetryPolicy = NYql::GetHTTPDefaultRetryPolicy(NYql::THttpRetryPolicyOptions{.RetriedCurlCodes = NYql::FqRetriedCurlCodes()}); + for (const auto& req : requests) { + auto s3Lister = NYql::NS3Lister::MakeS3Lister(httpGateway, httpRetryPolicy, req, Nothing(), AllowLocalFiles, ActorSystem); + futures.push_back(s3Lister->Next()); + } + + auto allFuture = NThreading::WaitExceptionOrAll(futures); + auto afterListing = allFuture.Apply([partByData, shouldInferPartitions, ignoreEmptyListings, futures = std::move(futures), requests = std::move(requests)](const NThreading::TFuture& result) { + result.GetValue(); + for (size_t i = 0; i < futures.size(); ++i) { + auto& listRes = futures[i].GetValue(); + if (std::holds_alternative(listRes)) { + auto& error = std::get(listRes); + throw yexception() << error.Issues.ToString(); + } + auto& entries = std::get(listRes); + if (entries.Objects.empty() && !ignoreEmptyListings) { + throw yexception() << "couldn't find files at " << requests[i].Pattern; + } + + if (shouldInferPartitions) { + for (const auto& entry : entries.Objects) { + *partByData << Endl << JoinSeq(",", entry.MatchedGlobs); + } + } + + for (const auto& entry : entries.Objects) { + if (entry.Size > 0) { + return entry; + } + } + + if (!ignoreEmptyListings) { + throw yexception() << "couldn't find any files for type inference, please check that the right path is provided"; } } + throw yexception() << "couldn't find any files for type inference, please check that the right path is provided"; }); @@ -331,18 +467,21 @@ struct TObjectStorageExternalSource : public IExternalSource { meta->DataSourceLocation, httpGateway, NYql::IHTTPGateway::TRetryPolicy::GetNoRetryPolicy(), - std::move(authInfo) + credentials )); meta->Attributes.erase("withinfer"); - auto fileFormat = NObjectStorage::NInference::ConvertFileFormat(*format); - auto arrowFetcherId = ActorSystem->Register(NObjectStorage::NInference::CreateArrowFetchingActor(s3FetcherId, fileFormat)); - auto arrowInferencinatorId = ActorSystem->Register(NObjectStorage::NInference::CreateArrowInferencinator(arrowFetcherId, fileFormat, meta->Attributes)); + auto arrowFetcherId = ActorSystem->Register(NObjectStorage::NInference::CreateArrowFetchingActor(s3FetcherId, meta->Attributes)); + auto arrowInferencinatorId = ActorSystem->Register(NObjectStorage::NInference::CreateArrowInferencinator(arrowFetcherId)); - return afterListing.Apply([arrowInferencinatorId, meta, actorSystem = ActorSystem](const NThreading::TFuture& pathFut) { + return afterListing.Apply([arrowInferencinatorId, meta, actorSystem = ActorSystem](const NThreading::TFuture& entryFut) { auto promise = NThreading::NewPromise(); auto schemaToMetadata = [meta](NThreading::TPromise metaPromise, NObjectStorage::TEvInferredFileSchema&& response) { + if (!response.Status.IsSuccess()) { + metaPromise.SetValue(NYql::NCommon::ResultFromError(response.Status.GetIssues())); + return; + } meta->Changed = true; meta->Schema.clear_column(); for (const auto& column : response.Fields) { @@ -354,14 +493,54 @@ struct TObjectStorageExternalSource : public IExternalSource { result.Metadata = meta; metaPromise.SetValue(std::move(result)); }; + auto [path, size, _] = entryFut.GetValue(); actorSystem->Register(new NKqp::TActorRequestHandler( arrowInferencinatorId, - new NObjectStorage::TEvInferFileSchema(TString{pathFut.GetValue()}), + new NObjectStorage::TEvInferFileSchema(std::move(path), size), promise, std::move(schemaToMetadata) )); return promise.GetFuture(); + }).Apply([arrowInferencinatorId, meta, partByData, partitionedBy, pathGenerator, this](const NThreading::TFuture& result) { + auto& value = result.GetValue(); + if (!value.Success()) { + return result; + } + + auto meta = value.Metadata; + if (pathGenerator) { + for (const auto& rule : pathGenerator->GetConfig().Rules) { + auto& destColumn = *meta->Schema.add_column(); + destColumn.mutable_name()->assign(rule.Name); + switch (rule.Type) { + case NYql::NPathGenerator::IPathGenerator::EType::INTEGER: + destColumn.mutable_type()->set_type_id(Ydb::Type::INT64); + break; + + case NYql::NPathGenerator::IPathGenerator::EType::DATE: + destColumn.mutable_type()->set_type_id(Ydb::Type::DATE); + break; + + case NYql::NPathGenerator::IPathGenerator::EType::ENUM: + default: + destColumn.mutable_type()->set_type_id(Ydb::Type::STRING); + break; + } + } + } else { + for (const auto& partitionName : partitionedBy) { + auto& destColumn = *meta->Schema.add_column(); + destColumn.mutable_name()->assign(partitionName); + destColumn.mutable_type()->set_type_id(Ydb::Type::UTF8); + } + } + + if (!partitionedBy.empty() && !pathGenerator) { + return InferPartitionedColumnsTypes(arrowInferencinatorId, partByData, result); + } + + return result; }).Apply([](const NThreading::TFuture& result) { auto& value = result.GetValue(); if (value.Success()) { @@ -376,6 +555,82 @@ struct TObjectStorageExternalSource : public IExternalSource { } private: + NThreading::TFuture InferPartitionedColumnsTypes( + NActors::TActorId arrowInferencinatorId, + std::shared_ptr partByData, + const NThreading::TFuture& result) const { + + auto& value = result.GetValue(); + auto meta = value.Metadata; + + arrow::BufferBuilder builder; + auto partitionBuffer = std::make_shared(nullptr, 0); + auto buildStatus = builder.Append(partByData->data(), partByData->size()); + auto finishStatus = builder.Finish(&partitionBuffer); + + if (!buildStatus.ok() || !finishStatus.ok()) { + return result; + } + + auto promise = NThreading::NewPromise(); + auto partitionsToMetadata = [meta](NThreading::TPromise metaPromise, NObjectStorage::TEvInferredFileSchema&& response){ + if (response.Status.IsSuccess()) { + THashMap inferredTypes; + for (const auto& column : response.Fields) { + if (ValidateCommonProjectionType(column.type(), column.name()).Empty()) { + inferredTypes[column.name()] = column.type(); + } + } + + for (auto& destColumn : *meta->Schema.mutable_column()) { + if (auto type = inferredTypes.FindPtr(destColumn.name()); type) { + destColumn.mutable_type()->set_type_id(type->type_id()); + } + } + } + TMetadataResult result; + result.SetSuccess(); + result.Metadata = meta; + metaPromise.SetValue(std::move(result)); + }; + + auto bufferReader = std::make_shared(std::move(partitionBuffer)); + auto file = std::dynamic_pointer_cast(bufferReader); + auto config = NObjectStorage::NInference::MakeFormatConfig({{ "format", "csv_with_names" }}); + config->ShouldMakeOptional = false; + ActorSystem->Register(new NKqp::TActorRequestHandler( + arrowInferencinatorId, + new NObjectStorage::TEvArrowFile(config, std::move(file), ""), + promise, + std::move(partitionsToMetadata) + )); + + return promise.GetFuture(); + } + + static TVector GetPartitionedByConfig(std::shared_ptr meta) { + THashSet columns; + if (auto partitioned = meta->Attributes.FindPtr("partitionedby"); partitioned) { + NJson::TJsonValue values; + auto successful = NJson::ReadJsonTree(*partitioned, &values); + if (!successful) { + columns.insert(*partitioned); + } else { + Y_ENSURE(values.GetType() == NJson::JSON_ARRAY); + + for (const auto& value : values.GetArray()) { + Y_ENSURE(value.GetType() == NJson::JSON_STRING); + if (columns.contains(value.GetString())) { + throw yexception() << "invalid partitioned_by parameter, column " << value.GetString() << "mentioned twice"; + } + columns.insert(value.GetString()); + } + } + } + + return TVector{columns.begin(), columns.end()}; + } + static bool IsValidIntervalUnit(const TString& unit) { static constexpr std::array IntervalUnits = { "MICROSECONDS"sv, @@ -591,12 +846,57 @@ struct TObjectStorageExternalSource : public IExternalSource { return FindIf(availableTypes, [&columnType](const auto& availableType) { return NYdb::TypesEqual(availableType, columnType); }) != availableTypes.end(); } + static std::vector GetDateOrTimeTypes() { + NYdb::TType dateType = NYdb::TTypeBuilder{}.Primitive(NYdb::EPrimitiveType::Date).Build(); + NYdb::TType datetimeType = NYdb::TTypeBuilder{}.Primitive(NYdb::EPrimitiveType::Datetime).Build(); + NYdb::TType timestampType = NYdb::TTypeBuilder{}.Primitive(NYdb::EPrimitiveType::Timestamp).Build(); + NYdb::TType intervalType = NYdb::TTypeBuilder{}.Primitive(NYdb::EPrimitiveType::Interval).Build(); + NYdb::TType date32Type = NYdb::TTypeBuilder{}.Primitive(NYdb::EPrimitiveType::Date32).Build(); + NYdb::TType datetime64Type = NYdb::TTypeBuilder{}.Primitive(NYdb::EPrimitiveType::Datetime64).Build(); + NYdb::TType timestamp64Type = NYdb::TTypeBuilder{}.Primitive(NYdb::EPrimitiveType::Timestamp64).Build(); + NYdb::TType interval64Type = NYdb::TTypeBuilder{}.Primitive(NYdb::EPrimitiveType::Interval64).Build(); + NYdb::TType tzdateType = NYdb::TTypeBuilder{}.Primitive(NYdb::EPrimitiveType::TzDate).Build(); + NYdb::TType tzdatetimeType = NYdb::TTypeBuilder{}.Primitive(NYdb::EPrimitiveType::TzDatetime).Build(); + NYdb::TType tztimestampType = NYdb::TTypeBuilder{}.Primitive(NYdb::EPrimitiveType::TzTimestamp).Build(); + const std::vector result { + dateType, + datetimeType, + timestampType, + intervalType, + date32Type, + datetime64Type, + timestamp64Type, + interval64Type, + tzdateType, + tzdatetimeType, + tztimestampType, + NYdb::TTypeBuilder{}.Optional(dateType).Build(), + NYdb::TTypeBuilder{}.Optional(datetimeType).Build(), + NYdb::TTypeBuilder{}.Optional(timestampType).Build(), + NYdb::TTypeBuilder{}.Optional(intervalType).Build(), + NYdb::TTypeBuilder{}.Optional(date32Type).Build(), + NYdb::TTypeBuilder{}.Optional(datetime64Type).Build(), + NYdb::TTypeBuilder{}.Optional(timestamp64Type).Build(), + NYdb::TTypeBuilder{}.Optional(interval64Type).Build(), + NYdb::TTypeBuilder{}.Optional(tzdateType).Build(), + NYdb::TTypeBuilder{}.Optional(tzdatetimeType).Build(), + NYdb::TTypeBuilder{}.Optional(tztimestampType).Build() + }; + return result; + } + + static bool ValidateDateOrTimeType(const NYdb::TType& columnType) { + static const std::vector availableTypes = GetDateOrTimeTypes(); + return FindIf(availableTypes, [&columnType](const auto& availableType) { return NYdb::TypesEqual(availableType, columnType); }) != availableTypes.end(); + } + private: const std::vector HostnamePatterns; const size_t PathsLimit; NActors::TActorSystem* ActorSystem = nullptr; std::shared_ptr CredentialsFactory; const bool EnableInfer = false; + const bool AllowLocalFiles; }; } @@ -606,12 +906,13 @@ IExternalSource::TPtr CreateObjectStorageExternalSource(const std::vector credentialsFactory, - bool enableInfer) { - return MakeIntrusive(hostnamePatterns, actorSystem, pathsLimit, std::move(credentialsFactory), enableInfer); + bool enableInfer, + bool allowLocalFiles) { + return MakeIntrusive(hostnamePatterns, actorSystem, pathsLimit, std::move(credentialsFactory), enableInfer, allowLocalFiles); } -NYql::TIssues Validate(const FederatedQuery::Schema& schema, const FederatedQuery::ObjectStorageBinding::Subset& objectStorage, size_t pathsLimit) { - return TObjectStorageExternalSource::Validate(schema, objectStorage, pathsLimit); +NYql::TIssues Validate(const FederatedQuery::Schema& schema, const FederatedQuery::ObjectStorageBinding::Subset& objectStorage, size_t pathsLimit, const TString& location) { + return TObjectStorageExternalSource::Validate(schema, objectStorage, pathsLimit, location); } NYql::TIssues ValidateDateFormatSetting(const google::protobuf::Map& formatSetting, bool matchAllSettings) { diff --git a/ydb/core/external_sources/object_storage.h b/ydb/core/external_sources/object_storage.h index 6e08d8e65181..74de7a69eb87 100644 --- a/ydb/core/external_sources/object_storage.h +++ b/ydb/core/external_sources/object_storage.h @@ -13,9 +13,10 @@ IExternalSource::TPtr CreateObjectStorageExternalSource(const std::vector credentialsFactory, - bool enableInfer); + bool enableInfer, + bool allowLocalFiles); -NYql::TIssues Validate(const FederatedQuery::Schema& schema, const FederatedQuery::ObjectStorageBinding::Subset& objectStorage, size_t pathsLimit); +NYql::TIssues Validate(const FederatedQuery::Schema& schema, const FederatedQuery::ObjectStorageBinding::Subset& objectStorage, size_t pathsLimit, const TString& location); NYql::TIssues ValidateDateFormatSetting(const google::protobuf::Map& formatSetting, bool matchAllSettings = false); diff --git a/ydb/core/external_sources/object_storage/events.h b/ydb/core/external_sources/object_storage/events.h index 1e1d0b03a78d..a327644fe1a7 100644 --- a/ydb/core/external_sources/object_storage/events.h +++ b/ydb/core/external_sources/object_storage/events.h @@ -8,8 +8,10 @@ #include #include #include +#include #include #include +#include namespace NKikimr::NExternalSource::NObjectStorage { @@ -108,30 +110,43 @@ struct TEvS3RangeError : public NActors::TEventLocal { - TEvArrowFile(std::shared_ptr file, TString path) - : File{std::move(file)} + TEvArrowFile( + std::shared_ptr config, + std::shared_ptr file, + TString path) + : Config{std::move(config)} + , File{std::move(file)} , Path{std::move(path)} {} + std::shared_ptr Config; std::shared_ptr File; TString Path; }; struct TEvInferFileSchema : public NActors::TEventLocal { - explicit TEvInferFileSchema(TString&& path) + explicit TEvInferFileSchema(TString&& path, ui64 size) : Path{std::move(path)} + , Size{size} {} TString Path; + ui64 Size = 0; }; struct TEvInferredFileSchema : public NActors::TEventLocal { TEvInferredFileSchema(TString path, std::vector&& fields) : Path{std::move(path)} + , Status{NYdb::EStatus::SUCCESS, {}} , Fields{std::move(fields)} {} + TEvInferredFileSchema(TString path, NYql::TIssues&& issues) + : Path{std::move(path)} + , Status{NYdb::EStatus::INTERNAL_ERROR, std::move(issues)} + {} TString Path; + NYdb::TStatus Status; std::vector Fields; }; diff --git a/ydb/core/external_sources/object_storage/inference/arrow_fetcher.cpp b/ydb/core/external_sources/object_storage/inference/arrow_fetcher.cpp index 09a1eb92c2da..cc0dfdaa24f1 100644 --- a/ydb/core/external_sources/object_storage/inference/arrow_fetcher.cpp +++ b/ydb/core/external_sources/object_storage/inference/arrow_fetcher.cpp @@ -1,11 +1,15 @@ #include "arrow_fetcher.h" #include "arrow_inferencinator.h" +#include "infer_config.h" #include #include #include #include +#include +#include #include +#include #include #include @@ -14,16 +18,26 @@ #include #include +#if defined(_linux_) || defined(_darwin_) +#include +#include +#endif + namespace NKikimr::NExternalSource::NObjectStorage::NInference { class TArrowFileFetcher : public NActors::TActorBootstrapped { static constexpr uint64_t PrefixSize = 10_MB; public: - TArrowFileFetcher(NActors::TActorId s3FetcherId, EFileFormat format) + TArrowFileFetcher(NActors::TActorId s3FetcherId, const THashMap& params) : S3FetcherId_{s3FetcherId} - , Format_{format} + , Config_{MakeFormatConfig(params)} { - Y_ABORT_UNLESS(IsArrowInferredFormat(Format_)); + Y_ABORT_UNLESS(IsArrowInferredFormat(Config_->Format)); + + auto decompression = params.FindPtr("compression"); + if (decompression) { + DecompressionFormat_ = *decompression; + } } void Bootstrap() { @@ -40,19 +54,30 @@ class TArrowFileFetcher : public NActors::TActorBootstrapped const auto& request = *ev->Get(); TRequest localRequest{ .Path = request.Path, - .RequestId = {}, + .RequestId = TGUID::Create(), .Requester = ev->Sender, + .MetadataRequest = false, }; - CreateGuid(&localRequest.RequestId); - switch (Format_) { + switch (Config_->Format) { case EFileFormat::CsvWithNames: - case EFileFormat::TsvWithNames: { - HandleAsPrefixFile(std::move(localRequest), ctx); + case EFileFormat::TsvWithNames: + case EFileFormat::JsonEachRow: + case EFileFormat::JsonList: { + RequestPartialFile(std::move(localRequest), ctx, 0, 10_MB); + break; + } + case EFileFormat::Parquet: { + localRequest.MetadataRequest = true; + RequestPartialFile(std::move(localRequest), ctx, request.Size - 8, request.Size - 4); break; } default: { - ctx.Send(localRequest.Requester, MakeError(localRequest.Path, NFq::TIssuesIds::UNSUPPORTED, TStringBuilder{} << "unsupported format for inference: " << ConvertFileFormat(Format_))); + ctx.Send(localRequest.Requester, MakeError( + localRequest.Path, + NFq::TIssuesIds::UNSUPPORTED, + TStringBuilder{} << "unsupported format for inference: " << ConvertFileFormat(Config_->Format)) + ); return; } case EFileFormat::Undefined: @@ -67,17 +92,36 @@ class TArrowFileFetcher : public NActors::TActorBootstrapped const auto& request = requestIt->second; + TString data = std::move(response.Data); + if (DecompressionFormat_) { + auto decompressedData = DecompressFile(data, request, ctx); + if (!decompressedData) { + return; + } + data = std::move(*decompressedData); + } + std::shared_ptr file; - switch (Format_) { + switch (Config_->Format) { case EFileFormat::CsvWithNames: case EFileFormat::TsvWithNames: { - // TODO: obtain from request - arrow::csv::ParseOptions options; - if (Format_ == EFileFormat::TsvWithNames) { - options.delimiter = '\t'; + file = CleanupCsvFile(data, request, std::dynamic_pointer_cast(Config_)->ParseOpts, ctx); + ctx.Send(request.Requester, new TEvArrowFile(Config_, std::move(file), request.Path)); + break; + } + case EFileFormat::Parquet: { + if (request.MetadataRequest) { + HandleMetadataSizeRequest(data, request, ctx); + return; } - file = CleanupCsvFile(response.Data, request, options, ctx); - ctx.Send(request.Requester, new TEvArrowFile(std::move(file), request.Path)); + file = BuildParquetFileFromMetadata(data, request, ctx); + ctx.Send(request.Requester, new TEvArrowFile(Config_, std::move(file), request.Path)); + break; + } + case EFileFormat::JsonEachRow: + case EFileFormat::JsonList: { + file = CleanupJsonFile(data, request, std::dynamic_pointer_cast(Config_)->ParseOpts, ctx); + ctx.Send(request.Requester, new TEvArrowFile(Config_, std::move(file), request.Path)); break; } case EFileFormat::Undefined: @@ -104,14 +148,15 @@ class TArrowFileFetcher : public NActors::TActorBootstrapped uint64_t From = 0; uint64_t To = 0; NActors::TActorId Requester; + bool MetadataRequest; }; // Reading file - void HandleAsPrefixFile(TRequest&& insertedRequest, const NActors::TActorContext& ctx) { + void RequestPartialFile(TRequest&& insertedRequest, const NActors::TActorContext& ctx, uint64_t from, uint64_t to) { auto path = insertedRequest.Path; - insertedRequest.From = 0; - insertedRequest.To = 10_MB; + insertedRequest.From = from; + insertedRequest.To = to; auto it = InflightRequests_.try_emplace(path, std::move(insertedRequest)); Y_ABORT_UNLESS(it.second, "couldn't insert request for path: %s", path.c_str()); @@ -126,42 +171,141 @@ class TArrowFileFetcher : public NActors::TActorBootstrapped } void HandleAsRAFile(TRequest&& insertedRequest, const NActors::TActorContext& ctx) { + auto format = Config_->Format; auto error = MakeError( insertedRequest.Path, NFq::TIssuesIds::UNSUPPORTED, - TStringBuilder{} << "got unsupported format: " << ConvertFileFormat(Format_) << '(' << static_cast(Format_) << ')' + TStringBuilder{} << "got unsupported format: " << ConvertFileFormat(format) << '(' << static_cast(format) << ')' ); SendError(ctx, error); } // Cutting file + TMaybe DecompressFile(const TString& data, const TRequest& request, const NActors::TActorContext& ctx) { +#if defined(_linux_) || defined(_darwin_) + try { + NDB::ReadBufferFromString dataBuffer(data); + auto decompressorBuffer = NYql::MakeDecompressor(dataBuffer, *DecompressionFormat_); + if (!decompressorBuffer) { + auto error = MakeError( + request.Path, + NFq::TIssuesIds::INTERNAL_ERROR, + TStringBuilder{} << "unknown compression: " << *DecompressionFormat_ << ". Use one of: gzip, zstd, lz4, brotli, bzip2, xz" + ); + SendError(ctx, error); + return {}; + } + + TStringBuilder decompressedData; + while (!decompressorBuffer->eof() && decompressedData.size() < 10_MB) { + decompressorBuffer->nextIfAtEnd(); + size_t maxDecompressedChunkSize = std::min( + decompressorBuffer->available(), + 10_MB - decompressedData.size() + ); + TString decompressedChunk{maxDecompressedChunkSize, ' '}; + decompressorBuffer->read(&decompressedChunk.front(), maxDecompressedChunkSize); + decompressedData << decompressedChunk; + } + return std::move(decompressedData); + } catch (const yexception& error) { + auto errorEv = MakeError( + request.Path, + NFq::TIssuesIds::INTERNAL_ERROR, + TStringBuilder{} << "couldn't decompress file, check compression params: " << error.what() + ); + SendError(ctx, errorEv); + return {}; + } +#else + auto error = MakeError( + request.Path, + NFq::TIssuesIds::INTERNAL_ERROR, + TStringBuilder{} << "inference with decompression is not supported on windows" + ); + SendError(ctx, error); + return {}; +#endif + } + std::shared_ptr CleanupCsvFile(const TString& data, const TRequest& request, const arrow::csv::ParseOptions& options, const NActors::TActorContext& ctx) { auto chunker = arrow::csv::MakeChunker(options); std::shared_ptr whole, partial; - auto arrowData = std::make_shared(nullptr, 0); - { - arrow::BufferBuilder builder; - auto buildRes = builder.Append(data.data(), data.size()); - if (buildRes.ok()) { - buildRes = builder.Finish(&arrowData); - } - if (!buildRes.ok()) { + auto arrowData = BuildBufferFromData(data, request, ctx); + auto status = chunker->Process(arrowData, &whole, &partial); + + if (!status.ok()) { + auto error = MakeError( + request.Path, + NFq::TIssuesIds::INTERNAL_ERROR, + TStringBuilder{} << "couldn't run arrow CSV chunker for " << request.Path << ": " << status.ToString() + ); + SendError(ctx, error); + return nullptr; + } + + return std::make_shared(std::move(whole)); + } + + void HandleMetadataSizeRequest(const TString& data, TRequest request, const NActors::TActorContext& ctx) { + uint32_t metadataSize = arrow::BitUtil::FromLittleEndian(ReadUnaligned(data.data())); + + if (metadataSize > 10_MB) { + auto error = MakeError( + request.Path, + NFq::TIssuesIds::INTERNAL_ERROR, + TStringBuilder{} << "couldn't load parquet metadata, size is bigger than 10MB : " << metadataSize + ); + SendError(ctx, error); + return; + } + + InflightRequests_.erase(request.Path); + + TRequest localRequest{ + .Path = request.Path, + .RequestId = TGUID::Create(), + .Requester = request.Requester, + .MetadataRequest = false, + }; + RequestPartialFile(std::move(localRequest), ctx, request.From - metadataSize, request.To + 4); + } + + std::shared_ptr BuildParquetFileFromMetadata(const TString& data, const TRequest& request, const NActors::TActorContext& ctx) { + auto arrowData = BuildBufferFromData(data, request, ctx); + return std::make_shared(std::move(arrowData)); + } + + std::shared_ptr CleanupJsonFile(const TString& data, const TRequest& request, const arrow::json::ParseOptions& options, const NActors::TActorContext& ctx) { + auto chunker = arrow::json::MakeChunker(options); + std::shared_ptr whole, partial; + auto arrowData = BuildBufferFromData(data, request, ctx); + + if (Config_->Format == EFileFormat::JsonList) { + auto empty = std::make_shared(nullptr, 0); + int64_t count = 1; + auto status = chunker->ProcessSkip(empty, arrowData, false, &count, &whole); + + if (!status.ok()) { auto error = MakeError( request.Path, NFq::TIssuesIds::INTERNAL_ERROR, - TStringBuilder{} << "couldn't consume buffer from S3Fetcher: " << buildRes.ToString() + TStringBuilder{} << "couldn't run arrow json chunker for " << request.Path << ": " << status.ToString() ); SendError(ctx, error); return nullptr; } + + arrowData = std::move(whole); } + auto status = chunker->Process(arrowData, &whole, &partial); if (!status.ok()) { auto error = MakeError( request.Path, NFq::TIssuesIds::INTERNAL_ERROR, - TStringBuilder{} << "couldn't run arrow CSV chunker for " << request.Path << ": " << status.ToString() + TStringBuilder{} << "couldn't run arrow json chunker for " << request.Path << ": " << status.ToString() ); SendError(ctx, error); return nullptr; @@ -170,6 +314,34 @@ class TArrowFileFetcher : public NActors::TActorBootstrapped return std::make_shared(std::move(whole)); } + std::shared_ptr BuildBufferFromData(const TString& data, const TRequest& request, const NActors::TActorContext& ctx) { + auto dataBuffer = std::make_shared(nullptr, 0); + arrow::BufferBuilder builder; + auto buildRes = builder.Append(data.data(), data.size()); + if (!buildRes.ok()) { + auto error = MakeError( + request.Path, + NFq::TIssuesIds::INTERNAL_ERROR, + TStringBuilder{} << "couldn't read data from S3Fetcher: " << buildRes.ToString() + ); + SendError(ctx, error); + return nullptr; + } + + buildRes = builder.Finish(&dataBuffer); + if (!buildRes.ok()) { + auto error = MakeError( + request.Path, + NFq::TIssuesIds::INTERNAL_ERROR, + TStringBuilder{} << "couldn't copy data from S3Fetcher: " << buildRes.ToString() + ); + SendError(ctx, error); + return nullptr; + } + + return dataBuffer; + } + // Utility void SendError(const NActors::TActorContext& ctx, TEvFileError* error) { auto requestIt = InflightRequests_.find(error->Path); @@ -182,11 +354,12 @@ class TArrowFileFetcher : public NActors::TActorBootstrapped // Fields NActors::TActorId S3FetcherId_; - EFileFormat Format_; + std::shared_ptr Config_; + TMaybe DecompressionFormat_; std::unordered_map InflightRequests_; // Path -> Request }; -NActors::IActor* CreateArrowFetchingActor(NActors::TActorId s3FetcherId, EFileFormat format) { - return new TArrowFileFetcher{s3FetcherId, format}; +NActors::IActor* CreateArrowFetchingActor(NActors::TActorId s3FetcherId, const THashMap& params) { + return new TArrowFileFetcher{s3FetcherId, params}; } } // namespace NKikimr::NExternalSource::NObjectStorage::NInference diff --git a/ydb/core/external_sources/object_storage/inference/arrow_fetcher.h b/ydb/core/external_sources/object_storage/inference/arrow_fetcher.h index dc45affccc4f..34f4874f835d 100644 --- a/ydb/core/external_sources/object_storage/inference/arrow_fetcher.h +++ b/ydb/core/external_sources/object_storage/inference/arrow_fetcher.h @@ -5,5 +5,5 @@ namespace NKikimr::NExternalSource::NObjectStorage::NInference { -NActors::IActor* CreateArrowFetchingActor(NActors::TActorId s3FetcherId, EFileFormat format); +NActors::IActor* CreateArrowFetchingActor(NActors::TActorId s3FetcherId, const THashMap& params); } // namespace NKikimr::NExternalSource::NObjectStorage::NInference diff --git a/ydb/core/external_sources/object_storage/inference/arrow_inferencinator.cpp b/ydb/core/external_sources/object_storage/inference/arrow_inferencinator.cpp index 600b6d69c2cc..ab162b61552e 100644 --- a/ydb/core/external_sources/object_storage/inference/arrow_inferencinator.cpp +++ b/ydb/core/external_sources/object_storage/inference/arrow_inferencinator.cpp @@ -1,23 +1,54 @@ #include "arrow_inferencinator.h" +#include "infer_config.h" #include #include #include +#include +#include +#include #include #include #include +#include #include +#define LOG_E(name, stream) \ + LOG_ERROR_S(*NActors::TlsActivationContext, NKikimrServices::OBJECT_STORAGE_INFERENCINATOR, name << ": " << this->SelfId() << ". " << stream) +#define LOG_I(name, stream) \ + LOG_INFO_S(*NActors::TlsActivationContext, NKikimrServices::OBJECT_STORAGE_INFERENCINATOR, name << ": " << this->SelfId() << ". " << stream) +#define LOG_D(name, stream) \ + LOG_DEBUG_S(*NActors::TlsActivationContext, NKikimrServices::OBJECT_STORAGE_INFERENCINATOR, name << ": " << this->SelfId() << ". " << stream) +#define LOG_T(name, stream) \ + LOG_TRACE_S(*NActors::TlsActivationContext, NKikimrServices::OBJECT_STORAGE_INFERENCINATOR, name << ": " << this->SelfId() << ". " << stream) namespace NKikimr::NExternalSource::NObjectStorage::NInference { namespace { -bool ArrowToYdbType(Ydb::Type& resType, const arrow::DataType& type) { +bool ShouldBeOptional(const arrow::DataType& type, std::shared_ptr config) { + if (!config->ShouldMakeOptional) { + return false; + } + + switch (type.id()) { + case arrow::Type::NA: + case arrow::Type::STRING: + case arrow::Type::BINARY: + case arrow::Type::LARGE_BINARY: + case arrow::Type::FIXED_SIZE_BINARY: + return false; + default: + return true; + } +} + +bool ArrowToYdbType(Ydb::Type& maybeOptionalType, const arrow::DataType& type, std::shared_ptr config) { + auto& resType = ShouldBeOptional(type, config) ? *maybeOptionalType.mutable_optional_type()->mutable_item() : maybeOptionalType; switch (type.id()) { case arrow::Type::NA: - resType.set_null_type(google::protobuf::NullValue::NULL_VALUE); + resType.set_type_id(Ydb::Type::UTF8); return true; case arrow::Type::BOOL: resType.set_type_id(Ydb::Type::BOOL); @@ -70,7 +101,11 @@ bool ArrowToYdbType(Ydb::Type& resType, const arrow::DataType& type) { resType.set_type_id(Ydb::Type::DATETIME64); return true; case arrow::Type::TIMESTAMP: - resType.set_type_id(Ydb::Type::TIMESTAMP); + if (config->Format == EFileFormat::JsonEachRow || config->Format == EFileFormat::JsonList) { + maybeOptionalType.set_type_id(Ydb::Type::UTF8); + } else { + resType.set_type_id(Ydb::Type::TIMESTAMP); + } return true; case arrow::Type::TIME32: // TODO: is there anything? return false; @@ -91,32 +126,10 @@ bool ArrowToYdbType(Ydb::Type& resType, const arrow::DataType& type) { case arrow::Type::LIST: { // TODO: is ok? return false; } - case arrow::Type::STRUCT: { // TODO: is ok? - auto& structType = *resType.mutable_struct_type(); - for (const auto& field : type.fields()) { - auto& member = *structType.add_members(); - auto& memberType = *member.mutable_type(); - if (!ArrowToYdbType(memberType, *field->type())) { - return false; - } - member.mutable_name()->assign(field->name().data(), field->name().size()); - } - return true; - } + case arrow::Type::STRUCT: case arrow::Type::SPARSE_UNION: - case arrow::Type::DENSE_UNION: { // TODO: is ok? - auto& variant = *resType.mutable_variant_type()->mutable_struct_items(); - for (const auto& field : type.fields()) { - auto& member = *variant.add_members(); - if (!ArrowToYdbType(*member.mutable_type(), *field->type())) { - return false; - } - if (field->name().empty()) { - return false; - } - member.mutable_name()->assign(field->name().data(), field->name().size()); - } - return true; + case arrow::Type::DENSE_UNION: { + return false; } case arrow::Type::DICTIONARY: // TODO: is representable? return false; @@ -139,78 +152,116 @@ bool ArrowToYdbType(Ydb::Type& resType, const arrow::DataType& type) { } return false; } -} -struct FormatConfig { - virtual ~FormatConfig() noexcept = default; -}; - -struct CsvConfig : public FormatConfig { - arrow::csv::ParseOptions ParseOpts = arrow::csv::ParseOptions::Defaults(); - arrow::csv::ConvertOptions ConvOpts = arrow::csv::ConvertOptions::Defaults(); -}; +TEvInferredFileSchema* MakeErrorSchema(TString path, NFq::TIssuesIds::EIssueCode code, TString message) { + NYql::TIssues issues; + issues.AddIssue(std::move(message)); + issues.back().SetCode(code, NYql::TSeverityIds::S_ERROR); + return new TEvInferredFileSchema{std::move(path), std::move(issues)}; +} -using TsvConfig = CsvConfig; +} namespace { using ArrowField = std::shared_ptr; using ArrowFields = std::vector; -std::variant InferCsvTypes(std::shared_ptr file, const CsvConfig& config) { +std::variant InferCsvTypes(std::shared_ptr file, std::shared_ptr config) { + int64_t fileSize; + if (auto sizeStatus = file->GetSize().Value(&fileSize); !sizeStatus.ok()) { + return TStringBuilder{} << "coudn't get file size: " << sizeStatus.ToString(); + } + std::shared_ptr reader; - auto fileSize = static_cast(file->GetSize().ValueOr(1 << 20)); - fileSize = std::min(fileSize, 1 << 20); auto readerStatus = arrow::csv::TableReader::Make( - arrow::io::default_io_context(), std::move(file), arrow::csv::ReadOptions{.use_threads = false, .block_size = fileSize}, config.ParseOpts, config.ConvOpts + arrow::io::default_io_context(), + std::move(file), + arrow::csv::ReadOptions{.use_threads = false, .block_size = static_cast(fileSize)}, + config->ParseOpts, + config->ConvOpts ) .Value(&reader); if (!readerStatus.ok()) { - return TString{TStringBuilder{} << "couldn't make table from data: " << readerStatus.ToString()}; + return TString{TStringBuilder{} << "couldn't open csv/tsv file, check format and compression parameters: " << readerStatus.ToString()}; } std::shared_ptr table; auto tableRes = reader->Read().Value(&table); if (!tableRes.ok()) { - return TStringBuilder{} << "couldn't read table from data: " << readerStatus.ToString(); + return TStringBuilder{} << "couldn't parse csv/tsv file, check format and compression parameters: " << tableRes.ToString(); } return table->fields(); } -std::variant InferType(EFileFormat format, std::shared_ptr file, const FormatConfig& config) { - switch (format) { - case EFileFormat::CsvWithNames: - return InferCsvTypes(std::move(file), static_cast(config)); - case EFileFormat::TsvWithNames: - return InferCsvTypes(std::move(file), static_cast(config)); - case EFileFormat::Undefined: - default: - return std::variant{std::in_place_type_t{}, TStringBuilder{} << "unexpected format: " << ConvertFileFormat(format)}; +std::variant InferParquetTypes(std::shared_ptr file) { + parquet::arrow::FileReaderBuilder builder; + builder.properties(parquet::ArrowReaderProperties(false)); + auto openStatus = builder.Open(std::move(file)); + if (!openStatus.ok()) { + return TStringBuilder{} << "couldn't open parquet file, check format parameters: " << openStatus.ToString(); + } + + std::unique_ptr reader; + auto readerStatus = builder.Build(&reader); + if (!readerStatus.ok()) { + return TStringBuilder{} << "couldn't read parquet file, check format parameters: " << readerStatus.ToString(); } -} -std::unique_ptr MakeCsvConfig(const THashMap&) { // TODO: extract params - return std::make_unique(); + std::shared_ptr schema; + auto schemaRes = reader->GetSchema(&schema); + if (!schemaRes.ok()) { + return TStringBuilder{} << "couldn't parse parquet file, check format parameters: " << schemaRes.ToString(); + } + + return schema->fields(); } -std::unique_ptr MakeTsvConfig(const THashMap& params) { - auto config = MakeCsvConfig(params); - config->ParseOpts.delimiter = '\t'; - return config; +std::variant InferJsonTypes(std::shared_ptr file, std::shared_ptr config) { + int64_t fileSize; + if (auto sizeStatus = file->GetSize().Value(&fileSize); !sizeStatus.ok()) { + return TStringBuilder{} << "coudn't get file size: " << sizeStatus.ToString(); + } + + std::shared_ptr reader; + auto readerStatus = arrow::json::TableReader::Make( + arrow::default_memory_pool(), + std::move(file), + arrow::json::ReadOptions{.use_threads = false, .block_size = static_cast(fileSize)}, + config->ParseOpts + ).Value(&reader); + + if (!readerStatus.ok()) { + return TString{TStringBuilder{} << "couldn't open json file, check format and compression parameters: " << readerStatus.ToString()}; + } + + std::shared_ptr table; + auto tableRes = reader->Read().Value(&table); + + if (!tableRes.ok()) { + return TString{TStringBuilder{} << "couldn't parse json file, check format and compression parameters: " << tableRes.ToString()}; + } + + return table->fields(); } -std::unique_ptr MakeFormatConfig(EFileFormat format, const THashMap& params) { - switch (format) { +std::variant InferType(std::shared_ptr file, std::shared_ptr config) { + switch (config->Format) { case EFileFormat::CsvWithNames: - return MakeCsvConfig(params); + return InferCsvTypes(std::move(file), std::dynamic_pointer_cast(config)); case EFileFormat::TsvWithNames: - return MakeTsvConfig(params); + return InferCsvTypes(std::move(file), std::dynamic_pointer_cast(config)); + case EFileFormat::Parquet: + return InferParquetTypes(std::move(file)); + case EFileFormat::JsonEachRow: + case EFileFormat::JsonList: + return InferJsonTypes(std::move(file), std::dynamic_pointer_cast(config)); case EFileFormat::Undefined: default: - return nullptr; + return TStringBuilder{} << "unexpected format: " << ConvertFileFormat(config->Format); } } @@ -218,13 +269,9 @@ std::unique_ptr MakeFormatConfig(EFileFormat format, const THashMa class TArrowInferencinator : public NActors::TActorBootstrapped { public: - TArrowInferencinator(NActors::TActorId arrowFetcher, EFileFormat format, const THashMap& params) - : Format_{format} - , Config_{MakeFormatConfig(Format_, params)} - , ArrowFetcherId_{arrowFetcher} - { - Y_ABORT_UNLESS(IsArrowInferredFormat(Format_)); - } + TArrowInferencinator(NActors::TActorId arrowFetcher) + : ArrowFetcherId_{arrowFetcher} + {} void Bootstrap() { Become(&TArrowInferencinator::WorkingState); @@ -242,40 +289,46 @@ class TArrowInferencinator : public NActors::TActorBootstrappedSender; + } + auto& file = *ev->Get(); - auto mbArrowFields = InferType(Format_, file.File, *Config_); + auto mbArrowFields = InferType(file.File, file.Config); if (std::holds_alternative(mbArrowFields)) { - ctx.Send(RequesterId_, MakeError(file.Path, NFq::TIssuesIds::INTERNAL_ERROR, std::get(mbArrowFields))); + ctx.Send(RequesterId_, MakeErrorSchema(file.Path, NFq::TIssuesIds::INTERNAL_ERROR, std::get(mbArrowFields))); + RequesterId_ = {}; return; } - auto& arrowFields = std::get(mbArrowFields); std::vector ydbFields; for (const auto& field : arrowFields) { - ydbFields.emplace_back(); - auto& ydbField = ydbFields.back(); - if (!ArrowToYdbType(*ydbField.mutable_type(), *field->type())) { - ctx.Send(RequesterId_, MakeError(file.Path, NFq::TIssuesIds::UNSUPPORTED, TStringBuilder{} << "couldn't convert arrow type to ydb: " << field->ToString())); - return; + Ydb::Column column; + if (!ArrowToYdbType(*column.mutable_type(), *field->type(), file.Config)) { + continue; } - ydbField.mutable_name()->assign(field->name()); + if (field->name().empty()) { + continue; + } + column.mutable_name()->assign(field->name()); + ydbFields.push_back(column); } + ctx.Send(RequesterId_, new TEvInferredFileSchema(file.Path, std::move(ydbFields))); + RequesterId_ = {}; } void HandleFileError(TEvFileError::TPtr& ev, const NActors::TActorContext& ctx) { - Cout << "TArrowInferencinator::HandleFileError" << Endl; - ctx.Send(RequesterId_, ev->Release()); + LOG_D("TArrowInferencinator", "HandleFileError: " << ev->Get()->Issues.ToOneLineString()); + ctx.Send(RequesterId_, new TEvInferredFileSchema(ev->Get()->Path, std::move(ev->Get()->Issues))); } private: - EFileFormat Format_; - std::unique_ptr Config_; NActors::TActorId ArrowFetcherId_; NActors::TActorId RequesterId_; }; -NActors::IActor* CreateArrowInferencinator(NActors::TActorId arrowFetcher, EFileFormat format, const THashMap& params) { - return new TArrowInferencinator{arrowFetcher, format, params}; +NActors::IActor* CreateArrowInferencinator(NActors::TActorId arrowFetcher) { + return new TArrowInferencinator{arrowFetcher}; } } // namespace NKikimr::NExternalSource::NObjectStorage::NInference diff --git a/ydb/core/external_sources/object_storage/inference/arrow_inferencinator.h b/ydb/core/external_sources/object_storage/inference/arrow_inferencinator.h index a6c15dda509a..184bb9c9eb58 100644 --- a/ydb/core/external_sources/object_storage/inference/arrow_inferencinator.h +++ b/ydb/core/external_sources/object_storage/inference/arrow_inferencinator.h @@ -9,6 +9,7 @@ enum class EFileFormat { CsvWithNames, TsvWithNames, JsonEachRow, + JsonList, Parquet, }; @@ -22,6 +23,9 @@ constexpr EFileFormat ConvertFileFormat(TStringBuf format) { if (format == "json_each_row") { return EFileFormat::JsonEachRow; } + if (format == "json_list") { + return EFileFormat::JsonList; + } if (format == "parquet") { return EFileFormat::Parquet; } @@ -37,6 +41,8 @@ constexpr TStringBuf ConvertFileFormat(EFileFormat format) { return "tsv_with_names"; case EFileFormat::JsonEachRow: return "json_each_row"; + case EFileFormat::JsonList: + return "json_list"; case EFileFormat::Parquet: return "parquet"; case EFileFormat::Undefined: @@ -53,5 +59,5 @@ constexpr bool IsArrowInferredFormat(TStringBuf format) { return IsArrowInferredFormat(ConvertFileFormat(format)); } -NActors::IActor* CreateArrowInferencinator(NActors::TActorId arrowFetcher, EFileFormat format, const THashMap& params); +NActors::IActor* CreateArrowInferencinator(NActors::TActorId arrowFetcher); } // namespace NKikimr::NExternalSource::NObjectStorage::NInference diff --git a/ydb/core/external_sources/object_storage/inference/infer_config.cpp b/ydb/core/external_sources/object_storage/inference/infer_config.cpp new file mode 100644 index 000000000000..aee7ed048baf --- /dev/null +++ b/ydb/core/external_sources/object_storage/inference/infer_config.cpp @@ -0,0 +1,100 @@ +#include "infer_config.h" + +#include + +namespace NKikimr::NExternalSource::NObjectStorage::NInference { + +namespace { + +std::shared_ptr MakeCsvConfig(const THashMap& params) { + auto config = std::make_shared(); + if (auto delimiter = params.FindPtr("csvdelimiter"); delimiter) { + if (delimiter->Size() != 1) { + throw yexception() << "invalid parameter: csv_delimiter must be single character"; + } + config->ParseOpts.delimiter = (*delimiter)[0]; + } + config->ConvOpts.timestamp_parsers.push_back(arrow::TimestampParser::MakeStrptime("\%Y-\%m-\%d \%H:\%M:\%S")); + return config; +} + +std::shared_ptr MakeTsvConfig(const THashMap&) { + auto config = std::make_shared(); + config->ParseOpts.delimiter = '\t'; + config->ConvOpts.timestamp_parsers.push_back(arrow::TimestampParser::MakeStrptime("\%Y-\%m-\%d \%H:\%M:\%S")); + return config; +} + +std::shared_ptr MakeParquetConfig(const THashMap&) { + return std::make_shared(); +} + +std::shared_ptr MakeJsonEachRowConfig(const THashMap&) { + auto config = std::make_shared(); + config->ParseOpts.newlines_in_values = true; + return config; +} + +std::shared_ptr MakeJsonListConfig(const THashMap&) { + return std::make_shared(); +} + +} + +std::shared_ptr MakeFormatConfig(const THashMap& params) { + static THashSet supportedParams { + "format", + "compression", + "filepattern", + "partitionedby", + "projection", + "csvdelimiter", + }; + + for (const auto& [param, value] : params) { + if (!supportedParams.contains(param)) { + throw yexception() << "parameter is not supported with type inference: " << param; + } + } + + EFileFormat format; + if (auto formatPtr = params.FindPtr("format"); formatPtr) { + format = ConvertFileFormat(*formatPtr); + } else { + throw yexception() << "format unspecified, use format parameter with type inferring"; + } + + if (auto delimiter = params.FindPtr("csvdelimiter"); delimiter) { + if (format != EFileFormat::CsvWithNames) { + throw yexception() << "invalid parameter: csv_delimiter should only be specified for 'csv_with_names' format"; + } + } + + std::shared_ptr config; + switch (format) { + case EFileFormat::CsvWithNames: + config = MakeCsvConfig(params); + break; + case EFileFormat::TsvWithNames: + config = MakeTsvConfig(params); + break; + case EFileFormat::Parquet: + config = MakeParquetConfig(params); + break; + case EFileFormat::JsonEachRow: + config = MakeJsonEachRowConfig(params); + break; + case EFileFormat::JsonList: + config = MakeJsonListConfig(params); + break; + case EFileFormat::Undefined: + default: + throw yexception() << "invalid parameter: unknown format specified"; + } + + config->Format = format; + config->ShouldMakeOptional = true; + return config; +} + +} // namespace NKikimr::NExternalSource::NObjectStorage::NInference \ No newline at end of file diff --git a/ydb/core/external_sources/object_storage/inference/infer_config.h b/ydb/core/external_sources/object_storage/inference/infer_config.h new file mode 100644 index 000000000000..c99d2c59dde6 --- /dev/null +++ b/ydb/core/external_sources/object_storage/inference/infer_config.h @@ -0,0 +1,31 @@ +#pragma once + +#include "arrow_inferencinator.h" + +#include +#include + +namespace NKikimr::NExternalSource::NObjectStorage::NInference { + +struct FormatConfig { + virtual ~FormatConfig() noexcept = default; + + EFileFormat Format; + bool ShouldMakeOptional; +}; + +struct CsvConfig : public FormatConfig { + arrow::csv::ParseOptions ParseOpts = arrow::csv::ParseOptions::Defaults(); + arrow::csv::ConvertOptions ConvOpts = arrow::csv::ConvertOptions::Defaults(); +}; + +using TsvConfig = CsvConfig; +using ParquetConfig = FormatConfig; + +struct JsonConfig : public FormatConfig { + arrow::json::ParseOptions ParseOpts = arrow::json::ParseOptions::Defaults(); +}; + +std::shared_ptr MakeFormatConfig(const THashMap& params = {}); + +} // namespace NKikimr::NExternalSource::NObjectStorage::NInference \ No newline at end of file diff --git a/ydb/core/external_sources/object_storage/inference/ut/arrow_inference_ut.cpp b/ydb/core/external_sources/object_storage/inference/ut/arrow_inference_ut.cpp index 8edd7a424212..a57267cb8b50 100644 --- a/ydb/core/external_sources/object_storage/inference/ut/arrow_inference_ut.cpp +++ b/ydb/core/external_sources/object_storage/inference/ut/arrow_inference_ut.cpp @@ -45,13 +45,14 @@ class ArrowInferenceTest : public testing::Test { BaseUrl, Gateway, NYql::IHTTPGateway::TRetryPolicy::GetNoRetryPolicy(), - NYql::TS3Credentials::TAuthInfo{}), 1); + NYql::TS3Credentials{}), 1); } NActors::TActorId RegisterInferencinator(TStringBuf formatStr) { - auto format = NInference::ConvertFileFormat(formatStr); - auto arrowFetcher = ActorSystem.Register(NInference::CreateArrowFetchingActor(S3ActorId, format), 1); - return ActorSystem.Register(NInference::CreateArrowInferencinator(arrowFetcher, format, {}), 1); + THashMap params; + params["format"] = formatStr; + auto arrowFetcher = ActorSystem.Register(NInference::CreateArrowFetchingActor(S3ActorId, params), 1); + return ActorSystem.Register(NInference::CreateArrowInferencinator(arrowFetcher), 1); } void TearDown() override { @@ -85,7 +86,7 @@ TEST_F(ArrowInferenceTest, csv_simple) { auto inferencinatorId = RegisterInferencinator("csv_with_names"); ActorSystem.WrapInActorContext(EdgeActorId, [this, inferencinatorId] { - NActors::TActivationContext::AsActorContext().Send(inferencinatorId, new TEvInferFileSchema(TString{Path})); + NActors::TActivationContext::AsActorContext().Send(inferencinatorId, new TEvInferFileSchema(TString{Path}, 0)); }); std::unique_ptr event = ActorSystem.WaitForEdgeActorEvent({EdgeActorId}); @@ -93,16 +94,16 @@ TEST_F(ArrowInferenceTest, csv_simple) { ASSERT_NE(response, nullptr); auto& fields = response->Fields; - ASSERT_TRUE(fields[0].type().has_type_id()); - ASSERT_EQ(response->Fields[0].type().type_id(), Ydb::Type::INT64); - ASSERT_EQ(response->Fields[0].name(), "A"); + ASSERT_TRUE(fields[0].type().optional_type().item().has_type_id()); + ASSERT_EQ(fields[0].type().optional_type().item().type_id(), Ydb::Type::INT64); + ASSERT_EQ(fields[0].name(), "A"); ASSERT_TRUE(fields[1].type().has_type_id()); ASSERT_EQ(fields[1].type().type_id(), Ydb::Type::UTF8); ASSERT_EQ(fields[1].name(), "B"); - ASSERT_TRUE(fields[2].type().has_type_id()); - ASSERT_EQ(fields[2].type().type_id(), Ydb::Type::DOUBLE); + ASSERT_TRUE(fields[2].type().optional_type().item().has_type_id()); + ASSERT_EQ(fields[2].type().optional_type().item().type_id(), Ydb::Type::DOUBLE); ASSERT_EQ(fields[2].name(), "C"); } @@ -121,7 +122,7 @@ TEST_F(ArrowInferenceTest, tsv_simple) { auto inferencinatorId = RegisterInferencinator("tsv_with_names"); ActorSystem.WrapInActorContext(EdgeActorId, [this, inferencinatorId] { - NActors::TActivationContext::AsActorContext().Send(inferencinatorId, new TEvInferFileSchema(TString{Path})); + NActors::TActivationContext::AsActorContext().Send(inferencinatorId, new TEvInferFileSchema(TString{Path}, 0)); }); std::unique_ptr event = ActorSystem.WaitForEdgeActorEvent({EdgeActorId}); @@ -129,16 +130,16 @@ TEST_F(ArrowInferenceTest, tsv_simple) { ASSERT_NE(response, nullptr); auto& fields = response->Fields; - ASSERT_TRUE(fields[0].type().has_type_id()); - ASSERT_EQ(response->Fields[0].type().type_id(), Ydb::Type::INT64); - ASSERT_EQ(response->Fields[0].name(), "A"); + ASSERT_TRUE(fields[0].type().optional_type().item().has_type_id()); + ASSERT_EQ(fields[0].type().optional_type().item().type_id(), Ydb::Type::INT64); + ASSERT_EQ(fields[0].name(), "A"); ASSERT_TRUE(fields[1].type().has_type_id()); ASSERT_EQ(fields[1].type().type_id(), Ydb::Type::UTF8); ASSERT_EQ(fields[1].name(), "B"); - ASSERT_TRUE(fields[2].type().has_type_id()); - ASSERT_EQ(fields[2].type().type_id(), Ydb::Type::DOUBLE); + ASSERT_TRUE(fields[2].type().optional_type().item().has_type_id()); + ASSERT_EQ(fields[2].type().optional_type().item().type_id(), Ydb::Type::DOUBLE); ASSERT_EQ(fields[2].name(), "C"); } diff --git a/ydb/core/external_sources/object_storage/inference/ut/ya.make b/ydb/core/external_sources/object_storage/inference/ut/ya.make index 3bc00815199f..a198a6b6e196 100644 --- a/ydb/core/external_sources/object_storage/inference/ut/ya.make +++ b/ydb/core/external_sources/object_storage/inference/ut/ya.make @@ -1,6 +1,7 @@ GTEST() PEERDIR( + ydb/library/yql/public/udf/service/stub ydb/core/external_sources/object_storage/inference ydb/core/external_sources/object_storage ydb/core/tx/scheme_board diff --git a/ydb/core/external_sources/object_storage/inference/ya.make b/ydb/core/external_sources/object_storage/inference/ya.make index 145f59393228..8ccd10efc751 100644 --- a/ydb/core/external_sources/object_storage/inference/ya.make +++ b/ydb/core/external_sources/object_storage/inference/ya.make @@ -1,14 +1,28 @@ LIBRARY() +ADDINCL( + ydb/library/yql/udfs/common/clickhouse/client/base + ydb/library/yql/udfs/common/clickhouse/client/base/pcg-random + ydb/library/yql/udfs/common/clickhouse/client/src +) + +# Added because of library header contrib/libs/apache/arrow/cpp/src/arrow/util/value_parsing.h +CFLAGS( + -Wno-unused-parameter +) + SRCS( arrow_fetcher.cpp arrow_inferencinator.cpp + infer_config.cpp ) PEERDIR( contrib/libs/apache/arrow ydb/core/external_sources/object_storage + + ydb/library/yql/providers/s3/compressors ) END() diff --git a/ydb/core/external_sources/object_storage/s3_fetcher.cpp b/ydb/core/external_sources/object_storage/s3_fetcher.cpp index 1238147ee089..c9dc7ca45e32 100644 --- a/ydb/core/external_sources/object_storage/s3_fetcher.cpp +++ b/ydb/core/external_sources/object_storage/s3_fetcher.cpp @@ -10,11 +10,11 @@ class S3Fetcher : public NActors::TActorBootstrapped { TString url, NYql::IHTTPGateway::TPtr gateway, NYql::IHTTPGateway::TRetryPolicy::TPtr retryPolicy, - NYql::TS3Credentials::TAuthInfo authInfo) + const NYql::TS3Credentials& credentials) : Url_{std::move(url)} , Gateway_{std::move(gateway)} , RetryPolicy_{std::move(retryPolicy)} - , AuthInfo_{std::move(authInfo)} + , Credentials_(credentials) {} void Bootstrap() { @@ -60,12 +60,13 @@ class S3Fetcher : public NActors::TActorBootstrapped { void StartDownload(std::shared_ptr&& request, NActors::TActorSystem* actorSystem) { auto length = request->End - request->Start; + const auto& authInfo = Credentials_.GetAuthInfo(); auto headers = NYql::IHTTPGateway::MakeYcHeaders( request->RequestId.AsGuidString(), - AuthInfo_.GetToken(), + authInfo.GetToken(), {}, - AuthInfo_.GetAwsUserPwd(), - AuthInfo_.GetAwsSigV4() + authInfo.GetAwsUserPwd(), + authInfo.GetAwsSigV4() ); Gateway_->Download( @@ -79,15 +80,15 @@ class S3Fetcher : public NActors::TActorBootstrapped { TString Url_; NYql::IHTTPGateway::TPtr Gateway_; NYql::IHTTPGateway::TRetryPolicy::TPtr RetryPolicy_; - NYql::TS3Credentials::TAuthInfo AuthInfo_; + const NYql::TS3Credentials Credentials_; }; NActors::IActor* CreateS3FetcherActor( TString url, NYql::IHTTPGateway::TPtr gateway, NYql::IHTTPGateway::TRetryPolicy::TPtr retryPolicy, - NYql::TS3Credentials::TAuthInfo authInfo) { + const NYql::TS3Credentials& credentials) { - return new S3Fetcher(std::move(url), std::move(gateway), std::move(retryPolicy), std::move(authInfo)); + return new S3Fetcher(std::move(url), std::move(gateway), std::move(retryPolicy), credentials); } } // namespace NKikimr::NExternalSource::NObjectStorage diff --git a/ydb/core/external_sources/object_storage/s3_fetcher.h b/ydb/core/external_sources/object_storage/s3_fetcher.h index 51310ec3ca64..51b77210f5b2 100644 --- a/ydb/core/external_sources/object_storage/s3_fetcher.h +++ b/ydb/core/external_sources/object_storage/s3_fetcher.h @@ -13,5 +13,5 @@ NActors::IActor* CreateS3FetcherActor( TString url, NYql::IHTTPGateway::TPtr gateway, NYql::IHTTPGateway::TRetryPolicy::TPtr retryPolicy, - NYql::TS3Credentials::TAuthInfo authInfo); + const NYql::TS3Credentials& credentials); } // namespace NKikimr::NExternalSource::NObjectStorage diff --git a/ydb/core/external_sources/object_storage_ut.cpp b/ydb/core/external_sources/object_storage_ut.cpp index 37e67cf63497..129ad8febd7d 100644 --- a/ydb/core/external_sources/object_storage_ut.cpp +++ b/ydb/core/external_sources/object_storage_ut.cpp @@ -8,14 +8,14 @@ namespace NKikimr { Y_UNIT_TEST_SUITE(ObjectStorageTest) { Y_UNIT_TEST(SuccessValidation) { - auto source = NExternalSource::CreateObjectStorageExternalSource({}, nullptr, 1000, nullptr, false); + auto source = NExternalSource::CreateObjectStorageExternalSource({}, nullptr, 1000, nullptr, false, false); NKikimrExternalSources::TSchema schema; NKikimrExternalSources::TGeneral general; UNIT_ASSERT_NO_EXCEPTION(source->Pack(schema, general)); } Y_UNIT_TEST(FailedCreate) { - auto source = NExternalSource::CreateObjectStorageExternalSource({}, nullptr, 1000, nullptr, false); + auto source = NExternalSource::CreateObjectStorageExternalSource({}, nullptr, 1000, nullptr, false, false); NKikimrExternalSources::TSchema schema; NKikimrExternalSources::TGeneral general; general.mutable_attributes()->insert({"a", "b"}); @@ -23,12 +23,72 @@ Y_UNIT_TEST_SUITE(ObjectStorageTest) { } Y_UNIT_TEST(FailedValidation) { - auto source = NExternalSource::CreateObjectStorageExternalSource({}, nullptr, 1000, nullptr, false); + auto source = NExternalSource::CreateObjectStorageExternalSource({}, nullptr, 1000, nullptr, false, false); NKikimrExternalSources::TSchema schema; NKikimrExternalSources::TGeneral general; general.mutable_attributes()->insert({"projection.h", "b"}); UNIT_ASSERT_EXCEPTION_CONTAINS(source->Pack(schema, general), NExternalSource::TExternalSourceException, "Partition by must always be specified"); } + + Y_UNIT_TEST(FailedJsonListValidation) { + static auto invalidTypes = { + Ydb::Type::DATE, + Ydb::Type::DATETIME, + Ydb::Type::TIMESTAMP, + Ydb::Type::INTERVAL, + Ydb::Type::DATE32, + Ydb::Type::DATETIME64, + Ydb::Type::TIMESTAMP64, + Ydb::Type::INTERVAL64, + Ydb::Type::TZ_DATE, + Ydb::Type::TZ_DATETIME, + Ydb::Type::TZ_TIMESTAMP, + }; + auto source = NExternalSource::CreateObjectStorageExternalSource({}, nullptr, 1000, nullptr, false, false); + NKikimrExternalSources::TSchema schema; + for (const auto typeId : invalidTypes) { + auto newColumn = schema.add_column(); + newColumn->mutable_type()->set_type_id(typeId); + } + NKikimrExternalSources::TGeneral general; + general.mutable_attributes()->insert({"format", "json_list"}); + UNIT_ASSERT_EXCEPTION_CONTAINS(source->Pack(schema, general), NExternalSource::TExternalSourceException, "Date, Timestamp and Interval types are not allowed in json_list format"); + } + + Y_UNIT_TEST(FailedOptionalTypeValidation) { + auto source = NExternalSource::CreateObjectStorageExternalSource({}, nullptr, 1000, nullptr, false, false); + NKikimrExternalSources::TSchema schema; + NKikimrExternalSources::TGeneral general; + auto newColumn = schema.add_column(); + newColumn->mutable_type()->mutable_optional_type()->mutable_item()->mutable_optional_type()->mutable_item()->set_type_id(Ydb::Type::INT32); + UNIT_ASSERT_EXCEPTION_CONTAINS(source->Pack(schema, general), NExternalSource::TExternalSourceException, "Double optional types are not supported"); + } + + Y_UNIT_TEST(WildcardsValidation) { + auto source = NExternalSource::CreateObjectStorageExternalSource({}, nullptr, 1000, nullptr, false, false); + NKikimrExternalSources::TSchema schema; + + { // location + NKikimrExternalSources::TGeneral general; + general.set_location("{"); + UNIT_ASSERT_EXCEPTION_CONTAINS(source->Pack(schema, general), NExternalSource::TExternalSourceException, "Location '{' contains invalid wildcard:"); + } + + { // file pattern + NKikimrExternalSources::TGeneral general; + general.mutable_attributes()->insert({"file_pattern", "{"}); + UNIT_ASSERT_EXCEPTION_CONTAINS(source->Pack(schema, general), NExternalSource::TExternalSourceException, "File pattern '{' contains invalid wildcard:"); + general.set_location("/test_file"); + UNIT_ASSERT_EXCEPTION_CONTAINS(source->Pack(schema, general), NExternalSource::TExternalSourceException, "Path pattern cannot be used with file_pattern"); + } + + { // partitioned by + NKikimrExternalSources::TGeneral general; + general.set_location("*"); + general.mutable_attributes()->insert({"partitioned_by", "[year]"}); + UNIT_ASSERT_EXCEPTION_CONTAINS(source->Pack(schema, general), NExternalSource::TExternalSourceException, "Location '*' contains wildcards"); + } + } } } // NKikimr diff --git a/ydb/core/external_sources/s3/ut/s3_aws_credentials_ut.cpp b/ydb/core/external_sources/s3/ut/s3_aws_credentials_ut.cpp index 4eb9da765afa..89ca4366fa76 100644 --- a/ydb/core/external_sources/s3/ut/s3_aws_credentials_ut.cpp +++ b/ydb/core/external_sources/s3/ut/s3_aws_credentials_ut.cpp @@ -1,12 +1,13 @@ +#include #include #include -#include +#include #include #include #include #include -#include #include +#include #include @@ -38,15 +39,40 @@ TString Exec(const TString& cmd) { TString GetExternalPort(const TString& service, const TString& port) { auto dockerComposeBin = BinaryPath("library/recipes/docker_compose/bin/docker-compose"); - auto composeFileYml = ArcadiaSourceRoot() + "/ydb/core/external_sources/s3/ut/docker-compose.yml"; + auto composeFileYml = ArcadiaFromCurrentLocation(__SOURCE_FILE__, "docker-compose.yml"); auto result = StringSplitter(Exec(dockerComposeBin + " -f " + composeFileYml + " port " + service + " " + port)).Split(':').ToList(); return result ? Strip(result.back()) : TString{}; } +void WaitBucket(std::shared_ptr kikimr, const TString& externalDataSourceName) { + auto db = kikimr->GetQueryClient(); + for (size_t i = 0; i < 100; i++) { + auto scriptExecutionOperation = db.ExecuteScript(fmt::format(R"( + SELECT * FROM `{external_source}`.`/a/` WITH ( + format="json_each_row", + schema( + key Utf8 NOT NULL, + value Utf8 NOT NULL + ) + ) + )", "external_source"_a = externalDataSourceName)).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString()); + UNIT_ASSERT(scriptExecutionOperation.Metadata().ExecutionId); + + NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver()); + if (readyOp.Metadata().ExecStatus == EExecStatus::Completed) { + return; + } + Sleep(TDuration::Seconds(1)); + } + UNIT_FAIL("Bucket isn't ready"); +} + Y_UNIT_TEST_SUITE(S3AwsCredentials) { Y_UNIT_TEST(ExecuteScriptWithEqSymbol) { const TString externalDataSourceName = "/Root/external_data_source"; - auto kikimr = MakeKikimrRunner(true); + auto s3ActorsFactory = NYql::NDq::CreateS3ActorsFactory(); + auto kikimr = MakeKikimrRunner(true, nullptr, nullptr, std::nullopt, s3ActorsFactory); auto tc = kikimr->GetTableClient(); auto session = tc.CreateSession().GetValueSync().GetSession(); const TString query = fmt::format(R"( @@ -59,12 +85,15 @@ Y_UNIT_TEST_SUITE(S3AwsCredentials) { AWS_ACCESS_KEY_ID_SECRET_NAME="id", AWS_SECRET_ACCESS_KEY_SECRET_NAME="key", AWS_REGION="ru-central-1" - );)", + ); + GRANT ALL ON `{external_source}` TO `root1@builtin`; + )", "external_source"_a = externalDataSourceName, "location"_a = "localhost:" + GetExternalPort("minio", "9000") + "/datalake/" ); auto result = session.ExecuteSchemeQuery(query).GetValueSync(); UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); + WaitBucket(kikimr, externalDataSourceName); auto db = kikimr->GetQueryClient(); { auto scriptExecutionOperation = db.ExecuteScript(fmt::format(R"( @@ -123,6 +152,186 @@ Y_UNIT_TEST_SUITE(S3AwsCredentials) { UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(0).GetUtf8(), "2"); UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(1).GetUtf8(), "hello world"); } + + { + auto db = kikimr->GetQueryClient(NYdb::NQuery::TClientSettings().AuthToken("root1@builtin")); + { + auto scriptExecutionOperation = db.ExecuteScript(fmt::format(R"( + SELECT * FROM `{external_source}`.`/a/` WITH ( + format="json_each_row", + schema( + key Utf8 NOT NULL, + value Utf8 NOT NULL + ) + ) + )", "external_source"_a = externalDataSourceName)).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString()); + UNIT_ASSERT(scriptExecutionOperation.Metadata().ExecutionId); + + NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver()); + UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Failed, readyOp.Status().GetIssues().ToString()); + UNIT_ASSERT_STRING_CONTAINS_C(readyOp.Status().GetIssues().ToString(), "secret with name 'id' not found", readyOp.Status().GetIssues().ToString()); + } + { + const TString query = R"( + CREATE OBJECT `id:root1@builtin` (TYPE SECRET_ACCESS); + CREATE OBJECT `key:root1@builtin` (TYPE SECRET_ACCESS); + )"; + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); + } + { + auto scriptExecutionOperation = db.ExecuteScript(fmt::format(R"( + SELECT * FROM `{external_source}`.`/a/` WITH ( + format="json_each_row", + schema( + key Utf8 NOT NULL, + value Utf8 NOT NULL + ) + ) + )", "external_source"_a = externalDataSourceName)).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString()); + UNIT_ASSERT(scriptExecutionOperation.Metadata().ExecutionId); + + NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver()); + UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Completed, readyOp.Status().GetIssues().ToString()); + TFetchScriptResultsResult results = db.FetchScriptResults(scriptExecutionOperation.Id(), 0).ExtractValueSync(); + UNIT_ASSERT_C(results.IsSuccess(), results.GetIssues().ToString()); + + TResultSetParser resultSet(results.ExtractResultSet()); + UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnsCount(), 2); + UNIT_ASSERT_VALUES_EQUAL(resultSet.RowsCount(), 2); + UNIT_ASSERT(resultSet.TryNextRow()); + UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(0).GetUtf8(), "1"); + UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(1).GetUtf8(), "trololo"); + UNIT_ASSERT(resultSet.TryNextRow()); + UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(0).GetUtf8(), "2"); + UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(1).GetUtf8(), "hello world"); + } + { + auto scriptExecutionOperation = db.ExecuteScript(fmt::format(R"( + SELECT * FROM `{external_source}`.`/` WITH ( + format="json_each_row", + schema( + key Utf8 NOT NULL, + value Utf8 NOT NULL + ) + ) + )", "external_source"_a = externalDataSourceName)).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString()); + UNIT_ASSERT(scriptExecutionOperation.Metadata().ExecutionId); + + NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver()); + UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Completed, readyOp.Status().GetIssues().ToString()); + TFetchScriptResultsResult results = db.FetchScriptResults(scriptExecutionOperation.Id(), 0).ExtractValueSync(); + UNIT_ASSERT_C(results.IsSuccess(), results.GetIssues().ToString()); + + TResultSetParser resultSet(results.ExtractResultSet()); + UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnsCount(), 2); + UNIT_ASSERT_VALUES_EQUAL(resultSet.RowsCount(), 4); + UNIT_ASSERT(resultSet.TryNextRow()); + UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(0).GetUtf8(), "1"); + UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(1).GetUtf8(), "trololo"); + UNIT_ASSERT(resultSet.TryNextRow()); + UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(0).GetUtf8(), "2"); + UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(1).GetUtf8(), "hello world"); + } + + { + auto scriptExecutionOperation = db.ExecuteScript(fmt::format(R"( + INSERT INTO `{external_source}`.`exp_folder/` WITH (FORMAT = "csv_with_names") + SELECT "Hello, world!" AS Data + )", "external_source"_a = externalDataSourceName)).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString()); + UNIT_ASSERT(scriptExecutionOperation.Metadata().ExecutionId); + + NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver()); + UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Completed, readyOp.Status().GetIssues().ToString()); + } + } + + } + + Y_UNIT_TEST(TestInsertEscaping) { + const TString externalDataSourceName = "/Root/external_data_source"; + auto s3ActorsFactory = NYql::NDq::CreateS3ActorsFactory(); + auto kikimr = MakeKikimrRunner(true, nullptr, nullptr, std::nullopt, s3ActorsFactory); + + auto tc = kikimr->GetTableClient(); + auto session = tc.CreateSession().GetValueSync().GetSession(); + const TString query = fmt::format(R"( + CREATE OBJECT id (TYPE SECRET) WITH (value=`minio`); + CREATE OBJECT key (TYPE SECRET) WITH (value=`minio123`); + CREATE EXTERNAL DATA SOURCE `{external_source}` WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="{location}", + AUTH_METHOD="AWS", + AWS_ACCESS_KEY_ID_SECRET_NAME="id", + AWS_SECRET_ACCESS_KEY_SECRET_NAME="key", + AWS_REGION="ru-central-1" + ); + )", + "external_source"_a = externalDataSourceName, + "location"_a = "localhost:" + GetExternalPort("minio", "9000") + "/datalake/" + ); + + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); + + WaitBucket(kikimr, externalDataSourceName); + + auto db = kikimr->GetQueryClient(); + + TString path = TStringBuilder() << "exp_folder/some_" << EscapeC(GetSymbolsString(' ', '~', "*?{}`")) << "\\`"; + + { + // NB: AtomicUploadCommit = "false" because in minio ListMultipartUploads by prefix is not supported + auto scriptExecutionOperation = db.ExecuteScript(fmt::format(R"( + PRAGMA s3.AtomicUploadCommit = "false"; + INSERT INTO `{external_source}`.`{path}/` WITH (FORMAT = "csv_with_names") + SELECT * FROM `{external_source}`.`/a/` WITH ( + format="json_each_row", + schema( + key Utf8 NOT NULL, + value Utf8 NOT NULL + ) + ) + )", "external_source"_a = externalDataSourceName, "path"_a = path)).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString()); + UNIT_ASSERT(!scriptExecutionOperation.Metadata().ExecutionId.empty()); + + NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver()); + UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Completed, readyOp.Status().GetIssues().ToString()); + } + + { + auto scriptExecutionOperation = db.ExecuteScript(fmt::format(R"( + SELECT * FROM `{external_source}`.`{path}/` WITH ( + format="csv_with_names", + schema( + key Utf8 NOT NULL, + value Utf8 NOT NULL + ) + ) + )", "external_source"_a = externalDataSourceName, "path"_a = path)).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString()); + UNIT_ASSERT(!scriptExecutionOperation.Metadata().ExecutionId.empty()); + + NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver()); + UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Completed, readyOp.Status().GetIssues().ToString()); + TFetchScriptResultsResult results = db.FetchScriptResults(scriptExecutionOperation.Id(), 0).ExtractValueSync(); + UNIT_ASSERT_C(results.IsSuccess(), results.GetIssues().ToString()); + + TResultSetParser resultSet(results.ExtractResultSet()); + UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnsCount(), 2); + UNIT_ASSERT_VALUES_EQUAL(resultSet.RowsCount(), 2); + UNIT_ASSERT(resultSet.TryNextRow()); + UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(0).GetUtf8(), "1"); + UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(1).GetUtf8(), "trololo"); + UNIT_ASSERT(resultSet.TryNextRow()); + UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(0).GetUtf8(), "2"); + UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(1).GetUtf8(), "hello world"); + } } } diff --git a/ydb/core/external_sources/s3/ut/ya.make b/ydb/core/external_sources/s3/ut/ya.make index da4d33248657..b9e006d13ec9 100644 --- a/ydb/core/external_sources/s3/ut/ya.make +++ b/ydb/core/external_sources/s3/ut/ya.make @@ -28,13 +28,19 @@ IF (AUTOCHECK) ) ENDIF() +ENV(COMPOSE_HTTP_TIMEOUT=1200) # during parallel tests execution there could be huge disk io, which triggers timeouts in docker-compose INCLUDE(${ARCADIA_ROOT}/library/recipes/docker_compose/recipe.inc) IF (OPENSOURCE) - # Including of docker_compose/recipe.inc automatically converts these tests into LARGE, - # which makes it impossible to run them during precommit checks on Github CI. - # Next several lines forces these tests to be MEDIUM. To see discussion, visit YDBOPS-8928. - SIZE(MEDIUM) + IF (SANITIZER_TYPE) + # Too huge for precommit check with sanitizers + SIZE(LARGE) + ELSE() + # Including of docker_compose/recipe.inc automatically converts these tests into LARGE, + # which makes it impossible to run them during precommit checks on Github CI. + # Next several lines forces these tests to be MEDIUM. To see discussion, visit YDBOPS-8928. + SIZE(MEDIUM) + ENDIF() SET(TEST_TAGS_VALUE) SET(TEST_REQUIREMENTS_VALUE) diff --git a/ydb/core/external_sources/ya.make b/ydb/core/external_sources/ya.make index ef67de3ab864..2ca381af561a 100644 --- a/ydb/core/external_sources/ya.make +++ b/ydb/core/external_sources/ya.make @@ -1,7 +1,3 @@ -RECURSE( - object_storage -) - LIBRARY() SRCS( @@ -12,18 +8,19 @@ SRCS( ) PEERDIR( - ydb/core/external_sources/object_storage/inference - ydb/library/actors/http - ydb/library/yql/providers/common/gateway + contrib/libs/apache/arrow library/cpp/regex/pcre library/cpp/scheme ydb/core/base + ydb/core/external_sources/object_storage + ydb/core/external_sources/object_storage/inference ydb/core/protos + ydb/library/actors/http ydb/library/yql/providers/common/db_id_async_resolver + ydb/library/yql/providers/common/gateway ydb/library/yql/providers/s3/common ydb/library/yql/providers/s3/object_listers ydb/library/yql/providers/s3/path_generator - ydb/library/yql/providers/common/gateway ydb/library/yql/public/issue ydb/public/sdk/cpp/client/ydb_params ydb/public/sdk/cpp/client/ydb_value @@ -37,4 +34,6 @@ RECURSE_FOR_TESTS( RECURSE( hive_metastore + object_storage + s3 ) diff --git a/ydb/core/formats/arrow/accessor/abstract/constructor.cpp b/ydb/core/formats/arrow/accessor/abstract/constructor.cpp new file mode 100644 index 000000000000..51c2e86cadf7 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/abstract/constructor.cpp @@ -0,0 +1,11 @@ +#include "constructor.h" +#include + +namespace NKikimr::NArrow::NAccessor { + +TConstructorContainer TConstructorContainer::GetDefaultConstructor() { + static std::shared_ptr result = std::make_shared(); + return result; +} + +} diff --git a/ydb/core/formats/arrow/accessor/abstract/constructor.h b/ydb/core/formats/arrow/accessor/abstract/constructor.h new file mode 100644 index 000000000000..aa99260e097a --- /dev/null +++ b/ydb/core/formats/arrow/accessor/abstract/constructor.h @@ -0,0 +1,75 @@ +#pragma once + +#include +#include +#include +#include + +#include + +namespace NKikimr::NArrow::NAccessor { + +class IConstructor { +public: + using TFactory = NObjectFactory::TObjectFactory; + using TProto = NKikimrArrowAccessorProto::TConstructor; + +private: + virtual TConclusion> DoConstruct( + const std::shared_ptr& originalData, const TChunkConstructionData& externalInfo) const = 0; + virtual TConclusion> DoConstructDefault( + const TChunkConstructionData& externalInfo) const = 0; + virtual NKikimrArrowAccessorProto::TConstructor DoSerializeToProto() const = 0; + virtual bool DoDeserializeFromProto(const NKikimrArrowAccessorProto::TConstructor& proto) = 0; + virtual std::shared_ptr DoGetExpectedSchema(const std::shared_ptr& resultColumn) const = 0; + virtual TString DoDebugString() const { + return ""; + } + +public: + virtual ~IConstructor() = default; + + TString DebugString() const { + return TStringBuilder() << GetClassName() << ":" << DoDebugString(); + } + + TConclusion> Construct( + const std::shared_ptr& originalData, const TChunkConstructionData& externalInfo) const { + return DoConstruct(originalData, externalInfo); + } + + TConclusion> ConstructDefault(const TChunkConstructionData& externalInfo) const { + return DoConstructDefault(externalInfo); + } + + bool DeserializeFromProto(const NKikimrArrowAccessorProto::TConstructor& proto) { + return DoDeserializeFromProto(proto); + } + + NKikimrArrowAccessorProto::TConstructor SerializeToProto() const { + return DoSerializeToProto(); + } + + void SerializeToProto(NKikimrArrowAccessorProto::TConstructor& proto) const { + proto = DoSerializeToProto(); + } + + std::shared_ptr GetExpectedSchema(const std::shared_ptr& resultColumn) const { + AFL_VERIFY(resultColumn); + return DoGetExpectedSchema(resultColumn); + } + + virtual TString GetClassName() const = 0; +}; + +class TConstructorContainer: public NBackgroundTasks::TInterfaceProtoContainer { +private: + using TBase = NBackgroundTasks::TInterfaceProtoContainer; + +public: + using TBase::TBase; + + static TConstructorContainer GetDefaultConstructor(); +}; + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/abstract/request.cpp b/ydb/core/formats/arrow/accessor/abstract/request.cpp new file mode 100644 index 000000000000..08bc3ee5c87d --- /dev/null +++ b/ydb/core/formats/arrow/accessor/abstract/request.cpp @@ -0,0 +1,16 @@ +#include "request.h" + +namespace NKikimr::NArrow::NAccessor { + +TConclusionStatus TRequestedConstructorContainer::DeserializeFromRequest(NYql::TFeaturesExtractor& features) { + const std::optional className = features.Extract("DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME"); + if (!className) { + return TConclusionStatus::Success(); + } + if (!TBase::Initialize(*className)) { + return TConclusionStatus::Fail("don't know anything about class_name=" + *className); + } + return TBase::GetObjectPtr()->DeserializeFromRequest(features); +} + +} diff --git a/ydb/core/formats/arrow/accessor/abstract/request.h b/ydb/core/formats/arrow/accessor/abstract/request.h new file mode 100644 index 000000000000..42fbf3551623 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/abstract/request.h @@ -0,0 +1,57 @@ +#pragma once +#include "constructor.h" + +#include +#include +#include + +#include + +namespace NKikimr::NArrow::NAccessor { + +class IRequestedConstructor { +public: + using TFactory = NObjectFactory::TObjectFactory; + using TProto = NKikimrArrowAccessorProto::TRequestedConstructor; +private: + virtual TConclusion DoBuildConstructor() const = 0; + virtual NKikimrArrowAccessorProto::TRequestedConstructor DoSerializeToProto() const = 0; + virtual bool DoDeserializeFromProto(const NKikimrArrowAccessorProto::TRequestedConstructor& proto) = 0; + virtual TConclusionStatus DoDeserializeFromRequest(NYql::TFeaturesExtractor& features) = 0; + +public: + virtual ~IRequestedConstructor() = default; + + NKikimrArrowAccessorProto::TRequestedConstructor SerializeToProto() const { + return DoSerializeToProto(); + } + + void SerializeToProto(NKikimrArrowAccessorProto::TRequestedConstructor& proto) const { + proto = DoSerializeToProto(); + } + + bool DeserializeFromProto(const NKikimrArrowAccessorProto::TRequestedConstructor& proto) { + return DoDeserializeFromProto(proto); + } + + TConclusionStatus DeserializeFromRequest(NYql::TFeaturesExtractor& features) { + return DoDeserializeFromRequest(features); + } + + TConclusion BuildConstructor() const { + return DoBuildConstructor(); + } + + virtual TString GetClassName() const = 0; +}; + +class TRequestedConstructorContainer: public NBackgroundTasks::TInterfaceProtoContainer { +private: + using TBase = NBackgroundTasks::TInterfaceProtoContainer; + +public: + using TBase::TBase; + TConclusionStatus DeserializeFromRequest(NYql::TFeaturesExtractor& features); +}; + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/abstract/ya.make b/ydb/core/formats/arrow/accessor/abstract/ya.make new file mode 100644 index 000000000000..c40f1f297c18 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/abstract/ya.make @@ -0,0 +1,17 @@ +LIBRARY() + +PEERDIR( + contrib/libs/apache/arrow + ydb/library/conclusion + ydb/services/metadata/abstract + ydb/library/formats/arrow/accessor/abstract + ydb/library/formats/arrow/accessor/common + ydb/library/formats/arrow/protos +) + +SRCS( + constructor.cpp + request.cpp +) + +END() diff --git a/ydb/core/formats/arrow/accessor/composite_serial/accessor.cpp b/ydb/core/formats/arrow/accessor/composite_serial/accessor.cpp new file mode 100644 index 000000000000..d804e92efdd7 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/composite_serial/accessor.cpp @@ -0,0 +1,46 @@ +#include "accessor.h" + +namespace NKikimr::NArrow::NAccessor { + +namespace { +class TSerializedChunkAccessor { +private: + const std::vector& Chunks; + const std::shared_ptr& Loader; + std::optional& Result; + +public: + TSerializedChunkAccessor(const std::vector& chunks, const std::shared_ptr& loader, + std::optional& result) + : Chunks(chunks) + , Loader(loader) + , Result(result) { + } + ui64 GetChunksCount() const { + return Chunks.size(); + } + ui64 GetChunkLength(const ui32 idx) const { + return Chunks[idx].GetRecordsCount(); + } + void OnArray(const ui32 chunkIdx, const ui32 startPosition) const { + Result = IChunkedArray::TLocalChunkedArrayAddress(Chunks[chunkIdx].GetArrayVerified(Loader), startPosition, chunkIdx); + } +}; +} // namespace + +IChunkedArray::TLocalDataAddress TDeserializeChunkedArray::DoGetLocalData( + const std::optional& /*chunkCurrent*/, const ui64 /*position*/) const { + AFL_VERIFY(false); + return IChunkedArray::TLocalDataAddress(nullptr, 0, 0); +} + +IChunkedArray::TLocalChunkedArrayAddress TDeserializeChunkedArray::DoGetLocalChunkedArray( + const std::optional& chunkCurrent, const ui64 position) const { + std::optional result; + TSerializedChunkAccessor accessor(Chunks, Loader, result); + SelectChunk(chunkCurrent, position, accessor); + AFL_VERIFY(result); + return *result; +} + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/composite_serial/accessor.h b/ydb/core/formats/arrow/accessor/composite_serial/accessor.h new file mode 100644 index 000000000000..ac7e0193d7bd --- /dev/null +++ b/ydb/core/formats/arrow/accessor/composite_serial/accessor.h @@ -0,0 +1,78 @@ +#pragma once +#include +#include + +namespace NKikimr::NArrow::NAccessor { + +class TDeserializeChunkedArray: public NArrow::NAccessor::IChunkedArray { +private: + using TBase = NArrow::NAccessor::IChunkedArray; + +public: + class TChunk { + private: + YDB_READONLY(ui32, RecordsCount, 0); + std::shared_ptr PredefinedArray; + const TString Data; + + public: + TChunk(const std::shared_ptr& predefinedArray) + : PredefinedArray(predefinedArray) { + AFL_VERIFY(PredefinedArray); + RecordsCount = PredefinedArray->GetRecordsCount(); + } + + TChunk(const ui32 recordsCount, const TString& data) + : RecordsCount(recordsCount) + , Data(data) { + } + + std::shared_ptr GetArrayVerified(const std::shared_ptr& loader) const { + if (PredefinedArray) { + return PredefinedArray; + } + return loader->ApplyVerified(Data, RecordsCount); + } + }; + +private: + std::shared_ptr Loader; + std::vector Chunks; + +protected: + virtual TLocalChunkedArrayAddress DoGetLocalChunkedArray( + const std::optional& chunkCurrent, const ui64 position) const override; + virtual TLocalDataAddress DoGetLocalData(const std::optional& chunkCurrent, const ui64 position) const override; + + virtual std::vector DoSplitBySizes( + const TColumnSaver& /*saver*/, const TString& /*fullSerializedData*/, const std::vector& /*splitSizes*/) override { + AFL_VERIFY(false); + return {}; + } + + virtual std::shared_ptr DoGetScalar(const ui32 /*index*/) const override { + AFL_VERIFY(false)("problem", "cannot use method"); + return nullptr; + } + virtual std::optional DoGetRawSize() const override { + return {}; + } + virtual std::shared_ptr DoGetMaxScalar() const override { + AFL_VERIFY(false); + return nullptr; + } + virtual std::shared_ptr DoGetChunkedArray() const override { + AFL_VERIFY(false); + return nullptr; + } + +public: + TDeserializeChunkedArray(const ui64 recordsCount, const std::shared_ptr& loader, std::vector&& chunks) + : TBase(recordsCount, NArrow::NAccessor::IChunkedArray::EType::SerializedChunkedArray, loader->GetField()->type()) + , Loader(loader) + , Chunks(std::move(chunks)) { + AFL_VERIFY(Loader); + } +}; + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/composite_serial/ya.make b/ydb/core/formats/arrow/accessor/composite_serial/ya.make new file mode 100644 index 000000000000..e8095e990285 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/composite_serial/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +PEERDIR( + contrib/libs/apache/arrow + ydb/library/formats/arrow/accessor/abstract + ydb/core/formats/arrow/common + ydb/core/formats/arrow/save_load +) + +SRCS( + accessor.cpp +) + +END() diff --git a/ydb/core/formats/arrow/accessor/plain/accessor.cpp b/ydb/core/formats/arrow/accessor/plain/accessor.cpp new file mode 100644 index 000000000000..c606f2e1952b --- /dev/null +++ b/ydb/core/formats/arrow/accessor/plain/accessor.cpp @@ -0,0 +1,89 @@ +#include "accessor.h" + +#include +#include +#include + +namespace NKikimr::NArrow::NAccessor { + +std::optional TTrivialArray::DoGetRawSize() const { + return NArrow::GetArrayDataSize(Array); +} + +std::vector TTrivialArray::DoSplitBySizes( + const TColumnSaver& saver, const TString& fullSerializedData, const std::vector& splitSizes) { + auto schema = std::make_shared(arrow::FieldVector({ std::make_shared("f", GetDataType()) })); + auto chunks = NArrow::NSplitter::TSimpleSplitter(saver).SplitBySizes( + arrow::RecordBatch::Make(schema, GetRecordsCount(), { Array }), fullSerializedData, splitSizes); + std::vector result; + for (auto&& i : chunks) { + AFL_VERIFY(i.GetSlicedBatch()->num_columns() == 1); + result.emplace_back(std::make_shared(i.GetSlicedBatch()->column(0)), i.GetSerializedChunk()); + } + return result; +} + +std::shared_ptr TTrivialArray::DoGetMaxScalar() const { + auto minMaxPos = NArrow::FindMinMaxPosition(Array); + return NArrow::TStatusValidator::GetValid(Array->GetScalar(minMaxPos.second)); +} + +namespace { +class TChunkAccessor { +private: + std::shared_ptr ChunkedArray; + std::optional* Result; + +public: + TChunkAccessor(const std::shared_ptr& chunkedArray, std::optional& result) + : ChunkedArray(chunkedArray) + , Result(&result) { + } + ui64 GetChunksCount() const { + return (ui64)ChunkedArray->num_chunks(); + } + ui64 GetChunkLength(const ui32 idx) const { + return (ui64)ChunkedArray->chunk(idx)->length(); + } + void OnArray(const ui32 idx, const ui32 startPosition) const { + const auto& arr = ChunkedArray->chunk(idx); + *Result = IChunkedArray::TLocalDataAddress(arr, startPosition, idx); + } +}; + +} // namespace + +IChunkedArray::TLocalDataAddress TTrivialChunkedArray::DoGetLocalData( + const std::optional& chunkCurrent, const ui64 position) const { + std::optional result; + TChunkAccessor accessor(Array, result); + SelectChunk(chunkCurrent, position, accessor); + AFL_VERIFY(result); + return *result; +} + +std::optional TTrivialChunkedArray::DoGetRawSize() const { + ui64 result = 0; + for (auto&& i : Array->chunks()) { + result += NArrow::GetArrayDataSize(i); + } + return result; +} + +std::shared_ptr TTrivialChunkedArray::DoGetMaxScalar() const { + std::shared_ptr result; + for (auto&& i : Array->chunks()) { + if (!i->length()) { + continue; + } + auto minMaxPos = NArrow::FindMinMaxPosition(i); + auto scalarCurrent = NArrow::TStatusValidator::GetValid(i->GetScalar(minMaxPos.second)); + if (!result || ScalarCompare(result, scalarCurrent) < 0) { + result = scalarCurrent; + } + } + + return result; +} + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/plain/accessor.h b/ydb/core/formats/arrow/accessor/plain/accessor.h new file mode 100644 index 000000000000..a00826161c40 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/plain/accessor.h @@ -0,0 +1,77 @@ +#pragma once +#include +#include + +namespace NKikimr::NArrow::NAccessor { + +class TTrivialArray: public IChunkedArray { +private: + using TBase = IChunkedArray; + const std::shared_ptr Array; + +protected: + virtual std::optional DoGetRawSize() const override; + + virtual TLocalDataAddress DoGetLocalData(const std::optional& /*chunkCurrent*/, const ui64 /*position*/) const override { + return TLocalDataAddress(Array, 0, 0); + } + virtual std::shared_ptr DoGetChunkedArray() const override { + return std::make_shared(Array); + } + virtual std::shared_ptr DoGetScalar(const ui32 index) const override { + return NArrow::TStatusValidator::GetValid(Array->GetScalar(index)); + } + virtual std::shared_ptr DoGetMaxScalar() const override; + virtual std::vector DoSplitBySizes( + const TColumnSaver& saver, const TString& fullSerializedData, const std::vector& splitSizes) override; + + virtual TLocalChunkedArrayAddress DoGetLocalChunkedArray( + const std::optional& /*chunkCurrent*/, const ui64 /*position*/) const override { + AFL_VERIFY(false); + return TLocalChunkedArrayAddress(nullptr, TCommonChunkAddress(0, GetRecordsCount(), 0)); + } + +public: + TTrivialArray(const std::shared_ptr& data) + : TBase(data->length(), EType::Array, data->type()) + , Array(data) { + } +}; + +class TTrivialChunkedArray: public IChunkedArray { +private: + using TBase = IChunkedArray; + const std::shared_ptr Array; + +protected: + virtual TLocalDataAddress DoGetLocalData(const std::optional& chunkCurrent, const ui64 position) const override; + virtual std::shared_ptr DoGetChunkedArray() const override { + return Array; + } + virtual std::optional DoGetRawSize() const override; + virtual std::shared_ptr DoGetScalar(const ui32 index) const override { + auto chunk = GetChunkSlow(index); + return NArrow::TStatusValidator::GetValid(chunk.GetArray()->GetScalar(chunk.GetAddress().GetLocalIndex(index))); + } + virtual std::vector DoSplitBySizes( + const TColumnSaver& /*saver*/, const TString& /*fullSerializedData*/, const std::vector& /*splitSizes*/) override { + AFL_VERIFY(false); + return {}; + } + + virtual std::shared_ptr DoGetMaxScalar() const override; + + virtual TLocalChunkedArrayAddress DoGetLocalChunkedArray( + const std::optional& /*chunkCurrent*/, const ui64 /*position*/) const override { + AFL_VERIFY(false); + return TLocalChunkedArrayAddress(nullptr, TCommonChunkAddress(0, 0, 0)); + } + +public: + TTrivialChunkedArray(const std::shared_ptr& data) + : TBase(data->length(), EType::ChunkedArray, data->type()) + , Array(data) { + } +}; + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/plain/constructor.cpp b/ydb/core/formats/arrow/accessor/plain/constructor.cpp new file mode 100644 index 000000000000..3ecf41502b33 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/plain/constructor.cpp @@ -0,0 +1,33 @@ +#include "accessor.h" +#include "constructor.h" + +#include +#include +#include + +namespace NKikimr::NArrow::NAccessor::NPlain { + +TConclusion> TConstructor::DoConstruct( + const std::shared_ptr& originalData, const TChunkConstructionData& /*externalInfo*/) const { + AFL_VERIFY(originalData->num_columns() == 1)("count", originalData->num_columns())("schema", originalData->schema()->ToString()); + return std::make_shared(originalData->column(0)); +} + +TConclusion> TConstructor::DoConstructDefault(const TChunkConstructionData& externalInfo) const { + return std::make_shared( + NArrow::TThreadSimpleArraysCache::Get(externalInfo.GetColumnType(), externalInfo.GetDefaultValue(), externalInfo.GetRecordsCount())); +} + +NKikimrArrowAccessorProto::TConstructor TConstructor::DoSerializeToProto() const { + return NKikimrArrowAccessorProto::TConstructor(); +} + +bool TConstructor::DoDeserializeFromProto(const NKikimrArrowAccessorProto::TConstructor& /*proto*/) { + return true; +} + +std::shared_ptr TConstructor::DoGetExpectedSchema(const std::shared_ptr& resultColumn) const { + return std::make_shared(arrow::FieldVector({ resultColumn })); +} + +} // namespace NKikimr::NArrow::NAccessor::NPlain diff --git a/ydb/core/formats/arrow/accessor/plain/constructor.h b/ydb/core/formats/arrow/accessor/plain/constructor.h new file mode 100644 index 000000000000..57c366689eb0 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/plain/constructor.h @@ -0,0 +1,28 @@ +#pragma once +#include +#include + +namespace NKikimr::NArrow::NAccessor::NPlain { + +class TConstructor: public IConstructor { +public: + static TString GetClassNameStatic() { + return TGlobalConst::PlainDataAccessorName; + } + +private: + static inline auto Registrator = TFactory::TRegistrator(GetClassNameStatic()); + virtual TConclusion> DoConstruct( + const std::shared_ptr& originalData, const TChunkConstructionData& externalInfo) const override; + virtual NKikimrArrowAccessorProto::TConstructor DoSerializeToProto() const override; + virtual bool DoDeserializeFromProto(const NKikimrArrowAccessorProto::TConstructor& proto) override; + virtual std::shared_ptr DoGetExpectedSchema(const std::shared_ptr& resultColumn) const override; + virtual TConclusion> DoConstructDefault(const TChunkConstructionData& externalInfo) const override; + +public: + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } +}; + +} // namespace NKikimr::NArrow::NAccessor::NPlain diff --git a/ydb/core/formats/arrow/accessor/plain/request.cpp b/ydb/core/formats/arrow/accessor/plain/request.cpp new file mode 100644 index 000000000000..05a6ab128165 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/plain/request.cpp @@ -0,0 +1,22 @@ +#include "request.h" +#include "constructor.h" + +namespace NKikimr::NArrow::NAccessor::NPlain { + +NKikimrArrowAccessorProto::TRequestedConstructor TRequestedConstuctor::DoSerializeToProto() const { + return NKikimrArrowAccessorProto::TRequestedConstructor(); +} + +bool TRequestedConstuctor::DoDeserializeFromProto(const NKikimrArrowAccessorProto::TRequestedConstructor& /*proto*/) { + return true; +} + +NKikimr::TConclusionStatus TRequestedConstuctor::DoDeserializeFromRequest(NYql::TFeaturesExtractor& /*features*/) { + return TConclusionStatus::Success(); +} + +NKikimr::TConclusion TRequestedConstuctor::DoBuildConstructor() const { + return std::make_shared(); +} + +} diff --git a/ydb/core/formats/arrow/accessor/plain/request.h b/ydb/core/formats/arrow/accessor/plain/request.h new file mode 100644 index 000000000000..19a8390f2df2 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/plain/request.h @@ -0,0 +1,26 @@ +#pragma once +#include +#include + +namespace NKikimr::NArrow::NAccessor::NPlain { + +class TRequestedConstuctor: public IRequestedConstructor { +public: + static TString GetClassNameStatic() { + return TGlobalConst::PlainDataAccessorName; + } + +private: + static inline auto Registrator = TFactory::TRegistrator(GetClassNameStatic()); + virtual TConclusion DoBuildConstructor() const override; + virtual NKikimrArrowAccessorProto::TRequestedConstructor DoSerializeToProto() const override; + virtual bool DoDeserializeFromProto(const NKikimrArrowAccessorProto::TRequestedConstructor& /*proto*/) override; + virtual TConclusionStatus DoDeserializeFromRequest(NYql::TFeaturesExtractor& /*features*/) override; + +public: + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } +}; + +} // namespace NKikimr::NArrow::NAccessor::NPlain diff --git a/ydb/core/formats/arrow/accessor/plain/ya.make b/ydb/core/formats/arrow/accessor/plain/ya.make new file mode 100644 index 000000000000..5b3dd0bf6081 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/plain/ya.make @@ -0,0 +1,15 @@ +LIBRARY() + +PEERDIR( + ydb/core/formats/arrow/accessor/abstract + ydb/library/formats/arrow + ydb/library/formats/arrow/protos +) + +SRCS( + accessor.cpp + GLOBAL constructor.cpp + GLOBAL request.cpp +) + +END() diff --git a/ydb/core/formats/arrow/accessor/sparsed/accessor.cpp b/ydb/core/formats/arrow/accessor/sparsed/accessor.cpp new file mode 100644 index 000000000000..62c796b811d1 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/sparsed/accessor.cpp @@ -0,0 +1,280 @@ +#include "accessor.h" + +#include +#include +#include +#include + +namespace NKikimr::NArrow::NAccessor { + +TSparsedArray::TSparsedArray(const IChunkedArray& defaultArray, const std::shared_ptr& defaultValue) + : TBase(defaultArray.GetRecordsCount(), EType::SparsedArray, defaultArray.GetDataType()) + , DefaultValue(defaultValue) { + if (DefaultValue) { + AFL_VERIFY(DefaultValue->type->id() == defaultArray.GetDataType()->id()); + } + std::optional current; + std::shared_ptr records; + ui32 sparsedRecordsCount = 0; + AFL_VERIFY(SwitchType(GetDataType()->id(), [&](const auto& type) { + using TWrap = std::decay_t; + using TScalar = typename arrow::TypeTraits::ScalarType; + using TArray = typename arrow::TypeTraits::ArrayType; + using TBuilder = typename arrow::TypeTraits::BuilderType; + auto builderValue = NArrow::MakeBuilder(GetDataType()); + TBuilder* builderValueImpl = (TBuilder*)builderValue.get(); + auto builderIndex = NArrow::MakeBuilder(arrow::uint32()); + arrow::UInt32Builder* builderIndexImpl = (arrow::UInt32Builder*)builderIndex.get(); + auto scalar = static_pointer_cast(DefaultValue); + for (ui32 pos = 0; pos < GetRecordsCount();) { + current = defaultArray.GetChunk(current, pos); + auto typedArray = static_pointer_cast(current->GetArray()); + for (ui32 i = 0; i < typedArray->length(); ++i) { + std::optional isDefault; + if (scalar) { + if constexpr (arrow::has_string_view()) { + isDefault = arrow::util::string_view((char*)scalar->value->data(), scalar->value->size()) == typedArray->GetView(i); + } else if constexpr (arrow::has_c_type()) { + isDefault = scalar->value == typedArray->Value(i); + } else { + AFL_VERIFY(false)("type", GetDataType()->ToString()); + } + } else { + isDefault = typedArray->IsNull(i); + } + if (!*isDefault) { + if constexpr (arrow::has_string_view()) { + NArrow::TStatusValidator::Validate(builderValueImpl->Append(typedArray->GetView(i))); + NArrow::TStatusValidator::Validate(builderIndexImpl->Append(pos + i)); + ++sparsedRecordsCount; + } else if constexpr (arrow::has_c_type()) { + NArrow::TStatusValidator::Validate(builderValueImpl->Append(typedArray->Value(i))); + NArrow::TStatusValidator::Validate(builderIndexImpl->Append(pos + i)); + ++sparsedRecordsCount; + } else { + AFL_VERIFY(false)("type", GetDataType()->ToString()); + } + } + } + pos = current->GetAddress().GetGlobalFinishPosition(); + AFL_VERIFY(pos <= GetRecordsCount()); + } + std::vector> columns = { NArrow::TStatusValidator::GetValid(builderIndex->Finish()), + NArrow::TStatusValidator::GetValid(builderValue->Finish()) }; + records = arrow::RecordBatch::Make(BuildSchema(GetDataType()), sparsedRecordsCount, columns); + AFL_VERIFY_DEBUG(records->ValidateFull().ok()); + return true; + })); + AFL_VERIFY(records); + Records.emplace_back(0, GetRecordsCount(), records, DefaultValue); +} + +std::vector TSparsedArray::DoSplitBySizes( + const TColumnSaver& saver, const TString& fullSerializedData, const std::vector& splitSizes) { + AFL_VERIFY(Records.size() == 1)("size", Records.size()); + auto chunks = NArrow::NSplitter::TSimpleSplitter(saver).SplitBySizes(Records.front().GetRecords(), fullSerializedData, splitSizes); + + std::vector result; + ui32 idx = 0; + ui32 startIdx = 0; + for (auto&& i : chunks) { + AFL_VERIFY(i.GetSlicedBatch()->num_columns() == 2); + AFL_VERIFY(i.GetSlicedBatch()->column(0)->type()->id() == arrow::uint32()->id()); + auto UI32Column = static_pointer_cast(i.GetSlicedBatch()->column(0)); + ui32 nextStartIdx = NArrow::NAccessor::TSparsedArray::GetLastIndex(i.GetSlicedBatch()) + 1; + if (idx + 1 == chunks.size()) { + nextStartIdx = GetRecordsCount(); + } + std::shared_ptr batch; + { + std::unique_ptr builder = NArrow::MakeBuilder(arrow::uint32()); + arrow::UInt32Builder* builderImpl = (arrow::UInt32Builder*)builder.get(); + for (ui32 rowIdx = 0; rowIdx < UI32Column->length(); ++rowIdx) { + TStatusValidator::Validate(builderImpl->Append(UI32Column->Value(rowIdx) - startIdx)); + } + auto colIndex = TStatusValidator::GetValid(builder->Finish()); + batch = arrow::RecordBatch::Make( + i.GetSlicedBatch()->schema(), i.GetSlicedBatch()->num_rows(), { colIndex, i.GetSlicedBatch()->column(1) }); + } + + ++idx; + { + TBuilder builder(DefaultValue, GetDataType()); + builder.AddChunk(nextStartIdx - startIdx, batch); + result.emplace_back(builder.Finish(), saver.Apply(batch)); + } + startIdx = nextStartIdx; + } + + return result; +} + +std::shared_ptr TSparsedArray::DoGetMaxScalar() const { + std::shared_ptr result; + for (auto&& i : Records) { + auto scalarCurrent = i.GetMaxScalar(); + if (!scalarCurrent) { + continue; + } + if (!result || ScalarCompare(result, scalarCurrent) < 0) { + result = scalarCurrent; + } + } + return result; +} + +ui32 TSparsedArray::GetLastIndex(const std::shared_ptr& batch) { + AFL_VERIFY(batch); + AFL_VERIFY(batch->num_rows()); + auto c = batch->GetColumnByName("index"); + AFL_VERIFY(c)("schema", batch->schema()->ToString()); + AFL_VERIFY(c->type_id() == arrow::uint32()->id())("type", c->type()->ToString()); + auto ui32Column = static_pointer_cast(c); + return ui32Column->Value(ui32Column->length() - 1); +} + +namespace { +static thread_local THashMap> SimpleBatchesCache; +} + +NKikimr::NArrow::NAccessor::TSparsedArrayChunk TSparsedArray::MakeDefaultChunk( + const std::shared_ptr& defaultValue, const std::shared_ptr& type, const ui32 recordsCount) { + auto it = SimpleBatchesCache.find(type->ToString()); + if (it == SimpleBatchesCache.end()) { + it = SimpleBatchesCache.emplace(type->ToString(), NArrow::MakeEmptyBatch(BuildSchema(type))).first; + AFL_VERIFY(it->second->ValidateFull().ok()); + } + return TSparsedArrayChunk(0, recordsCount, it->second, defaultValue); +} + +IChunkedArray::TLocalDataAddress TSparsedArrayChunk::GetChunk( + const std::optional& /*chunkCurrent*/, const ui64 position, const ui32 chunkIdx) const { + const auto predCompare = [](const ui32 position, const TInternalChunkInfo& item) { + return position < item.GetStartExt(); + }; + auto it = std::upper_bound(RemapExternalToInternal.begin(), RemapExternalToInternal.end(), position, predCompare); + AFL_VERIFY(it != RemapExternalToInternal.begin()); + --it; + if (it->GetIsDefault()) { + return IChunkedArray::TLocalDataAddress( + NArrow::TThreadSimpleArraysCache::Get(ColValue->type(), DefaultValue, it->GetSize()), StartPosition + it->GetStartExt(), chunkIdx); + } else { + return IChunkedArray::TLocalDataAddress( + ColValue->Slice(it->GetStartInt(), it->GetSize()), StartPosition + it->GetStartExt(), chunkIdx); + } +} + +std::vector> TSparsedArrayChunk::GetChunkedArray() const { + std::vector> chunks; + for (auto&& i : RemapExternalToInternal) { + if (i.GetIsDefault()) { + chunks.emplace_back(NArrow::TThreadSimpleArraysCache::Get(ColValue->type(), DefaultValue, i.GetSize())); + } else { + chunks.emplace_back(ColValue->Slice(i.GetStartInt(), i.GetSize())); + } + } + return chunks; +} + +TSparsedArrayChunk::TSparsedArrayChunk(const ui32 posStart, const ui32 recordsCount, const std::shared_ptr& records, + const std::shared_ptr& defaultValue) + : RecordsCount(recordsCount) + , StartPosition(posStart) + , Records(records) + , DefaultValue(defaultValue) { + AFL_VERIFY(records->num_columns() == 2); + ColIndex = Records->GetColumnByName("index"); + AFL_VERIFY(ColIndex); + AFL_VERIFY(ColIndex->type_id() == arrow::uint32()->id()); + UI32ColIndex = static_pointer_cast(ColIndex); + if (UI32ColIndex->length()) { + AFL_VERIFY(UI32ColIndex->Value(UI32ColIndex->length() - 1) < recordsCount)("val", UI32ColIndex->Value(UI32ColIndex->length() - 1))( + "count", recordsCount); + } + NotDefaultRecordsCount = UI32ColIndex->length(); + RawValues = UI32ColIndex->raw_values(); + ColValue = Records->GetColumnByName("value"); + if (DefaultValue) { + AFL_VERIFY(DefaultValue->type->id() == ColValue->type_id()); + } + ui32 nextIndex = 0; + ui32 startIndexExt = 0; + ui32 startIndexInt = 0; + for (ui32 idx = 0; idx < UI32ColIndex->length(); ++idx) { + if (nextIndex != UI32ColIndex->Value(idx)) { + if (idx - startIndexInt) { + RemapExternalToInternal.emplace_back(startIndexExt, startIndexInt, idx - startIndexInt, false); + } + RemapExternalToInternal.emplace_back(nextIndex, 0, UI32ColIndex->Value(idx) - nextIndex, true); + startIndexExt = UI32ColIndex->Value(idx); + startIndexInt = idx; + } + nextIndex = UI32ColIndex->Value(idx) + 1; + } + if (UI32ColIndex->length() > startIndexInt) { + RemapExternalToInternal.emplace_back(startIndexExt, startIndexInt, UI32ColIndex->length() - startIndexInt, false); + } + if (nextIndex != RecordsCount) { + RemapExternalToInternal.emplace_back(nextIndex, 0, RecordsCount - nextIndex, true); + } + ui32 count = 0; + for (auto&& i : RemapExternalToInternal) { + count += i.GetSize(); + } + for (ui32 i = 0; i + 1 < RemapExternalToInternal.size(); ++i) { + AFL_VERIFY(RemapExternalToInternal[i + 1].GetStartExt() == RemapExternalToInternal[i].GetStartExt() + RemapExternalToInternal[i].GetSize()); + } + AFL_VERIFY(count == RecordsCount)("count", count)("records_count", RecordsCount); + AFL_VERIFY(ColValue); +} + +ui64 TSparsedArrayChunk::GetRawSize() const { + return std::max(NArrow::GetBatchDataSize(Records), 8); +} + +std::shared_ptr TSparsedArrayChunk::GetScalar(const ui32 index) const { + AFL_VERIFY(index < RecordsCount); + for (ui32 idx = 0; idx < UI32ColIndex->length(); ++idx) { + if (UI32ColIndex->Value(idx) == index) { + return NArrow::TStatusValidator::GetValid(ColValue->GetScalar(idx)); + } + } + return DefaultValue; +} + +ui32 TSparsedArrayChunk::GetFirstIndexNotDefault() const { + if (UI32ColIndex->length()) { + return StartPosition + GetUI32ColIndex()->Value(0); + } else { + return StartPosition + GetRecordsCount(); + } +} + +std::shared_ptr TSparsedArrayChunk::GetMaxScalar() const { + if (!ColValue->length()) { + return DefaultValue; + } + auto minMax = NArrow::FindMinMaxPosition(ColValue); + auto currentScalar = NArrow::TStatusValidator::GetValid(ColValue->GetScalar(minMax.second)); + if (!DefaultValue || ScalarCompare(DefaultValue, currentScalar) < 0) { + return currentScalar; + } + return DefaultValue; +} + +void TSparsedArray::TBuilder::AddChunk(const ui32 recordsCount, const std::shared_ptr& data) { + AFL_VERIFY(data); + AFL_VERIFY(recordsCount); + AFL_VERIFY(data->num_rows() <= recordsCount)("rows", data->num_rows())("count", recordsCount); + AFL_VERIFY(data->num_columns() == 2)("count", data->num_columns()); + AFL_VERIFY(data->column(0)->type_id() == arrow::uint32()->id())("type", data->column(0)->type()->ToString()); + AFL_VERIFY_DEBUG(data->schema()->field(0)->name() == "index")("name", data->schema()->field(0)->name()); + if (data->num_rows()) { + auto* arr = static_cast(data->column(0).get()); + AFL_VERIFY(arr->Value(arr->length() - 1) < recordsCount)("val", arr->Value(arr->length() - 1))("count", recordsCount); + } + Chunks.emplace_back(RecordsCount, recordsCount, data, DefaultValue); + RecordsCount += recordsCount; +} + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/sparsed/accessor.h b/ydb/core/formats/arrow/accessor/sparsed/accessor.h new file mode 100644 index 000000000000..040224962239 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/sparsed/accessor.h @@ -0,0 +1,189 @@ +#pragma once +#include + +#include +#include + +#include +#include +#include + +namespace NKikimr::NArrow::NAccessor { + +class TSparsedArrayChunk: public TMoveOnly { +private: + YDB_READONLY(ui32, RecordsCount, 0); + YDB_READONLY(ui32, StartPosition, 0); + YDB_READONLY_DEF(std::shared_ptr, Records); + std::shared_ptr DefaultValue; + + std::shared_ptr ColIndex; + const ui32* RawValues = nullptr; + ui32 NotDefaultRecordsCount = 0; + YDB_READONLY_DEF(std::shared_ptr, UI32ColIndex); + YDB_READONLY_DEF(std::shared_ptr, ColValue); + + class TInternalChunkInfo { + private: + YDB_READONLY(ui32, StartExt, 0); + YDB_READONLY(ui32, StartInt, 0); + YDB_READONLY(ui32, Size, 0); + YDB_READONLY(bool, IsDefault, false); + + public: + TInternalChunkInfo(const ui32 startExt, const ui32 startInt, const ui32 size, const bool defaultFlag) + : StartExt(startExt) + , StartInt(startInt) + , Size(size) + , IsDefault(defaultFlag) { + AFL_VERIFY(Size); + } + + bool operator<(const TInternalChunkInfo& item) const { + return StartExt < item.StartExt; + } + }; + + std::vector RemapExternalToInternal; + +public: + ui32 GetFinishPosition() const { + return StartPosition + RecordsCount; + } + + ui32 GetNotDefaultRecordsCount() const { + return NotDefaultRecordsCount; + } + + ui32 GetIndexUnsafeFast(const ui32 i) const { + return RawValues[i]; + } + + ui32 GetFirstIndexNotDefault() const; + + std::shared_ptr GetMaxScalar() const; + + std::shared_ptr GetScalar(const ui32 index) const; + + IChunkedArray::TLocalDataAddress GetChunk( + const std::optional& chunkCurrent, const ui64 position, const ui32 chunkIdx) const; + + std::vector> GetChunkedArray() const; + + TSparsedArrayChunk(const ui32 posStart, const ui32 recordsCount, const std::shared_ptr& records, + const std::shared_ptr& defaultValue); + + ui64 GetRawSize() const; +}; + +class TSparsedArray: public IChunkedArray { +private: + using TBase = IChunkedArray; + std::shared_ptr DefaultValue; + std::vector Records; + +protected: + virtual TLocalChunkedArrayAddress DoGetLocalChunkedArray( + const std::optional& /*chunkCurrent*/, const ui64 /*position*/) const override { + AFL_VERIFY(false); + return TLocalChunkedArrayAddress(nullptr, 0, 0); + } + + virtual std::shared_ptr DoGetMaxScalar() const override; + + virtual std::vector DoSplitBySizes( + const TColumnSaver& saver, const TString& fullSerializedData, const std::vector& splitSizes) override; + + virtual TLocalDataAddress DoGetLocalData(const std::optional& chunkCurrent, const ui64 position) const override { + ui32 currentIdx = 0; + for (ui32 i = 0; i < Records.size(); ++i) { + if (currentIdx <= position && position < currentIdx + Records[i].GetRecordsCount()) { + return Records[i].GetChunk(chunkCurrent, position - currentIdx, i); + } + currentIdx += Records[i].GetRecordsCount(); + } + AFL_VERIFY(false); + return TLocalDataAddress(nullptr, 0, 0); + } + virtual std::shared_ptr DoGetChunkedArray() const override { + std::vector> chunks; + for (auto&& i : Records) { + auto chunksLocal = i.GetChunkedArray(); + chunks.insert(chunks.end(), chunksLocal.begin(), chunksLocal.end()); + } + return std::make_shared(chunks, GetDataType()); + } + virtual std::optional DoGetRawSize() const override { + ui64 bytes = 0; + for (auto&& i : Records) { + bytes += i.GetRawSize(); + } + return bytes; + } + + TSparsedArray(std::vector&& data, const std::shared_ptr& defaultValue, + const std::shared_ptr& type, const ui32 recordsCount) + : TBase(recordsCount, EType::SparsedArray, type) + , DefaultValue(defaultValue) + , Records(std::move(data)) { + } + + static ui32 GetLastIndex(const std::shared_ptr& batch); + + static std::shared_ptr BuildSchema(const std::shared_ptr& type) { + std::vector> fields = { std::make_shared("index", arrow::uint32()), + std::make_shared("value", type) }; + return std::make_shared(fields); + } + + static TSparsedArrayChunk MakeDefaultChunk( + const std::shared_ptr& defaultValue, const std::shared_ptr& type, const ui32 recordsCount); + +public: + TSparsedArray(const IChunkedArray& defaultArray, const std::shared_ptr& defaultValue); + + TSparsedArray(const std::shared_ptr& defaultValue, const std::shared_ptr& type, const ui32 recordsCount) + : TBase(recordsCount, EType::SparsedArray, type) + , DefaultValue(defaultValue) { + Records.emplace_back(MakeDefaultChunk(defaultValue, type, recordsCount)); + } + + virtual std::shared_ptr DoGetScalar(const ui32 index) const override { + auto& chunk = GetSparsedChunk(index); + return chunk.GetScalar(index - chunk.GetStartPosition()); + } + + const TSparsedArrayChunk& GetSparsedChunk(const ui64 position) const { + const auto pred = [](const ui64 position, const TSparsedArrayChunk& item) { + return position < item.GetStartPosition(); + }; + auto it = std::upper_bound(Records.begin(), Records.end(), position, pred); + AFL_VERIFY(it != Records.begin()); + --it; + AFL_VERIFY(position < it->GetStartPosition() + it->GetRecordsCount()); + AFL_VERIFY(it->GetStartPosition() <= position); + return *it; + } + + class TBuilder { + private: + ui32 RecordsCount = 0; + std::vector Chunks; + std::shared_ptr DefaultValue; + std::shared_ptr Type; + + public: + TBuilder(const std::shared_ptr& defaultValue, const std::shared_ptr& type) + : DefaultValue(defaultValue) + , Type(type) { + } + + void AddChunk(const ui32 recordsCount, const std::shared_ptr& data); + + std::shared_ptr Finish() { + return std::shared_ptr(new TSparsedArray(std::move(Chunks), DefaultValue, Type, RecordsCount)); + } + }; +}; + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/sparsed/constructor.cpp b/ydb/core/formats/arrow/accessor/sparsed/constructor.cpp new file mode 100644 index 000000000000..e3f45cd75327 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/sparsed/constructor.cpp @@ -0,0 +1,34 @@ +#include "accessor.h" +#include "constructor.h" + +namespace NKikimr::NArrow::NAccessor::NSparsed { + +std::shared_ptr TConstructor::DoGetExpectedSchema(const std::shared_ptr& resultColumn) const { + arrow::FieldVector fields = { std::make_shared("index", arrow::uint32()), + std::make_shared("value", resultColumn->type()) }; + return std::make_shared(fields); +} + +TConclusion> TConstructor::DoConstructDefault(const TChunkConstructionData& externalInfo) const { + return std::make_shared(externalInfo.GetDefaultValue(), externalInfo.GetColumnType(), externalInfo.GetRecordsCount()); +} + +TConclusion> TConstructor::DoConstruct( + const std::shared_ptr& originalData, const TChunkConstructionData& externalInfo) const { + AFL_VERIFY(originalData->num_columns() == 2)("count", originalData->num_columns())("schema", originalData->schema()->ToString()); + NArrow::NAccessor::TSparsedArray::TBuilder builder(externalInfo.GetDefaultValue(), externalInfo.GetColumnType()); + builder.AddChunk(externalInfo.GetRecordsCount(), originalData); + return builder.Finish(); +} + +NKikimrArrowAccessorProto::TConstructor TConstructor::DoSerializeToProto() const { + NKikimrArrowAccessorProto::TConstructor result; + *result.MutableSparsed() = {}; + return result; +} + +bool TConstructor::DoDeserializeFromProto(const NKikimrArrowAccessorProto::TConstructor& /*proto*/) { + return true; +} + +} // namespace NKikimr::NArrow::NAccessor::NSparsed diff --git a/ydb/core/formats/arrow/accessor/sparsed/constructor.h b/ydb/core/formats/arrow/accessor/sparsed/constructor.h new file mode 100644 index 000000000000..0ccf5efdd70f --- /dev/null +++ b/ydb/core/formats/arrow/accessor/sparsed/constructor.h @@ -0,0 +1,28 @@ +#pragma once +#include +#include + +namespace NKikimr::NArrow::NAccessor::NSparsed { + +class TConstructor: public IConstructor { +public: + static TString GetClassNameStatic() { + return TGlobalConst::SparsedDataAccessorName; + } + +private: + static inline auto Registrator = TFactory::TRegistrator(GetClassNameStatic()); + virtual TConclusion> DoConstruct( + const std::shared_ptr& originalData, const TChunkConstructionData& externalInfo) const override; + virtual NKikimrArrowAccessorProto::TConstructor DoSerializeToProto() const override; + virtual bool DoDeserializeFromProto(const NKikimrArrowAccessorProto::TConstructor& proto) override; + virtual std::shared_ptr DoGetExpectedSchema(const std::shared_ptr& resultColumn) const override; + virtual TConclusion> DoConstructDefault(const TChunkConstructionData& externalInfo) const override; + +public: + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } +}; + +} // namespace NKikimr::NArrow::NAccessor::NSparsed diff --git a/ydb/core/formats/arrow/accessor/sparsed/request.cpp b/ydb/core/formats/arrow/accessor/sparsed/request.cpp new file mode 100644 index 000000000000..d484341a95c0 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/sparsed/request.cpp @@ -0,0 +1,22 @@ +#include "request.h" +#include "constructor.h" + +namespace NKikimr::NArrow::NAccessor::NSparsed { + +NKikimrArrowAccessorProto::TRequestedConstructor TRequestedConstuctor::DoSerializeToProto() const { + return NKikimrArrowAccessorProto::TRequestedConstructor(); +} + +bool TRequestedConstuctor::DoDeserializeFromProto(const NKikimrArrowAccessorProto::TRequestedConstructor& /*proto*/) { + return true; +} + +NKikimr::TConclusionStatus TRequestedConstuctor::DoDeserializeFromRequest(NYql::TFeaturesExtractor& /*features*/) { + return TConclusionStatus::Success(); +} + +NKikimr::TConclusion TRequestedConstuctor::DoBuildConstructor() const { + return std::make_shared(); +} + +} diff --git a/ydb/core/formats/arrow/accessor/sparsed/request.h b/ydb/core/formats/arrow/accessor/sparsed/request.h new file mode 100644 index 000000000000..4be2d897b090 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/sparsed/request.h @@ -0,0 +1,26 @@ +#pragma once +#include +#include + +namespace NKikimr::NArrow::NAccessor::NSparsed { + +class TRequestedConstuctor: public IRequestedConstructor { +public: + static TString GetClassNameStatic() { + return TGlobalConst::SparsedDataAccessorName; + } + +private: + static inline auto Registrator = TFactory::TRegistrator(GetClassNameStatic()); + virtual TConclusion DoBuildConstructor() const override; + virtual NKikimrArrowAccessorProto::TRequestedConstructor DoSerializeToProto() const override; + virtual bool DoDeserializeFromProto(const NKikimrArrowAccessorProto::TRequestedConstructor& proto) override; + virtual TConclusionStatus DoDeserializeFromRequest(NYql::TFeaturesExtractor& features) override; + +public: + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } +}; + +} // namespace NKikimr::NArrow::NAccessor::NSparsed diff --git a/ydb/core/formats/arrow/accessor/sparsed/ya.make b/ydb/core/formats/arrow/accessor/sparsed/ya.make new file mode 100644 index 000000000000..c68f5f84f9a6 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/sparsed/ya.make @@ -0,0 +1,15 @@ +LIBRARY() + +PEERDIR( + ydb/core/formats/arrow/accessor/abstract + ydb/library/formats/arrow + ydb/library/formats/arrow/protos +) + +SRCS( + GLOBAL constructor.cpp + GLOBAL request.cpp + accessor.cpp +) + +END() diff --git a/ydb/core/formats/arrow/accessor/ya.make b/ydb/core/formats/arrow/accessor/ya.make new file mode 100644 index 000000000000..197b97d9efe7 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/ya.make @@ -0,0 +1,10 @@ +LIBRARY() + +PEERDIR( + ydb/core/formats/arrow/accessor/abstract + ydb/core/formats/arrow/accessor/plain + ydb/core/formats/arrow/accessor/composite_serial + ydb/core/formats/arrow/accessor/sparsed +) + +END() diff --git a/ydb/core/formats/arrow/arrow_filter.cpp b/ydb/core/formats/arrow/arrow_filter.cpp index 58cd7116baed..c404a016f4bd 100644 --- a/ydb/core/formats/arrow/arrow_filter.cpp +++ b/ydb/core/formats/arrow/arrow_filter.cpp @@ -1,5 +1,5 @@ #include "arrow_filter.h" -#include "switch_type.h" +#include "switch/switch_type.h" #include "common/container.h" #include "common/adapter.h" diff --git a/ydb/core/formats/arrow/arrow_helpers.cpp b/ydb/core/formats/arrow/arrow_helpers.cpp index 2ee5626e5d40..1246b01e334c 100644 --- a/ydb/core/formats/arrow/arrow_helpers.cpp +++ b/ydb/core/formats/arrow/arrow_helpers.cpp @@ -1,13 +1,14 @@ #include "arrow_helpers.h" -#include "switch_type.h" -#include "common/validation.h" +#include "switch/switch_type.h" #include "permutations.h" #include "common/adapter.h" #include "serializer/native.h" #include "serializer/abstract.h" #include "serializer/stream.h" -#include "simple_arrays_cache.h" +#include +#include +#include #include #include @@ -105,11 +106,6 @@ arrow::Result> MakeArrowSchema(const std::vector< return fields.status(); } -TString SerializeSchema(const arrow::Schema& schema) { - auto buffer = TStatusValidator::GetValid(arrow::ipc::SerializeSchema(schema)); - return buffer->ToString(); -} - std::shared_ptr DeserializeSchema(const TString& str) { std::shared_ptr buffer(std::make_shared(str)); arrow::io::BufferReader reader(buffer); @@ -143,148 +139,6 @@ std::shared_ptr DeserializeBatch(const TString& blob, const } } -std::shared_ptr MakeEmptyBatch(const std::shared_ptr& schema, const ui32 rowsCount) { - std::vector> columns; - columns.reserve(schema->num_fields()); - - for (auto& field : schema->fields()) { - auto result = NArrow::TThreadSimpleArraysCache::GetNull(field->type(), rowsCount); - columns.emplace_back(result); - Y_ABORT_UNLESS(result); - } - return arrow::RecordBatch::Make(schema, rowsCount, columns); -} - -std::shared_ptr CombineBatches(const std::vector>& batches) { - if (batches.empty()) { - return nullptr; - } - auto table = TStatusValidator::GetValid(arrow::Table::FromRecordBatches(batches)); - return table ? ToBatch(table, true) : nullptr; -} - -std::shared_ptr ToBatch(const std::shared_ptr& tableExt, const bool combine) { - if (!tableExt) { - return nullptr; - } - std::shared_ptr table; - if (combine) { - auto res = tableExt->CombineChunks(); - Y_ABORT_UNLESS(res.ok()); - table = *res; - } else { - table = tableExt; - } - std::vector> columns; - columns.reserve(table->num_columns()); - for (auto& col : table->columns()) { - AFL_VERIFY(col->num_chunks() == 1)("size", col->num_chunks())("size_bytes", GetTableDataSize(tableExt)) - ("schema", tableExt->schema()->ToString())("size_new", GetTableDataSize(table)); - columns.push_back(col->chunk(0)); - } - return arrow::RecordBatch::Make(table->schema(), table->num_rows(), columns); -} - -// Check if the permutation doesn't reorder anything -bool IsTrivial(const arrow::UInt64Array& permutation, const ui64 originalLength) { - if ((ui64)permutation.length() != originalLength) { - return false; - } - for (i64 i = 0; i < permutation.length(); ++i) { - if (permutation.Value(i) != (ui64)i) { - return false; - } - } - return true; -} - -std::shared_ptr Reorder(const std::shared_ptr& batch, - const std::shared_ptr& permutation, const bool canRemove) { - Y_ABORT_UNLESS(permutation->length() == batch->num_rows() || canRemove); - - auto res = IsTrivial(*permutation, batch->num_rows()) ? batch : arrow::compute::Take(batch, permutation); - Y_ABORT_UNLESS(res.ok()); - return (*res).record_batch(); -} - -THashMap> ShardingSplit(const std::shared_ptr& batch, const THashMap>& shardRows) { - AFL_VERIFY(batch); - std::shared_ptr permutation; - { - arrow::UInt64Builder builder; - Y_VERIFY_OK(builder.Reserve(batch->num_rows())); - - for (auto&& [shardId, rowIdxs]: shardRows) { - for (auto& row : rowIdxs) { - Y_VERIFY_OK(builder.Append(row)); - } - } - Y_VERIFY_OK(builder.Finish(&permutation)); - } - - auto reorderedBatch = Reorder(batch, permutation, false); - - THashMap> out; - - int offset = 0; - for (auto&& [shardId, shardRowIdxs] : shardRows) { - if (shardRowIdxs.empty()) { - continue; - } - out.emplace(shardId, reorderedBatch->Slice(offset, shardRowIdxs.size())); - offset += shardRowIdxs.size(); - } - - Y_ABORT_UNLESS(offset == batch->num_rows()); - return out; -} - -std::vector> ShardingSplit(const std::shared_ptr& batch, const std::vector>& shardRows, const ui32 numShards) { - AFL_VERIFY(batch); - std::shared_ptr permutation; - { - arrow::UInt64Builder builder; - Y_VERIFY_OK(builder.Reserve(batch->num_rows())); - - for (ui32 shardNo = 0; shardNo < numShards; ++shardNo) { - for (auto& row : shardRows[shardNo]) { - Y_VERIFY_OK(builder.Append(row)); - } - } - Y_VERIFY_OK(builder.Finish(&permutation)); - } - - auto reorderedBatch = Reorder(batch, permutation, false); - - std::vector> out(numShards); - - int offset = 0; - for (ui32 shardNo = 0; shardNo < numShards; ++shardNo) { - int length = shardRows[shardNo].size(); - if (length) { - out[shardNo] = reorderedBatch->Slice(offset, length); - offset += length; - } - } - - Y_ABORT_UNLESS(offset == batch->num_rows()); - return out; -} - -std::vector> ShardingSplit(const std::shared_ptr& batch, - const std::vector& sharding, ui32 numShards) { - AFL_VERIFY(batch); - Y_ABORT_UNLESS((size_t)batch->num_rows() == sharding.size()); - - std::vector> shardRows(numShards); - for (size_t row = 0; row < sharding.size(); ++row) { - ui32 shardNo = sharding[row]; - Y_ABORT_UNLESS(shardNo < numShards); - shardRows[shardNo].push_back(row); - } - return ShardingSplit(batch, shardRows, numShards); -} - void DedupSortedBatch(const std::shared_ptr& batch, const std::shared_ptr& sortingKey, std::vector>& out) { @@ -322,37 +176,6 @@ void DedupSortedBatch(const std::shared_ptr& batch, Y_DEBUG_ABORT_UNLESS(NArrow::IsSortedAndUnique(out.back(), sortingKey)); } -template -static bool IsSelfSorted(const std::shared_ptr& batch) { - if (batch->num_rows() < 2) { - return true; - } - auto& columns = batch->columns(); - - for (int i = 1; i < batch->num_rows(); ++i) { - TRawReplaceKey prev(&columns, i - 1); - TRawReplaceKey current(&columns, i); - if constexpr (desc) { - if (prev < current) { - AFL_DEBUG(NKikimrServices::ARROW_HELPER)("event", "prev < current")("current", current.DebugString())("prev", prev.DebugString()); - return false; - } - } else { - if (current < prev) { - AFL_DEBUG(NKikimrServices::ARROW_HELPER)("event", "current < prev")("current", current.DebugString())("prev", prev.DebugString()); - return false; - } - } - if constexpr (uniq) { - if (prev == current) { - AFL_DEBUG(NKikimrServices::ARROW_HELPER)("event", "equal")("current", current.DebugString())("prev", prev.DebugString()); - return false; - } - } - } - return true; -} - bool IsSorted(const std::shared_ptr& batch, const std::shared_ptr& sortingKey, bool desc) { auto keyBatch = TColumnOperator().Adapt(batch, sortingKey).DetachResult(); @@ -373,267 +196,6 @@ bool IsSortedAndUnique(const std::shared_ptr& batch, } } -bool HasAllColumns(const std::shared_ptr& batch, const std::shared_ptr& schema) { - for (auto& field : schema->fields()) { - if (batch->schema()->GetFieldIndex(field->name()) < 0) { - return false; - } - } - return true; -} - -std::vector> MakeBuilders(const std::shared_ptr& schema, - size_t reserve, const std::map& sizeByColumn) { - std::vector> builders; - builders.reserve(schema->num_fields()); - - for (auto& field : schema->fields()) { - std::unique_ptr builder; - TStatusValidator::Validate(arrow::MakeBuilder(arrow::default_memory_pool(), field->type(), &builder)); - if (sizeByColumn.size()) { - auto it = sizeByColumn.find(field->name()); - if (it != sizeByColumn.end()) { - AFL_VERIFY(NArrow::ReserveData(*builder, it->second))("size", it->second)("field", field->name()); - } - } - - if (reserve) { - TStatusValidator::Validate(builder->Reserve(reserve)); - } - - builders.emplace_back(std::move(builder)); - - } - return builders; -} - -std::unique_ptr MakeBuilder(const std::shared_ptr& field) { - AFL_VERIFY(field); - return MakeBuilder(field->type()); -} - -std::unique_ptr MakeBuilder(const std::shared_ptr& type) { - AFL_VERIFY(type); - std::unique_ptr builder; - TStatusValidator::Validate(arrow::MakeBuilder(arrow::default_memory_pool(), type, &builder)); - return std::move(builder); -} - -std::vector> Finish(std::vector>&& builders) { - std::vector> out; - for (auto& builder : builders) { - std::shared_ptr array; - TStatusValidator::Validate(builder->Finish(&array)); - out.emplace_back(array); - } - return out; -} - -std::vector ColumnNames(const std::shared_ptr& schema) { - std::vector out; - out.reserve(schema->num_fields()); - for (int i = 0; i < schema->num_fields(); ++i) { - auto& name = schema->field(i)->name(); - out.emplace_back(TString(name.data(), name.size())); - } - return out; -} - -std::shared_ptr MakeUI64Array(ui64 value, i64 size) { - auto res = arrow::MakeArrayFromScalar(arrow::UInt64Scalar(value), size); - Y_ABORT_UNLESS(res.ok()); - return std::static_pointer_cast(*res); -} - -std::pair FindMinMaxPosition(const std::shared_ptr& array) { - if (array->length() == 0) { - return {-1, -1}; - } - - int minPos = 0; - int maxPos = 0; - SwitchType(array->type_id(), [&](const auto& type) { - using TWrap = std::decay_t; - using TArray = typename arrow::TypeTraits::ArrayType; - - auto& column = static_cast(*array); - - for (int i = 1; i < column.length(); ++i) { - const auto& value = column.GetView(i); - if (value < column.GetView(minPos)) { - minPos = i; - } - if (value > column.GetView(maxPos)) { - maxPos = i; - } - } - return true; - }); - return {minPos, maxPos}; -} - -std::shared_ptr MinScalar(const std::shared_ptr& type) { - std::shared_ptr out; - SwitchType(type->id(), [&](const auto& t) { - using TWrap = std::decay_t; - using T = typename TWrap::T; - using TScalar = typename arrow::TypeTraits::ScalarType; - - if constexpr (std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v) { - out = std::make_shared(arrow::Buffer::FromString(""), type); - } else if constexpr (std::is_same_v) { - std::string s(static_cast(*type).byte_width(), '\0'); - out = std::make_shared(arrow::Buffer::FromString(s), type); - } else if constexpr (std::is_same_v) { - return false; - } else if constexpr (arrow::is_temporal_type::value) { - using TCType = typename arrow::TypeTraits::CType; - out = std::make_shared(Min(), type); - } else if constexpr (arrow::has_c_type::value) { - using TCType = typename arrow::TypeTraits::CType; - out = std::make_shared(Min()); - } else { - return false; - } - return true; - }); - Y_ABORT_UNLESS(out); - return out; -} - -namespace { - -template -class TDefaultScalarValue { -public: - static constexpr T Value = 0; -}; - -template <> -class TDefaultScalarValue { -public: - static constexpr bool Value = false; -}; - -} - -std::shared_ptr DefaultScalar(const std::shared_ptr& type) { - std::shared_ptr out; - SwitchType(type->id(), [&](const auto& t) { - using TWrap = std::decay_t; - using T = typename TWrap::T; - using TScalar = typename arrow::TypeTraits::ScalarType; - - if constexpr (std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v) { - out = std::make_shared(arrow::Buffer::FromString(""), type); - } else if constexpr (std::is_same_v) { - std::string s(static_cast(*type).byte_width(), '\0'); - out = std::make_shared(arrow::Buffer::FromString(s), type); - } else if constexpr (std::is_same_v) { - return false; - } else if constexpr (arrow::is_temporal_type::value) { - using TCType = typename arrow::TypeTraits::CType; - out = std::make_shared(TDefaultScalarValue::Value, type); - } else if constexpr (arrow::has_c_type::value) { - using TCType = typename arrow::TypeTraits::CType; - out = std::make_shared(TDefaultScalarValue::Value); - } else { - return false; - } - return true; - }); - Y_ABORT_UNLESS(out); - return out; -} - -std::shared_ptr GetScalar(const std::shared_ptr& array, int position) { - auto res = array->GetScalar(position); - Y_ABORT_UNLESS(res.ok()); - return *res; -} - -bool IsGoodScalar(const std::shared_ptr& x) { - if (!x) { - return false; - } - - return SwitchType(x->type->id(), [&](const auto& type) { - using TWrap = std::decay_t; - using TScalar = typename arrow::TypeTraits::ScalarType; - using TValue = std::decay_t(*x).value)>; - - if constexpr (arrow::has_string_view()) { - const auto& xval = static_cast(*x).value; - return xval && xval->data(); - } - if constexpr (std::is_arithmetic_v) { - return true; - } - return false; - }); -} - -bool ScalarLess(const std::shared_ptr& x, const std::shared_ptr& y) { - Y_ABORT_UNLESS(x); - Y_ABORT_UNLESS(y); - return ScalarLess(*x, *y); -} - -bool ScalarLess(const arrow::Scalar& x, const arrow::Scalar& y) { - return ScalarCompare(x, y) < 0; -} - -int ScalarCompare(const arrow::Scalar& x, const arrow::Scalar& y) { - Y_VERIFY_S(x.type->Equals(y.type), x.type->ToString() + " vs " + y.type->ToString()); - - return SwitchTypeImpl(x.type->id(), [&](const auto& type) { - using TWrap = std::decay_t; - using TScalar = typename arrow::TypeTraits::ScalarType; - using TValue = std::decay_t(x).value)>; - - if constexpr (arrow::has_string_view()) { - const auto& xval = static_cast(x).value; - const auto& yval = static_cast(y).value; - Y_ABORT_UNLESS(xval); - Y_ABORT_UNLESS(yval); - TStringBuf xBuf(reinterpret_cast(xval->data()), xval->size()); - TStringBuf yBuf(reinterpret_cast(yval->data()), yval->size()); - if (xBuf < yBuf) { - return -1; - } else if (yBuf < xBuf) { - return 1; - } else { - return 0; - } - } - if constexpr (std::is_arithmetic_v) { - const auto& xval = static_cast(x).value; - const auto& yval = static_cast(y).value; - if (xval < yval) { - return -1; - } else if (yval < xval) { - return 1; - } else { - return 0; - } - } - Y_ABORT_UNLESS(false); // TODO: non primitive types - return 0; - }); -} - -int ScalarCompare(const std::shared_ptr& x, const std::shared_ptr& y) { - Y_ABORT_UNLESS(x); - Y_ABORT_UNLESS(y); - return ScalarCompare(*x, *y); -} - std::shared_ptr SortBatch(const std::shared_ptr& batch, const std::shared_ptr& sortingKey, const bool andUnique) { auto sortPermutation = MakeSortPermutation(batch, sortingKey, andUnique); @@ -644,217 +206,6 @@ std::shared_ptr SortBatch(const std::shared_ptr BoolVecToArray(const std::vector& vec) { - std::shared_ptr out; - arrow::BooleanBuilder builder; - for (const auto val : vec) { - Y_ABORT_UNLESS(builder.Append(val).ok()); - } - Y_ABORT_UNLESS(builder.Finish(&out).ok()); - return out; -} - - -bool ArrayScalarsEqual(const std::shared_ptr& lhs, const std::shared_ptr& rhs) { - bool res = lhs->length() == rhs->length(); - for (int64_t i = 0; i < lhs->length() && res; ++i) { - res &= arrow::ScalarEquals(*lhs->GetScalar(i).ValueOrDie(), *rhs->GetScalar(i).ValueOrDie()); - } - return res; -} - -bool ReserveData(arrow::ArrayBuilder& builder, const size_t size) { - arrow::Status result = arrow::Status::OK(); - if (builder.type()->id() == arrow::Type::BINARY || - builder.type()->id() == arrow::Type::STRING) - { - static_assert(std::is_convertible_v&>, - "Expected StringBuilder to be BaseBinaryBuilder"); - auto& bBuilder = static_cast&>(builder); - result = bBuilder.ReserveData(size); - } - - if (!result.ok()) { - AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "ReserveData")("error", result.ToString()); - } - return result.ok(); -} - -template -bool MergeBatchColumnsImpl(const std::vector>& batches, std::shared_ptr& result, - const std::vector& columnsOrder, const bool orderFieldsAreNecessary, const TBuilder& builder) { - if (batches.empty()) { - result = nullptr; - return true; - } - if (batches.size() == 1) { - result = batches.front(); - return true; - } - std::vector> fields; - std::vector> columns; - std::map fieldNames; - for (auto&& i : batches) { - Y_ABORT_UNLESS(i); - for (auto&& f : i->schema()->fields()) { - if (!fieldNames.emplace(f->name(), fields.size()).second) { - AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "duplicated column")("name", f->name()); - return false; - } - fields.emplace_back(f); - } - if (i->num_rows() != batches.front()->num_rows()) { - AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "inconsistency record sizes")("i", i->num_rows())("front", batches.front()->num_rows()); - return false; - } - for (auto&& c : i->columns()) { - columns.emplace_back(c); - } - } - - Y_ABORT_UNLESS(fields.size() == columns.size()); - if (columnsOrder.size()) { - std::vector> fieldsOrdered; - std::vector> columnsOrdered; - for (auto&& i : columnsOrder) { - auto it = fieldNames.find(i); - if (orderFieldsAreNecessary) { - Y_ABORT_UNLESS(it != fieldNames.end()); - } else if (it == fieldNames.end()) { - continue; - } - fieldsOrdered.emplace_back(fields[it->second]); - columnsOrdered.emplace_back(columns[it->second]); - } - std::swap(fieldsOrdered, fields); - std::swap(columnsOrdered, columns); - } - result = builder(std::make_shared(fields), batches.front()->num_rows(), std::move(columns)); - return true; -} - -bool MergeBatchColumns(const std::vector>& batches, std::shared_ptr& result, const std::vector& columnsOrder, const bool orderFieldsAreNecessary) { - const auto builder = [](const std::shared_ptr& schema, const ui32 recordsCount, std::vector>&& columns) { - return arrow::Table::Make(schema, columns, recordsCount); - }; - - return MergeBatchColumnsImpl(batches, result, columnsOrder, orderFieldsAreNecessary, builder); -} - -bool MergeBatchColumns(const std::vector>& batches, std::shared_ptr& result, const std::vector& columnsOrder, const bool orderFieldsAreNecessary) { - const auto builder = [](const std::shared_ptr& schema, const ui32 recordsCount, std::vector>&& columns) { - return arrow::RecordBatch::Make(schema, recordsCount, columns); - }; - - return MergeBatchColumnsImpl(batches, result, columnsOrder, orderFieldsAreNecessary, builder); -} - -std::partial_ordering ColumnsCompare(const std::vector>& x, const ui32 xRow, const std::vector>& y, const ui32 yRow) { - return TRawReplaceKey(&x, xRow).CompareNotNull(TRawReplaceKey(&y, yRow)); -} - -NJson::TJsonValue DebugJson(std::shared_ptr array, const ui32 position) { - NJson::TJsonValue result = NJson::JSON_ARRAY; - for (auto&& i : array->columns()) { - result.AppendValue(DebugJson(i, position)); - } - return result; -} - -TString DebugString(std::shared_ptr array, const ui32 position) { - if (!array) { - return "_NO_DATA"; - } - Y_ABORT_UNLESS(position < array->length()); - TStringBuilder result; - SwitchType(array->type_id(), [&](const auto& type) { - using TWrap = std::decay_t; - using TArray = typename arrow::TypeTraits::ArrayType; - - auto& column = static_cast(*array); - if constexpr (arrow::has_string_view()) { - auto value = column.GetString(position); - result << TString(value.data(), value.size()); - } - if constexpr (arrow::has_c_type()) { - result << column.Value(position); - } - return true; - }); - return result; -} - -NJson::TJsonValue DebugJson(std::shared_ptr array, const ui32 position) { - if (!array) { - return NJson::JSON_NULL; - } - Y_ABORT_UNLESS(position < array->length()); - NJson::TJsonValue result = NJson::JSON_MAP; - SwitchType(array->type_id(), [&](const auto& type) { - using TWrap = std::decay_t; - using TArray = typename arrow::TypeTraits::ArrayType; - - auto& column = static_cast(*array); - result.InsertValue("type", typeid(TArray).name()); - if constexpr (arrow::has_string_view()) { - auto value = column.GetString(position); - result.InsertValue("value", TString(value.data(), value.size())); - } - if constexpr (arrow::has_c_type()) { - result.InsertValue("value", column.Value(position)); - } - return true; - }); - return result; -} - -NJson::TJsonValue DebugJson(std::shared_ptr array, const ui32 head, const ui32 tail) { - if (!array) { - return NJson::JSON_NULL; - } - NJson::TJsonValue resultFull = NJson::JSON_MAP; - resultFull.InsertValue("length", array->length()); - SwitchType(array->type_id(), [&](const auto& type) { - using TWrap = std::decay_t; - using TArray = typename arrow::TypeTraits::ArrayType; - - auto& column = static_cast(*array); - resultFull.InsertValue("type", typeid(TArray).name()); - resultFull.InsertValue("head", head); - resultFull.InsertValue("tail", tail); - auto& result = resultFull.InsertValue("data", NJson::JSON_ARRAY); - for (int i = 0; i < column.length(); ++i) { - if (i >= (int)head && i + (int)tail < column.length()) { - continue; - } - if constexpr (arrow::has_string_view()) { - auto value = column.GetString(i); - result.AppendValue(TString(value.data(), value.size())); - } - if constexpr (arrow::has_c_type()) { - result.AppendValue(column.Value(i)); - } - } - return true; - }); - return resultFull; -} - -NJson::TJsonValue DebugJson(std::shared_ptr batch, const ui32 head, const ui32 tail) { - if (!batch) { - return NJson::JSON_NULL; - } - NJson::TJsonValue result = NJson::JSON_ARRAY; - ui32 idx = 0; - for (auto&& i : batch->columns()) { - auto& jsonColumn = result.AppendValue(NJson::JSON_MAP); - jsonColumn.InsertValue("name", batch->column_name(idx)); - jsonColumn.InsertValue("data", DebugJson(i, head, tail)); - ++idx; - } - return result; -} - std::shared_ptr ReallocateBatch(std::shared_ptr original) { if (!original) { return nullptr; @@ -862,92 +213,16 @@ std::shared_ptr ReallocateBatch(std::shared_ptrschema()); } -std::shared_ptr MergeColumns(const std::vector>& batches) { - std::vector> columns; - std::vector> fields; - std::optional recordsCount; - std::set columnNames; - for (auto&& batch : batches) { - if (!batch) { - continue; - } - for (auto&& column : batch->columns()) { - columns.emplace_back(column); - if (!recordsCount) { - recordsCount = column->length(); - } else { - Y_ABORT_UNLESS(*recordsCount == column->length()); - } - } - for (auto&& field : batch->schema()->fields()) { - AFL_VERIFY(columnNames.emplace(field->name()).second)("field_name", field->name()); - fields.emplace_back(field); - } - } - if (columns.empty()) { - return nullptr; - } - auto schema = std::make_shared(fields); - return arrow::RecordBatch::Make(schema, *recordsCount, columns); -} - -std::vector> SliceToRecordBatches(const std::shared_ptr& t) { - std::set splitPositions; - const ui32 numRows = t->num_rows(); - for (auto&& i : t->columns()) { - ui32 pos = 0; - for (auto&& arr : i->chunks()) { - splitPositions.emplace(pos); - pos += arr->length(); - } - AFL_VERIFY(pos == t->num_rows()); - } - std::vector>> slicedData; - slicedData.resize(splitPositions.size()); - std::vector positions(splitPositions.begin(), splitPositions.end()); - for (auto&& i : t->columns()) { - for (ui32 idx = 0; idx < positions.size(); ++idx) { - auto slice = i->Slice(positions[idx], ((idx + 1 == positions.size()) ? numRows : positions[idx + 1]) - positions[idx]); - AFL_VERIFY(slice->num_chunks() == 1); - slicedData[idx].emplace_back(slice->chunks().front()); - } - } - std::vector> result; - ui32 count = 0; - for (auto&& i : slicedData) { - result.emplace_back(arrow::RecordBatch::Make(t->schema(), i.front()->length(), i)); - count += result.back()->num_rows(); - } - AFL_VERIFY(count == t->num_rows())("count", count)("t", t->num_rows()); - return result; -} - -std::shared_ptr ToTable(const std::shared_ptr& batch) { - if (!batch) { - return nullptr; - } - return TStatusValidator::GetValid(arrow::Table::FromRecordBatches(batch->schema(), {batch})); -} - -bool HasNulls(const std::shared_ptr& column) { - AFL_VERIFY(column); - return column->null_bitmap_data(); -} - -std::vector ConvertStrings(const std::vector& input) { - std::vector result; - for (auto&& i : input) { - result.emplace_back(i); +std::shared_ptr ReallocateBatch(const std::shared_ptr& original) { + if (!original) { + return original; } - return result; -} - -std::vector ConvertStrings(const std::vector& input) { - std::vector result; - for (auto&& i : input) { - result.emplace_back(i); + auto batches = NArrow::SliceToRecordBatches(original); + for (auto&& i : batches) { + i = NArrow::TStatusValidator::GetValid( + NArrow::NSerialization::TNativeSerializer().Deserialize(NArrow::NSerialization::TNativeSerializer().SerializeFull(i))); } - return result; + return NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches(batches)); } } diff --git a/ydb/core/formats/arrow/arrow_helpers.h b/ydb/core/formats/arrow/arrow_helpers.h index f6f4fd0c18a0..ea1f7a825bb2 100644 --- a/ydb/core/formats/arrow/arrow_helpers.h +++ b/ydb/core/formats/arrow/arrow_helpers.h @@ -1,5 +1,4 @@ #pragma once -#include "switch_type.h" #include "process_columns.h" #include #include @@ -9,44 +8,16 @@ #include #include #include +#include namespace NKikimr::NArrow { -using TArrayVec = std::vector>; - arrow::Result> GetArrowType(NScheme::TTypeInfo typeInfo); arrow::Result> GetCSVArrowType(NScheme::TTypeInfo typeId); -template -inline bool ArrayEqualValue(const std::shared_ptr& x, const std::shared_ptr& y) { - auto& arrX = static_cast(*x); - auto& arrY = static_cast(*y); - for (int i = 0; i < x->length(); ++i) { - if (arrX.Value(i) != arrY.Value(i)) { - return false; - } - } - return true; -} - -template -inline bool ArrayEqualView(const std::shared_ptr& x, const std::shared_ptr& y) { - auto& arrX = static_cast(*x); - auto& arrY = static_cast(*y); - for (int i = 0; i < x->length(); ++i) { - if (arrX.GetView(i) != arrY.GetView(i)) { - return false; - } - } - return true; -} - -struct TSortDescription; - arrow::Result MakeArrowFields(const std::vector>& columns, const std::set& notNullColumns = {}); arrow::Result> MakeArrowSchema(const std::vector>& columns, const std::set& notNullColumns = {}); -TString SerializeSchema(const arrow::Schema& schema); std::shared_ptr DeserializeSchema(const TString& str); TString SerializeBatch(const std::shared_ptr& batch, const arrow::ipc::IpcWriteOptions& options); @@ -54,28 +25,6 @@ TString SerializeBatchNoCompression(const std::shared_ptr& b std::shared_ptr DeserializeBatch(const TString& blob, const std::shared_ptr& schema); -std::shared_ptr MakeEmptyBatch(const std::shared_ptr& schema, const ui32 rowsCount = 0); -std::shared_ptr ToTable(const std::shared_ptr& batch); - -std::shared_ptr ToBatch(const std::shared_ptr& combinedTable, const bool combine); -std::shared_ptr CombineBatches(const std::vector>& batches); -std::shared_ptr MergeColumns(const std::vector>& rb); -std::vector> ShardingSplit(const std::shared_ptr& batch, const std::vector& sharding, ui32 numShards); -std::vector> ShardingSplit(const std::shared_ptr& batch, const std::vector>& shardRows, const ui32 numShards); -THashMap> ShardingSplit(const std::shared_ptr& batch, const THashMap>& shardRows); - -std::unique_ptr MakeBuilder(const std::shared_ptr& field); -std::unique_ptr MakeBuilder(const std::shared_ptr& type); - -std::vector> MakeBuilders(const std::shared_ptr& schema, - size_t reserve = 0, const std::map& sizeByColumn = {}); -std::vector> Finish(std::vector>&& builders); - -std::shared_ptr MakeUI64Array(ui64 value, i64 size); -std::vector ColumnNames(const std::shared_ptr& schema); -bool ReserveData(arrow::ArrayBuilder& builder, const size_t size); -bool MergeBatchColumns(const std::vector>& batches, std::shared_ptr& result, const std::vector& columnsOrder = {}, const bool orderFieldsAreNecessary = true); -bool MergeBatchColumns(const std::vector>& batches, std::shared_ptr& result, const std::vector& columnsOrder = {}, const bool orderFieldsAreNecessary = true); std::shared_ptr SortBatch(const std::shared_ptr& batch, const std::shared_ptr& sortingKey, const bool andUnique); @@ -88,33 +37,8 @@ bool IsSortedAndUnique(const std::shared_ptr& batch, void DedupSortedBatch(const std::shared_ptr& batch, const std::shared_ptr& sortingKey, std::vector>& out); -bool HasAllColumns(const std::shared_ptr& batch, const std::shared_ptr& schema); -std::pair FindMinMaxPosition(const std::shared_ptr& column); - -std::shared_ptr DefaultScalar(const std::shared_ptr& type); -std::shared_ptr MinScalar(const std::shared_ptr& type); -std::shared_ptr GetScalar(const std::shared_ptr& array, int position); -bool IsGoodScalar(const std::shared_ptr& x); -int ScalarCompare(const arrow::Scalar& x, const arrow::Scalar& y); -int ScalarCompare(const std::shared_ptr& x, const std::shared_ptr& y); -std::partial_ordering ColumnsCompare(const std::vector>& x, const ui32 xRow, const std::vector>& y, const ui32 yRow); -bool ScalarLess(const std::shared_ptr& x, const std::shared_ptr& y); -bool ScalarLess(const arrow::Scalar& x, const arrow::Scalar& y); std::shared_ptr ReallocateBatch(std::shared_ptr original); - -bool HasNulls(const std::shared_ptr& column); - -std::vector> SliceToRecordBatches(const std::shared_ptr& t); - -bool ArrayScalarsEqual(const std::shared_ptr& lhs, const std::shared_ptr& rhs); -std::shared_ptr BoolVecToArray(const std::vector& vec); - -NJson::TJsonValue DebugJson(std::shared_ptr array, const ui32 head, const ui32 tail); -NJson::TJsonValue DebugJson(std::shared_ptr batch, const ui32 head, const ui32 tail); - -NJson::TJsonValue DebugJson(std::shared_ptr array, const ui32 position); -TString DebugString(std::shared_ptr array, const ui32 position); -NJson::TJsonValue DebugJson(std::shared_ptr array, const ui32 position); +std::shared_ptr ReallocateBatch(const std::shared_ptr& original); } diff --git a/ydb/core/formats/arrow/common/accessor.cpp b/ydb/core/formats/arrow/common/accessor.cpp deleted file mode 100644 index 9865b2a692f7..000000000000 --- a/ydb/core/formats/arrow/common/accessor.cpp +++ /dev/null @@ -1,122 +0,0 @@ -#include "accessor.h" -#include -#include -#include -#include -#include - -namespace NKikimr::NArrow::NAccessor { - -void IChunkedArray::TReader::AppendPositionTo(arrow::ArrayBuilder& builder, const ui64 position, ui64* recordSize) const { - auto address = GetReadChunk(position); - AFL_VERIFY(NArrow::Append(builder, *address.GetArray(), address.GetPosition(), recordSize)); -} - -std::shared_ptr IChunkedArray::TReader::CopyRecord(const ui64 recordIndex) const { - auto address = GetReadChunk(recordIndex); - return NArrow::CopyRecords(address.GetArray(), {address.GetPosition()}); -} - -std::shared_ptr IChunkedArray::Slice(const ui32 offset, const ui32 count) const { - AFL_VERIFY(offset + count <= (ui64)GetRecordsCount())("offset", offset)("count", count)("length", GetRecordsCount()); - ui32 currentOffset = offset; - ui32 countLeast = count; - std::vector> chunks; - auto address = GetChunk({}, offset); - while (countLeast) { - address = GetChunk(address, currentOffset); - const ui64 internalPos = currentOffset - address.GetStartPosition(); - if (internalPos + countLeast <= (ui64)address.GetArray()->length()) { - chunks.emplace_back(address.GetArray()->Slice(internalPos, countLeast)); - break; - } else { - const ui32 deltaCount = address.GetArray()->length() - internalPos; - chunks.emplace_back(address.GetArray()->Slice(internalPos, deltaCount)); - AFL_VERIFY(countLeast >= deltaCount); - countLeast -= deltaCount; - currentOffset += deltaCount; - } - } - return std::make_shared(chunks, DataType); -} - -TString IChunkedArray::TReader::DebugString(const ui32 position) const { - auto address = GetReadChunk(position); - return NArrow::DebugString(address.GetArray(), address.GetPosition()); -} - -std::partial_ordering IChunkedArray::TReader::CompareColumns(const std::vector& l, const ui64 lPosition, const std::vector& r, const ui64 rPosition) { - AFL_VERIFY(l.size() == r.size()); - for (ui32 i = 0; i < l.size(); ++i) { - const TAddress lAddress = l[i].GetReadChunk(lPosition); - const TAddress rAddress = r[i].GetReadChunk(rPosition); - auto cmp = lAddress.Compare(rAddress); - if (std::is_neq(cmp)) { - return cmp; - } - } - return std::partial_ordering::equivalent; -} - -IChunkedArray::TAddress IChunkedArray::TReader::GetReadChunk(const ui64 position) const { - AFL_VERIFY(position < ChunkedArray->GetRecordsCount()); - if (CurrentChunkAddress && position < CurrentChunkAddress->GetStartPosition() + CurrentChunkAddress->GetArray()->length() && CurrentChunkAddress->GetStartPosition() <= position) { - } else { - CurrentChunkAddress = ChunkedArray->DoGetChunk(CurrentChunkAddress, position); - } - return IChunkedArray::TAddress(CurrentChunkAddress->GetArray(), position - CurrentChunkAddress->GetStartPosition(), CurrentChunkAddress->GetChunkIndex()); -} - -const std::partial_ordering IChunkedArray::TAddress::Compare(const TAddress& item) const { - return TComparator::TypedCompare(*Array, Position, *item.Array, item.Position); -} - -namespace { -class TChunkAccessor { -private: - std::shared_ptr ChunkedArray; -public: - TChunkAccessor(const std::shared_ptr& chunkedArray) - : ChunkedArray(chunkedArray) - { - - } - ui64 GetChunksCount() const { - return (ui64)ChunkedArray->num_chunks(); - } - ui64 GetChunkLength(const ui32 idx) const { - return (ui64)ChunkedArray->chunk(idx)->length(); - } - std::shared_ptr GetArray(const ui32 idx) const { - return ChunkedArray->chunk(idx); - } -}; - -} - -std::partial_ordering IChunkedArray::TCurrentChunkAddress::Compare(const ui64 position, const TCurrentChunkAddress& item, const ui64 itemPosition) const { - AFL_VERIFY(StartPosition <= position); - AFL_VERIFY(position < FinishPosition); - AFL_VERIFY(item.StartPosition <= itemPosition); - AFL_VERIFY(itemPosition < item.FinishPosition); - return TComparator::TypedCompare(*Array, position - StartPosition, *item.Array, itemPosition - item.StartPosition); -} - -std::shared_ptr IChunkedArray::TCurrentChunkAddress::CopyRecord(const ui64 recordIndex) const { - AFL_VERIFY(StartPosition <= recordIndex); - AFL_VERIFY(recordIndex < FinishPosition); - return NArrow::CopyRecords(Array, { recordIndex - StartPosition }); -} - -TString IChunkedArray::TCurrentChunkAddress::DebugString(const ui64 position) const { - AFL_VERIFY(position < FinishPosition); - AFL_VERIFY(StartPosition <= position); - return NArrow::DebugString(Array, position - StartPosition); -} - -IChunkedArray::TCurrentChunkAddress TTrivialChunkedArray::DoGetChunk(const std::optional& chunkCurrent, const ui64 position) const { - TChunkAccessor accessor(Array); - return SelectChunk(chunkCurrent, position, accessor); -} - -} diff --git a/ydb/core/formats/arrow/common/accessor.h b/ydb/core/formats/arrow/common/accessor.h deleted file mode 100644 index 3765d726992b..000000000000 --- a/ydb/core/formats/arrow/common/accessor.h +++ /dev/null @@ -1,216 +0,0 @@ -#pragma once -#include -#include - -#include -#include -#include - -namespace NKikimr::NArrow::NAccessor { - -class IChunkedArray { -public: - enum class EType { - Undefined, - Array, - ChunkedArray, - SerializedChunkedArray - }; - - class TCurrentChunkAddress { - private: - YDB_READONLY_DEF(std::shared_ptr, Array); - YDB_READONLY(ui64, StartPosition, 0); - YDB_READONLY(ui64, FinishPosition, 0); - YDB_READONLY(ui64, ChunkIndex, 0); - public: - TString DebugString(const ui64 position) const; - - ui64 GetLength() const { - return Array->length(); - } - - bool Contains(const ui64 position) const { - return position >= StartPosition && position < FinishPosition; - } - - std::shared_ptr CopyRecord(const ui64 recordIndex) const; - - std::partial_ordering Compare(const ui64 position, const TCurrentChunkAddress& item, const ui64 itemPosition) const; - - TCurrentChunkAddress(const std::shared_ptr& arr, const ui64 pos, const ui32 chunkIdx) - : Array(arr) - , StartPosition(pos) - , ChunkIndex(chunkIdx) - { - AFL_VERIFY(arr); - AFL_VERIFY(arr->length()); - FinishPosition = StartPosition + arr->length(); - } - - TString DebugString() const { - return TStringBuilder() - << "start=" << StartPosition << ";" - << "chunk_index=" << ChunkIndex << ";" - << "length=" << Array->length() << ";"; - } - }; - - class TAddress { - private: - YDB_READONLY_DEF(std::shared_ptr, Array); - YDB_READONLY(ui64, Position, 0); - YDB_READONLY(ui64, ChunkIdx, 0); - public: - bool NextPosition() { - if (Position + 1 < (ui32)Array->length()) { - ++Position; - return true; - } - return false; - } - - TAddress(const std::shared_ptr& arr, const ui64 position, const ui64 chunkIdx) - : Array(arr) - , Position(position) - , ChunkIdx(chunkIdx) - { - - } - - const std::partial_ordering Compare(const TAddress& item) const; - }; -private: - YDB_READONLY_DEF(std::shared_ptr, DataType); - YDB_READONLY(ui64, RecordsCount, 0); - YDB_READONLY(EType, Type, EType::Undefined); -protected: - virtual std::shared_ptr DoGetChunkedArray() const = 0; - virtual TCurrentChunkAddress DoGetChunk(const std::optional& chunkCurrent, const ui64 position) const = 0; - - template - TCurrentChunkAddress SelectChunk(const std::optional& chunkCurrent, const ui64 position, const TChunkAccessor& accessor) const { - if (!chunkCurrent || position >= chunkCurrent->GetStartPosition() + chunkCurrent->GetLength()) { - ui32 startIndex = 0; - ui64 idx = 0; - if (chunkCurrent) { - AFL_VERIFY(chunkCurrent->GetChunkIndex() + 1 < accessor.GetChunksCount()); - startIndex = chunkCurrent->GetChunkIndex() + 1; - idx = chunkCurrent->GetStartPosition() + chunkCurrent->GetLength(); - } - for (ui32 i = startIndex; i < accessor.GetChunksCount(); ++i) { - const ui64 nextIdx = idx + accessor.GetChunkLength(i); - if (idx <= position && position < nextIdx) { - return TCurrentChunkAddress(accessor.GetArray(i), idx, i); - } - idx = nextIdx; - } - } else if (position < chunkCurrent->GetStartPosition()) { - AFL_VERIFY(chunkCurrent->GetChunkIndex() > 0); - ui64 idx = chunkCurrent->GetStartPosition(); - for (i32 i = chunkCurrent->GetChunkIndex() - 1; i >= 0; --i) { - AFL_VERIFY(idx >= accessor.GetChunkLength(i))("idx", idx)("length", accessor.GetChunkLength(i)); - const ui64 nextIdx = idx - accessor.GetChunkLength(i); - if (nextIdx <= position && position < idx) { - return TCurrentChunkAddress(accessor.GetArray(i), nextIdx, i); - } - idx = nextIdx; - } - } - TStringBuilder sb; - ui64 recordsCountChunks = 0; - for (ui32 i = 0; i < accessor.GetChunksCount(); ++i) { - sb << accessor.GetChunkLength(i) << ","; - recordsCountChunks += accessor.GetChunkLength(i); - } - TStringBuilder chunkCurrentInfo; - if (chunkCurrent) { - chunkCurrentInfo << chunkCurrent->DebugString(); - } - AFL_VERIFY(recordsCountChunks == GetRecordsCount())("pos", position)("count", GetRecordsCount())("chunks_map", sb)("chunk_current", chunkCurrentInfo); - AFL_VERIFY(false)("pos", position)("count", GetRecordsCount())("chunks_map", sb)("chunk_current", chunkCurrentInfo); - return TCurrentChunkAddress(nullptr, 0, 0); - } - -public: - - class TReader { - private: - std::shared_ptr ChunkedArray; - mutable std::optional CurrentChunkAddress; - public: - TReader(const std::shared_ptr& data) - : ChunkedArray(data) - { - AFL_VERIFY(ChunkedArray); - } - - ui64 GetRecordsCount() const { - return ChunkedArray->GetRecordsCount(); - } - - TAddress GetReadChunk(const ui64 position) const; - static std::partial_ordering CompareColumns(const std::vector& l, const ui64 lPosition, const std::vector& r, const ui64 rPosition); - void AppendPositionTo(arrow::ArrayBuilder& builder, const ui64 position, ui64* recordSize) const; - std::shared_ptr CopyRecord(const ui64 recordIndex) const; - TString DebugString(const ui32 position) const; - }; - - std::shared_ptr GetChunkedArray() const { - return DoGetChunkedArray(); - } - virtual ~IChunkedArray() = default; - - std::shared_ptr Slice(const ui32 offset, const ui32 count) const; - - TCurrentChunkAddress GetChunk(const std::optional& chunkCurrent, const ui64 position) const { - return DoGetChunk(chunkCurrent, position); - } - - IChunkedArray(const ui64 recordsCount, const EType type, const std::shared_ptr& dataType) - : DataType(dataType) - , RecordsCount(recordsCount) - , Type(type) { - - } -}; - -class TTrivialArray: public IChunkedArray { -private: - using TBase = IChunkedArray; - const std::shared_ptr Array; -protected: - virtual TCurrentChunkAddress DoGetChunk(const std::optional& /*chunkCurrent*/, const ui64 /*position*/) const override { - return TCurrentChunkAddress(Array, 0, 0); - } - virtual std::shared_ptr DoGetChunkedArray() const override { - return std::make_shared(Array); - } - -public: - TTrivialArray(const std::shared_ptr& data) - : TBase(data->length(), EType::Array, data->type()) - , Array(data) { - - } -}; - -class TTrivialChunkedArray: public IChunkedArray { -private: - using TBase = IChunkedArray; - const std::shared_ptr Array; -protected: - virtual TCurrentChunkAddress DoGetChunk(const std::optional& chunkCurrent, const ui64 position) const override; - virtual std::shared_ptr DoGetChunkedArray() const override { - return Array; - } - -public: - TTrivialChunkedArray(const std::shared_ptr& data) - : TBase(data->length(), EType::ChunkedArray, data->type()) - , Array(data) { - - } -}; - -} diff --git a/ydb/core/formats/arrow/common/adapter.h b/ydb/core/formats/arrow/common/adapter.h index 543e78511146..18b2deeacc9b 100644 --- a/ydb/core/formats/arrow/common/adapter.h +++ b/ydb/core/formats/arrow/common/adapter.h @@ -1,18 +1,19 @@ #pragma once #include "container.h" -#include "accessor.h" -#include "validation.h" +#include + +#include #include -#include #include +#include #include +#include +#include #include #include -#include -#include -#include +#include namespace NKikimr::NArrow::NAdapter { @@ -27,7 +28,8 @@ class TDataBuilderPolicy { using TColumn = arrow::Array; using TAccessor = NAccessor::TTrivialArray; - [[nodiscard]] static std::shared_ptr AddColumn(const std::shared_ptr& batch, const std::shared_ptr& field, const std::shared_ptr& extCol) { + [[nodiscard]] static std::shared_ptr AddColumn(const std::shared_ptr& batch, + const std::shared_ptr& field, const std::shared_ptr& extCol) { return TStatusValidator::GetValid(batch->AddColumn(batch->num_columns(), field, extCol)); } @@ -37,7 +39,8 @@ class TDataBuilderPolicy { [[nodiscard]] static std::shared_ptr Build(const std::shared_ptr& schema, std::vector>&& columns, const ui32 count) { return arrow::RecordBatch::Make(schema, count, std::move(columns)); } - [[nodiscard]] static std::shared_ptr ApplyArrowFilter(const std::shared_ptr& batch, const std::shared_ptr& filter) { + [[nodiscard]] static std::shared_ptr ApplyArrowFilter( + const std::shared_ptr& batch, const std::shared_ptr& filter) { auto res = arrow::compute::Filter(batch, filter); Y_VERIFY_S(res.ok(), res.status().message()); Y_ABORT_UNLESS(res->kind() == arrow::Datum::RECORD_BATCH); @@ -46,7 +49,6 @@ class TDataBuilderPolicy { [[nodiscard]] static std::shared_ptr GetEmptySame(const std::shared_ptr& batch) { return batch->Slice(0, 0); } - }; template <> @@ -60,11 +62,13 @@ class TDataBuilderPolicy { [[nodiscard]] static std::shared_ptr Build(const std::shared_ptr& schema, std::vector>&& columns, const ui32 count) { return arrow::Table::Make(schema, std::move(columns), count); } - [[nodiscard]] static std::shared_ptr AddColumn(const std::shared_ptr& batch, const std::shared_ptr& field, const std::shared_ptr& extCol) { + [[nodiscard]] static std::shared_ptr AddColumn( + const std::shared_ptr& batch, const std::shared_ptr& field, const std::shared_ptr& extCol) { return TStatusValidator::GetValid(batch->AddColumn(batch->num_columns(), field, std::make_shared(extCol))); } - [[nodiscard]] static std::shared_ptr ApplyArrowFilter(const std::shared_ptr& batch, const std::shared_ptr& filter) { + [[nodiscard]] static std::shared_ptr ApplyArrowFilter( + const std::shared_ptr& batch, const std::shared_ptr& filter) { auto res = arrow::compute::Filter(batch, filter); Y_VERIFY_S(res.ok(), res.status().message()); Y_ABORT_UNLESS(res->kind() == arrow::Datum::TABLE); @@ -86,12 +90,14 @@ class TDataBuilderPolicy { } return std::make_shared(std::make_shared(std::move(fields)), std::move(columns)); } - [[nodiscard]] static std::shared_ptr AddColumn(const std::shared_ptr& batch, const std::shared_ptr& field, const std::shared_ptr& extCol) { + [[nodiscard]] static std::shared_ptr AddColumn(const std::shared_ptr& batch, + const std::shared_ptr& field, const std::shared_ptr& extCol) { batch->AddField(field, std::make_shared(extCol)).Validate(); return batch; } - [[nodiscard]] static std::shared_ptr ApplyArrowFilter(const std::shared_ptr& batch, const std::shared_ptr& filter) { - auto table = batch->BuildTable(); + [[nodiscard]] static std::shared_ptr ApplyArrowFilter( + const std::shared_ptr& batch, const std::shared_ptr& filter) { + auto table = batch->BuildTableVerified(); return std::make_shared(TDataBuilderPolicy::ApplyArrowFilter(table, filter)); } [[nodiscard]] static std::shared_ptr GetEmptySame(const std::shared_ptr& batch) { @@ -99,4 +105,4 @@ class TDataBuilderPolicy { } }; -} +} // namespace NKikimr::NArrow::NAdapter diff --git a/ydb/core/formats/arrow/common/container.cpp b/ydb/core/formats/arrow/common/container.cpp index ccf8dc71fb0c..7b159f2eef06 100644 --- a/ydb/core/formats/arrow/common/container.cpp +++ b/ydb/core/formats/arrow/common/container.cpp @@ -1,50 +1,71 @@ #include "container.h" + +#include +#include + #include -#include +#include +#include namespace NKikimr::NArrow { -NKikimr::TConclusionStatus TGeneralContainer::MergeColumnsStrictly(const TGeneralContainer& container) { - if (RecordsCount != container.RecordsCount) { - return TConclusionStatus::Fail(TStringBuilder() << "inconsistency records count in additional container: " << - container.GetSchema()->ToString() << ". expected: " << RecordsCount << ", reality: " << container.GetRecordsCount()); +TConclusionStatus TGeneralContainer::MergeColumnsStrictly(const TGeneralContainer& container) { + if (!container.RecordsCount) { + return TConclusionStatus::Success(); + } + if (!RecordsCount) { + RecordsCount = container.RecordsCount; + } + if (*RecordsCount != *container.RecordsCount) { + return TConclusionStatus::Fail(TStringBuilder() + << "inconsistency records count in additional container: " << container.GetSchema()->ToString() + << ". expected: " << RecordsCount << ", reality: " << container.GetRecordsCount()); } for (i32 i = 0; i < container.Schema->num_fields(); ++i) { auto addFieldResult = AddField(container.Schema->field(i), container.Columns[i]); - if (!addFieldResult) { + if (addFieldResult.IsFail()) { return addFieldResult; } } return TConclusionStatus::Success(); } -NKikimr::TConclusionStatus TGeneralContainer::AddField(const std::shared_ptr& f, const std::shared_ptr& data) { +TConclusionStatus TGeneralContainer::AddField(const std::shared_ptr& f, const std::shared_ptr& data) { AFL_VERIFY(f); AFL_VERIFY(data); - if (data->GetRecordsCount() != RecordsCount) { - return TConclusionStatus::Fail(TStringBuilder() << "inconsistency records count in new column: " << - f->name() << ". expected: " << RecordsCount << ", reality: " << data->GetRecordsCount()); + if (RecordsCount && data->GetRecordsCount() != *RecordsCount) { + return TConclusionStatus::Fail(TStringBuilder() << "inconsistency records count in new column: " << f->name() + << ". expected: " << RecordsCount << ", reality: " << data->GetRecordsCount()); } if (!data->GetDataType()->Equals(f->type())) { - return TConclusionStatus::Fail("schema and data type are not equals: " + data->GetDataType()->ToString() + " vs " + f->type()->ToString()); - } - if (Schema->GetFieldByName(f->name())) { - return TConclusionStatus::Fail("field name duplication: " + f->name()); + return TConclusionStatus::Fail( + "schema and data type are not equals: " + data->GetDataType()->ToString() + " vs " + f->type()->ToString()); } - auto resultAdd = Schema->AddField(Schema->num_fields(), f); - if (!resultAdd.ok()) { - return TConclusionStatus::Fail("internal schema error on add field: " + resultAdd.status().ToString()); + { + auto conclusion = Schema->AddField(f); + if (conclusion.IsFail()) { + return conclusion; + } } - Schema = *resultAdd; + RecordsCount = data->GetRecordsCount(); Columns.emplace_back(data); return TConclusionStatus::Success(); } -TGeneralContainer::TGeneralContainer(const std::shared_ptr& schema, std::vector>&& columns) - : Schema(schema) - , Columns(std::move(columns)) -{ - AFL_VERIFY(schema); +TConclusionStatus TGeneralContainer::AddField(const std::shared_ptr& f, const std::shared_ptr& data) { + return AddField(f, std::make_shared(data)); +} + +TConclusionStatus TGeneralContainer::AddField(const std::shared_ptr& f, const std::shared_ptr& data) { + return AddField(f, std::make_shared(data)); +} + +void TGeneralContainer::DeleteFieldsByIndex(const std::vector& idxs) { + Schema->DeleteFieldsByIndex(idxs); + NUtil::EraseItems(Columns, idxs); +} + +void TGeneralContainer::Initialize() { std::optional recordsCount; AFL_VERIFY(Schema->num_fields() == (i32)Columns.size())("schema", Schema->num_fields())("columns", Columns.size()); for (i32 i = 0; i < Schema->num_fields(); ++i) { @@ -54,17 +75,40 @@ TGeneralContainer::TGeneralContainer(const std::shared_ptr& schem recordsCount = Columns[i]->GetRecordsCount(); } else { AFL_VERIFY(*recordsCount == Columns[i]->GetRecordsCount()) - ("event", "inconsistency_records_count")("expect", *recordsCount)("real", Columns[i]->GetRecordsCount())("field_name", Schema->field(i)->name()); + ("event", "inconsistency_records_count")("expect", *recordsCount)("real", Columns[i]->GetRecordsCount())( + "field_name", Schema->field(i)->name()); } } AFL_VERIFY(recordsCount); + AFL_VERIFY(!RecordsCount || *RecordsCount == *recordsCount); RecordsCount = *recordsCount; } -TGeneralContainer::TGeneralContainer(const std::shared_ptr& table) { +TGeneralContainer::TGeneralContainer( + const std::vector>& fields, std::vector>&& columns) + : Schema(std::make_shared(fields)) + , Columns(std::move(columns)) { + Initialize(); +} + +TGeneralContainer::TGeneralContainer( + const std::shared_ptr& schema, std::vector>&& columns) + : Schema(std::make_shared(schema)) + , Columns(std::move(columns)) { + Initialize(); +} + +TGeneralContainer::TGeneralContainer( + const std::shared_ptr& schema, std::vector>&& columns) + : Schema(std::make_shared(schema)) + , Columns(std::move(columns)) { + Initialize(); +} + +TGeneralContainer::TGeneralContainer(const std::shared_ptr& table) + : RecordsCount(TValidator::CheckNotNull(table)->num_rows()) + , Schema(std::make_shared(TValidator::CheckNotNull(table)->schema())) { AFL_VERIFY(table); - Schema = table->schema(); - RecordsCount = table->num_rows(); for (auto&& i : table->columns()) { if (i->num_chunks() == 1) { Columns.emplace_back(std::make_shared(i->chunk(0))); @@ -72,15 +116,22 @@ TGeneralContainer::TGeneralContainer(const std::shared_ptr& table) Columns.emplace_back(std::make_shared(i)); } } + Initialize(); } -TGeneralContainer::TGeneralContainer(const std::shared_ptr& table) { +TGeneralContainer::TGeneralContainer(const std::shared_ptr& table) + : RecordsCount(TValidator::CheckNotNull(table)->num_rows()) + , Schema(std::make_shared(TValidator::CheckNotNull(table)->schema())) { AFL_VERIFY(table); - Schema = table->schema(); - RecordsCount = table->num_rows(); for (auto&& i : table->columns()) { Columns.emplace_back(std::make_shared(i)); } + Initialize(); +} + +TGeneralContainer::TGeneralContainer(const ui32 recordsCount) + : RecordsCount(recordsCount) + , Schema(std::make_shared()) { } std::shared_ptr TGeneralContainer::GetAccessorByNameVerified(const std::string& fieldId) const { @@ -110,14 +161,82 @@ std::shared_ptr TGeneralContainer::BuildTableOptional(const std::o if (fields.empty()) { return nullptr; } - return arrow::Table::Make(std::make_shared(fields), columns, RecordsCount); + AFL_VERIFY(RecordsCount); + return arrow::Table::Make(std::make_shared(fields), columns, *RecordsCount); } -std::shared_ptr TGeneralContainer::BuildTable(const std::optional>& columnNames /*= {}*/) const { +std::shared_ptr TGeneralContainer::BuildTableVerified(const std::optional>& columnNames /*= {}*/) const { auto result = BuildTableOptional(columnNames); AFL_VERIFY(result); AFL_VERIFY(!columnNames || result->schema()->num_fields() == (i32)columnNames->size()); return result; } +std::shared_ptr TGeneralContainer::GetAccessorByNameOptional(const std::string& fieldId) const { + int idx = Schema->GetFieldIndex(fieldId); + if (idx == -1) { + return nullptr; + } + AFL_VERIFY((ui32)idx < Columns.size())("idx", idx)("count", Columns.size()); + return Columns[idx]; +} + +TConclusionStatus TGeneralContainer::SyncSchemaTo( + const std::shared_ptr& schema, const IFieldsConstructor* defaultFieldsConstructor, const bool forceDefaults) { + std::shared_ptr schemaNew = std::make_shared(); + std::vector> columnsNew; + if (!RecordsCount) { + return TConclusionStatus::Fail("original container has not data"); + } + for (auto&& i : schema->fields()) { + const int idx = Schema->GetFieldIndex(i->name()); + if (idx == -1) { + if (!defaultFieldsConstructor) { + return TConclusionStatus::Fail("haven't field for sync: '" + i->name() + "'"); + } else { + schemaNew->AddField(i).Validate(); + auto defConclusion = defaultFieldsConstructor->GetDefaultColumnElementValue(i, forceDefaults); + if (defConclusion.IsFail()) { + return defConclusion; + } + columnsNew.emplace_back( + std::make_shared(NArrow::TThreadSimpleArraysCache::Get(i->type(), *defConclusion, *RecordsCount))); + } + } else { + const auto& fOwned = Schema->GetFieldVerified(idx); + if (!fOwned->type()->Equals(i->type())) { + return TConclusionStatus::Fail( + "different field types for '" + i->name() + "'. Have " + fOwned->type()->ToString() + ", need " + i->type()->ToString()); + } + schemaNew->AddField(fOwned).Validate(); + columnsNew.emplace_back(Columns[idx]); + } + } + std::swap(Schema, schemaNew); + std::swap(columnsNew, Columns); + return TConclusionStatus::Success(); } + +TString TGeneralContainer::DebugString() const { + TStringBuilder result; + if (RecordsCount) { + result << "records_count=" << *RecordsCount << ";"; + } + result << "schema=" << Schema->ToString() << ";"; + return result; +} + +TConclusion> IFieldsConstructor::GetDefaultColumnElementValue( + const std::shared_ptr& field, const bool force) const { + AFL_VERIFY(field); + auto result = DoGetDefaultColumnElementValue(field->name()); + if (result) { + return result; + } + if (force) { + return NArrow::DefaultScalar(field->type()); + } + return TConclusionStatus::Fail("have not default value for column " + field->name()); +} + +} // namespace NKikimr::NArrow diff --git a/ydb/core/formats/arrow/common/container.h b/ydb/core/formats/arrow/common/container.h index 25262d14ff4a..dacd5d62c0b0 100644 --- a/ydb/core/formats/arrow/common/container.h +++ b/ydb/core/formats/arrow/common/container.h @@ -1,8 +1,10 @@ #pragma once -#include "accessor.h" #include +#include #include +#include +#include #include #include @@ -12,50 +14,78 @@ namespace NKikimr::NArrow { +class IFieldsConstructor { +private: + virtual std::shared_ptr DoGetDefaultColumnElementValue(const std::string& fieldName) const = 0; +public: + TConclusion> GetDefaultColumnElementValue(const std::shared_ptr& field, const bool force) const; +}; + class TGeneralContainer { private: - YDB_READONLY(ui64, RecordsCount, 0); - YDB_READONLY_DEF(std::shared_ptr, Schema); + std::optional RecordsCount; + YDB_READONLY_DEF(std::shared_ptr, Schema); std::vector> Columns; + void Initialize(); public: - TString DebugString() const { - return TStringBuilder() - << "records_count=" << RecordsCount << ";" - << "schema=" << Schema->ToString() << ";" - ; + TGeneralContainer(const ui32 recordsCount); + + ui32 GetRecordsCount() const { + AFL_VERIFY(RecordsCount); + return *RecordsCount; + } + + TString DebugString() const; + + [[nodiscard]] TConclusionStatus SyncSchemaTo(const std::shared_ptr& schema, + const IFieldsConstructor* defaultFieldsConstructor, const bool forceDefaults); + + bool HasColumn(const std::string& name) { + return Schema->HasField(name); + } + + ui64 num_columns() const { + return Columns.size(); } ui64 num_rows() const { - return RecordsCount; + AFL_VERIFY(RecordsCount); + return *RecordsCount; } - std::shared_ptr BuildTable(const std::optional>& columnNames = {}) const; + ui32 GetColumnsCount() const { + return Columns.size(); + } + + const std::shared_ptr& GetColumnVerified(const ui32 idx) const { + AFL_VERIFY(idx < Columns.size()); + return Columns[idx]; + } + + std::shared_ptr BuildTableVerified(const std::optional>& columnNames = {}) const; std::shared_ptr BuildTableOptional(const std::optional>& columnNames = {}) const; std::shared_ptr BuildEmptySame() const; [[nodiscard]] TConclusionStatus MergeColumnsStrictly(const TGeneralContainer& container); [[nodiscard]] TConclusionStatus AddField(const std::shared_ptr& f, const std::shared_ptr& data); + [[nodiscard]] TConclusionStatus AddField(const std::shared_ptr& f, const std::shared_ptr& data); - TGeneralContainer(const std::shared_ptr& table); + [[nodiscard]] TConclusionStatus AddField(const std::shared_ptr& f, const std::shared_ptr& data); - TGeneralContainer(const std::shared_ptr& table); + void DeleteFieldsByIndex(const std::vector& idxs); + TGeneralContainer(const std::shared_ptr& table); + TGeneralContainer(const std::shared_ptr& table); TGeneralContainer(const std::shared_ptr& schema, std::vector>&& columns); + TGeneralContainer(const std::shared_ptr& schema, std::vector>&& columns); + TGeneralContainer(const std::vector>& fields, std::vector>&& columns); arrow::Status ValidateFull() const { return arrow::Status::OK(); } - std::shared_ptr GetAccessorByNameOptional(const std::string& fieldId) const { - for (i32 i = 0; i < Schema->num_fields(); ++i) { - if (Schema->field(i)->name() == fieldId) { - return Columns[i]; - } - } - return nullptr; - } - + std::shared_ptr GetAccessorByNameOptional(const std::string& fieldId) const; std::shared_ptr GetAccessorByNameVerified(const std::string& fieldId) const; }; diff --git a/ydb/core/formats/arrow/common/ya.make b/ydb/core/formats/arrow/common/ya.make index 61f742b09b76..fc34c380aeb0 100644 --- a/ydb/core/formats/arrow/common/ya.make +++ b/ydb/core/formats/arrow/common/ya.make @@ -5,13 +5,13 @@ PEERDIR( ydb/core/formats/arrow/switch ydb/library/actors/core ydb/library/conclusion + ydb/library/formats/arrow + ydb/core/formats/arrow/splitter ) SRCS( container.cpp - validation.cpp adapter.cpp - accessor.cpp ) END() diff --git a/ydb/core/formats/arrow/converter.cpp b/ydb/core/formats/arrow/converter.cpp index 1bd0c92e2ceb..f0a38e2c8149 100644 --- a/ydb/core/formats/arrow/converter.cpp +++ b/ydb/core/formats/arrow/converter.cpp @@ -1,5 +1,5 @@ #include "converter.h" -#include "switch_type.h" +#include "switch/switch_type.h" #include #include diff --git a/ydb/core/formats/arrow/custom_registry.cpp b/ydb/core/formats/arrow/custom_registry.cpp index 13e8dc6150a3..9d61c8bf6476 100644 --- a/ydb/core/formats/arrow/custom_registry.cpp +++ b/ydb/core/formats/arrow/custom_registry.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #endif namespace cp = ::arrow::compute; @@ -62,6 +63,10 @@ static void RegisterYdbCast(cp::FunctionRegistry* registry) { Y_ABORT_UNLESS(registry->AddFunction(std::make_shared()).ok()); } +static void RegisterCustomAggregates(cp::FunctionRegistry* registry) { + Y_ABORT_UNLESS(registry->AddFunction(std::make_shared(GetFunctionName(EAggregate::NumRows))).ok()); +} + static void RegisterHouseAggregates(cp::FunctionRegistry* registry) { #ifndef WIN32 try { @@ -71,6 +76,7 @@ static void RegisterHouseAggregates(cp::FunctionRegistry* registry) { Y_ABORT_UNLESS(registry->AddFunction(std::make_shared(GetHouseFunctionName(EAggregate::Max))).ok()); Y_ABORT_UNLESS(registry->AddFunction(std::make_shared(GetHouseFunctionName(EAggregate::Sum))).ok()); //Y_ABORT_UNLESS(registry->AddFunction(std::make_shared(GetHouseFunctionName(EAggregate::Avg))).ok()); + Y_ABORT_UNLESS(registry->AddFunction(std::make_shared(GetHouseFunctionName(EAggregate::NumRows))).ok()); Y_ABORT_UNLESS(registry->AddFunction(std::make_shared(GetHouseGroupByName())).ok()); } catch (const std::exception& /*ex*/) { @@ -88,6 +94,7 @@ static std::unique_ptr CreateCustomRegistry() { RegisterRound(registry.get()); RegisterArithmetic(registry.get()); RegisterYdbCast(registry.get()); + RegisterCustomAggregates(registry.get()); RegisterHouseAggregates(registry.get()); return registry; } diff --git a/ydb/core/formats/arrow/dictionary/conversion.cpp b/ydb/core/formats/arrow/dictionary/conversion.cpp index 026ae6ba5d6e..b1decbf14bb6 100644 --- a/ydb/core/formats/arrow/dictionary/conversion.cpp +++ b/ydb/core/formats/arrow/dictionary/conversion.cpp @@ -1,8 +1,8 @@ #include "conversion.h" #include -#include -#include #include +#include +#include namespace NKikimr::NArrow { @@ -131,11 +131,4 @@ bool IsDictionableArray(const std::shared_ptr& data) { return result; } -ui64 GetDictionarySize(const std::shared_ptr& data) { - if (!data) { - return 0; - } - return GetArrayDataSize(data->dictionary()) + GetArrayDataSize(data->indices()); -} - } diff --git a/ydb/core/formats/arrow/dictionary/conversion.h b/ydb/core/formats/arrow/dictionary/conversion.h index ee044bfd514b..dfedb4aa31a0 100644 --- a/ydb/core/formats/arrow/dictionary/conversion.h +++ b/ydb/core/formats/arrow/dictionary/conversion.h @@ -7,7 +7,6 @@ namespace NKikimr::NArrow { bool IsDictionableArray(const std::shared_ptr& data); -ui64 GetDictionarySize(const std::shared_ptr& data); std::shared_ptr ArrayToDictionary(const std::shared_ptr& data); std::shared_ptr ArrayToDictionary(const std::shared_ptr& data); std::shared_ptr DictionaryToArray(const std::shared_ptr& data); diff --git a/ydb/core/formats/arrow/dictionary/object.cpp b/ydb/core/formats/arrow/dictionary/object.cpp index 4a72802b2aec..36c9fe3fc276 100644 --- a/ydb/core/formats/arrow/dictionary/object.cpp +++ b/ydb/core/formats/arrow/dictionary/object.cpp @@ -1,6 +1,6 @@ #include "object.h" -#include #include +#include #include namespace NKikimr::NArrow::NDictionary { diff --git a/ydb/core/formats/arrow/dictionary/object.h b/ydb/core/formats/arrow/dictionary/object.h index 2fd4d6a12924..09f5efebb56f 100644 --- a/ydb/core/formats/arrow/dictionary/object.h +++ b/ydb/core/formats/arrow/dictionary/object.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace NKikimr::NArrow::NDictionary { diff --git a/ydb/core/formats/arrow/dictionary/ya.make b/ydb/core/formats/arrow/dictionary/ya.make index ea71f4c7dacf..35639ea43bea 100644 --- a/ydb/core/formats/arrow/dictionary/ya.make +++ b/ydb/core/formats/arrow/dictionary/ya.make @@ -3,9 +3,11 @@ LIBRARY() PEERDIR( contrib/libs/apache/arrow ydb/core/protos - ydb/core/formats/arrow/simple_builder ydb/core/formats/arrow/switch ydb/library/actors/core + ydb/library/formats/arrow/transformer + ydb/library/formats/arrow/common + ydb/library/formats/arrow/simple_builder ) SRCS( diff --git a/ydb/core/formats/arrow/hash/calcer.cpp b/ydb/core/formats/arrow/hash/calcer.cpp index 71af0492cfea..d5fa4a8dd6a3 100644 --- a/ydb/core/formats/arrow/hash/calcer.cpp +++ b/ydb/core/formats/arrow/hash/calcer.cpp @@ -1,8 +1,8 @@ #include "calcer.h" -#include "xx_hash.h" #include #include #include +#include #include #include #include diff --git a/ydb/core/formats/arrow/hash/calcer.h b/ydb/core/formats/arrow/hash/calcer.h index d82f669fbee1..51dfe7858f8c 100644 --- a/ydb/core/formats/arrow/hash/calcer.h +++ b/ydb/core/formats/arrow/hash/calcer.h @@ -1,11 +1,11 @@ #pragma once -#include "xx_hash.h" #include -#include #include #include #include +#include +#include #include #include diff --git a/ydb/core/formats/arrow/hash/ya.make b/ydb/core/formats/arrow/hash/ya.make index 6d9a98b836a6..d7337f6b5588 100644 --- a/ydb/core/formats/arrow/hash/ya.make +++ b/ydb/core/formats/arrow/hash/ya.make @@ -2,17 +2,18 @@ LIBRARY() PEERDIR( contrib/libs/apache/arrow - ydb/core/formats/arrow/simple_builder ydb/core/formats/arrow/switch ydb/core/formats/arrow/reader ydb/library/actors/core ydb/library/services ydb/library/actors/protos + ydb/library/formats/arrow/hash + ydb/library/formats/arrow/common + ydb/library/formats/arrow/simple_builder ) SRCS( calcer.cpp - xx_hash.cpp ) END() diff --git a/ydb/core/formats/arrow/permutations.cpp b/ydb/core/formats/arrow/permutations.cpp index 623bc15c9221..c36a09779c6d 100644 --- a/ydb/core/formats/arrow/permutations.cpp +++ b/ydb/core/formats/arrow/permutations.cpp @@ -1,13 +1,13 @@ #include "permutations.h" #include "arrow_helpers.h" -#include "replace_key.h" #include "size_calcer.h" #include "hash/calcer.h" -#include #include +#include +#include #include #include @@ -15,28 +15,6 @@ namespace NKikimr::NArrow { -std::shared_ptr MakePermutation(const int size, const bool reverse) { - arrow::UInt64Builder builder; - TStatusValidator::Validate(builder.Reserve(size)); - - if (size) { - if (reverse) { - ui64 value = size - 1; - for (i64 i = 0; i < size; ++i, --value) { - TStatusValidator::Validate(builder.Append(value)); - } - } else { - for (i64 i = 0; i < size; ++i) { - TStatusValidator::Validate(builder.Append(i)); - } - } - } - - std::shared_ptr out; - TStatusValidator::Validate(builder.Finish(&out)); - return out; -} - std::shared_ptr MakeSortPermutation(const std::shared_ptr& batch, const std::shared_ptr& sortingKey, const bool andUnique) { auto keyBatch = TColumnOperator().VerifyIfAbsent().Adapt(batch, sortingKey).DetachResult(); auto keyColumns = std::make_shared(keyBatch->columns()); @@ -100,84 +78,6 @@ std::shared_ptr MakeSortPermutation(const std::shared_ptr -std::shared_ptr MakeFilterPermutationImpl(const std::vector& indexes) { - if (indexes.empty()) { - return {}; - } - - arrow::UInt64Builder builder; - if (!builder.Reserve(indexes.size()).ok()) { - return {}; - } - - for (auto&& i : indexes) { - TStatusValidator::Validate(builder.Append(i)); - } - std::shared_ptr out; - TStatusValidator::Validate(builder.Finish(&out)); - return out; -} - -std::shared_ptr MakeFilterPermutation(const std::vector& indexes) { - return MakeFilterPermutationImpl(indexes); -} - -std::shared_ptr MakeFilterPermutation(const std::vector& indexes) { - return MakeFilterPermutationImpl(indexes); -} - -std::shared_ptr CopyRecords(const std::shared_ptr& source, const std::vector& indexes) { - Y_ABORT_UNLESS(!!source); - auto schema = source->schema(); - std::vector> columns; - for (auto&& i : source->columns()) { - columns.emplace_back(CopyRecords(i, indexes)); - } - return arrow::RecordBatch::Make(schema, indexes.size(), columns); -} - -std::shared_ptr CopyRecords(const std::shared_ptr& source, const std::vector& indexes) { - if (!source) { - return source; - } - std::shared_ptr result; - SwitchType(source->type_id(), [&](const auto& type) { - using TWrap = std::decay_t; - using TArray = typename arrow::TypeTraits::ArrayType; - using TBuilder = typename arrow::TypeTraits::BuilderType; - auto& column = static_cast(*source); - - std::unique_ptr builder; - TStatusValidator::Validate(arrow::MakeBuilder(arrow::default_memory_pool(), source->type(), &builder)); - auto& builderImpl = static_cast(*builder); - - if constexpr (arrow::has_string_view::value) { - ui64 sumByIndexes = 0; - for (auto&& idx : indexes) { - Y_ABORT_UNLESS(idx < (ui64)column.length()); - sumByIndexes += column.GetView(idx).size(); - } - TStatusValidator::Validate(builderImpl.ReserveData(sumByIndexes)); - } - - TStatusValidator::Validate(builder->Reserve(indexes.size())); - - { - const ui32 arraySize = column.length(); - for (auto&& i : indexes) { - Y_ABORT_UNLESS(i < arraySize); - builderImpl.UnsafeAppend(column.GetView(i)); - } - } - - TStatusValidator::Validate(builder->Finish(&result)); - return true; - }); - Y_ABORT_UNLESS(result); - return result; -} - namespace { template @@ -213,101 +113,4 @@ bool THashConstructor::BuildHashUI64(std::shared_ptr& batch, return BuildHashUI64Impl(batch, fieldNames, hashFieldName); } -ui64 TShardedRecordBatch::GetMemorySize() const { - return NArrow::GetTableMemorySize(RecordBatch); -} - -TShardedRecordBatch::TShardedRecordBatch(const std::shared_ptr& batch) { - AFL_VERIFY(batch); - RecordBatch = TStatusValidator::GetValid(arrow::Table::FromRecordBatches(batch->schema(), {batch})); -} - - -TShardedRecordBatch::TShardedRecordBatch(const std::shared_ptr& batch) - : RecordBatch(batch) -{ - AFL_VERIFY(RecordBatch); -} - -TShardedRecordBatch::TShardedRecordBatch(const std::shared_ptr& batch, std::vector>&& splittedByShards) - : RecordBatch(batch) - , SplittedByShards(std::move(splittedByShards)) -{ - AFL_VERIFY(RecordBatch); - AFL_VERIFY(SplittedByShards.size()); -} - -std::vector> TShardingSplitIndex::Apply(const std::shared_ptr& input) { - AFL_VERIFY(input); - AFL_VERIFY(input->num_rows() == RecordsCount); - auto permutation = BuildPermutation(); - auto resultBatch = NArrow::TStatusValidator::GetValid(arrow::compute::Take(input, *permutation)).table(); - AFL_VERIFY(resultBatch->num_rows() == RecordsCount); - std::vector> result; - ui64 startIndex = 0; - for (auto&& i : Remapping) { - result.emplace_back(resultBatch->Slice(startIndex, i.size())); - startIndex += i.size(); - } - AFL_VERIFY(startIndex == RecordsCount); - return result; -} - -NKikimr::NArrow::TShardedRecordBatch TShardingSplitIndex::Apply(const ui32 shardsCount, const std::shared_ptr& input, const std::string& hashColumnName) { - AFL_VERIFY(input); - if (shardsCount == 1) { - return TShardedRecordBatch(input); - } - auto hashColumn = input->GetColumnByName(hashColumnName); - if (!hashColumn) { - return TShardedRecordBatch(input); - } - std::optional splitter; - if (hashColumn->type()->id() == arrow::Type::UINT64) { - splitter = TShardingSplitIndex::Build(shardsCount, *hashColumn); - } else if (hashColumn->type()->id() == arrow::Type::UINT32) { - splitter = TShardingSplitIndex::Build(shardsCount, *hashColumn); - } else if (hashColumn->type()->id() == arrow::Type::INT64) { - splitter = TShardingSplitIndex::Build(shardsCount, *hashColumn); - } else if (hashColumn->type()->id() == arrow::Type::INT32) { - splitter = TShardingSplitIndex::Build(shardsCount, *hashColumn); - } else { - Y_ABORT_UNLESS(false); - } - auto resultBatch = NArrow::TStatusValidator::GetValid(input->RemoveColumn(input->schema()->GetFieldIndex(hashColumnName))); - return TShardedRecordBatch(resultBatch, splitter->DetachRemapping()); -} - -TShardedRecordBatch TShardingSplitIndex::Apply(const ui32 shardsCount, const std::shared_ptr& input, const std::string& hashColumnName) { - return Apply(shardsCount, TStatusValidator::GetValid(arrow::Table::FromRecordBatches(input->schema(), {input})) - , hashColumnName); -} - -std::shared_ptr TShardingSplitIndex::BuildPermutation() const { - arrow::UInt64Builder builder; - Y_ABORT_UNLESS(builder.Reserve(RecordsCount).ok()); - - for (auto&& i : Remapping) { - for (auto&& idx : i) { - TStatusValidator::Validate(builder.Append(idx)); - } - } - - std::shared_ptr out; - Y_ABORT_UNLESS(builder.Finish(&out).ok()); - return out; -} - -std::shared_ptr ReverseRecords(const std::shared_ptr& batch) { - AFL_VERIFY(batch); - auto permutation = NArrow::MakePermutation(batch->num_rows(), true); - return NArrow::TStatusValidator::GetValid(arrow::compute::Take(batch, permutation)).record_batch(); -} - -std::shared_ptr ReverseRecords(const std::shared_ptr& batch) { - AFL_VERIFY(batch); - auto permutation = NArrow::MakePermutation(batch->num_rows(), true); - return NArrow::TStatusValidator::GetValid(arrow::compute::Take(batch, permutation)).table(); -} - } diff --git a/ydb/core/formats/arrow/permutations.h b/ydb/core/formats/arrow/permutations.h index 73a433ee52a2..f8c62fb87107 100644 --- a/ydb/core/formats/arrow/permutations.h +++ b/ydb/core/formats/arrow/permutations.h @@ -2,6 +2,7 @@ #include "arrow_helpers.h" #include +#include #include #include #include @@ -15,143 +16,6 @@ class THashConstructor { }; -class TShardedRecordBatch { -private: - YDB_READONLY_DEF(std::shared_ptr, RecordBatch); - YDB_READONLY_DEF(std::vector>, SplittedByShards); -public: - TShardedRecordBatch(const std::shared_ptr& batch); - TShardedRecordBatch(const std::shared_ptr& batch); - - void Cut(const ui32 limit) { - RecordBatch = RecordBatch->Slice(0, limit); - for (auto&& i : SplittedByShards) { - auto it = std::lower_bound(i.begin(), i.end(), limit); - if (it != i.end()) { - i.erase(it, i.end()); - } - } - } - - bool IsSharded() const { - return SplittedByShards.size() > 1; - } - - TShardedRecordBatch(const std::shared_ptr& batch, std::vector>&& splittedByShards); - - ui64 GetMemorySize() const; - - ui64 GetRecordsCount() const { - return RecordBatch->num_rows(); - } -}; - -class TShardingSplitIndex { -private: - ui32 ShardsCount = 0; - std::vector> Remapping; - ui32 RecordsCount = 0; - - template - std::vector MergeLists(const std::vector& base, const TIterator itFrom, const TIterator itTo) { - std::vector result; - result.reserve(base.size() + (itTo - itFrom)); - auto itBase = base.begin(); - auto itExt = itFrom; - while (itBase != base.end() && itExt != itTo) { - if (*itBase < *itExt) { - result.emplace_back(*itBase); - ++itBase; - } else { - result.emplace_back(*itExt); - ++itExt; - } - } - if (itBase == base.end()) { - result.insert(result.end(), itExt, itTo); - } else if (itExt == itTo) { - result.insert(result.end(), itBase, base.end()); - } - return result; - } - - template - void Initialize(const arrow::ChunkedArray& arrowHashArrayChunked) { - Y_ABORT_UNLESS(ShardsCount); - Remapping.resize(ShardsCount); - const ui32 expectation = arrowHashArrayChunked.length() / ShardsCount + 1; - for (auto&& i : Remapping) { - i.reserve(2 * expectation); - } - for (auto&& arrowHashArrayAbstract : arrowHashArrayChunked.chunks()) { - auto& arrowHashArray = static_cast(*arrowHashArrayAbstract); - ui64 offset = 0; - for (ui64 i = 0; i < (ui64)arrowHashArray.length(); ++i) { - const i64 v = arrowHashArray.GetView(i); - const ui32 idx = ((v < 0) ? (-v) : v) % ShardsCount; - Remapping[idx].emplace_back(offset + i); - } - offset += (ui64)arrowHashArray.length(); - } - std::deque*> sizeCorrection; - for (auto&& i : Remapping) { - sizeCorrection.emplace_back(&i); - } - const auto pred = [](const std::vector* l, const std::vector* r) { - return l->size() < r->size(); - }; - std::sort(sizeCorrection.begin(), sizeCorrection.end(), pred); - while (sizeCorrection.size() > 1 && sizeCorrection.back()->size() > expectation && sizeCorrection.front()->size() < expectation) { - const ui32 uselessRecords = sizeCorrection.back()->size() - expectation; - const ui32 needRecords = expectation - sizeCorrection.front()->size(); - const ui32 moveRecords = std::min(needRecords, uselessRecords); - if (moveRecords == 0) { - break; - } - *sizeCorrection.front() = MergeLists(*sizeCorrection.front(), sizeCorrection.back()->end() - moveRecords, sizeCorrection.back()->end()); - sizeCorrection.back()->resize(sizeCorrection.back()->size() - moveRecords); - if (sizeCorrection.back()->size() <= expectation) { - sizeCorrection.pop_back(); - } - if (sizeCorrection.front()->size() >= expectation) { - sizeCorrection.pop_front(); - } - } - } - - TShardingSplitIndex(const ui32 shardsCount, const arrow::ChunkedArray& arrowHashArray) - : ShardsCount(shardsCount) - , RecordsCount(arrowHashArray.length()) { - } - -public: - - std::vector> DetachRemapping() { - return std::move(Remapping); - } - - template - static TShardingSplitIndex Build(const ui32 shardsCount, const arrow::ChunkedArray& arrowHashArray) { - TShardingSplitIndex result(shardsCount, arrowHashArray); - result.Initialize(arrowHashArray); - return result; - } - - std::shared_ptr BuildPermutation() const; - - std::vector> Apply(const std::shared_ptr& input); - static TShardedRecordBatch Apply(const ui32 shardsCount, const std::shared_ptr& input, const std::string& hashColumnName); - static TShardedRecordBatch Apply(const ui32 shardsCount, const std::shared_ptr& input, const std::string& hashColumnName); -}; - -std::shared_ptr MakePermutation(const int size, const bool reverse = false); -std::shared_ptr MakeFilterPermutation(const std::vector& indexes); -std::shared_ptr MakeFilterPermutation(const std::vector& indexes); std::shared_ptr MakeSortPermutation(const std::shared_ptr& batch, const std::shared_ptr& sortingKey, const bool andUnique); -std::shared_ptr ReverseRecords(const std::shared_ptr& batch); -std::shared_ptr ReverseRecords(const std::shared_ptr& batch); - -std::shared_ptr CopyRecords(const std::shared_ptr& source, const std::vector& indexes); -std::shared_ptr CopyRecords(const std::shared_ptr& source, const std::vector& indexes); } diff --git a/ydb/core/formats/arrow/process_columns.cpp b/ydb/core/formats/arrow/process_columns.cpp index d8795e188055..c032d1d8006c 100644 --- a/ydb/core/formats/arrow/process_columns.cpp +++ b/ydb/core/formats/arrow/process_columns.cpp @@ -1,14 +1,45 @@ #include "process_columns.h" + #include "common/adapter.h" +#include +#include + #include namespace NKikimr::NArrow { namespace { -template -std::shared_ptr ExtractColumnsValidateImpl(const std::shared_ptr& srcBatch, - const std::vector& columnNames) { + +template +class TColumnNameAccessor { +public: + static const std::string& GetFieldName(const T& val) { + return val; + } + static TString DebugString(const std::vector& items) { + return JoinSeq(",", items); + } +}; + +template <> +class TColumnNameAccessor> { +public: + static const std::string& GetFieldName(const std::shared_ptr& val) { + return val->name(); + } + static TString DebugString(const std::vector>& items) { + TStringBuilder sb; + for (auto&& i : items) { + sb << i->name() << ","; + } + return sb; + } +}; + +template +std::shared_ptr ExtractColumnsValidateImpl( + const std::shared_ptr& srcBatch, const std::vector& columnNames) { std::vector> fields; fields.reserve(columnNames.size()); std::vector::TColumn>> columns; @@ -16,7 +47,7 @@ std::shared_ptr ExtractColumnsValidateImpl(const std::shared_ptr auto srcSchema = srcBatch->schema(); for (auto& name : columnNames) { - const int pos = srcSchema->GetFieldIndex(name); + const int pos = srcSchema->GetFieldIndex(TColumnNameAccessor::GetFieldName(name)); if (Y_LIKELY(pos > -1)) { fields.push_back(srcSchema->field(pos)); columns.push_back(srcBatch->column(pos)); @@ -26,46 +57,57 @@ std::shared_ptr ExtractColumnsValidateImpl(const std::shared_ptr return NAdapter::TDataBuilderPolicy::Build(std::move(fields), std::move(columns), srcBatch->num_rows()); } -template -TConclusion> AdaptColumnsImpl(const std::shared_ptr& srcBatch, - const std::shared_ptr& dstSchema) { +template +TConclusion> AdaptColumnsImpl( + const std::shared_ptr& srcBatch, const std::shared_ptr& dstSchema, TSchemaSubset* subset) { AFL_VERIFY(srcBatch); AFL_VERIFY(dstSchema); std::vector::TColumn>> columns; columns.reserve(dstSchema->num_fields()); - + std::vector> fields; + fields.reserve(dstSchema->num_fields()); + std::set fieldIdx; + ui32 idx = 0; for (auto& field : dstSchema->fields()) { const int index = srcBatch->schema()->GetFieldIndex(field->name()); if (index > -1) { + if (subset) { + fieldIdx.emplace(idx); + } columns.push_back(srcBatch->column(index)); + fields.emplace_back(field); auto srcField = srcBatch->schema()->field(index); if (field->Equals(srcField)) { - AFL_VERIFY(columns.back()->type()->Equals(field->type()))("event", "cannot_use_incoming_batch")("reason", "invalid_column_type")("column", field->name()) - ("column_type", field->type()->ToString())("incoming_type", columns.back()->type()->ToString()); + AFL_VERIFY(columns.back()->type()->Equals(field->type()))("event", "cannot_use_incoming_batch")("reason", "invalid_column_type")( + "column", field->name())("column_type", field->type()->ToString())("incoming_type", columns.back()->type()->ToString()); } else { - AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "cannot_use_incoming_batch")("reason", "invalid_column_type")("column", field->name()) - ("column_type", field->ToString(true))("incoming_type", srcField->ToString(true)); + AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "cannot_use_incoming_batch")("reason", "invalid_column_type")( + "column", field->name())("column_type", field->ToString(true))("incoming_type", srcField->ToString(true)); return TConclusionStatus::Fail("incompatible column types"); } - } else { - AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "not_found_column")("column", field->name()) - ("column_type", field->type()->ToString())("columns", JoinSeq(",", srcBatch->schema()->field_names())); + } else if (!subset) { + AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "not_found_column")("column", field->name())( + "column_type", field->type()->ToString())("columns", JoinSeq(",", srcBatch->schema()->field_names())); return TConclusionStatus::Fail("not found column '" + field->name() + "'"); } + ++idx; } - - return NAdapter::TDataBuilderPolicy::Build(dstSchema, std::move(columns), srcBatch->num_rows()); + if (subset) { + *subset = TSchemaSubset(fieldIdx, dstSchema->num_fields()); + } + return NAdapter::TDataBuilderPolicy::Build(std::make_shared(fields), std::move(columns), srcBatch->num_rows()); } -template +template std::shared_ptr ExtractImpl(const TColumnOperator::EExtractProblemsPolicy& policy, - const std::shared_ptr& incoming, const std::vector& columnNames) { + const std::shared_ptr& incoming, const std::vector& columnNames) { AFL_VERIFY(incoming); AFL_VERIFY(columnNames.size()); auto result = ExtractColumnsValidateImpl(incoming, columnNames); switch (policy) { case TColumnOperator::EExtractProblemsPolicy::Verify: - AFL_VERIFY((ui32)result->num_columns() == columnNames.size())("schema", incoming->schema()->ToString())("required", JoinSeq(",", columnNames)); + AFL_VERIFY((ui32)result->num_columns() == columnNames.size())("schema", incoming->schema()->ToString())( + "required", TColumnNameAccessor::DebugString(columnNames)); break; case TColumnOperator::EExtractProblemsPolicy::Null: if ((ui32)result->num_columns() != columnNames.size()) { @@ -79,7 +121,8 @@ std::shared_ptr ExtractImpl(const TColumnOperator::EExtractProbl } template -TConclusion> ReorderImpl(const std::shared_ptr& incoming, const std::vector& columnNames) { +TConclusion> ReorderImpl( + const std::shared_ptr& incoming, const std::vector& columnNames) { AFL_VERIFY(!!incoming); AFL_VERIFY(columnNames.size()); if ((ui32)incoming->num_columns() < columnNames.size()) { @@ -96,17 +139,30 @@ TConclusion> ReorderImpl(const std::shared_ptr TColumnOperator::Extract(const std::shared_ptr& incoming, const std::vector& columnNames) { +std::shared_ptr TColumnOperator::Extract( + const std::shared_ptr& incoming, const std::vector& columnNames) { return ExtractImpl(AbsentColumnPolicy, incoming, columnNames); } -std::shared_ptr TColumnOperator::Extract(const std::shared_ptr& incoming, const std::vector& columnNames) { +std::shared_ptr TColumnOperator::Extract( + const std::shared_ptr& incoming, const std::vector& columnNames) { return ExtractImpl(AbsentColumnPolicy, incoming, columnNames); } -std::shared_ptr TColumnOperator::Extract(const std::shared_ptr& incoming, const std::vector& columnNames) { +std::shared_ptr TColumnOperator::Extract( + const std::shared_ptr& incoming, const std::vector>& columns) { + return ExtractImpl(AbsentColumnPolicy, incoming, columns); +} + +std::shared_ptr TColumnOperator::Extract( + const std::shared_ptr& incoming, const std::vector>& columns) { + return ExtractImpl(AbsentColumnPolicy, incoming, columns); +} + +std::shared_ptr TColumnOperator::Extract( + const std::shared_ptr& incoming, const std::vector& columnNames) { return ExtractImpl(AbsentColumnPolicy, incoming, columnNames); } @@ -114,28 +170,86 @@ std::shared_ptr TColumnOperator::Extract(const std::shared_ptr> TColumnOperator::Adapt(const std::shared_ptr& incoming, const std::shared_ptr& dstSchema) { - return AdaptColumnsImpl(incoming, dstSchema); +NKikimr::TConclusion> TColumnOperator::Adapt( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset) { + return AdaptColumnsImpl(incoming, dstSchema, subset); } -NKikimr::TConclusion> TColumnOperator::Adapt(const std::shared_ptr& incoming, const std::shared_ptr& dstSchema) { - return AdaptColumnsImpl(incoming, dstSchema); +NKikimr::TConclusion> TColumnOperator::Adapt( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset) { + return AdaptColumnsImpl(incoming, dstSchema, subset); } -NKikimr::TConclusion> TColumnOperator::Reorder(const std::shared_ptr& incoming, const std::vector& columnNames) { +NKikimr::TConclusion> TColumnOperator::Adapt( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset) { + return AdaptColumnsImpl(incoming, dstSchema, subset); +} + +NKikimr::TConclusion> TColumnOperator::Adapt( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset) { + return AdaptColumnsImpl(incoming, dstSchema, subset); +} + +NKikimr::TConclusion> TColumnOperator::Reorder( + const std::shared_ptr& incoming, const std::vector& columnNames) { return ReorderImpl(incoming, columnNames); } -NKikimr::TConclusion> TColumnOperator::Reorder(const std::shared_ptr& incoming, const std::vector& columnNames) { +NKikimr::TConclusion> TColumnOperator::Reorder( + const std::shared_ptr& incoming, const std::vector& columnNames) { return ReorderImpl(incoming, columnNames); } -NKikimr::TConclusion> TColumnOperator::Reorder(const std::shared_ptr& incoming, const std::vector& columnNames) { +NKikimr::TConclusion> TColumnOperator::Reorder( + const std::shared_ptr& incoming, const std::vector& columnNames) { return ReorderImpl(incoming, columnNames); } -NKikimr::TConclusion> TColumnOperator::Reorder(const std::shared_ptr& incoming, const std::vector& columnNames) { +NKikimr::TConclusion> TColumnOperator::Reorder( + const std::shared_ptr& incoming, const std::vector& columnNames) { return ReorderImpl(incoming, columnNames); } +namespace { +template +TConclusion BuildSequentialSubsetImpl( + const std::shared_ptr& srcBatch, const std::shared_ptr& dstSchema) { + AFL_VERIFY(srcBatch); + AFL_VERIFY(dstSchema); + if (dstSchema->num_fields() < srcBatch->schema()->num_fields()) { + AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "incorrect columns set: destination must been wider than source")( + "source", srcBatch->schema()->ToString())("destination", dstSchema->ToString()); + return TConclusionStatus::Fail("incorrect columns set: destination must been wider than source"); + } + std::set fieldIdx; + auto itSrc = srcBatch->schema()->fields().begin(); + auto itDst = dstSchema->fields().begin(); + while (itSrc != srcBatch->schema()->fields().end() && itDst != dstSchema->fields().end()) { + if ((*itSrc)->name() != (*itDst)->name()) { + ++itDst; + } else { + fieldIdx.emplace(itDst - dstSchema->fields().begin()); + if (!(*itDst)->Equals(*itSrc)) { + AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "cannot_use_incoming_batch")("reason", "invalid_column_type")( + "column_type", (*itDst)->ToString(true))("incoming_type", (*itSrc)->ToString(true)); + return TConclusionStatus::Fail("incompatible column types"); + } + + ++itDst; + ++itSrc; + } + } + if (itDst == dstSchema->fields().end() && itSrc != srcBatch->schema()->fields().end()) { + AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "incorrect columns order in source set")("source", srcBatch->schema()->ToString())( + "destination", dstSchema->ToString()); + return TConclusionStatus::Fail("incorrect columns order in source set"); + } + return TSchemaSubset(fieldIdx, dstSchema->num_fields()); +} +} // namespace + +TConclusion TColumnOperator::BuildSequentialSubset( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema) { + return BuildSequentialSubsetImpl(incoming, dstSchema); +} -} \ No newline at end of file +} // namespace NKikimr::NArrow diff --git a/ydb/core/formats/arrow/process_columns.h b/ydb/core/formats/arrow/process_columns.h index d07b106231d2..ad57af9e6647 100644 --- a/ydb/core/formats/arrow/process_columns.h +++ b/ydb/core/formats/arrow/process_columns.h @@ -5,6 +5,9 @@ namespace NKikimr::NArrow { +class TSchemaSubset; +class TSchemaLite; + class TColumnOperator { public: enum class EExtractProblemsPolicy { @@ -12,6 +15,7 @@ class TColumnOperator { Verify, Skip }; + private: EExtractProblemsPolicy AbsentColumnPolicy = EExtractProblemsPolicy::Verify; @@ -31,18 +35,35 @@ class TColumnOperator { return *this; } - std::shared_ptr Extract(const std::shared_ptr& incoming, const std::vector& columnNames); + std::shared_ptr Extract( + const std::shared_ptr& incoming, const std::vector& columnNames); std::shared_ptr Extract(const std::shared_ptr& incoming, const std::vector& columnNames); + std::shared_ptr Extract( + const std::shared_ptr& incoming, const std::vector>& columns); + std::shared_ptr Extract( + const std::shared_ptr& incoming, const std::vector>& columns); std::shared_ptr Extract(const std::shared_ptr& incoming, const std::vector& columnNames); std::shared_ptr Extract(const std::shared_ptr& incoming, const std::vector& columnNames); - TConclusion> Adapt(const std::shared_ptr& incoming, const std::shared_ptr& dstSchema); - TConclusion> Adapt(const std::shared_ptr& incoming, const std::shared_ptr& dstSchema); + TConclusion BuildSequentialSubset( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema); + + TConclusion> Adapt( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset = nullptr); + TConclusion> Adapt( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset = nullptr); + TConclusion> Adapt(const std::shared_ptr& incoming, + const std::shared_ptr& dstSchema, TSchemaSubset* subset = nullptr); + TConclusion> Adapt( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset = nullptr); - TConclusion> Reorder(const std::shared_ptr& incoming, const std::vector& columnNames); - TConclusion> Reorder(const std::shared_ptr& incoming, const std::vector& columnNames); - TConclusion> Reorder(const std::shared_ptr& incoming, const std::vector& columnNames); + TConclusion> Reorder( + const std::shared_ptr& incoming, const std::vector& columnNames); + TConclusion> Reorder( + const std::shared_ptr& incoming, const std::vector& columnNames); + TConclusion> Reorder( + const std::shared_ptr& incoming, const std::vector& columnNames); TConclusion> Reorder(const std::shared_ptr& incoming, const std::vector& columnNames); }; -} \ No newline at end of file +} // namespace NKikimr::NArrow diff --git a/ydb/core/formats/arrow/program.cpp b/ydb/core/formats/arrow/program.cpp index 1203f1802098..50071d8490e3 100644 --- a/ydb/core/formats/arrow/program.cpp +++ b/ydb/core/formats/arrow/program.cpp @@ -19,6 +19,18 @@ enum class AggFunctionId { AGG_MIN = 3, AGG_MAX = 4, AGG_SUM = 5, + AGG_AVG = 6, + //AGG_VAR = 7, + //AGG_COVAR = 8, + //AGG_STDDEV = 9, + //AGG_CORR = 10, + //AGG_ARG_MIN = 11, + //AGG_ARG_MAX = 12, + //AGG_COUNT_DISTINCT = 13, + //AGG_QUANTILES = 14, + //AGG_TOP_COUNT = 15, + //AGG_TOP_SUM = 16, + AGG_NUM_ROWS = 17, }; struct GroupByOptions: public arrow::compute::ScalarAggregateOptions { struct Assign { @@ -88,7 +100,7 @@ class TConstFunction : public IStepFunction { using TBase = IStepFunction; public: using TBase::TBase; - arrow::Result Call(const TAssign& assign, const TDatumBatch& batch) const override { + arrow::Result Call(const TAssign& assign, const TDatumBatch& batch) const override { Y_UNUSED(batch); return assign.GetConstant(); } @@ -398,6 +410,8 @@ const char * GetFunctionName(EAggregate op) { return "min_max"; case EAggregate::Sum: return "sum"; + case EAggregate::NumRows: + return "num_rows"; #if 0 // TODO case EAggregate::Avg: return "mean"; @@ -424,6 +438,8 @@ const char * GetHouseFunctionName(EAggregate op) { case EAggregate::Avg: return "ch.avg"; #endif + case EAggregate::NumRows: + return "ch.num_rows"; default: break; } @@ -448,6 +464,8 @@ CH::AggFunctionId GetHouseFunction(EAggregate op) { case EAggregate::Avg: return CH::AggFunctionId::AGG_AVG; #endif + case EAggregate::NumRows: + return CH::AggFunctionId::AGG_NUM_ROWS; default: break; } @@ -531,7 +549,7 @@ class TFilterVisitor : public arrow::ArrayVisitor { arrow::Status TDatumBatch::AddColumn(const std::string& name, arrow::Datum&& column) { - if (Schema->GetFieldIndex(name) != -1) { + if (HasColumn(name)) { return arrow::Status::Invalid("Trying to add duplicate column '" + name + "'"); } @@ -543,20 +561,27 @@ arrow::Status TDatumBatch::AddColumn(const std::string& name, arrow::Datum&& col return arrow::Status::Invalid("Wrong column length."); } - Schema = *Schema->AddField(Schema->num_fields(), field); + NewColumnIds.emplace(name, NewColumnsPtr.size()); + NewColumnsPtr.emplace_back(field); + Datums.emplace_back(column); return arrow::Status::OK(); } arrow::Result TDatumBatch::GetColumnByName(const std::string& name) const { - auto i = Schema->GetFieldIndex(name); + auto it = NewColumnIds.find(name); + if (it != NewColumnIds.end()) { + AFL_VERIFY(SchemaBase->num_fields() + it->second < Datums.size()); + return Datums[SchemaBase->num_fields() + it->second]; + } + auto i = SchemaBase->GetFieldIndex(name); if (i < 0) { return arrow::Status::Invalid("Not found column '" + name + "' or duplicate"); } return Datums[i]; } -std::shared_ptr TDatumBatch::ToTable() const { +std::shared_ptr TDatumBatch::ToTable() { std::vector> columns; columns.reserve(Datums.size()); for (auto col : Datums) { @@ -576,10 +601,10 @@ std::shared_ptr TDatumBatch::ToTable() const { AFL_VERIFY(false); } } - return arrow::Table::Make(Schema, columns, Rows); + return arrow::Table::Make(GetSchema(), columns, Rows); } -std::shared_ptr TDatumBatch::ToRecordBatch() const { +std::shared_ptr TDatumBatch::ToRecordBatch() { std::vector> columns; columns.reserve(Datums.size()); for (auto col : Datums) { @@ -594,7 +619,7 @@ std::shared_ptr TDatumBatch::ToRecordBatch() const { AFL_VERIFY(false); } } - return arrow::RecordBatch::Make(Schema, Rows, columns); + return arrow::RecordBatch::Make(GetSchema(), Rows, columns); } std::shared_ptr TDatumBatch::FromRecordBatch(const std::shared_ptr& batch) { @@ -603,12 +628,7 @@ std::shared_ptr TDatumBatch::FromRecordBatch(const std::shared_ptr< for (int64_t i = 0; i < batch->num_columns(); ++i) { datums.push_back(arrow::Datum(batch->column(i))); } - return std::make_shared( - TProgramStep::TDatumBatch{ - .Schema = std::make_shared(*batch->schema()), - .Datums = std::move(datums), - .Rows = batch->num_rows() - }); + return std::make_shared(std::make_shared(*batch->schema()), std::move(datums), batch->num_rows()); } std::shared_ptr TDatumBatch::FromTable(const std::shared_ptr& batch) { @@ -617,12 +637,15 @@ std::shared_ptr TDatumBatch::FromTable(const std::shared_ptrnum_columns(); ++i) { datums.push_back(arrow::Datum(batch->column(i))); } - return std::make_shared( - TProgramStep::TDatumBatch{ - .Schema = std::make_shared(*batch->schema()), - .Datums = std::move(datums), - .Rows = batch->num_rows() - }); + return std::make_shared(std::make_shared(*batch->schema()), std::move(datums), batch->num_rows()); +} + +TDatumBatch::TDatumBatch(const std::shared_ptr& schema, std::vector&& datums, const i64 rows) + : SchemaBase(schema) + , Rows(rows) + , Datums(std::move(datums)) { + AFL_VERIFY(SchemaBase); + AFL_VERIFY(Datums.size() == (ui32)SchemaBase->num_fields()); } TAssign TAssign::MakeTimestamp(const TColumnInfo& column, ui64 value) { @@ -673,6 +696,27 @@ IStepFunction::TPtr TAggregateAssign::GetFunction(arrow::compu return std::make_shared(ctx); } +TString TAggregateAssign::DebugString() const { + TStringBuilder sb; + sb << "{"; + if (Operation != EAggregate::Unspecified) { + sb << "op=" << GetFunctionName(Operation) << ";"; + } + if (Arguments.size()) { + sb << "arguments=["; + for (auto&& i : Arguments) { + sb << i.DebugString() << ";"; + } + sb << "];"; + } + sb << "options=" << ScalarOpts.ToString() << ";"; + if (KernelFunction) { + sb << "kernel=" << KernelFunction->name() << ";"; + } + sb << "column=" << Column.DebugString() << ";"; + sb << "}"; + return sb; +} arrow::Status TProgramStep::ApplyAssignes(TDatumBatch& batch, arrow::compute::ExecContext* ctx) const { if (Assignes.empty()) { @@ -680,7 +724,7 @@ arrow::Status TProgramStep::ApplyAssignes(TDatumBatch& batch, arrow::compute::Ex } batch.Datums.reserve(batch.Datums.size() + Assignes.size()); for (auto& assign : Assignes) { - if (batch.GetColumnByName(assign.GetName()).ok()) { + if (batch.HasColumn(assign.GetName())) { return arrow::Status::Invalid("Assign to existing column '" + assign.GetName() + "'."); } @@ -703,8 +747,9 @@ arrow::Status TProgramStep::ApplyAggregates(TDatumBatch& batch, arrow::compute:: } ui32 numResultColumns = GroupBy.size() + GroupByKeys.size(); - TDatumBatch res; - res.Datums.reserve(numResultColumns); + std::vector datums; + datums.reserve(numResultColumns); + std::optional resultRecordsCount; arrow::FieldVector fields; fields.reserve(numResultColumns); @@ -715,13 +760,13 @@ arrow::Status TProgramStep::ApplyAggregates(TDatumBatch& batch, arrow::compute:: if (!funcResult.ok()) { return funcResult.status(); } - res.Datums.push_back(*funcResult); - fields.emplace_back(std::make_shared(assign.GetName(), res.Datums.back().type())); + datums.push_back(*funcResult); + fields.emplace_back(std::make_shared(assign.GetName(), datums.back().type())); } - res.Rows = 1; + resultRecordsCount = 1; } else { CH::GroupByOptions funcOpts; - funcOpts.schema = batch.Schema; + funcOpts.schema = batch.GetSchema(); funcOpts.assigns.reserve(numResultColumns); funcOpts.has_nullable_key = false; @@ -759,19 +804,18 @@ arrow::Status TProgramStep::ApplyAggregates(TDatumBatch& batch, arrow::compute:: return arrow::Status::Invalid("No expected column in GROUP BY result."); } fields.emplace_back(std::make_shared(assign.result_column, column->type())); - res.Datums.push_back(column); + datums.push_back(column); } - res.Rows = gbBatch->num_rows(); + resultRecordsCount = gbBatch->num_rows(); } - - res.Schema = std::make_shared(std::move(fields)); - batch = std::move(res); + AFL_VERIFY(resultRecordsCount); + batch = TDatumBatch(std::make_shared(std::move(fields)), std::move(datums), *resultRecordsCount); return arrow::Status::OK(); } arrow::Status TProgramStep::MakeCombinedFilter(TDatumBatch& batch, NArrow::TColumnFilter& result) const { - TFilterVisitor filterVisitor(batch.Rows); + TFilterVisitor filterVisitor(batch.GetRecordsCount()); for (auto& colName : Filters) { auto column = batch.GetColumnByName(colName.GetColumnName()); if (!column.ok()) { @@ -821,13 +865,13 @@ arrow::Status TProgramStep::ApplyFilters(TDatumBatch& batch) const { } } std::vector filterDatums; - for (int64_t i = 0; i < batch.Schema->num_fields(); ++i) { - if (batch.Datums[i].is_arraylike() && (allColumns || neededColumns.contains(batch.Schema->field(i)->name()))) { + for (int64_t i = 0; i < batch.GetSchema()->num_fields(); ++i) { + if (batch.Datums[i].is_arraylike() && (allColumns || neededColumns.contains(batch.GetSchema()->field(i)->name()))) { filterDatums.emplace_back(&batch.Datums[i]); } } - bits.Apply(batch.Rows, filterDatums); - batch.Rows = bits.GetFilteredCount().value_or(batch.Rows); + bits.Apply(batch.GetRecordsCount(), filterDatums); + batch.SetRecordsCount(bits.GetFilteredCount().value_or(batch.GetRecordsCount())); return arrow::Status::OK(); } @@ -838,15 +882,14 @@ arrow::Status TProgramStep::ApplyProjection(TDatumBatch& batch) const { std::vector> newFields; std::vector newDatums; for (size_t i = 0; i < Projection.size(); ++i) { - int schemaFieldIndex = batch.Schema->GetFieldIndex(Projection[i].GetColumnName()); + int schemaFieldIndex = batch.GetSchema()->GetFieldIndex(Projection[i].GetColumnName()); if (schemaFieldIndex == -1) { return arrow::Status::Invalid("Could not find column " + Projection[i].GetColumnName() + " in record batch schema."); } - newFields.push_back(batch.Schema->field(schemaFieldIndex)); + newFields.push_back(batch.GetSchema()->field(schemaFieldIndex)); newDatums.push_back(batch.Datums[schemaFieldIndex]); } - batch.Schema = std::make_shared(std::move(newFields)); - batch.Datums = std::move(newDatums); + batch = TDatumBatch(std::make_shared(std::move(newFields)), std::move(newDatums), batch.GetRecordsCount()); return arrow::Status::OK(); } @@ -919,14 +962,10 @@ std::set TProgramStep::GetColumnsInUsage(const bool originalOnly/* } arrow::Result> TProgramStep::BuildFilter(const std::shared_ptr& t) const { - return BuildFilter(t->BuildTable(GetColumnsInUsage(true))); -} - -arrow::Result> TProgramStep::BuildFilter(const std::shared_ptr& t) const { if (Filters.empty()) { return nullptr; } - std::vector> batches = NArrow::SliceToRecordBatches(t); + std::vector> batches = NArrow::SliceToRecordBatches(t->BuildTableVerified(GetColumnsInUsage(true))); NArrow::TColumnFilter fullLocal = NArrow::TColumnFilter::BuildAllowFilter(); for (auto&& rb : batches) { auto datumBatch = TDatumBatch::FromRecordBatch(rb); @@ -938,7 +977,7 @@ arrow::Result> TProgramStep::BuildFilter( } NArrow::TColumnFilter local = NArrow::TColumnFilter::BuildAllowFilter(); NArrow::TStatusValidator::Validate(MakeCombinedFilter(*datumBatch, local)); - AFL_VERIFY(local.Size() == datumBatch->Rows)("local", local.Size())("datum", datumBatch->Rows); + AFL_VERIFY(local.Size() == datumBatch->GetRecordsCount())("local", local.Size())("datum", datumBatch->GetRecordsCount()); fullLocal.Append(local); } AFL_VERIFY(fullLocal.Size() == t->num_rows())("filter", fullLocal.Size())("t", t->num_rows()); @@ -946,18 +985,18 @@ arrow::Result> TProgramStep::BuildFilter( } const std::set& TProgramStep::GetFilterOriginalColumnIds() const { - AFL_VERIFY(IsFilterOnly()); +// AFL_VERIFY(IsFilterOnly()); return FilterOriginalColumnIds; } std::set TProgram::GetEarlyFilterColumns() const { std::set result; for (ui32 i = 0; i < Steps.size(); ++i) { + auto stepFields = Steps[i]->GetColumnsInUsage(true); + result.insert(stepFields.begin(), stepFields.end()); if (!Steps[i]->IsFilterOnly()) { break; } - auto stepFields = Steps[i]->GetColumnsInUsage(); - result.insert(stepFields.begin(), stepFields.end()); } return result; } diff --git a/ydb/core/formats/arrow/program.h b/ydb/core/formats/arrow/program.h index dfb22116158b..2b953b55e070 100644 --- a/ydb/core/formats/arrow/program.h +++ b/ydb/core/formats/arrow/program.h @@ -21,6 +21,7 @@ enum class EAggregate { Max = 4, Sum = 5, //Avg = 6, + NumRows = 7, }; } @@ -37,15 +38,47 @@ const char * GetHouseFunctionName(EAggregate op); inline const char * GetHouseGroupByName() { return "ch.group_by"; } EOperation ValidateOperation(EOperation op, ui32 argsSize); -struct TDatumBatch { - std::shared_ptr Schema; - std::vector Datums; +class TDatumBatch { +private: + std::shared_ptr SchemaBase; + THashMap NewColumnIds; + std::vector> NewColumnsPtr; int64_t Rows = 0; +public: + std::vector Datums; + + ui64 GetRecordsCount() const { + return Rows; + } + + void SetRecordsCount(const ui64 value) { + Rows = value; + } + + TDatumBatch(const std::shared_ptr& schema, std::vector&& datums, const i64 rows); + + const std::shared_ptr& GetSchema() { + if (NewColumnIds.size()) { + std::vector> fields = SchemaBase->fields(); + fields.insert(fields.end(), NewColumnsPtr.begin(), NewColumnsPtr.end()); + SchemaBase = std::make_shared(fields); + NewColumnIds.clear(); + NewColumnsPtr.clear(); + } + return SchemaBase; + } + arrow::Status AddColumn(const std::string& name, arrow::Datum&& column); arrow::Result GetColumnByName(const std::string& name) const; - std::shared_ptr ToTable() const; - std::shared_ptr ToRecordBatch() const; + bool HasColumn(const std::string& name) const { + if (NewColumnIds.contains(name)) { + return true; + } + return SchemaBase->GetFieldIndex(name) > -1; + } + std::shared_ptr ToTable(); + std::shared_ptr ToRecordBatch(); static std::shared_ptr FromRecordBatch(const std::shared_ptr& batch); static std::shared_ptr FromTable(const std::shared_ptr& batch); }; @@ -291,6 +324,7 @@ class TAggregateAssign { const arrow::compute::ScalarAggregateOptions* GetOptions() const { return &ScalarOpts; } IStepFunction::TPtr GetFunction(arrow::compute::ExecContext* ctx) const; + TString DebugString() const; private: TColumnInfo Column; @@ -340,10 +374,18 @@ class TProgramStep { sb << "];"; } if (GroupBy.size()) { - sb << "group_by_count=" << GroupBy.size() << "; "; + sb << "group_by_assignes=["; + for (auto&& i : GroupBy) { + sb << i.DebugString() << ";"; + } + sb << "];"; } if (GroupByKeys.size()) { - sb << "group_by_keys_count=" << GroupByKeys.size() << ";"; + sb << "group_by_keys=["; + for (auto&& i : GroupByKeys) { + sb << i.DebugString() << ";"; + } + sb << "];"; } sb << "projections=["; @@ -405,7 +447,6 @@ class TProgramStep { return Filters.size() && (!GroupBy.size() && !GroupByKeys.size()); } - [[nodiscard]] arrow::Result> BuildFilter(const std::shared_ptr& t) const; [[nodiscard]] arrow::Result> BuildFilter(const std::shared_ptr& t) const; }; diff --git a/ydb/core/formats/arrow/protos/ya.make b/ydb/core/formats/arrow/protos/ya.make deleted file mode 100644 index f01aa064fbcc..000000000000 --- a/ydb/core/formats/arrow/protos/ya.make +++ /dev/null @@ -1,11 +0,0 @@ -PROTO_LIBRARY() - -SRCS( - ssa.proto -) - -PEERDIR( - -) - -END() diff --git a/ydb/core/formats/arrow/reader/batch_iterator.h b/ydb/core/formats/arrow/reader/batch_iterator.h index 48497a53c452..d3bb365d5706 100644 --- a/ydb/core/formats/arrow/reader/batch_iterator.h +++ b/ydb/core/formats/arrow/reader/batch_iterator.h @@ -44,7 +44,8 @@ class TBatchIterator { TBatchIterator(TRWSortableBatchPosition&& keyColumns) : ControlPointFlag(true) - , KeyColumns(std::move(keyColumns)) { + , KeyColumns(std::move(keyColumns)) + { } diff --git a/ydb/core/formats/arrow/reader/merger.cpp b/ydb/core/formats/arrow/reader/merger.cpp index ddae86c1ed28..16b9733ad4c0 100644 --- a/ydb/core/formats/arrow/reader/merger.cpp +++ b/ydb/core/formats/arrow/reader/merger.cpp @@ -1,15 +1,16 @@ #include "merger.h" #include "result_builder.h" +#include #include namespace NKikimr::NArrow::NMerger { -void TMergePartialStream::PutControlPoint(const TSortableBatchPosition& point) { +void TMergePartialStream::PutControlPoint(const TSortableBatchPosition& point, const bool deepCopy) { AFL_VERIFY(point.IsSameSortingSchema(SortSchema))("point", point.DebugJson())("schema", SortSchema->ToString()); Y_ABORT_UNLESS(point.IsReverseSort() == Reverse); Y_ABORT_UNLESS(++ControlPoints == 1); - SortHeap.Push(TBatchIterator(point.BuildRWPosition())); + SortHeap.Push(TBatchIterator(point.BuildRWPosition(false, deepCopy))); } void TMergePartialStream::RemoveControlPoint() { @@ -65,7 +66,7 @@ bool TMergePartialStream::DrainToControlPoint(TRecordBatchBuilder& builder, cons } bool TMergePartialStream::DrainCurrentTo(TRecordBatchBuilder& builder, const TSortableBatchPosition& readTo, const bool includeFinish, std::optional* lastResultPosition) { - PutControlPoint(readTo); + PutControlPoint(readTo, false); return DrainToControlPoint(builder, includeFinish, lastResultPosition); } @@ -185,13 +186,16 @@ void TMergePartialStream::DrainCurrentPosition(TRecordBatchBuilder* builder, std SortHeap.CleanFinished(); } -std::vector> TMergePartialStream::DrainAllParts(const std::map& positions, +std::vector> TMergePartialStream::DrainAllParts(const TIntervalPositions& positions, const std::vector>& resultFields) { std::vector> result; for (auto&& i : positions) { TRecordBatchBuilder indexesBuilder(resultFields); - DrainCurrentTo(indexesBuilder, i.first, i.second); + if (SortHeap.Empty() || i.GetPosition().Compare(SortHeap.Current().GetKeyColumns()) == std::partial_ordering::less) { + continue; + } + DrainCurrentTo(indexesBuilder, i.GetPosition(), i.IsIncludedToLeftInterval()); result.emplace_back(indexesBuilder.Finalize()); if (result.back()->num_rows() == 0) { result.pop_back(); diff --git a/ydb/core/formats/arrow/reader/merger.h b/ydb/core/formats/arrow/reader/merger.h index 196edcd09e3c..c30aba0f384f 100644 --- a/ydb/core/formats/arrow/reader/merger.h +++ b/ydb/core/formats/arrow/reader/merger.h @@ -37,6 +37,9 @@ class TMergePartialStream { void DrainCurrentPosition(TRecordBatchBuilder* builder, std::shared_ptr* resultScanData, ui64* resultPosition); void CheckSequenceInDebug(const TRWSortableBatchPosition& nextKeyColumnsPosition); + bool DrainCurrentTo(TRecordBatchBuilder& builder, const TSortableBatchPosition& readTo, const bool includeFinish, + std::optional* lastResultPosition = nullptr); + public: TMergePartialStream(std::shared_ptr sortSchema, std::shared_ptr dataSchema, const bool reverse, const std::vector& versionColumnNames) : SortSchema(sortSchema) @@ -49,6 +52,7 @@ class TMergePartialStream { Y_ABORT_UNLESS(!DataSchema || DataSchema->num_fields()); } + void PutControlPoint(const TSortableBatchPosition& point, const bool deepCopy); void SkipToLowerBound(const TSortableBatchPosition& pos, const bool include); void SetPossibleSameVersion(const bool value) { @@ -67,8 +71,6 @@ class TMergePartialStream { return TStringBuilder() << "sort_heap=" << SortHeap.DebugJson(); } - void PutControlPoint(const TSortableBatchPosition& point); - void RemoveControlPoint(); bool ControlPointEnriched() const { @@ -92,9 +94,8 @@ class TMergePartialStream { void DrainAll(TRecordBatchBuilder& builder); std::shared_ptr SingleSourceDrain(const TSortableBatchPosition& readTo, const bool includeFinish, std::optional* lastResultPosition = nullptr); - bool DrainCurrentTo(TRecordBatchBuilder& builder, const TSortableBatchPosition& readTo, const bool includeFinish, std::optional* lastResultPosition = nullptr); bool DrainToControlPoint(TRecordBatchBuilder& builder, const bool includeFinish, std::optional* lastResultPosition = nullptr); - std::vector> DrainAllParts(const std::map& positions, + std::vector> DrainAllParts(const TIntervalPositions& positions, const std::vector>& resultFields); }; diff --git a/ydb/core/formats/arrow/reader/position.cpp b/ydb/core/formats/arrow/reader/position.cpp index 6431d180d130..b728405769d7 100644 --- a/ydb/core/formats/arrow/reader/position.cpp +++ b/ydb/core/formats/arrow/reader/position.cpp @@ -1,4 +1,7 @@ #include "position.h" + +#include + #include namespace NKikimr::NArrow::NMerger { @@ -15,11 +18,13 @@ NJson::TJsonValue TSortableBatchPosition::DebugJson() const { return result; } -std::optional TSortableBatchPosition::FindPosition(TRWSortableBatchPosition& position, const ui64 posStartExt, const ui64 posFinishExt, const TSortableBatchPosition& forFound, const bool greater) { +std::optional TSortableBatchPosition::FindPosition(TRWSortableBatchPosition& position, + const ui64 posStartExt, const ui64 posFinishExt, const TSortableBatchPosition& forFound, const bool greater) { ui64 posStart = posStartExt; ui64 posFinish = posFinishExt; + auto guard = position.CreateAsymmetricAccessGuard(); { - AFL_VERIFY(position.InitPosition(posStart)); + AFL_VERIFY(guard.InitSortingPosition(posStart)); auto cmp = position.Compare(forFound); if (cmp == std::partial_ordering::greater) { return TFoundPosition::Greater(posStart); @@ -28,7 +33,7 @@ std::optional TSortableBatchPosition::Fi } } { - AFL_VERIFY(position.InitPosition(posFinish)); + AFL_VERIFY(guard.InitSortingPosition(posFinish)); auto cmp = position.Compare(forFound); if (cmp == std::partial_ordering::less) { return TFoundPosition::Less(posFinish); @@ -37,7 +42,7 @@ std::optional TSortableBatchPosition::Fi } } while (posFinish > posStart + 1) { - Y_ABORT_UNLESS(position.InitPosition(0.5 * (posStart + posFinish))); + AFL_VERIFY(guard.InitSortingPosition(0.5 * (posStart + posFinish))); const auto comparision = position.Compare(forFound); if (comparision == std::partial_ordering::less) { posStart = position.Position; @@ -47,17 +52,18 @@ std::optional TSortableBatchPosition::Fi return TFoundPosition::Equal(position.Position); } } - Y_ABORT_UNLESS(posFinish != posStart); + AFL_VERIFY(posFinish != posStart); if (greater) { - Y_ABORT_UNLESS(position.InitPosition(posFinish)); + AFL_VERIFY(guard.InitSortingPosition(posFinish)); return TFoundPosition::Greater(posFinish); } else { - Y_ABORT_UNLESS(position.InitPosition(posStart)); + AFL_VERIFY(guard.InitSortingPosition(posStart)); return TFoundPosition::Less(posStart); } } -std::optional TSortableBatchPosition::FindPosition(const std::shared_ptr& batch, const TSortableBatchPosition& forFound, const bool greater, const std::optional includedStartPosition) { +std::optional TSortableBatchPosition::FindPosition(const std::shared_ptr& batch, + const TSortableBatchPosition& forFound, const bool greater, const std::optional includedStartPosition) { if (!batch || !batch->num_rows()) { return {}; } @@ -74,11 +80,14 @@ std::optional TSortableBatchPosition::Fi return FindPosition(position, posStart, posFinish, forFound, greater); } -NKikimr::NArrow::NMerger::TRWSortableBatchPosition TSortableBatchPosition::BuildRWPosition() const { - return TRWSortableBatchPosition(Position, RecordsCount, ReverseSort, Sorting->BuildCopy(Position), Data ? Data->BuildCopy(Position) : nullptr); +NKikimr::NArrow::NMerger::TRWSortableBatchPosition TSortableBatchPosition::BuildRWPosition(const bool needData, const bool deepCopy) const { + return TRWSortableBatchPosition(Position, RecordsCount, ReverseSort, + deepCopy ? Sorting->BuildCopy(Position) : Sorting, + (needData && Data) ? (deepCopy ? Data->BuildCopy(Position) : Data) : nullptr); } -NKikimr::NArrow::NMerger::TRWSortableBatchPosition TSortableBatchPosition::BuildRWPosition(std::shared_ptr batch, const ui32 position) const { +NKikimr::NArrow::NMerger::TRWSortableBatchPosition TSortableBatchPosition::BuildRWPosition( + std::shared_ptr batch, const ui32 position) const { std::vector dataColumns; if (Data) { dataColumns = Data->GetFieldNames(); @@ -98,7 +107,8 @@ TSortableBatchPosition::TFoundPosition TRWSortableBatchPosition::SkipToLower(con return *pos; } -TSortableScanData::TSortableScanData(const ui64 position, const std::shared_ptr& batch, const std::vector& columns) { +TSortableScanData::TSortableScanData( + const ui64 position, const std::shared_ptr& batch, const std::vector& columns) { for (auto&& i : columns) { auto c = batch->GetAccessorByNameOptional(i); AFL_VERIFY(c)("column_name", i)("columns", JoinSeq(",", columns))("batch", batch->DebugString()); @@ -110,7 +120,8 @@ TSortableScanData::TSortableScanData(const ui64 position, const std::shared_ptr< BuildPosition(position); } -TSortableScanData::TSortableScanData(const ui64 position, const std::shared_ptr& batch, const std::vector& columns) { +TSortableScanData::TSortableScanData( + const ui64 position, const std::shared_ptr& batch, const std::vector& columns) { for (auto&& i : columns) { auto c = batch->GetColumnByName(i); AFL_VERIFY(c)("column_name", i)("columns", JoinSeq(",", columns)); @@ -134,10 +145,11 @@ TSortableScanData::TSortableScanData(const ui64 position, const std::shared_ptr< BuildPosition(position); } -void TSortableScanData::AppendPositionTo(const std::vector>& builders, const ui64 position, ui64* recordSize) const { +void TSortableScanData::AppendPositionTo( + const std::vector>& builders, const ui64 position, ui64* recordSize) const { AFL_VERIFY(builders.size() == PositionAddress.size()); for (ui32 i = 0; i < PositionAddress.size(); ++i) { - AFL_VERIFY(NArrow::Append(*builders[i], *PositionAddress[i].GetArray(), position - PositionAddress[i].GetStartPosition(), recordSize)); + AFL_VERIFY(NArrow::Append(*builders[i], *PositionAddress[i].GetArray(), PositionAddress[i].GetAddress().GetLocalIndex(position), recordSize)); } } @@ -148,9 +160,9 @@ void TSortableScanData::BuildPosition(const ui64 position) { StartPosition = 0; LastInit = position; for (auto&& i : Columns) { - PositionAddress.emplace_back(i->GetChunk({}, position)); - StartPosition = std::max(StartPosition, PositionAddress.back().GetStartPosition()); - FinishPosition = std::min(FinishPosition, PositionAddress.back().GetFinishPosition()); + PositionAddress.emplace_back(i->GetChunkSlow(position)); + StartPosition = std::max(StartPosition, PositionAddress.back().GetAddress().GetGlobalStartPosition()); + FinishPosition = std::min(FinishPosition, PositionAddress.back().GetAddress().GetGlobalFinishPosition()); if (!recordsCount) { recordsCount = i->GetRecordsCount(); } else { @@ -166,18 +178,19 @@ void TSortableScanData::BuildPosition(const ui64 position) { bool TSortableScanData::InitPosition(const ui64 position) { AFL_VERIFY(position < RecordsCount); if (position < FinishPosition && StartPosition <= position) { - return false; + return true; } LastInit = position; ui32 idx = 0; FinishPosition = Max(); StartPosition = 0; for (auto&& i : PositionAddress) { - if (!i.Contains(position)) { - i = Columns[idx]->GetChunk(i, position); + if (!i.GetAddress().Contains(position)) { + i = Columns[idx]->GetChunk(i.GetAddress(), position); } - StartPosition = std::max(StartPosition, i.GetStartPosition()); - FinishPosition = std::min(FinishPosition, i.GetFinishPosition()); + StartPosition = std::max(StartPosition, i.GetAddress().GetGlobalStartPosition()); + FinishPosition = std::min(FinishPosition, i.GetAddress().GetGlobalFinishPosition()); + AFL_VERIFY(i.GetAddress().Contains(position)); ++idx; } AFL_VERIFY(StartPosition < FinishPosition); @@ -212,14 +225,13 @@ void TCursor::AppendPositionTo(const std::vectortype()->Equals(PositionAddress[i].GetArray()->type())); - AFL_VERIFY(NArrow::Append(*builders[i], *PositionAddress[i].GetArray(), Position - PositionAddress[i].GetStartPosition(), recordSize)); + AFL_VERIFY(NArrow::Append(*builders[i], *PositionAddress[i].GetArray(), PositionAddress[i].GetAddress().GetLocalIndex(Position), recordSize)); } } TCursor::TCursor(const std::shared_ptr& table, const ui64 position, const std::vector& columns) - : Position(position) -{ + : Position(position) { PositionAddress = TSortableScanData(position, table, columns).GetPositionAddress(); } -} +} // namespace NKikimr::NArrow::NMerger diff --git a/ydb/core/formats/arrow/reader/position.h b/ydb/core/formats/arrow/reader/position.h index 8a6e15fd79ac..78233e50b4a5 100644 --- a/ydb/core/formats/arrow/reader/position.h +++ b/ydb/core/formats/arrow/reader/position.h @@ -1,10 +1,9 @@ #pragma once -#include #include #include -#include #include +#include #include #include @@ -22,12 +21,12 @@ class TSortableScanData; class TCursor { private: YDB_READONLY(ui64, Position, 0); - std::vector PositionAddress; + std::vector PositionAddress; public: TCursor() = default; TCursor(const std::shared_ptr& table, const ui64 position, const std::vector& columns); - TCursor(const ui64 position, const std::vector& addresses) + TCursor(const ui64 position, const std::vector& addresses) : Position(position) , PositionAddress(addresses) { @@ -64,7 +63,7 @@ class TCursor { class TSortableScanData { private: ui64 RecordsCount = 0; - YDB_READONLY_DEF(std::vector, PositionAddress); + YDB_READONLY_DEF(std::vector, PositionAddress); YDB_READONLY_DEF(std::vector>, Columns); YDB_READONLY_DEF(std::vector>, Fields); ui64 StartPosition = 0; @@ -87,19 +86,18 @@ class TSortableScanData { BuildPosition(position); } - const NAccessor::IChunkedArray::TCurrentChunkAddress& GetPositionAddress(const ui32 colIdx) const { + const NAccessor::IChunkedArray::TFullDataAddress& GetPositionAddress(const ui32 colIdx) const { AFL_VERIFY(colIdx < PositionAddress.size()); return PositionAddress[colIdx]; } ui32 GetPositionInChunk(const ui32 colIdx, const ui32 pos) const { AFL_VERIFY(colIdx < PositionAddress.size()); - AFL_VERIFY(pos >= PositionAddress[colIdx].GetStartPosition()); - return pos - PositionAddress[colIdx].GetStartPosition(); + return PositionAddress[colIdx].GetAddress().GetLocalIndex(pos); } - std::shared_ptr BuildCopy(const ui64 position) const { - return std::make_shared(position, RecordsCount, Columns, Fields); + std::shared_ptr BuildCopy(const ui64 /*position*/) const { + return std::make_shared(*this); } TCursor BuildCursor(const ui64 position) const { @@ -109,8 +107,8 @@ class TSortableScanData { auto addresses = PositionAddress; ui32 idx = 0; for (auto&& i : addresses) { - if (!i.Contains(position)) { - i = Columns[idx]->GetChunk(i, position); + if (!i.GetAddress().Contains(position)) { + i = Columns[idx]->GetChunk(i.GetAddress(), position); } ++idx; } @@ -129,15 +127,15 @@ class TSortableScanData { } else { for (ui32 idx = 0; idx < PositionAddress.size(); ++idx) { std::partial_ordering cmp = std::partial_ordering::equivalent; - const bool containsSelf = PositionAddress[idx].Contains(position); - const bool containsItem = item.PositionAddress[idx].Contains(itemPosition); + const bool containsSelf = PositionAddress[idx].GetAddress().Contains(position); + const bool containsItem = item.PositionAddress[idx].GetAddress().Contains(itemPosition); if (containsSelf && containsItem) { cmp = PositionAddress[idx].Compare(position, item.PositionAddress[idx], itemPosition); } else if (containsSelf) { - auto temporaryAddress = item.Columns[idx]->GetChunk(item.PositionAddress[idx], itemPosition); + auto temporaryAddress = item.Columns[idx]->GetChunk(item.PositionAddress[idx].GetAddress(), itemPosition); cmp = PositionAddress[idx].Compare(position, temporaryAddress, itemPosition); } else if (containsItem) { - auto temporaryAddress = Columns[idx]->GetChunk(PositionAddress[idx], position); + auto temporaryAddress = Columns[idx]->GetChunk(PositionAddress[idx].GetAddress(), position); cmp = temporaryAddress.Compare(position, item.PositionAddress[idx], itemPosition); } else { AFL_VERIFY(false); @@ -153,7 +151,7 @@ class TSortableScanData { void AppendPositionTo(const std::vector>& builders, const ui64 position, ui64* recordSize) const; - bool InitPosition(const ui64 position); + [[nodiscard]] bool InitPosition(const ui64 position); std::shared_ptr Slice(const ui64 offset, const ui64 count) const { std::vector> slicedArrays; @@ -210,6 +208,17 @@ class TSortableBatchPosition { bool ReverseSort = false; std::shared_ptr Sorting; std::shared_ptr Data; + + TSortableBatchPosition(const i64 position, const i64 recordsCount, const bool reverseSort, const std::shared_ptr& sorting, + const std::shared_ptr& data) + : Position(position) + , RecordsCount(recordsCount) + , ReverseSort(reverseSort) + , Sorting(sorting) + , Data(data) { + AFL_VERIFY(IsAvailablePosition(Position)); + } + public: TSortableBatchPosition() = default; @@ -221,7 +230,7 @@ class TSortableBatchPosition { return RecordsCount; } - std::shared_ptr GetSorting() const { + const std::shared_ptr& GetSorting() const { return Sorting; } @@ -240,16 +249,6 @@ class TSortableBatchPosition { return Sorting->GetFields(); } - TSortableBatchPosition(const i64 position, const i64 recordsCount, const bool reverseSort, const std::shared_ptr& sorting, const std::shared_ptr& data) - : Position(position) - , RecordsCount(recordsCount) - , ReverseSort(reverseSort) - , Sorting(sorting) - , Data(data) - { - - } - TSortableBatchPosition(const TRWSortableBatchPosition& source) = delete; TSortableBatchPosition(TRWSortableBatchPosition& source) = delete; TSortableBatchPosition(TRWSortableBatchPosition&& source) = delete; @@ -258,7 +257,7 @@ class TSortableBatchPosition { TSortableBatchPosition operator= (TRWSortableBatchPosition& source) = delete; TSortableBatchPosition operator= (TRWSortableBatchPosition&& source) = delete; - TRWSortableBatchPosition BuildRWPosition() const; + TRWSortableBatchPosition BuildRWPosition(const bool needData, const bool deepCopy) const; std::shared_ptr SliceData(const ui64 offset, const ui64 count) const { AFL_VERIFY(Data); @@ -316,7 +315,12 @@ class TSortableBatchPosition { } }; - static std::optional FindPosition(const std::shared_ptr& batch, const TSortableBatchPosition& forFound, const bool needGreater, const std::optional includedStartPosition); + [[nodiscard]] bool IsAvailablePosition(const i64 position) const { + return 0 <= position && position < RecordsCount; + } + + static std::optional FindPosition(const std::shared_ptr& batch, const TSortableBatchPosition& forFound, + const bool needGreater, const std::optional includedStartPosition); static std::optional FindPosition(TRWSortableBatchPosition& position, const ui64 posStart, const ui64 posFinish, const TSortableBatchPosition& forFound, const bool greater); const TSortableScanData& GetData() const { @@ -404,29 +408,151 @@ class TSortableBatchPosition { }; +class TIntervalPosition { +private: + TSortableBatchPosition Position; + bool LeftIntervalInclude; +public: + const TSortableBatchPosition& GetPosition() const { + return Position; + } + bool IsIncludedToLeftInterval() const { + return LeftIntervalInclude; + } + TIntervalPosition(TSortableBatchPosition&& position, const bool leftIntervalInclude) + : Position(std::move(position)) + , LeftIntervalInclude(leftIntervalInclude) { + + } + + TIntervalPosition(const TSortableBatchPosition& position, const bool leftIntervalInclude) + : Position(position) + , LeftIntervalInclude(leftIntervalInclude) { + + } + + bool operator<(const TIntervalPosition& item) const { + std::partial_ordering cmp = Position.Compare(item.Position); + if (cmp == std::partial_ordering::equivalent) { + return (LeftIntervalInclude ? 1 : 0) < (item.LeftIntervalInclude ? 1 : 0); + } + return cmp == std::partial_ordering::less; + } + + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + result.InsertValue("position", Position.DebugJson()); + result.InsertValue("include", LeftIntervalInclude); + return result; + } +}; + +class TIntervalPositions { +private: + std::vector Positions; +public: + bool IsEmpty() const { + return Positions.empty(); + } + + std::vector::const_iterator begin() const { + return Positions.begin(); + } + + std::vector::const_iterator end() const { + return Positions.end(); + } + + void InsertPosition(TIntervalPosition&& intervalPosition) { + Positions.emplace_back(std::move(intervalPosition)); + ui32 index = Positions.size() - 1; + while (index >= 1 && Positions[index] < Positions[index - 1]) { + std::swap(Positions[index], Positions[index - 1]); + index = index - 1; + } + } + + void InsertPosition(TSortableBatchPosition&& position, const bool includePositionToLeftInterval) { + TIntervalPosition intervalPosition(std::move(position), includePositionToLeftInterval); + InsertPosition(std::move(intervalPosition)); + } + + void InsertPosition(const TSortableBatchPosition& position, const bool includePositionToLeftInterval) { + TIntervalPosition intervalPosition(position, includePositionToLeftInterval); + InsertPosition(std::move(intervalPosition)); + } + + void AddPosition(TIntervalPosition&& intervalPosition) { + if (Positions.size()) { + AFL_VERIFY(Positions.back() < intervalPosition)("back", Positions.back().DebugJson())("pos", intervalPosition.DebugJson()); + } + Positions.emplace_back(std::move(intervalPosition)); + } + + void AddPosition(TSortableBatchPosition&& position, const bool includePositionToLeftInterval) { + TIntervalPosition intervalPosition(std::move(position), includePositionToLeftInterval); + AddPosition(std::move(intervalPosition)); + } + + void AddPosition(const TSortableBatchPosition& position, const bool includePositionToLeftInterval) { + TIntervalPosition intervalPosition(position, includePositionToLeftInterval); + AddPosition(std::move(intervalPosition)); + } +}; + class TRWSortableBatchPosition: public TSortableBatchPosition, public TMoveOnly { private: using TBase = TSortableBatchPosition; public: using TBase::TBase; - bool NextPosition(const i64 delta) { + [[nodiscard]] bool NextPosition(const i64 delta) { return InitPosition(Position + delta); } - bool InitPosition(const i64 position) { - if (position < RecordsCount && position >= 0) { - Sorting->InitPosition(position); - if (Data) { - Data->InitPosition(position); + [[nodiscard]] bool InitPosition(const i64 position) { + if (!IsAvailablePosition(position)) { + return false; + } + AFL_VERIFY(Sorting->InitPosition(position))("pos", position)("count", RecordsCount); + if (Data) { + AFL_VERIFY(Data->InitPosition(position))("pos", position)("count", RecordsCount); + } + Position = position; + return true; + } + + class TAsymmetricPositionGuard: TNonCopyable { + private: + TRWSortableBatchPosition& Owner; + public: + TAsymmetricPositionGuard(TRWSortableBatchPosition& owner) + : Owner(owner) + { + } + + [[nodiscard]] bool InitSortingPosition(const i64 position) { + if (!Owner.IsAvailablePosition(position)) { + return false; } - Position = position; + AFL_VERIFY(Owner.Sorting->InitPosition(position)); + Owner.Position = position; return true; - } else { - return false; } + ~TAsymmetricPositionGuard() { + if (Owner.IsAvailablePosition(Owner.Position)) { + if (Owner.Data) { + AFL_VERIFY(Owner.Data->InitPosition(Owner.Position)); + } + } + } + }; + + TAsymmetricPositionGuard CreateAsymmetricAccessGuard() { + return TAsymmetricPositionGuard(*this); } + TSortableBatchPosition::TFoundPosition SkipToLower(const TSortableBatchPosition& forFound); // (-inf, it1), [it1, it2), [it2, it3), ..., [itLast, +inf) diff --git a/ydb/core/formats/arrow/reader/result_builder.cpp b/ydb/core/formats/arrow/reader/result_builder.cpp index deb4fe3e1427..9b412902b1ed 100644 --- a/ydb/core/formats/arrow/reader/result_builder.cpp +++ b/ydb/core/formats/arrow/reader/result_builder.cpp @@ -1,9 +1,8 @@ #include "result_builder.h" -#include - #include #include +#include #include @@ -64,7 +63,7 @@ std::shared_ptr TRecordBatchBuilder::Finalize() { for (auto&& i : Builders) { columns.emplace_back(NArrow::TStatusValidator::GetValid(i->Finish())); } - auto result = arrow::RecordBatch::Make(schema, columns.front()->length(), columns); + auto result = arrow::RecordBatch::Make(schema, columns.front()->length(), std::move(columns)); #ifndef NDEBUG NArrow::TStatusValidator::Validate(result->ValidateFull()); #endif diff --git a/ydb/core/formats/arrow/reader/ya.make b/ydb/core/formats/arrow/reader/ya.make index d57bb4e501ca..8dba6acf2efb 100644 --- a/ydb/core/formats/arrow/reader/ya.make +++ b/ydb/core/formats/arrow/reader/ya.make @@ -2,11 +2,11 @@ LIBRARY() PEERDIR( contrib/libs/apache/arrow - ydb/core/formats/arrow/simple_builder ydb/core/formats/arrow/switch ydb/core/formats/arrow/common ydb/library/actors/core ydb/library/services + ydb/library/formats/arrow ) SRCS( diff --git a/ydb/core/formats/arrow/save_load/loader.cpp b/ydb/core/formats/arrow/save_load/loader.cpp new file mode 100644 index 000000000000..c9328f751d4a --- /dev/null +++ b/ydb/core/formats/arrow/save_load/loader.cpp @@ -0,0 +1,68 @@ +#include "loader.h" + +#include + +namespace NKikimr::NArrow::NAccessor { + +TString TColumnLoader::DebugString() const { + TStringBuilder result; + result << "accessor_constructor:" << AccessorConstructor->DebugString() << ";"; + result << "result_field:" << ResultField->ToString() << ";"; + if (Transformer) { + result << "transformer:" << Transformer->DebugString() << ";"; + } + result << "serializer:" << Serializer->DebugString() << ";"; + return result; +} + +TColumnLoader::TColumnLoader(NTransformation::ITransformer::TPtr transformer, const NSerialization::TSerializerContainer& serializer, + const TConstructorContainer& accessorConstructor, const std::shared_ptr& resultField, + const std::shared_ptr& defaultValue, const ui32 columnId) + : Serializer(serializer) + , Transformer(transformer) + , AccessorConstructor(accessorConstructor) + , ResultField(resultField) + , DefaultValue(defaultValue) + , ColumnId(columnId) { + AFL_VERIFY(!!AccessorConstructor); + AFL_VERIFY(ResultField); + AFL_VERIFY(Serializer); +} + +const std::shared_ptr& TColumnLoader::GetField() const { + return ResultField; +} + +arrow::Result> TColumnLoader::Apply(const TString& data) const { + Y_ABORT_UNLESS(Serializer); + arrow::Result> columnArray = + Transformer ? Serializer->Deserialize(data) : Serializer->Deserialize(data, AccessorConstructor->GetExpectedSchema(ResultField)); + if (!columnArray.ok()) { + return columnArray; + } + if (Transformer) { + return Transformer->Transform(*columnArray); + } else { + return columnArray; + } +} + +std::shared_ptr TColumnLoader::ApplyRawVerified(const TString& data) const { + return TStatusValidator::GetValid(Apply(data)); +} + +std::shared_ptr TColumnLoader::ApplyVerified(const TString& dataStr, const ui32 recordsCount) const { + auto data = TStatusValidator::GetValid(Apply(dataStr)); + return BuildAccessor(data, TChunkConstructionData(recordsCount, DefaultValue, ResultField->type())); +} + +std::shared_ptr TColumnLoader::BuildAccessor( + const std::shared_ptr& batch, const TChunkConstructionData& chunkData) const { + return AccessorConstructor->Construct(batch, chunkData).DetachResult(); +} + +std::shared_ptr TColumnLoader::BuildDefaultAccessor(const ui32 recordsCount) const { + return AccessorConstructor->ConstructDefault(TChunkConstructionData(recordsCount, DefaultValue, ResultField->type())).DetachResult(); +} + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/save_load/loader.h b/ydb/core/formats/arrow/save_load/loader.h new file mode 100644 index 000000000000..2d3119ac3fa8 --- /dev/null +++ b/ydb/core/formats/arrow/save_load/loader.h @@ -0,0 +1,56 @@ +#pragma once +#include +#include + +#include +#include + +#include + +namespace NKikimr::NArrow::NAccessor { + +class TColumnLoader { +private: + NSerialization::TSerializerContainer Serializer; + NTransformation::ITransformer::TPtr Transformer; + YDB_READONLY_DEF(NAccessor::TConstructorContainer, AccessorConstructor); + YDB_READONLY_DEF(std::shared_ptr, ResultField); + YDB_READONLY_DEF(std::shared_ptr, DefaultValue); + const ui32 ColumnId; + + arrow::Result> Apply(const TString& data) const; + std::shared_ptr BuildAccessor( + const std::shared_ptr& batch, const TChunkConstructionData& chunkData) const; + +public: + std::shared_ptr BuildDefaultAccessor(const ui32 recordsCount) const; + + bool IsEqualTo(const TColumnLoader& item) const { + if (!!Transformer != !!item.Transformer) { + return false; + } else if (!!Transformer && !Transformer->IsEqualTo(*item.Transformer)) { + return false; + } + if (!Serializer.IsEqualTo(item.Serializer)) { + return false; + } + return true; + } + + TString DebugString() const; + + TColumnLoader(NTransformation::ITransformer::TPtr transformer, const NSerialization::TSerializerContainer& serializer, + const NAccessor::TConstructorContainer& accessorConstructor, const std::shared_ptr& resultField, + const std::shared_ptr& defaultValue, const ui32 columnId); + + ui32 GetColumnId() const { + return ColumnId; + } + + const std::shared_ptr& GetField() const; + + std::shared_ptr ApplyVerified(const TString& data, const ui32 expectedRecordsCount) const; + std::shared_ptr ApplyRawVerified(const TString& data) const; +}; + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/save_load/saver.cpp b/ydb/core/formats/arrow/save_load/saver.cpp new file mode 100644 index 000000000000..95adebc76471 --- /dev/null +++ b/ydb/core/formats/arrow/save_load/saver.cpp @@ -0,0 +1,38 @@ +#include "saver.h" + +namespace NKikimr::NArrow::NAccessor { + +TColumnSaver::TColumnSaver(NArrow::NTransformation::ITransformer::TPtr transformer, const NArrow::NSerialization::TSerializerContainer serializer) + : Transformer(transformer) + , Serializer(serializer) +{ + Y_ABORT_UNLESS(Serializer); +} + +bool TColumnSaver::IsHardPacker() const { + return Serializer->IsHardPacker(); +} + +TString TColumnSaver::Apply(std::shared_ptr data, std::shared_ptr field) const { + auto schema = std::make_shared(arrow::FieldVector{field}); + auto batch = arrow::RecordBatch::Make(schema, data->length(), {data}); + return Apply(batch); +} + +TString TColumnSaver::Apply(const std::shared_ptr& data) const { + Y_ABORT_UNLESS(Serializer); + NArrow::NSerialization::TSerializerContainer serializer = Serializer; + if (SerializerBySizeUpperBorder.size()) { + auto it = SerializerBySizeUpperBorder.lower_bound(data->num_rows()); + if (it != SerializerBySizeUpperBorder.end()) { + serializer = it->second; + } + } + if (Transformer) { + return serializer->SerializeFull(Transformer->Transform(data)); + } else { + return serializer->SerializePayload(data); + } +} + +} \ No newline at end of file diff --git a/ydb/core/formats/arrow/save_load/saver.h b/ydb/core/formats/arrow/save_load/saver.h new file mode 100644 index 000000000000..dd9feb4114f3 --- /dev/null +++ b/ydb/core/formats/arrow/save_load/saver.h @@ -0,0 +1,38 @@ +#pragma once +#include + +#include +#include + +#include +#include + +namespace NKikimr::NArrow::NAccessor { + +class TColumnSaver { +private: + NArrow::NTransformation::ITransformer::TPtr Transformer; + YDB_READONLY_DEF(NArrow::NSerialization::TSerializerContainer, Serializer); + std::map SerializerBySizeUpperBorder; + +public: + TColumnSaver() = default; + TColumnSaver(NArrow::NTransformation::ITransformer::TPtr transformer, const NArrow::NSerialization::TSerializerContainer serializer); + + void AddSerializerWithBorder(const ui32 upperBorder, const NArrow::NSerialization::TSerializerContainer& serializer) { + if (Serializer.IsCompatibleForExchange(serializer)) { + AFL_VERIFY(SerializerBySizeUpperBorder.emplace(upperBorder, serializer).second); + } else { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_add_serializer")("reason", "incompatible_serializers")( + "border", upperBorder); + } + } + + bool IsHardPacker() const; + + TString Apply(std::shared_ptr data, std::shared_ptr field) const; + + TString Apply(const std::shared_ptr& data) const; +}; + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/save_load/ya.make b/ydb/core/formats/arrow/save_load/ya.make new file mode 100644 index 000000000000..7947aa1ab826 --- /dev/null +++ b/ydb/core/formats/arrow/save_load/ya.make @@ -0,0 +1,19 @@ +LIBRARY() + +SRCS( + saver.cpp + loader.cpp +) + +PEERDIR( + ydb/library/actors/core + contrib/libs/apache/arrow + ydb/library/accessor + ydb/library/conclusion + ydb/library/formats/arrow/transformer + ydb/library/formats/arrow/common + ydb/core/formats/arrow/transformer + ydb/core/formats/arrow/serializer +) + +END() diff --git a/ydb/core/formats/arrow/serializer/abstract.cpp b/ydb/core/formats/arrow/serializer/abstract.cpp index 6347cd765b9b..8010197ae409 100644 --- a/ydb/core/formats/arrow/serializer/abstract.cpp +++ b/ydb/core/formats/arrow/serializer/abstract.cpp @@ -21,8 +21,12 @@ NKikimr::TConclusionStatus TSerializerContainer::DeserializeFromRequest(NYql::TF return TBase::GetObjectPtr()->DeserializeFromRequest(features); } -std::shared_ptr TSerializerContainer::GetDefaultSerializer() { +std::shared_ptr TSerializerContainer::GetDefaultSerializer() { return std::make_shared(); } +std::shared_ptr TSerializerContainer::GetFastestSerializer() { + return std::make_shared(arrow::Compression::UNCOMPRESSED); +} + } diff --git a/ydb/core/formats/arrow/serializer/abstract.h b/ydb/core/formats/arrow/serializer/abstract.h index 1c8d9963dd7e..9811aaaf0f20 100644 --- a/ydb/core/formats/arrow/serializer/abstract.h +++ b/ydb/core/formats/arrow/serializer/abstract.h @@ -4,9 +4,9 @@ #include #include #include -#include #include +#include #include #include @@ -146,6 +146,7 @@ class TSerializerContainer: public NBackgroundTasks::TInterfaceProtoContainer GetDefaultSerializer(); + static std::shared_ptr GetFastestSerializer(); TConclusionStatus DeserializeFromProto(const NKikimrSchemeOp::TCompressionOptions& proto); diff --git a/ydb/core/formats/arrow/serializer/native.cpp b/ydb/core/formats/arrow/serializer/native.cpp index 7b422a8c1cb1..4b90286001d2 100644 --- a/ydb/core/formats/arrow/serializer/native.cpp +++ b/ydb/core/formats/arrow/serializer/native.cpp @@ -2,10 +2,10 @@ #include "stream.h" #include "parsing.h" #include -#include #include #include +#include #include #include diff --git a/ydb/core/formats/arrow/serializer/ya.make b/ydb/core/formats/arrow/serializer/ya.make index bf7e091ab4bf..8c9fb49fe08f 100644 --- a/ydb/core/formats/arrow/serializer/ya.make +++ b/ydb/core/formats/arrow/serializer/ya.make @@ -2,9 +2,9 @@ LIBRARY() PEERDIR( contrib/libs/apache/arrow - ydb/core/formats/arrow/common ydb/services/metadata/abstract ydb/library/actors/core + ydb/library/formats/arrow/common ydb/core/protos ) diff --git a/ydb/core/formats/arrow/simple_arrays_cache.h b/ydb/core/formats/arrow/simple_arrays_cache.h deleted file mode 100644 index e527e44a0b08..000000000000 --- a/ydb/core/formats/arrow/simple_arrays_cache.h +++ /dev/null @@ -1,45 +0,0 @@ -#pragma once -#include "size_calcer.h" - -#include - -#include -#include -#include -#include - -namespace NKikimr::NArrow { - -class TThreadSimpleArraysCache { -private: - THashMap> Arrays; - const ui64 MaxOneArrayMemorySize = 10 * 1024 * 1024; - - template - std::shared_ptr InitializePosition(const TString& key, const ui32 recordsCountExt, const TInitializeActor actor) { - const ui32 recordsCount = (recordsCountExt < 1024) ? 1024 : recordsCountExt; - auto it = Arrays.find(key); - if (it == Arrays.end() || it->second->length() < recordsCount) { - auto arrNew = actor(recordsCount); - if (NArrow::GetArrayMemorySize(arrNew->data()) < MaxOneArrayMemorySize) { - if (it == Arrays.end()) { - it = Arrays.emplace(key, arrNew).first; - } else { - it->second = arrNew; - } - } else { - AFL_VERIFY(recordsCountExt == recordsCount)("ext", recordsCountExt)("count", recordsCount); - return arrNew; - } - } - return it->second->Slice(0, recordsCountExt); - } - - std::shared_ptr GetNullImpl(const std::shared_ptr& type, const ui32 recordsCount); - std::shared_ptr GetConstImpl(const std::shared_ptr& type, const std::shared_ptr& scalar, const ui32 recordsCount); -public: - static std::shared_ptr GetNull(const std::shared_ptr& type, const ui32 recordsCount); - static std::shared_ptr GetConst(const std::shared_ptr& type, const std::shared_ptr& scalar, const ui32 recordsCount); - static std::shared_ptr Get(const std::shared_ptr& type, const std::shared_ptr& scalar, const ui32 recordsCount); -}; -} diff --git a/ydb/core/formats/arrow/simple_builder/filler.cpp b/ydb/core/formats/arrow/simple_builder/filler.cpp deleted file mode 100644 index f6168701ddbe..000000000000 --- a/ydb/core/formats/arrow/simple_builder/filler.cpp +++ /dev/null @@ -1,17 +0,0 @@ -#include "filler.h" -#include - -namespace NKikimr::NArrow::NConstruction { - -TStringPoolFiller::TStringPoolFiller(const ui32 poolSize, const ui32 strLen) { - for (ui32 i = 0; i < poolSize; ++i) { - Data.emplace_back(NUnitTest::RandomString(strLen, i)); - } -} - -arrow::util::string_view TStringPoolFiller::GetValue(const ui32 idx) const { - const TString& str = Data[(2 + 7 * idx) % Data.size()]; - return arrow::util::string_view(str.data(), str.size()); -} - -} diff --git a/ydb/core/formats/arrow/simple_builder/filler.h b/ydb/core/formats/arrow/simple_builder/filler.h deleted file mode 100644 index e86e7a6c2139..000000000000 --- a/ydb/core/formats/arrow/simple_builder/filler.h +++ /dev/null @@ -1,129 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include - -namespace NKikimr::NArrow::NConstruction { - -template -class TIntSeqFiller { -public: - using TValue = TArrowInt; -private: - using CType = typename TArrowInt::c_type; - const CType Delta; -public: - CType GetValue(const CType idx) const { - return Delta + idx; - } - TIntSeqFiller(const CType delta = 0) - : Delta(delta) { - - } -}; - -template -class TIntConstFiller { -public: - using TValue = TArrowInt; -private: - using CType = typename TArrowInt::c_type; - const CType Value; -public: - CType GetValue(const CType /*idx*/) const { - return Value; - } - TIntConstFiller(const CType value) - : Value(value) { - - } -}; - -class TStringPoolFiller { -private: - std::vector Data; -public: - using TValue = arrow::StringType; - arrow::util::string_view GetValue(const ui32 idx) const; - - TStringPoolFiller(const ui32 poolSize, const ui32 strLen); -}; - -template -class TLinearArrayAccessor { -private: - using TArray = typename arrow::TypeTraits::ArrayType; - const TArray& Data; -public: - using TValue = TValueExt; - auto GetValue(const ui32 idx) const { - return Data.Value(idx); - } - - TLinearArrayAccessor(const arrow::Array& data) - : Data(static_cast(data)) { - } -}; - -template -class TBinaryArrayAccessor { -private: - using TArray = typename arrow::TypeTraits::ArrayType; - const TArray& Data; -public: - using TValue = TValueExt; - const char* GetValueView(const ui32 idx) const { - return Data.GetView(idx).data(); - } - - TBinaryArrayAccessor(const arrow::Array& data) - : Data(static_cast(data)) { - } -}; - -template -class TDictionaryArrayAccessor { -private: - using TDictionary = typename arrow::TypeTraits::ArrayType; - const TDictionary& Dictionary; - const TIndices& Indices; -public: - using TValue = TDictionaryValue; - auto GetValue(const ui32 idx) const { - return Dictionary.Value(Indices.Value(idx)); - } - - TDictionaryArrayAccessor(const TDictionary& dictionary, const TIndices& indices) - : Dictionary(dictionary) - , Indices(indices) { - } -}; - -template -class TBinaryDictionaryArrayAccessor { -private: - using TDictionary = typename arrow::TypeTraits::ArrayType; - const TDictionary& Dictionary; - const TIndices& Indices; - std::vector DictionaryStrings; -public: - using TValue = TDictionaryValue; - const char* GetValueView(const ui32 idx) const { - return DictionaryStrings[Indices.Value(idx)].data(); - } - - TBinaryDictionaryArrayAccessor(const TDictionary& dictionary, const TIndices& indices) - : Dictionary(dictionary) - , Indices(indices) - { - DictionaryStrings.reserve(Dictionary.length()); - for (i64 idx = 0; idx < Dictionary.length(); ++idx) { - auto sView = Dictionary.Value(idx); - DictionaryStrings.emplace_back(TString(sView.data(), sView.size())); - } - } -}; - -} diff --git a/ydb/core/formats/arrow/size_calcer.cpp b/ydb/core/formats/arrow/size_calcer.cpp index d44018faaa77..a79f52eb5799 100644 --- a/ydb/core/formats/arrow/size_calcer.cpp +++ b/ydb/core/formats/arrow/size_calcer.cpp @@ -1,5 +1,5 @@ #include "size_calcer.h" -#include "switch_type.h" +#include "switch/switch_type.h" #include "arrow_helpers.h" #include "dictionary/conversion.h" #include @@ -50,203 +50,16 @@ TConclusion> SplitByBlobSize(const std::shared_ptr return result; } -ui32 TRowSizeCalculator::GetRowBitWidth(const ui32 row) const { - Y_ABORT_UNLESS(Prepared); - ui32 result = CommonSize; - for (auto&& c : BinaryColumns) { - result += GetBitWidthAligned(c->GetView(row).size() * 8); - } - for (auto&& c : StringColumns) { - result += GetBitWidthAligned(c->GetView(row).size() * 8); - } - return result; -} - -bool TRowSizeCalculator::InitBatch(const std::shared_ptr& batch) { - Batch = batch; - CommonSize = 0; - BinaryColumns.clear(); - StringColumns.clear(); - Prepared = false; - for (ui32 i = 0; i < (ui32)Batch->num_columns(); ++i) { - auto fSize = std::dynamic_pointer_cast(Batch->column(i)->type()); - if (fSize) { - CommonSize += GetBitWidthAligned(fSize->bit_width()); - } else { - auto c = Batch->column(i); - if (c->type()->id() == arrow::Type::BINARY) { - const arrow::BinaryArray& viewArray = static_cast(*c); - BinaryColumns.emplace_back(&viewArray); - } else if (c->type()->id() == arrow::Type::STRING) { - const arrow::StringArray& viewArray = static_cast(*c); - StringColumns.emplace_back(&viewArray); - } else { - return false; - } - } - } - Prepared = true; - return true; -} - -ui32 TRowSizeCalculator::GetRowBytesSize(const ui32 row) const { - const ui32 bitsWidth = GetRowBitWidth(row); - ui32 result = bitsWidth / 8; - if (bitsWidth % 8) { - ++result; - } - return result; -} - -ui64 GetArrayMemorySize(const std::shared_ptr& data) { - if (!data) { - return 0; - } - ui64 result = 0; - for (auto&& i : data->buffers) { - if (i) { - result += i->capacity(); - } - } - for (auto&& i : data->child_data) { - for (auto&& b : i->buffers) { - if (b) { - result += b->capacity(); - } - } - } - if (data->dictionary) { - for (auto&& b : data->dictionary->buffers) { - if (b) { - result += b->capacity(); - } - } - } - return result; -} - - -ui64 GetBatchDataSize(const std::shared_ptr& batch) { - if (!batch) { - return 0; - } - ui64 bytes = 0; - for (auto& column : batch->columns()) { - bytes += GetArrayDataSize(column); - } - return bytes; -} - -ui64 GetBatchMemorySize(const std::shared_ptr& batch) { - if (!batch) { - return 0; - } - ui64 bytes = 0; - for (auto& column : batch->column_data()) { - bytes += GetArrayMemorySize(column); - } - return bytes; -} - -ui64 GetTableMemorySize(const std::shared_ptr& batch) { - if (!batch) { - return 0; - } - ui64 bytes = 0; - for (auto& column : batch->columns()) { - for (auto&& chunk : column->chunks()) { - bytes += GetArrayMemorySize(chunk->data()); - } - } - return bytes; -} - -ui64 GetTableDataSize(const std::shared_ptr& batch) { - if (!batch) { - return 0; - } - ui64 bytes = 0; - for (auto& column : batch->columns()) { - for (auto&& chunk : column->chunks()) { - bytes += GetArrayDataSize(chunk); - } - } - return bytes; -} - -template -ui64 GetArrayDataSizeImpl(const std::shared_ptr& column) { - return sizeof(typename TType::c_type) * column->length(); -} - -template <> -ui64 GetArrayDataSizeImpl(const std::shared_ptr& column) { - return column->length() * 8; // Special value for empty lines -} - -template <> -ui64 GetArrayDataSizeImpl(const std::shared_ptr& column) { - auto typedColumn = std::static_pointer_cast(column); - return typedColumn->total_values_length() + sizeof(arrow::StringArray::offset_type) * column->length(); -} - -template <> -ui64 GetArrayDataSizeImpl(const std::shared_ptr& column) { - auto typedColumn = std::static_pointer_cast(column); - return typedColumn->total_values_length() + sizeof(arrow::LargeStringArray::offset_type) * column->length(); -} - -template <> -ui64 GetArrayDataSizeImpl(const std::shared_ptr& column) { - auto typedColumn = std::static_pointer_cast(column); - return typedColumn->total_values_length() + sizeof(arrow::BinaryArray::offset_type) * column->length(); -} - -template <> -ui64 GetArrayDataSizeImpl(const std::shared_ptr& column) { - auto typedColumn = std::static_pointer_cast(column); - return typedColumn->total_values_length() + sizeof(arrow::LargeBinaryArray::offset_type) * column->length(); -} - -template <> -ui64 GetArrayDataSizeImpl(const std::shared_ptr& column) { - auto typedColumn = std::static_pointer_cast(column); - return typedColumn->byte_width() * typedColumn->length(); -} - -template <> -ui64 GetArrayDataSizeImpl(const std::shared_ptr& column) { - return sizeof(ui64) * 2 * column->length(); -} - -ui64 GetArrayDataSize(const std::shared_ptr& column) { - auto type = column->type(); - if (type->id() == arrow::Type::DICTIONARY) { - auto dictArray = static_pointer_cast(column); - return GetDictionarySize(dictArray); - } - ui64 bytes = 0; - bool success = SwitchTypeWithNull(type->id(), [&](TTypeWrapper typeHolder) { - Y_UNUSED(typeHolder); - bytes = GetArrayDataSizeImpl(column); - return true; - }); - - // Add null bit mask overhead if any. - if (HasNulls(column)) { - bytes += column->length() / 8 + 1; - } - - Y_DEBUG_ABORT_UNLESS(success, "Unsupported arrow type %s", type->ToString().data()); - return bytes; -} - NKikimr::NArrow::TSerializedBatch TSerializedBatch::Build(std::shared_ptr batch, const TBatchSplitttingContext& context) { - std::optional specialKeys; + std::optional specialKeysPayload; + std::optional specialKeysFull; if (context.GetFieldsForSpecialKeys().size()) { - specialKeys = TFirstLastSpecialKeys(batch, context.GetFieldsForSpecialKeys()); + TFirstLastSpecialKeys specialKeys(batch, context.GetFieldsForSpecialKeys()); + specialKeysPayload = specialKeys.SerializePayloadToString(); + specialKeysFull = specialKeys.SerializeFullToString(); } - return TSerializedBatch(NArrow::SerializeSchema(*batch->schema()), NArrow::SerializeBatchNoCompression(batch), batch->num_rows(), NArrow::GetBatchDataSize(batch), specialKeys); + return TSerializedBatch(NArrow::SerializeBatchNoCompression(batch), batch->num_rows(), + NArrow::GetBatchDataSize(batch), specialKeysPayload, specialKeysFull); } TConclusionStatus TSerializedBatch::BuildWithLimit(std::shared_ptr batch, const TBatchSplitttingContext& context, std::optional& sbL, std::optional& sbR) { @@ -290,7 +103,7 @@ TConclusion> TSerializedBatch::BuildWithLimit(std: } TString TSerializedBatch::DebugString() const { - return TStringBuilder() << "(data_size=" << Data.size() << ";schema_data_size=" << SchemaData.size() << ";rows_count=" << RowsCount << ";raw_bytes=" << RawBytes << ";)"; + return TStringBuilder() << "(data_size=" << Data.size() << ";rows_count=" << RowsCount << ";raw_bytes=" << RawBytes << ";)"; } } diff --git a/ydb/core/formats/arrow/size_calcer.h b/ydb/core/formats/arrow/size_calcer.h index 410d5517bc86..00e55f9ca20b 100644 --- a/ydb/core/formats/arrow/size_calcer.h +++ b/ydb/core/formats/arrow/size_calcer.h @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -11,43 +12,6 @@ namespace NKikimr::NArrow { -class TRowSizeCalculator { -private: - std::shared_ptr Batch; - ui32 CommonSize = 0; - std::vector BinaryColumns; - std::vector StringColumns; - bool Prepared = false; - const ui32 AlignBitsCount = 1; - - ui32 GetBitWidthAligned(const ui32 bitWidth) const { - if (AlignBitsCount == 1) { - return bitWidth; - } - ui32 result = bitWidth / AlignBitsCount; - if (bitWidth % AlignBitsCount) { - result += 1; - } - result *= AlignBitsCount; - return result; - } - -public: - - ui64 GetApproxSerializeSize(const ui64 dataSize) const { - return Max(dataSize * 1.05, dataSize + Batch->num_columns() * 8); - } - - TRowSizeCalculator(const ui32 alignBitsCount) - : AlignBitsCount(alignBitsCount) - { - - } - bool InitBatch(const std::shared_ptr& batch); - ui32 GetRowBitWidth(const ui32 row) const; - ui32 GetRowBytesSize(const ui32 row) const; -}; - class TBatchSplitttingContext { private: YDB_ACCESSOR(ui64, SizeLimit, 6 * 1024 * 1024); @@ -70,23 +34,29 @@ class TBatchSplitttingContext { class TSerializedBatch { private: - YDB_READONLY_DEF(TString, SchemaData); YDB_READONLY_DEF(TString, Data); YDB_READONLY(ui32, RowsCount, 0); YDB_READONLY(ui32, RawBytes, 0); - std::optional SpecialKeys; + std::optional SpecialKeysFull; + std::optional SpecialKeysPayload; + public: size_t GetSize() const { return Data.size(); } - const TFirstLastSpecialKeys& GetSpecialKeysSafe() const { - AFL_VERIFY(SpecialKeys); - return *SpecialKeys; + const TString& GetSpecialKeysPayloadSafe() const { + AFL_VERIFY(SpecialKeysPayload); + return *SpecialKeysPayload; + } + + const TString& GetSpecialKeysFullSafe() const { + AFL_VERIFY(SpecialKeysFull); + return *SpecialKeysFull; } bool HasSpecialKeys() const { - return !!SpecialKeys; + return !!SpecialKeysFull; } TString DebugString() const; @@ -95,27 +65,17 @@ class TSerializedBatch { static TConclusionStatus BuildWithLimit(std::shared_ptr batch, const TBatchSplitttingContext& context, std::optional& sbL, std::optional& sbR); static TSerializedBatch Build(std::shared_ptr batch, const TBatchSplitttingContext& context); - TSerializedBatch(TString&& schemaData, TString&& data, const ui32 rowsCount, const ui32 rawBytes, const std::optional& specialKeys) - : SchemaData(schemaData) - , Data(data) + TSerializedBatch(TString&& data, const ui32 rowsCount, const ui32 rawBytes, + const std::optional& specialKeysPayload, const std::optional& specialKeysFull) + : Data(data) , RowsCount(rowsCount) , RawBytes(rawBytes) - , SpecialKeys(specialKeys) - { - + , SpecialKeysFull(specialKeysFull) + , SpecialKeysPayload(specialKeysPayload) { + AFL_VERIFY(!!SpecialKeysPayload == !!SpecialKeysFull); } }; TConclusion> SplitByBlobSize(const std::shared_ptr& batch, const TBatchSplitttingContext& context); -// Return size in bytes including size of bitmap mask -ui64 GetBatchDataSize(const std::shared_ptr& batch); -ui64 GetTableDataSize(const std::shared_ptr& batch); -// Return size in bytes including size of bitmap mask -ui64 GetArrayMemorySize(const std::shared_ptr& data); -ui64 GetBatchMemorySize(const std::shared_ptr&batch); -ui64 GetTableMemorySize(const std::shared_ptr& batch); -// Return size in bytes *not* including size of bitmap mask -ui64 GetArrayDataSize(const std::shared_ptr& column); - } diff --git a/ydb/core/formats/arrow/special_keys.cpp b/ydb/core/formats/arrow/special_keys.cpp index 0b97fb3f25ed..0745fad0e559 100644 --- a/ydb/core/formats/arrow/special_keys.cpp +++ b/ydb/core/formats/arrow/special_keys.cpp @@ -27,12 +27,12 @@ NKikimr::NArrow::TReplaceKey TSpecialKeys::GetKeyByIndex(const ui32 position, co } } -TString TSpecialKeys::SerializeToString() const { - return NArrow::NSerialization::TSerializerContainer::GetDefaultSerializer()->SerializeFull(Data); +TString TSpecialKeys::SerializePayloadToString() const { + return NArrow::NSerialization::TSerializerContainer::GetFastestSerializer()->SerializePayload(Data); } -TString TSpecialKeys::SerializeToStringDataOnlyNoCompression() const { - return NArrow::SerializeBatchNoCompression(Data); +TString TSpecialKeys::SerializeFullToString() const { + return NArrow::NSerialization::TSerializerContainer::GetFastestSerializer()->SerializeFull(Data); } ui64 TSpecialKeys::GetMemoryBytes() const { @@ -50,13 +50,17 @@ TFirstLastSpecialKeys::TFirstLastSpecialKeys(const std::shared_ptr indexes = {0}; - if (batch->num_rows() > 1) { - indexes.emplace_back(batch->num_rows() - 1); - } + if (keyBatch->num_rows() <= 2) { + Data = keyBatch; + } else { + std::vector indexes = { 0 }; + if (batch->num_rows() > 1) { + indexes.emplace_back(batch->num_rows() - 1); + } - Data = NArrow::CopyRecords(keyBatch, indexes); - Y_ABORT_UNLESS(Data->num_rows() == 1 || Data->num_rows() == 2); + Data = NArrow::CopyRecords(keyBatch, indexes); + Y_ABORT_UNLESS(Data->num_rows() == 1 || Data->num_rows() == 2); + } } TMinMaxSpecialKeys::TMinMaxSpecialKeys(std::shared_ptr batch, const std::shared_ptr& schema) { diff --git a/ydb/core/formats/arrow/special_keys.h b/ydb/core/formats/arrow/special_keys.h index d56e658fbb68..7559b4a5f7fb 100644 --- a/ydb/core/formats/arrow/special_keys.h +++ b/ydb/core/formats/arrow/special_keys.h @@ -1,5 +1,8 @@ #pragma once -#include + +#include "arrow_helpers.h" + +#include #include namespace NKikimr::NArrow { @@ -22,8 +25,6 @@ class TSpecialKeys { public: ui64 GetMemoryBytes() const; - TString SerializeToStringDataOnlyNoCompression() const; - TSpecialKeys(const TString& data, const std::shared_ptr& schema) { Data = NArrow::DeserializeBatch(data, schema); Y_ABORT_UNLESS(Data); @@ -34,7 +35,8 @@ class TSpecialKeys { Y_ABORT_UNLESS(DeserializeFromString(data)); } - TString SerializeToString() const; + TString SerializePayloadToString() const; + TString SerializeFullToString() const; ui64 GetMemorySize() const; }; diff --git a/ydb/core/formats/arrow/splitter/scheme_info.cpp b/ydb/core/formats/arrow/splitter/scheme_info.cpp new file mode 100644 index 000000000000..35a8fcc5c035 --- /dev/null +++ b/ydb/core/formats/arrow/splitter/scheme_info.cpp @@ -0,0 +1,13 @@ +#include "scheme_info.h" + +namespace NKikimr::NArrow::NSplitter { + +NAccessor::TColumnSaver ISchemaDetailInfo::GetColumnSaver(const ui32 columnId) const { + auto saver = DoGetColumnSaver(columnId); + if (OverrideSerializer) { + saver.AddSerializerWithBorder(Max(), *OverrideSerializer); + } + return saver; +} + +} diff --git a/ydb/core/formats/arrow/splitter/scheme_info.h b/ydb/core/formats/arrow/splitter/scheme_info.h new file mode 100644 index 000000000000..0bb30e97300a --- /dev/null +++ b/ydb/core/formats/arrow/splitter/scheme_info.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + +#include +#include + +namespace NKikimr::NArrow::NSplitter { + +class ISchemaDetailInfo { +private: + YDB_ACCESSOR_DEF(std::optional, OverrideSerializer); + +protected: + virtual NAccessor::TColumnSaver DoGetColumnSaver(const ui32 columnId) const = 0; + +public: + using TPtr = std::shared_ptr; + virtual ~ISchemaDetailInfo() = default; + virtual ui32 GetColumnId(const std::string& fieldName) const = 0; + NAccessor::TColumnSaver GetColumnSaver(const ui32 columnId) const; + virtual std::shared_ptr GetField(const ui32 columnId) const = 0; + virtual std::optional GetColumnSerializationStats(const ui32 columnId) const = 0; + virtual bool NeedMinMaxForColumn(const ui32 columnId) const = 0; + virtual bool IsSortedColumn(const ui32 columnId) const = 0; + virtual std::optional GetBatchSerializationStats(const std::shared_ptr& rb) const = 0; +}; +} // namespace NKikimr::NArrow::NSplitter diff --git a/ydb/core/formats/arrow/splitter/simple.cpp b/ydb/core/formats/arrow/splitter/simple.cpp new file mode 100644 index 000000000000..a113084b531b --- /dev/null +++ b/ydb/core/formats/arrow/splitter/simple.cpp @@ -0,0 +1,200 @@ +#include "simple.h" + +#include + +#include +#include + +namespace NKikimr::NArrow::NSplitter { + +std::vector TSimpleSplitter::Split(const std::shared_ptr& data, const std::shared_ptr& field, const ui32 maxBlobSize) const { + AFL_VERIFY(data); + AFL_VERIFY(field); + auto schema = std::make_shared(arrow::FieldVector{field}); + auto batch = arrow::RecordBatch::Make(schema, data->length(), {data}); + return Split(batch, maxBlobSize); +} + +class TSplitChunk { +private: + std::shared_ptr Data; + YDB_READONLY_DEF(std::optional, Result); + ui32 SplitFactor = 0; + ui32 Iterations = 0; + ui32 MaxBlobSize = 8 * 1024 * 1024; + NAccessor::TColumnSaver ColumnSaver; + +public: + TSplitChunk(const ui32 baseSplitFactor, const ui32 maxBlobSize, const std::shared_ptr& data, + const NAccessor::TColumnSaver& columnSaver) + : Data(data) + , SplitFactor(baseSplitFactor) + , MaxBlobSize(maxBlobSize) + , ColumnSaver(columnSaver) + { + AFL_VERIFY(Data && Data->num_rows()); + AFL_VERIFY(SplitFactor); + } + + TSplitChunk(const ui32 baseSplitFactor, const ui32 maxBlobSize, const std::shared_ptr& data, TString&& serializedData, + const NAccessor::TColumnSaver& columnSaver) + : Data(data) + , Result(TSaverSplittedChunk(data, std::move(serializedData))) + , SplitFactor(baseSplitFactor) + , MaxBlobSize(maxBlobSize) + , ColumnSaver(columnSaver) + { + AFL_VERIFY(Data && Data->num_rows()); + AFL_VERIFY(SplitFactor); + } + + std::vector Split() { + while (true) { + AFL_VERIFY(!Result); + AFL_VERIFY(++Iterations < 100); + AFL_VERIFY(SplitFactor <= Data->num_rows())("factor", SplitFactor)("records", Data->num_rows())("iteration", Iterations)( + "size", NArrow::GetBatchDataSize(Data)); + bool found = false; + std::vector result; + if (SplitFactor == 1) { + TString blob = ColumnSaver.Apply(Data); + if (blob.size() < MaxBlobSize) { + Result = TSaverSplittedChunk(Data, std::move(blob)); + found = true; + result.emplace_back(*this); + } else { + TBatchSerializationStat stats(blob.size(), Data->num_rows(), NArrow::GetBatchDataSize(Data)); + SplitFactor = stats.PredictOptimalSplitFactor(Data->num_rows(), MaxBlobSize).value_or(1); + if (SplitFactor == 1) { + SplitFactor = 2; + } + AFL_VERIFY(Data->num_rows() > 1); + } + } else { + TLinearSplitInfo linearSplitting = TSimpleSplitter::GetLinearSplittingByMax(Data->num_rows(), Data->num_rows() / SplitFactor); + TStringBuilder sb; + std::optional badStartPosition; + ui32 badBatchRecordsCount = 0; + ui64 badBatchSerializedSize = 0; + ui32 badBatchCount = 0; + for (auto it = linearSplitting.StartIterator(); it.IsValid(); it.Next()) { + auto slice = Data->Slice(it.GetPosition(), it.GetCurrentPackSize()); + TString blob = ColumnSaver.Apply(slice); + if (blob.size() >= MaxBlobSize) { + if (!badStartPosition) { + badStartPosition = it.GetPosition(); + } + badBatchSerializedSize += blob.size(); + badBatchRecordsCount += it.GetCurrentPackSize(); + ++badBatchCount; + Y_ABORT_UNLESS(!linearSplitting.IsMinimalGranularity()); + } else { + if (badStartPosition) { + AFL_VERIFY(badBatchRecordsCount && badBatchCount)("count", badBatchCount)("records", badBatchRecordsCount); + auto badSlice = Data->Slice(*badStartPosition, badBatchRecordsCount); + TBatchSerializationStat stats(badBatchSerializedSize, badBatchRecordsCount, Max()); + result.emplace_back(std::max(stats.PredictOptimalSplitFactor(badBatchRecordsCount, MaxBlobSize).value_or(1), badBatchCount) + 1, MaxBlobSize, badSlice, ColumnSaver); + badStartPosition = {}; + badBatchRecordsCount = 0; + badBatchCount = 0; + badBatchSerializedSize = 0; + } + found = true; + result.emplace_back(1, MaxBlobSize, slice, std::move(blob), ColumnSaver); + } + } + if (badStartPosition) { + auto badSlice = Data->Slice(*badStartPosition, badBatchRecordsCount); + TBatchSerializationStat stats(badBatchSerializedSize, badBatchRecordsCount, Max()); + result.emplace_back(std::max(stats.PredictOptimalSplitFactor(badBatchRecordsCount, MaxBlobSize).value_or(1), badBatchCount) + 1, MaxBlobSize, badSlice, ColumnSaver); + } + ++SplitFactor; + } + if (found) { + return result; + } + } + AFL_VERIFY(false); + return {}; + } +}; + +std::vector TSimpleSplitter::Split(const std::shared_ptr& data, const ui32 maxBlobSize) const { + AFL_VERIFY(data->num_rows()); + TSplitChunk baseChunk(Stats ? Stats->PredictOptimalSplitFactor(data->num_rows(), maxBlobSize).value_or(1) : 1, maxBlobSize, data, ColumnSaver); + std::vector chunks = {baseChunk}; + for (auto it = chunks.begin(); it != chunks.end(); ) { + AFL_VERIFY(chunks.size() < 100); + if (!!it->GetResult()) { + ++it; + continue; + } + std::vector splitted = it->Split(); + if (splitted.size() == 1) { + *it = splitted.front(); + } else { + it = chunks.insert(it, splitted.begin(), splitted.end()); + chunks.erase(it + splitted.size()); + } + } + std::vector result; + for (auto&& i : chunks) { + AFL_VERIFY(i.GetResult()); + result.emplace_back(*i.GetResult()); + } + return result; +} + +std::vector TSimpleSplitter::SplitByRecordsCount(std::shared_ptr data, const std::vector& recordsCount) const { + std::vector result; + ui64 position = 0; + for (auto&& i : recordsCount) { + auto subData = data->Slice(position, i); + result.emplace_back(subData, ColumnSaver.Apply(subData)); + position += i; + } + Y_ABORT_UNLESS(position == (ui64)data->num_rows()); + return result; +} + +std::vector TSimpleSplitter::SplitBySizes(std::shared_ptr data, const TString& dataSerialization, const std::vector& splitPartSizesExt) const { + auto splitPartSizesLocal = splitPartSizesExt; + Y_ABORT_UNLESS(data); + { + ui32 sumSizes = 0; + for (auto&& i : splitPartSizesExt) { + sumSizes += i; + } + Y_ABORT_UNLESS(sumSizes <= dataSerialization.size()); + + if (sumSizes < dataSerialization.size()) { + splitPartSizesLocal.emplace_back(dataSerialization.size() - sumSizes); + } + } + std::vector recordsCount; + i64 remainedRecordsCount = data->num_rows(); + const double rowsPerByte = 1.0 * data->num_rows() / dataSerialization.size(); + i32 remainedParts = splitPartSizesLocal.size(); + for (ui32 idx = 0; idx < splitPartSizesLocal.size(); ++idx) { + AFL_VERIFY(remainedRecordsCount >= remainedParts)("remained_records_count", remainedRecordsCount) + ("remained_parts", remainedParts)("idx", idx)("size", splitPartSizesLocal.size())("sizes", JoinSeq(",", splitPartSizesLocal))("data_size", dataSerialization.size()); + --remainedParts; + i64 expectedRecordsCount = rowsPerByte * splitPartSizesLocal[idx]; + if (expectedRecordsCount < 1) { + expectedRecordsCount = 1; + } else if (remainedRecordsCount < expectedRecordsCount + remainedParts) { + expectedRecordsCount = remainedRecordsCount - remainedParts; + } + if (idx + 1 == splitPartSizesLocal.size()) { + expectedRecordsCount = remainedRecordsCount; + } + Y_ABORT_UNLESS(expectedRecordsCount); + recordsCount.emplace_back(expectedRecordsCount); + remainedRecordsCount -= expectedRecordsCount; + Y_ABORT_UNLESS(remainedRecordsCount >= 0); + } + Y_ABORT_UNLESS(remainedRecordsCount == 0); + return SplitByRecordsCount(data, recordsCount); +} + +} diff --git a/ydb/core/formats/arrow/splitter/simple.h b/ydb/core/formats/arrow/splitter/simple.h new file mode 100644 index 000000000000..1405d3a6dc20 --- /dev/null +++ b/ydb/core/formats/arrow/splitter/simple.h @@ -0,0 +1,120 @@ +#pragma once +#include +#include "scheme_info.h" + +namespace NKikimr::NArrow::NSplitter { + +class TSaverSplittedChunk { +private: + YDB_READONLY_DEF(std::shared_ptr, SlicedBatch); + YDB_READONLY_DEF(TString, SerializedChunk); +public: + ui32 GetRecordsCount() const { + return SlicedBatch->num_rows(); + } + + TSaverSplittedChunk(const std::shared_ptr& batch, TString&& serializedChunk) + : SlicedBatch(batch) + , SerializedChunk(std::move(serializedChunk)) { + Y_ABORT_UNLESS(SlicedBatch); + Y_ABORT_UNLESS(SlicedBatch->num_rows()); + } +}; + +class TLinearSplitInfo { +private: + YDB_READONLY(ui64, PacksCount, 0); + YDB_READONLY(ui64, PackSize, 0); + YDB_READONLY(ui64, ObjectsCount, 0); +public: + bool IsMinimalGranularity() const { + return PackSize == 1; + } + + TLinearSplitInfo(const ui64 packsCount, const ui64 packSize, const ui64 objectsCount) + : PacksCount(packsCount) + , PackSize(packSize) + , ObjectsCount(objectsCount) + { + AFL_VERIFY(objectsCount >= packsCount)("objects_count", objectsCount)("packs_count", packsCount); + AFL_VERIFY(PackSize); + AFL_VERIFY(PacksCount); + } + + class TIterator { + private: + const TLinearSplitInfo& Owner; + YDB_READONLY(ui64, Position, 0); + YDB_READONLY(ui64, CurrentPackSize, 0); + ui64 PackIdx = 0; + void InitPack() { + CurrentPackSize = (PackIdx + 1 == Owner.GetPacksCount()) ? Owner.ObjectsCount - Position : Owner.GetPackSize(); + } + public: + explicit TIterator(const TLinearSplitInfo& owner) + : Owner(owner) + { + InitPack(); + } + + bool IsValid() const { + if (Position < Owner.GetObjectsCount() && PackIdx < Owner.GetPacksCount()) { + return true; + } else { + Y_ABORT_UNLESS(Position == Owner.GetObjectsCount() && PackIdx == Owner.GetPacksCount()); + return false; + } + } + + bool Next() { + Y_ABORT_UNLESS(IsValid()); + Position += CurrentPackSize; + ++PackIdx; + InitPack(); + return IsValid(); + } + }; + + TIterator StartIterator() const { + return TIterator(*this); + } +}; + +class TSimpleSplitter { +private: + NAccessor::TColumnSaver ColumnSaver; + YDB_ACCESSOR_DEF(std::optional, Stats); +public: + explicit TSimpleSplitter(const NAccessor::TColumnSaver& columnSaver) + : ColumnSaver(columnSaver) + { + + } + + static TLinearSplitInfo GetOptimalLinearSplitting(const ui64 objectsCount, const i64 optimalPackSizeExt) { + const i64 optimalPackSize = optimalPackSizeExt ? optimalPackSizeExt : 1; + const ui32 countPacksMax = std::max(1, (ui32)floor(1.0 * objectsCount / optimalPackSize)); + const ui32 countPacksMin = std::max(1, (ui32)ceil(1.0 * objectsCount / optimalPackSize)); + const ui32 stepPackMax = objectsCount / countPacksMin; + const ui32 stepPackMin = objectsCount / countPacksMax; + if (std::abs(optimalPackSize - stepPackMax) > std::abs(optimalPackSize - stepPackMin)) { + return TLinearSplitInfo(countPacksMax, stepPackMin, objectsCount); + } else { + return TLinearSplitInfo(countPacksMin, stepPackMax, objectsCount); + } + } + + static TLinearSplitInfo GetLinearSplittingByMax(const ui64 objectsCount, const ui64 maxPackSizeExt) { + const ui64 maxPackSize = maxPackSizeExt ? maxPackSizeExt : 1; + const ui32 countPacksMax = std::max(1, (ui32)floor(1.0 * objectsCount / maxPackSize)); + const ui32 stepPackMin = objectsCount / countPacksMax; + return TLinearSplitInfo(countPacksMax, stepPackMin, objectsCount); + } + + std::vector Split(const std::shared_ptr& data, const std::shared_ptr& field, const ui32 maxBlobSize) const; + std::vector Split(const std::shared_ptr& data, const ui32 maxBlobSize) const; + std::vector SplitByRecordsCount(std::shared_ptr data, const std::vector& recordsCount) const; + std::vector SplitBySizes(std::shared_ptr data, const TString& dataSerialization, const std::vector& splitPartSizesExt) const; +}; + +} diff --git a/ydb/core/formats/arrow/splitter/ya.make b/ydb/core/formats/arrow/splitter/ya.make new file mode 100644 index 000000000000..70db86e75d4d --- /dev/null +++ b/ydb/core/formats/arrow/splitter/ya.make @@ -0,0 +1,17 @@ +LIBRARY() + +SRCS( + simple.cpp + scheme_info.cpp +) + +PEERDIR( + contrib/libs/apache/arrow + ydb/library/actors/core + ydb/library/conclusion + ydb/library/formats/arrow/splitter + ydb/library/formats/arrow/common + ydb/core/formats/arrow/serializer +) + +END() diff --git a/ydb/core/formats/arrow/ssa_program_optimizer.cpp b/ydb/core/formats/arrow/ssa_program_optimizer.cpp index f55f63110f7b..ff1e5a5cb38c 100644 --- a/ydb/core/formats/arrow/ssa_program_optimizer.cpp +++ b/ydb/core/formats/arrow/ssa_program_optimizer.cpp @@ -1,5 +1,7 @@ #include "ssa_program_optimizer.h" +#include + namespace NKikimr::NSsa { namespace { @@ -11,7 +13,8 @@ void ReplaceCountAll(TProgram& program) { Y_ABORT_UNLESS(step); for (auto& groupBy : step->MutableGroupBy()) { - if (groupBy.GetOperation() == EAggregate::Count && groupBy.GetArguments().empty()) { + if (groupBy.GetOperation() == EAggregate::NumRows) { + AFL_VERIFY(groupBy.GetArguments().empty()); if (step->GetGroupByKeys().size()) { groupBy.MutableArguments().push_back(step->GetGroupByKeys()[0]); } else { diff --git a/ydb/core/formats/arrow/ssa_runtime_version.h b/ydb/core/formats/arrow/ssa_runtime_version.h index 500074420fda..cdf3f6bdc09d 100644 --- a/ydb/core/formats/arrow/ssa_runtime_version.h +++ b/ydb/core/formats/arrow/ssa_runtime_version.h @@ -19,7 +19,7 @@ namespace NKikimr::NSsa { // Bump this version every time incompatible runtime functions are introduced. #ifndef SSA_RUNTIME_VERSION -#define SSA_RUNTIME_VERSION 5U +#define SSA_RUNTIME_VERSION 4U #endif // History: diff --git a/ydb/core/formats/arrow/switch/switch_type.h b/ydb/core/formats/arrow/switch/switch_type.h index 78a9dfa04360..383ad1567c4b 100644 --- a/ydb/core/formats/arrow/switch/switch_type.h +++ b/ydb/core/formats/arrow/switch/switch_type.h @@ -1,7 +1,7 @@ #pragma once #include #include -#include +#include #include #include @@ -13,106 +13,6 @@ extern "C" { namespace NKikimr::NArrow { -template -struct TTypeWrapper -{ - using T = TType; -}; - -template -TResult SwitchTypeImpl(arrow::Type::type typeId, TFunc&& f) { - switch (typeId) { - case arrow::Type::NA: { - if constexpr (EnableNull) { - return f(TTypeWrapper()); - } - break; - } - case arrow::Type::BOOL: - return f(TTypeWrapper()); - case arrow::Type::UINT8: - return f(TTypeWrapper()); - case arrow::Type::INT8: - return f(TTypeWrapper()); - case arrow::Type::UINT16: - return f(TTypeWrapper()); - case arrow::Type::INT16: - return f(TTypeWrapper()); - case arrow::Type::UINT32: - return f(TTypeWrapper()); - case arrow::Type::INT32: - return f(TTypeWrapper()); - case arrow::Type::UINT64: - return f(TTypeWrapper()); - case arrow::Type::INT64: - return f(TTypeWrapper()); - case arrow::Type::HALF_FLOAT: - return f(TTypeWrapper()); - case arrow::Type::FLOAT: - return f(TTypeWrapper()); - case arrow::Type::DOUBLE: - return f(TTypeWrapper()); - case arrow::Type::STRING: - return f(TTypeWrapper()); - case arrow::Type::BINARY: - return f(TTypeWrapper()); - case arrow::Type::FIXED_SIZE_BINARY: - return f(TTypeWrapper()); - case arrow::Type::DATE32: - return f(TTypeWrapper()); - case arrow::Type::DATE64: - return f(TTypeWrapper()); - case arrow::Type::TIMESTAMP: - return f(TTypeWrapper()); - case arrow::Type::TIME32: - return f(TTypeWrapper()); - case arrow::Type::TIME64: - return f(TTypeWrapper()); - case arrow::Type::INTERVAL_MONTHS: - return f(TTypeWrapper()); - case arrow::Type::DECIMAL: - return f(TTypeWrapper()); - case arrow::Type::DURATION: - return f(TTypeWrapper()); - case arrow::Type::LARGE_STRING: - return f(TTypeWrapper()); - case arrow::Type::LARGE_BINARY: - return f(TTypeWrapper()); - case arrow::Type::DECIMAL256: - case arrow::Type::DENSE_UNION: - case arrow::Type::DICTIONARY: - case arrow::Type::EXTENSION: - case arrow::Type::FIXED_SIZE_LIST: - case arrow::Type::INTERVAL_DAY_TIME: - case arrow::Type::LARGE_LIST: - case arrow::Type::LIST: - case arrow::Type::MAP: - case arrow::Type::MAX_ID: - case arrow::Type::SPARSE_UNION: - case arrow::Type::STRUCT: - break; - } - - return defaultValue; -} - -template -bool SwitchType(arrow::Type::type typeId, TFunc&& f) { - return SwitchTypeImpl(typeId, std::move(f)); -} - -template -bool SwitchTypeWithNull(arrow::Type::type typeId, TFunc&& f) { - return SwitchType(typeId, std::move(f)); -} - -template -bool SwitchArrayType(const arrow::Datum& column, TFunc&& f) { - auto type = column.type(); - Y_ABORT_UNLESS(type); - return SwitchType(type->id(), std::forward(f)); -} - /** * @brief Function to switch yql type correctly and uniformly converting it to arrow type using callback * @@ -227,74 +127,4 @@ inline bool IsPrimitiveYqlType(const NScheme::TTypeInfo& typeInfo) { return false; } -template -bool Append(arrow::ArrayBuilder& builder, const typename T::c_type& value) { - using TBuilder = typename arrow::TypeTraits::BuilderType; - - TStatusValidator::Validate(static_cast(builder).Append(value)); - return true; -} - -template -bool Append(arrow::ArrayBuilder& builder, arrow::util::string_view value) { - using TBuilder = typename arrow::TypeTraits::BuilderType; - - TStatusValidator::Validate(static_cast(builder).Append(value)); - return true; -} - -template -bool Append(arrow::ArrayBuilder& builder, const typename T::c_type* values, size_t size) { - using TBuilder = typename arrow::NumericBuilder; - - TStatusValidator::Validate(static_cast(builder).AppendValues(values, size)); - return true; -} - -template -bool Append(arrow::ArrayBuilder& builder, const std::vector& values) { - using TBuilder = typename arrow::NumericBuilder; - - TStatusValidator::Validate(static_cast(builder).AppendValues(values.data(), values.size())); - return true; -} - -template -[[nodiscard]] bool Append(T& builder, const arrow::Array& array, int position, ui64* recordSize = nullptr) { - Y_DEBUG_ABORT_UNLESS(builder.type()->id() == array.type_id()); - return SwitchType(array.type_id(), [&](const auto& type) { - using TWrap = std::decay_t; - using TArray = typename arrow::TypeTraits::ArrayType; - using TBuilder = typename arrow::TypeTraits::BuilderType; - - auto& typedArray = static_cast(array); - auto& typedBuilder = static_cast(builder); - - if (typedArray.IsNull(position)) { - TStatusValidator::Validate(typedBuilder.AppendNull()); - if (recordSize) { - *recordSize += 4; - } - return true; - } else { - if constexpr (!arrow::has_string_view::value) { - TStatusValidator::Validate(typedBuilder.Append(typedArray.GetView(position))); - if (recordSize) { - *recordSize += sizeof(typedArray.GetView(position)); - } - return true; - } - if constexpr (arrow::has_string_view::value) { - TStatusValidator::Validate(typedBuilder.Append(typedArray.GetView(position))); - if (recordSize) { - *recordSize += typedArray.GetView(position).size(); - } - return true; - } - } - Y_ABORT_UNLESS(false, "unpredictable variant"); - return false; - }); -} - } diff --git a/ydb/core/formats/arrow/switch/ya.make b/ydb/core/formats/arrow/switch/ya.make index e11e5e070ca6..622e9bf2a604 100644 --- a/ydb/core/formats/arrow/switch/ya.make +++ b/ydb/core/formats/arrow/switch/ya.make @@ -4,11 +4,11 @@ PEERDIR( contrib/libs/apache/arrow ydb/core/scheme_types ydb/library/actors/core + ydb/library/formats/arrow/switch ) SRCS( switch_type.cpp - compare.cpp ) END() diff --git a/ydb/core/formats/arrow/transformer/dictionary.h b/ydb/core/formats/arrow/transformer/dictionary.h index da0c13a5189a..4229c0ed8071 100644 --- a/ydb/core/formats/arrow/transformer/dictionary.h +++ b/ydb/core/formats/arrow/transformer/dictionary.h @@ -1,5 +1,5 @@ #pragma once -#include "abstract.h" +#include namespace NKikimr::NArrow::NTransformation { diff --git a/ydb/core/formats/arrow/transformer/ya.make b/ydb/core/formats/arrow/transformer/ya.make index 3a1c0c4c12ab..8ca15c923dd9 100644 --- a/ydb/core/formats/arrow/transformer/ya.make +++ b/ydb/core/formats/arrow/transformer/ya.make @@ -3,12 +3,11 @@ LIBRARY() PEERDIR( contrib/libs/apache/arrow ydb/core/formats/arrow/dictionary + ydb/library/formats/arrow/transformer ) SRCS( - abstract.cpp dictionary.cpp - composite.cpp ) END() diff --git a/ydb/core/formats/arrow/ut/ut_arrow.cpp b/ydb/core/formats/arrow/ut/ut_arrow.cpp index da620d70fa30..b12fc5561b12 100644 --- a/ydb/core/formats/arrow/ut/ut_arrow.cpp +++ b/ydb/core/formats/arrow/ut/ut_arrow.cpp @@ -185,11 +185,6 @@ struct TDataRow { } }; - -std::shared_ptr GetColumn(const arrow::Table& table, int i, int chunk = 0) { - return table.column(i)->chunk(chunk); -} - std::shared_ptr GetColumn(const arrow::RecordBatch& batch, int i) { return batch.column(i); } @@ -526,22 +521,6 @@ bool CheckSorted(const std::shared_ptr& batch, bool desc = f } Y_UNIT_TEST_SUITE(ArrowTest) { - Y_UNIT_TEST(Basic) { - std::vector rows = TestRows(); - - std::shared_ptr table = TDataRowTableBuilder::Build(rows); - - auto expectedSchema = TDataRow::MakeArrowSchema(); - UNIT_ASSERT_EQUAL(expectedSchema->Equals(*table->schema()), true); - - std::vector readRows = ToVector(table); - - UNIT_ASSERT_EQUAL(rows.size(), readRows.size()); - for (size_t i = 0; i < rows.size(); ++i) { - UNIT_ASSERT_EQUAL(rows[i], readRows[i]); - } - } - Y_UNIT_TEST(BatchBuilder) { std::vector rows = TestRows(); diff --git a/ydb/core/formats/arrow/ut/ut_dictionary.cpp b/ydb/core/formats/arrow/ut/ut_dictionary.cpp index c3df2c6a30f0..02a9dc9b3e69 100644 --- a/ydb/core/formats/arrow/ut/ut_dictionary.cpp +++ b/ydb/core/formats/arrow/ut/ut_dictionary.cpp @@ -1,10 +1,10 @@ #include #include #include -#include -#include -#include #include +#include +#include +#include Y_UNIT_TEST_SUITE(Dictionary) { diff --git a/ydb/core/formats/arrow/ut/ut_hash.cpp b/ydb/core/formats/arrow/ut/ut_hash.cpp index 3255d430352d..4c64c74895cb 100644 --- a/ydb/core/formats/arrow/ut/ut_hash.cpp +++ b/ydb/core/formats/arrow/ut/ut_hash.cpp @@ -1,7 +1,7 @@ #include #include -#include #include +#include Y_UNIT_TEST_SUITE(Hash) { diff --git a/ydb/core/formats/arrow/ut/ya.make b/ydb/core/formats/arrow/ut/ya.make index c4c993ec3220..54fa4d357730 100644 --- a/ydb/core/formats/arrow/ut/ya.make +++ b/ydb/core/formats/arrow/ut/ya.make @@ -5,6 +5,7 @@ SIZE(SMALL) PEERDIR( contrib/libs/apache/arrow ydb/library/arrow_kernels + ydb/library/formats/arrow/simple_builder ydb/core/base # for NYql::NUdf alloc stuff used in binary_json @@ -26,7 +27,6 @@ SRCS( ut_arrow.cpp ut_program_step.cpp ut_dictionary.cpp - ut_size_calcer.cpp ut_column_filter.cpp ut_hash.cpp ) diff --git a/ydb/core/formats/arrow/ya.make b/ydb/core/formats/arrow/ya.make index d4bf1f8529c2..fa66d3a97154 100644 --- a/ydb/core/formats/arrow/ya.make +++ b/ydb/core/formats/arrow/ya.make @@ -7,16 +7,19 @@ LIBRARY() PEERDIR( contrib/libs/apache/arrow ydb/core/scheme + ydb/core/formats/arrow/accessor ydb/core/formats/arrow/serializer - ydb/core/formats/arrow/simple_builder ydb/core/formats/arrow/dictionary ydb/core/formats/arrow/transformer ydb/core/formats/arrow/reader + ydb/core/formats/arrow/save_load + ydb/core/formats/arrow/splitter ydb/core/formats/arrow/hash ydb/library/actors/core ydb/library/arrow_kernels ydb/library/binary_json ydb/library/dynumber + ydb/library/formats/arrow ydb/library/services ydb/library/yql/core/arrow_kernels/request ) @@ -44,14 +47,11 @@ SRCS( converter.cpp converter.h custom_registry.cpp - input_stream.h permutations.cpp program.cpp - replace_key.cpp size_calcer.cpp ssa_program_optimizer.cpp special_keys.cpp - simple_arrays_cache.cpp process_columns.cpp ) diff --git a/ydb/core/fq/libs/actors/clusters_from_connections.cpp b/ydb/core/fq/libs/actors/clusters_from_connections.cpp index 36a5fe05f2f5..ba88c444feb8 100644 --- a/ydb/core/fq/libs/actors/clusters_from_connections.cpp +++ b/ydb/core/fq/libs/actors/clusters_from_connections.cpp @@ -1,7 +1,7 @@ #include "clusters_from_connections.h" #include -#include +#include #include #include #include @@ -109,7 +109,7 @@ void FillGenericClusterConfigBase( TGenericClusterConfig& clusterCfg, const TConnection& connection, const TString& connectionName, - NConnector::NApi::EDataSourceKind dataSourceKind, + NYql::EGenericDataSourceKind dataSourceKind, const TString& authToken, const THashMap& accountIdSignatures ) { @@ -125,15 +125,21 @@ void FillGenericClusterConfigBase( // In YQv1 we just hardcode desired protocols here. // In YQv2 protocol can be configured via `CREATE EXTERNAL DATA SOURCE` params. switch (dataSourceKind) { - case NYql::NConnector::NApi::CLICKHOUSE: - clusterCfg.SetProtocol(common.GetUseNativeProtocolForClickHouse() ? NYql::NConnector::NApi::EProtocol::NATIVE : NYql::NConnector::NApi::EProtocol::HTTP); + case NYql::EGenericDataSourceKind::CLICKHOUSE: + clusterCfg.SetProtocol(common.GetUseNativeProtocolForClickHouse() ? NYql::EGenericProtocol::NATIVE : NYql::EGenericProtocol::HTTP); break; - case NYql::NConnector::NApi::POSTGRESQL: - clusterCfg.SetProtocol(NYql::NConnector::NApi::EProtocol::NATIVE); + case NYql::EGenericDataSourceKind::GREENPLUM: + clusterCfg.SetProtocol(NYql::EGenericProtocol::NATIVE); + break; + case NYql::EGenericDataSourceKind::MYSQL: + clusterCfg.SetProtocol(NYql::EGenericProtocol::NATIVE); + break; + case NYql::EGenericDataSourceKind::POSTGRESQL: + clusterCfg.SetProtocol(NYql::EGenericProtocol::NATIVE); break; default: ythrow yexception() << "Unexpected data source kind: '" - << NYql::NConnector::NApi::EDataSourceKind_Name(dataSourceKind) << "'"; + << NYql::EGenericDataSourceKind_Name(dataSourceKind) << "'"; } ValidateGenericClusterConfig(clusterCfg, "NFq::FillGenericClusterFromConfig"); @@ -145,7 +151,7 @@ void FillGenericClusterConfig( TGenericClusterConfig& clusterCfg, const TConnection& connection, const TString& connectionName, - NConnector::NApi::EDataSourceKind dataSourceKind, + NYql::EGenericDataSourceKind dataSourceKind, const TString& authToken, const THashMap& accountIdSignatures ) { @@ -158,7 +164,7 @@ void FillGenericClusterConfig( TGenericClusterConfig& clusterCfg, const FederatedQuery::PostgreSQLCluster& connection, const TString& connectionName, - NConnector::NApi::EDataSourceKind dataSourceKind, + NYql::EGenericDataSourceKind dataSourceKind, const TString& authToken, const THashMap& accountIdSignatures ){ @@ -217,8 +223,8 @@ void AddClustersFromConnections( case FederatedQuery::ConnectionSetting::kYdbDatabase: { const auto& db = conn.content().setting().ydb_database(); auto* clusterCfg = gatewaysConfig.MutableGeneric()->AddClusterMapping(); - clusterCfg->SetKind(NYql::NConnector::NApi::EDataSourceKind::YDB); - clusterCfg->SetProtocol(NYql::NConnector::NApi::EProtocol::NATIVE); + clusterCfg->SetKind(NYql::EGenericDataSourceKind::YDB); + clusterCfg->SetProtocol(NYql::EGenericProtocol::NATIVE); clusterCfg->SetName(connectionName); clusterCfg->SetDatabaseId(db.database_id()); clusterCfg->SetUseSsl(!common.GetDisableSslForGenericDataSources()); @@ -232,7 +238,7 @@ void AddClustersFromConnections( *gatewaysConfig.MutableGeneric()->AddClusterMapping(), conn.content().setting().clickhouse_cluster(), connectionName, - NYql::NConnector::NApi::EDataSourceKind::CLICKHOUSE, + NYql::EGenericDataSourceKind::CLICKHOUSE, authToken, accountIdSignatures); clusters.emplace(connectionName, GenericProviderName); @@ -265,7 +271,7 @@ void AddClustersFromConnections( *gatewaysConfig.MutableGeneric()->AddClusterMapping(), conn.content().setting().postgresql_cluster(), connectionName, - NYql::NConnector::NApi::EDataSourceKind::POSTGRESQL, + NYql::EGenericDataSourceKind::POSTGRESQL, authToken, accountIdSignatures); clusters.emplace(connectionName, GenericProviderName); @@ -277,12 +283,34 @@ void AddClustersFromConnections( *gatewaysConfig.MutableGeneric()->AddClusterMapping(), conn.content().setting().greenplum_cluster(), connectionName, - NYql::NConnector::NApi::EDataSourceKind::GREENPLUM, + NYql::EGenericDataSourceKind::GREENPLUM, + authToken, + accountIdSignatures); + clusters.emplace(connectionName, GenericProviderName); + break; + } + case FederatedQuery::ConnectionSetting::kMysqlCluster: { + FillGenericClusterConfig( + common, + *gatewaysConfig.MutableGeneric()->AddClusterMapping(), + conn.content().setting().mysql_cluster(), + connectionName, + NYql::EGenericDataSourceKind::MYSQL, authToken, accountIdSignatures); clusters.emplace(connectionName, GenericProviderName); break; } + case FederatedQuery::ConnectionSetting::kLogging: { + const auto& connection = conn.content().setting().logging(); + auto* clusterCfg = gatewaysConfig.MutableGeneric()->AddClusterMapping(); + clusterCfg->SetKind(NYql::EGenericDataSourceKind::LOGGING); + clusterCfg->SetName(connectionName); + clusterCfg->mutable_datasourceoptions()->insert({"folder_id", connection.folder_id()}); + FillClusterAuth(*clusterCfg, connection.auth(), authToken, accountIdSignatures); + clusters.emplace(connectionName, GenericProviderName); + break; + } // Do not replace with default. Adding a new connection should cause a compilation error case FederatedQuery::ConnectionSetting::CONNECTION_NOT_SET: diff --git a/ydb/core/fq/libs/actors/database_resolver.cpp b/ydb/core/fq/libs/actors/database_resolver.cpp index d098a1ed76a4..75563a23254a 100644 --- a/ydb/core/fq/libs/actors/database_resolver.cpp +++ b/ydb/core/fq/libs/actors/database_resolver.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -29,7 +29,7 @@ using TParser = std::function; using TParsers = THashMap; @@ -213,11 +213,13 @@ class TResponseProcessor : public TActorBootstrapped DatabaseId2Description[std::make_pair(params.Id, params.DatabaseType)] = description; result.ConstructInPlace(description); return ""; - } catch (const TCodeLineException& ex) { + } catch (const NYql::TCodeLineException& ex) { + LOG_E("ResponseProcessor::Handle(HttpIncomingResponse): " << ex.what()); return TStringBuilder() << "response parser error: " << params.ToDebugString() << Endl << ex.GetRawMessage(); } catch (...) { + LOG_E("ResponseProcessor::Handle(HttpIncomingResponse): " << CurrentExceptionMessage()); return TStringBuilder() << "response parser error: " << params.ToDebugString() << Endl << CurrentExceptionMessage(); @@ -290,7 +292,7 @@ class TDatabaseResolver: public TActor .SetErrorTtl(TDuration::Minutes(1)) .SetMaxSize(1000000)) { - auto ydbParser = [](NJson::TJsonValue& databaseInfo, const NYql::IMdbEndpointGenerator::TPtr&, bool, NConnector::NApi::EProtocol) { + auto ydbParser = [](NJson::TJsonValue& databaseInfo, const NYql::IMdbEndpointGenerator::TPtr&, bool, NYql::EGenericProtocol) { bool secure = false; TString endpoint = databaseInfo.GetMap().at("endpoint").GetStringRobust(); TString prefix("/?database="); @@ -319,7 +321,7 @@ class TDatabaseResolver: public TActor // There are two kinds of managed YDBs: serverless and dedicated. // While working with dedicated databases, we have to use underlay network. // That's why we add `u-` prefix to database fqdn. - if (databaseInfo.GetMap().contains("dedicatedDatabase")) { + if (databaseInfo.GetMap().contains("storageConfig")) { endpoint = "u-" + endpoint; host = "u-" + host; } @@ -331,11 +333,11 @@ class TDatabaseResolver: public TActor NJson::TJsonValue& databaseInfo, const NYql::IMdbEndpointGenerator::TPtr& mdbEndpointGenerator, bool useTls, - NConnector::NApi::EProtocol protocol) + NYql::EGenericProtocol protocol) { auto ret = ydbParser(databaseInfo, mdbEndpointGenerator, useTls, protocol); // TODO: Take explicit field from MVP - bool isDedicatedDb = databaseInfo.GetMap().contains("dedicatedDatabase"); + bool isDedicatedDb = databaseInfo.GetMap().contains("storageConfig"); if (!isDedicatedDb && ret.Endpoint.StartsWith("ydb.")) { // Replace "ydb." -> "yds." ret.Endpoint[2] = 's'; @@ -347,7 +349,7 @@ class TDatabaseResolver: public TActor NJson::TJsonValue& databaseInfo, const NYql::IMdbEndpointGenerator::TPtr& mdbEndpointGenerator, bool useTls, - NConnector::NApi::EProtocol protocol + NYql::EGenericProtocol protocol ) { NYql::IMdbEndpointGenerator::TEndpoint endpoint; TVector aliveHosts; @@ -359,7 +361,7 @@ class TDatabaseResolver: public TActor } if (aliveHosts.empty()) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "No ALIVE ClickHouse hosts found"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "No ALIVE ClickHouse hosts found"; } NYql::IMdbEndpointGenerator::TParams params = { @@ -378,7 +380,7 @@ class TDatabaseResolver: public TActor NJson::TJsonValue& databaseInfo, const NYql::IMdbEndpointGenerator::TPtr& mdbEndpointGenerator, bool useTls, - NConnector::NApi::EProtocol protocol + NYql::EGenericProtocol protocol ) { NYql::IMdbEndpointGenerator::TEndpoint endpoint; TVector aliveHosts; @@ -407,7 +409,7 @@ class TDatabaseResolver: public TActor } if (aliveHosts.empty()) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "No ALIVE PostgreSQL hosts found"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "No ALIVE PostgreSQL hosts found"; } NYql::IMdbEndpointGenerator::TParams params = { @@ -425,7 +427,7 @@ class TDatabaseResolver: public TActor NJson::TJsonValue& databaseInfo, const NYql::IMdbEndpointGenerator::TPtr& mdbEndpointGenerator, bool useTls, - NConnector::NApi::EProtocol protocol + NYql::EGenericProtocol protocol ) { NYql::IMdbEndpointGenerator::TEndpoint endpoint; TString aliveHost; @@ -445,7 +447,7 @@ class TDatabaseResolver: public TActor } if (aliveHost == "") { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "No ALIVE Greenplum hosts found"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "No ALIVE Greenplum hosts found"; } NYql::IMdbEndpointGenerator::TParams params = { @@ -457,6 +459,56 @@ class TDatabaseResolver: public TActor endpoint = mdbEndpointGenerator->ToEndpoint(params); + return TDatabaseDescription{"", endpoint.first, endpoint.second, "", useTls}; + }; + Parsers[NYql::EDatabaseType::MySQL] = []( + NJson::TJsonValue& databaseInfo, + const NYql::IMdbEndpointGenerator::TPtr& mdbEndpointGenerator, + bool useTls, + NYql::EGenericProtocol protocol + ) { + NYql::IMdbEndpointGenerator::TEndpoint endpoint; + TVector aliveHosts; + + const auto& hostsArray = databaseInfo.GetMap().at("hosts").GetArraySafe(); + + for (const auto& host : hostsArray) { + const auto& hostMap = host.GetMap(); + + if (!hostMap.contains("services")) { + // indicates that cluster is down + continue; + } + + const auto& servicesArray = hostMap.at("services").GetArraySafe(); + + // check if all services of a particular host are alive + const bool alive = std::all_of( + servicesArray.begin(), + servicesArray.end(), + [](const auto& service) { + return service["health"].GetString() == "ALIVE"; + } + ); + + if (alive) { + aliveHosts.push_back(host["name"].GetString()); + } + } + + if (aliveHosts.empty()) { + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "No ALIVE MySQL hosts found"; + } + + NYql::IMdbEndpointGenerator::TParams params = { + .DatabaseType = NYql::EDatabaseType::MySQL, + .MdbHost = aliveHosts[std::rand() % static_cast(aliveHosts.size())], + .UseTls = useTls, + .Protocol = protocol, + }; + + endpoint = mdbEndpointGenerator->ToEndpoint(params); + return TDatabaseDescription{"", endpoint.first, endpoint.second, "", useTls}; }; } @@ -538,7 +590,7 @@ class TDatabaseResolver: public TActor url = TUrlBuilder(ev->Get()->YdbMvpEndpoint + "/database") .AddUrlParam("databaseId", databaseId) .Build(); - } else if (IsIn({NYql::EDatabaseType::ClickHouse, NYql::EDatabaseType::PostgreSQL}, databaseType)) { + } else if (IsIn({NYql::EDatabaseType::ClickHouse, NYql::EDatabaseType::PostgreSQL, NYql::EDatabaseType::MySQL}, databaseType)) { YQL_ENSURE(ev->Get()->MdbGateway, "empty MDB Gateway"); url = TUrlBuilder( ev->Get()->MdbGateway + "/managed-" + NYql::DatabaseTypeLowercase(databaseType) + "/v1/clusters/") diff --git a/ydb/core/fq/libs/actors/run_actor.cpp b/ydb/core/fq/libs/actors/run_actor.cpp index 29626a4ec054..41b9085d25c3 100644 --- a/ydb/core/fq/libs/actors/run_actor.cpp +++ b/ydb/core/fq/libs/actors/run_actor.cpp @@ -779,7 +779,11 @@ class TRunActor : public NActors::TActorBootstrapped { mkqlDefaultLimit = 8_GB; } + // This part is for backward compatibility. TODO: remove this part after migration to TS3GatewayConfig auto s3ReadDefaultInflightLimit = Params.Config.GetReadActorsFactoryConfig().GetS3ReadActorFactoryConfig().GetDataInflight(); + if (s3ReadDefaultInflightLimit == 0) { + s3ReadDefaultInflightLimit = Params.Config.GetGateways().GetS3().GetDataInflight(); + } if (s3ReadDefaultInflightLimit == 0) { s3ReadDefaultInflightLimit = 200_MB; } @@ -1935,8 +1939,7 @@ class TRunActor : public NActors::TActorBootstrapped { } { - dataProvidersInit.push_back(GetS3DataProviderInitializer(Params.S3Gateway, Params.CredentialsFactory, - Params.Config.GetReadActorsFactoryConfig().GetS3ReadActorFactoryConfig().GetAllowLocalFiles())); + dataProvidersInit.push_back(GetS3DataProviderInitializer(Params.S3Gateway, Params.CredentialsFactory, NActors::TActivationContext::ActorSystem())); } { diff --git a/ydb/core/fq/libs/actors/ut/database_resolver_ut.cpp b/ydb/core/fq/libs/actors/ut/database_resolver_ut.cpp index 2cfc32baa9c4..59a4cb47ae07 100644 --- a/ydb/core/fq/libs/actors/ut/database_resolver_ut.cpp +++ b/ydb/core/fq/libs/actors/ut/database_resolver_ut.cpp @@ -124,7 +124,7 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { void Test( NYql::EDatabaseType databaseType, - NYql::NConnector::NApi::EProtocol protocol, + NYql::EGenericProtocol protocol, const TString& getUrl, const TString& status, const TString& responseBody, @@ -186,7 +186,7 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { Y_UNIT_TEST(Ydb_Serverless) { Test( NYql::EDatabaseType::Ydb, - NYql::NConnector::NApi::EProtocol::PROTOCOL_UNSPECIFIED, + NYql::EGenericProtocol::PROTOCOL_UNSPECIFIED, "https://ydbc.ydb.cloud.yandex.net:8789/ydbc/cloud-prod/database?databaseId=etn021us5r9rhld1vgbh", "200", R"( @@ -218,7 +218,7 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { Test( NYql::EDatabaseType::Ydb, - NYql::NConnector::NApi::EProtocol::PROTOCOL_UNSPECIFIED, + NYql::EGenericProtocol::PROTOCOL_UNSPECIFIED, "https://ydbc.ydb.cloud.yandex.net:8789/ydbc/cloud-prod/database?databaseId=etn021us5r9rhld1vgbh", "", "", @@ -237,13 +237,13 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { Y_UNIT_TEST(Ydb_Dedicated) { Test( NYql::EDatabaseType::Ydb, - NYql::NConnector::NApi::EProtocol::PROTOCOL_UNSPECIFIED, + NYql::EGenericProtocol::PROTOCOL_UNSPECIFIED, "https://ydbc.ydb.cloud.yandex.net:8789/ydbc/cloud-prod/database?databaseId=etn021us5r9rhld1vgbh", "200", R"( { "endpoint":"grpcs://lb.etnbrtlini51k7cinbdr.ydb.mdb.yandexcloud.net:2135/?database=/ru-central1/b1gtl2kg13him37quoo6/etn021us5r9rhld1vgbh", - "dedicatedDatabase":{"resuorcePresetId": "medium"} + "storageConfig":{"storageSizeLimit":107374182400} })", NYql::TDatabaseResolverResponse::TDatabaseDescription{ TString{"u-lb.etnbrtlini51k7cinbdr.ydb.mdb.yandexcloud.net:2135"}, @@ -259,7 +259,7 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { Y_UNIT_TEST(DataStreams_Serverless) { Test( NYql::EDatabaseType::DataStreams, - NYql::NConnector::NApi::EProtocol::PROTOCOL_UNSPECIFIED, + NYql::EGenericProtocol::PROTOCOL_UNSPECIFIED, "https://ydbc.ydb.cloud.yandex.net:8789/ydbc/cloud-prod/database?databaseId=etn021us5r9rhld1vgbh", "200", R"( @@ -280,13 +280,13 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { Y_UNIT_TEST(DataStreams_Dedicated) { Test( NYql::EDatabaseType::DataStreams, - NYql::NConnector::NApi::EProtocol::PROTOCOL_UNSPECIFIED, + NYql::EGenericProtocol::PROTOCOL_UNSPECIFIED, "https://ydbc.ydb.cloud.yandex.net:8789/ydbc/cloud-prod/database?databaseId=etn021us5r9rhld1vgbh", "200", R"( { "endpoint":"grpcs://lb.etn021us5r9rhld1vgbh.ydb.mdb.yandexcloud.net:2135/?database=/ru-central1/b1g7jdjqd07qg43c4fmp/etn021us5r9rhld1vgbh", - "dedicatedDatabase":{"resourcePresetId": "medium"} + "storageConfig":{"storageSizeLimit":107374182400} })", NYql::TDatabaseResolverResponse::TDatabaseDescription{ TString{"u-lb.etn021us5r9rhld1vgbh.ydb.mdb.yandexcloud.net:2135"}, @@ -302,7 +302,7 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { Y_UNIT_TEST(ClickHouseNative) { Test( NYql::EDatabaseType::ClickHouse, - NYql::NConnector::NApi::EProtocol::NATIVE, + NYql::EGenericProtocol::NATIVE, "https://mdb.api.cloud.yandex.net:443/managed-clickhouse/v1/clusters/etn021us5r9rhld1vgbh/hosts", "200", R"({ @@ -336,7 +336,7 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { Y_UNIT_TEST(ClickHouseHttp) { Test( NYql::EDatabaseType::ClickHouse, - NYql::NConnector::NApi::EProtocol::HTTP, + NYql::EGenericProtocol::HTTP, "https://mdb.api.cloud.yandex.net:443/managed-clickhouse/v1/clusters/etn021us5r9rhld1vgbh/hosts", "200", R"({ @@ -381,7 +381,7 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { Test( NYql::EDatabaseType::ClickHouse, - NYql::NConnector::NApi::EProtocol::HTTP, + NYql::EGenericProtocol::HTTP, "https://mdb.api.cloud.yandex.net:443/managed-clickhouse/v1/clusters/etn021us5r9rhld1vgbh/hosts", "403", R"( @@ -405,7 +405,7 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { Y_UNIT_TEST(PostgreSQL) { Test( NYql::EDatabaseType::PostgreSQL, - NYql::NConnector::NApi::EProtocol::NATIVE, + NYql::EGenericProtocol::NATIVE, "https://mdb.api.cloud.yandex.net:443/managed-postgresql/v1/clusters/etn021us5r9rhld1vgbh/hosts", "200", R"({ @@ -454,7 +454,7 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { Test( NYql::EDatabaseType::PostgreSQL, - NYql::NConnector::NApi::EProtocol::NATIVE, + NYql::EGenericProtocol::NATIVE, "https://mdb.api.cloud.yandex.net:443/managed-postgresql/v1/clusters/etn021us5r9rhld1vgbh/hosts", "403", R"( @@ -474,10 +474,11 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { issues ); } + Y_UNIT_TEST(Greenplum_MasterNode) { Test( NYql::EDatabaseType::Greenplum, - NYql::NConnector::NApi::EProtocol::NATIVE, + NYql::EGenericProtocol::NATIVE, "https://mdb.api.cloud.yandex.net:443/managed-greenplum/v1/clusters/etn021us5r9rhld1vgbh/master-hosts", "200", R"({ @@ -505,7 +506,7 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { TString(""), true}, {}); - } + } Y_UNIT_TEST(Greenplum_PermissionDenied) { NYql::TIssues issues{ @@ -519,7 +520,7 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { Test( NYql::EDatabaseType::Greenplum, - NYql::NConnector::NApi::EProtocol::NATIVE, + NYql::EGenericProtocol::NATIVE, "https://mdb.api.cloud.yandex.net:443/managed-greenplum/v1/clusters/etn021us5r9rhld1vgbh/master-hosts", "403", R"( @@ -536,7 +537,79 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { )", NYql::TDatabaseResolverResponse::TDatabaseDescription{}, issues); - } + } + + Y_UNIT_TEST(MySQL) { + Test( + NYql::EDatabaseType::MySQL, + NYql::EGenericProtocol::NATIVE, + "https://mdb.api.cloud.yandex.net:443/managed-mysql/v1/clusters/etn021us5r9rhld1vgbh/hosts", + "200", + R"({ + "hosts": [ + { + "services": [ + { + "type": "POOLER", + "health": "ALIVE" + }, + { + "type": "MYSQL", + "health": "ALIVE" + } + ], + "name": "rc1b-eyt6dtobu96rwydq.mdb.yandexcloud.net", + "clusterId": "c9qb2bjghs8onbncpamk", + "zoneId": "ru-central1-b", + "role": "MASTER", + "health": "ALIVE" + } + ] + })", + NYql::TDatabaseResolverResponse::TDatabaseDescription{ + TString{""}, + TString{"rc1b-eyt6dtobu96rwydq.db.yandex.net"}, + 3306, + TString(""), + true + }, + {}); + } + + Y_UNIT_TEST(MySQL_PermissionDenied) { + NYql::TIssues issues{ + NYql::TIssue( + TStringBuilder{} << MakeErrorPrefix( + "mdb.api.cloud.yandex.net:443", + "/managed-mysql/v1/clusters/etn021us5r9rhld1vgbh/hosts", + "etn021us5r9rhld1vgbh", + NYql::EDatabaseType::MySQL + ) << NoPermissionStr + ) + }; + + Test( + NYql::EDatabaseType::MySQL, + NYql::EGenericProtocol::NATIVE, + "https://mdb.api.cloud.yandex.net:443/managed-mysql/v1/clusters/etn021us5r9rhld1vgbh/hosts", + "403", + R"( + { + "code": 7, + "message": "Permission denied", + "details": [ + { + "@type": "type.googleapis.com/google.rpc.RequestInfo", + "requestId": "a943c092-d596-4e0e-ae7b-1f67f9d8164e" + } + ] + } + )", + NYql::TDatabaseResolverResponse::TDatabaseDescription{}, + issues + ); + } + Y_UNIT_TEST(DataStreams_PermissionDenied) { NYql::TIssues issues{ @@ -551,7 +624,7 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { }; Test( NYql::EDatabaseType::DataStreams, - NYql::NConnector::NApi::EProtocol::PROTOCOL_UNSPECIFIED, + NYql::EGenericProtocol::PROTOCOL_UNSPECIFIED, "https://ydbc.ydb.cloud.yandex.net:8789/ydbc/cloud-prod/database?databaseId=etn021us5r9rhld1vgbh", "403", R"( @@ -569,7 +642,7 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { NYql::TDatabaseAuth databaseAuth; databaseAuth.UseTls = true; - databaseAuth.Protocol = NYql::NConnector::NApi::EProtocol::PROTOCOL_UNSPECIFIED; + databaseAuth.Protocol = NYql::EGenericProtocol::PROTOCOL_UNSPECIFIED; TString databaseId1{"etn021us5r9rhld1vgb1"}; TString databaseId2{"etn021us5r9rhld1vgb2"}; diff --git a/ydb/core/fq/libs/actors/ya.make b/ydb/core/fq/libs/actors/ya.make index 02e87f1d1389..4ceb0d997a05 100644 --- a/ydb/core/fq/libs/actors/ya.make +++ b/ydb/core/fq/libs/actors/ya.make @@ -43,7 +43,6 @@ PEERDIR( ydb/core/fq/libs/db_id_async_resolver_impl ydb/core/fq/libs/db_schema ydb/core/fq/libs/events - ydb/core/fq/libs/exceptions ydb/core/fq/libs/grpc ydb/core/fq/libs/private_client ydb/core/fq/libs/rate_limiter/utils @@ -73,7 +72,6 @@ PEERDIR( ydb/library/yql/providers/dq/provider ydb/library/yql/providers/dq/provider/exec ydb/library/yql/providers/dq/worker_manager/interface - ydb/library/yql/providers/generic/connector/api/common ydb/library/yql/providers/generic/connector/libcpp ydb/library/yql/providers/generic/provider ydb/library/yql/providers/pq/cm_client diff --git a/ydb/core/fq/libs/cloud_audit/yq_cloud_audit_service.cpp b/ydb/core/fq/libs/cloud_audit/yq_cloud_audit_service.cpp index 5647add100b2..b3f612936ba6 100644 --- a/ydb/core/fq/libs/cloud_audit/yq_cloud_audit_service.cpp +++ b/ydb/core/fq/libs/cloud_audit/yq_cloud_audit_service.cpp @@ -65,7 +65,13 @@ std::string MapConnectionType(const FederatedQuery::ConnectionSetting::Connectio return "Monitoring"; case FederatedQuery::ConnectionSetting::ConnectionCase::kPostgresqlCluster: return "PostgreSQLCluster"; - default: + case FederatedQuery::ConnectionSetting::ConnectionCase::kGreenplumCluster: + return "GreenplumCluster"; + case FederatedQuery::ConnectionSetting::ConnectionCase::kMysqlCluster: + return "MySQLCluster"; + case FederatedQuery::ConnectionSetting::ConnectionCase::kLogging: + return "Logging"; + case FederatedQuery::ConnectionSetting::ConnectionCase::CONNECTION_NOT_SET: Y_ENSURE(false, "Invalid connection case " << i32(connectionCase)); } } diff --git a/ydb/core/fq/libs/common/util.cpp b/ydb/core/fq/libs/common/util.cpp index 433709568eb1..940fc4350414 100644 --- a/ydb/core/fq/libs/common/util.cpp +++ b/ydb/core/fq/libs/common/util.cpp @@ -62,18 +62,24 @@ class TIssueDatabaseRemover { TString DatabasePath; }; +void EscapeBackslashes(TString& value) { + SubstGlobal(value, "\\", "\\\\"); +} + } TString EscapeString(const TString& value, const TString& enclosingSeq, const TString& replaceWith) { auto escapedValue = value; + EscapeBackslashes(escapedValue); SubstGlobal(escapedValue, enclosingSeq, replaceWith); return escapedValue; } TString EscapeString(const TString& value, char enclosingChar) { auto escapedValue = value; + EscapeBackslashes(escapedValue); SubstGlobal(escapedValue, TString{enclosingChar}, TStringBuilder{} << '\\' << enclosingChar); @@ -129,6 +135,12 @@ TString ExtractServiceAccountId(const FederatedQuery::ConnectionSetting& setting case FederatedQuery::ConnectionSetting::kGreenplumCluster: { return GetServiceAccountId(setting.greenplum_cluster().auth()); } + case FederatedQuery::ConnectionSetting::kMysqlCluster: { + return GetServiceAccountId(setting.mysql_cluster().auth()); + } + case FederatedQuery::ConnectionSetting::kLogging: { + return GetServiceAccountId(setting.logging().auth()); + } // Do not replace with default. Adding a new connection should cause a compilation error case FederatedQuery::ConnectionSetting::CONNECTION_NOT_SET: break; @@ -162,6 +174,10 @@ TMaybe GetLogin(const FederatedQuery::ConnectionSetting& setting) { return setting.postgresql_cluster().login(); case FederatedQuery::ConnectionSetting::kGreenplumCluster: return setting.greenplum_cluster().login(); + case FederatedQuery::ConnectionSetting::kMysqlCluster: + return setting.mysql_cluster().login(); + case FederatedQuery::ConnectionSetting::kLogging: + return {}; } } @@ -183,6 +199,10 @@ TMaybe GetPassword(const FederatedQuery::ConnectionSetting& setting) { return setting.postgresql_cluster().password(); case FederatedQuery::ConnectionSetting::kGreenplumCluster: return setting.greenplum_cluster().password(); + case FederatedQuery::ConnectionSetting::kMysqlCluster: + return setting.mysql_cluster().password(); + case FederatedQuery::ConnectionSetting::kLogging: + return {}; } } @@ -204,6 +224,10 @@ EYdbComputeAuth GetYdbComputeAuthMethod(const FederatedQuery::ConnectionSetting& return GetBasicAuthMethod(setting.postgresql_cluster().auth()); case FederatedQuery::ConnectionSetting::kGreenplumCluster: return GetBasicAuthMethod(setting.greenplum_cluster().auth()); + case FederatedQuery::ConnectionSetting::kMysqlCluster: + return GetBasicAuthMethod(setting.mysql_cluster().auth()); + case FederatedQuery::ConnectionSetting::kLogging: + return GetIamAuthMethod(setting.logging().auth()); } } @@ -223,6 +247,10 @@ FederatedQuery::IamAuth GetAuth(const FederatedQuery::Connection& connection) { return connection.content().setting().postgresql_cluster().auth(); case FederatedQuery::ConnectionSetting::kGreenplumCluster: return connection.content().setting().greenplum_cluster().auth(); + case FederatedQuery::ConnectionSetting::kMysqlCluster: + return connection.content().setting().mysql_cluster().auth(); + case FederatedQuery::ConnectionSetting::kLogging: + return connection.content().setting().logging().auth(); case FederatedQuery::ConnectionSetting::CONNECTION_NOT_SET: return FederatedQuery::IamAuth{}; } diff --git a/ydb/core/fq/libs/common/util_ut.cpp b/ydb/core/fq/libs/common/util_ut.cpp index 3e5a7d3e1202..3e209c1349c3 100644 --- a/ydb/core/fq/libs/common/util_ut.cpp +++ b/ydb/core/fq/libs/common/util_ut.cpp @@ -23,7 +23,9 @@ Y_UNIT_TEST_SUITE(EscapingBasics) { UNIT_ASSERT_VALUES_EQUAL(EscapeString("some_secret1", '"'), "some_secret1"); UNIT_ASSERT_VALUES_EQUAL(EscapeString("some_secret1", "}+{", "[*]"), "some_secret1"); UNIT_ASSERT_VALUES_EQUAL(EscapeString("some\"_\"secret1", '"'), "some\\\"_\\\"secret1"); + UNIT_ASSERT_VALUES_EQUAL(EscapeString("some\"_\\\"secret1", '"'), "some\\\"_\\\\\\\"secret1"); UNIT_ASSERT_VALUES_EQUAL(EscapeString("some}+{_}+{secret1", "}+{", "[*]"), "some[*]_[*]secret1"); + UNIT_ASSERT_VALUES_EQUAL(EscapeString("some}+{\\}+{secret1", "}+{", "[*]"), "some[*]\\\\[*]secret1"); } Y_UNIT_TEST(EncloseAndEscapeStringShouldWork) { @@ -31,7 +33,9 @@ Y_UNIT_TEST_SUITE(EscapingBasics) { UNIT_ASSERT_VALUES_EQUAL(EncloseAndEscapeString("some_secret1\nsome_secret2", "}+{", "[*]"), "}+{some_secret1\nsome_secret2}+{"); UNIT_ASSERT_VALUES_EQUAL(EncloseAndEscapeString("some\"_\"secret1", '"'), "\"some\\\"_\\\"secret1\""); + UNIT_ASSERT_VALUES_EQUAL(EncloseAndEscapeString("some\"_\\\"secret1", '"'), "\"some\\\"_\\\\\\\"secret1\""); UNIT_ASSERT_VALUES_EQUAL(EncloseAndEscapeString("some_secret1}+{\n}+{some_secret2", "}+{", "[*]"), "}+{some_secret1[*]\n[*]some_secret2}+{"); + UNIT_ASSERT_VALUES_EQUAL(EncloseAndEscapeString("some_secret1}+{\\}+{some_secret2", "}+{", "[*]"), "}+{some_secret1[*]\\\\[*]some_secret2}+{"); } } diff --git a/ydb/core/fq/libs/compute/common/config.h b/ydb/core/fq/libs/compute/common/config.h index 5817e2d94d6f..298a76ff41d6 100644 --- a/ydb/core/fq/libs/compute/common/config.h +++ b/ydb/core/fq/libs/compute/common/config.h @@ -158,6 +158,8 @@ class TComputeConfig { } } + // This function shows which external data sources are currently supported by the open-source YDB + // and which ones are not yet supported. bool IsConnectionCaseEnabled( const FederatedQuery::ConnectionSetting::ConnectionCase& connectionCase) const { switch (connectionCase) { @@ -165,7 +167,9 @@ class TComputeConfig { case FederatedQuery::ConnectionSetting::kClickhouseCluster: case FederatedQuery::ConnectionSetting::kPostgresqlCluster: case FederatedQuery::ConnectionSetting::kGreenplumCluster: + case FederatedQuery::ConnectionSetting::kMysqlCluster: case FederatedQuery::ConnectionSetting::kYdbDatabase: + case FederatedQuery::ConnectionSetting::kLogging: return true; case FederatedQuery::ConnectionSetting::kDataStreams: case FederatedQuery::ConnectionSetting::kMonitoring: diff --git a/ydb/core/fq/libs/compute/ydb/control_plane/database_monitoring.cpp b/ydb/core/fq/libs/compute/ydb/control_plane/database_monitoring.cpp index f752bef3160e..33cec012a0f5 100644 --- a/ydb/core/fq/libs/compute/ydb/control_plane/database_monitoring.cpp +++ b/ydb/core/fq/libs/compute/ydb/control_plane/database_monitoring.cpp @@ -1,6 +1,8 @@ #include #include +#include + #include #include @@ -24,17 +26,9 @@ namespace NFq { class TComputeDatabaseMonitoringActor : public NActors::TActorBootstrapped { struct TCounters { ::NMonitoring::TDynamicCounterPtr Counters; - struct TCommonMetrics { - ::NMonitoring::TDynamicCounters::TCounterPtr Ok; - ::NMonitoring::TDynamicCounters::TCounterPtr Error; - ::NMonitoring::THistogramPtr LatencyMs; - }; - - TCommonMetrics CpuLoadRequest; - ::NMonitoring::TDynamicCounters::TCounterPtr InstantLoadPercentage; - ::NMonitoring::TDynamicCounters::TCounterPtr AverageLoadPercentage; - ::NMonitoring::TDynamicCounters::TCounterPtr QuotedLoadPercentage; - ::NMonitoring::TDynamicCounters::TCounterPtr AvailableLoadPercentage; + ::NMonitoring::TDynamicCounterPtr SubComponent; + + ::NMonitoring::THistogramPtr CpuLoadRequestLatencyMs; ::NMonitoring::TDynamicCounters::TCounterPtr TargetLoadPercentage; ::NMonitoring::TDynamicCounters::TCounterPtr PendingQueueSize; ::NMonitoring::TDynamicCounters::TCounterPtr PendingQueueOverload; @@ -48,21 +42,11 @@ class TComputeDatabaseMonitoringActor : public NActors::TActorBootstrappedGetSubgroup("component", "ComputeDatabaseMonitoring"); - auto subComponent = component->GetSubgroup("subcomponent", "CpuLoadRequest"); - RegisterCommonMetrics(CpuLoadRequest, subComponent); - InstantLoadPercentage = subComponent->GetCounter("InstantLoadPercentage", false); - AverageLoadPercentage = subComponent->GetCounter("AverageLoadPercentage", false); - QuotedLoadPercentage = subComponent->GetCounter("QuotedLoadPercentage", false); - AvailableLoadPercentage = subComponent->GetCounter("AvailableLoadPercentage", false); - TargetLoadPercentage = subComponent->GetCounter("TargetLoadPercentage", false); - PendingQueueSize = subComponent->GetCounter("PendingQueueSize", false); - PendingQueueOverload = subComponent->GetCounter("PendingQueueOverload", true); - } - - void RegisterCommonMetrics(TCommonMetrics& metrics, ::NMonitoring::TDynamicCounterPtr subComponent) { - metrics.Ok = subComponent->GetCounter("Ok", true); - metrics.Error = subComponent->GetCounter("Error", true); - metrics.LatencyMs = subComponent->GetHistogram("LatencyMs", GetLatencyHistogramBuckets()); + SubComponent = component->GetSubgroup("subcomponent", "CpuLoadRequest"); + CpuLoadRequestLatencyMs = SubComponent->GetHistogram("LatencyMs", GetLatencyHistogramBuckets()); + TargetLoadPercentage = SubComponent->GetCounter("TargetLoadPercentage", false); + PendingQueueSize = SubComponent->GetCounter("PendingQueueSize", false); + PendingQueueOverload = SubComponent->GetCounter("PendingQueueOverload", true); } static ::NMonitoring::IHistogramCollectorPtr GetLatencyHistogramBuckets() { @@ -75,15 +59,19 @@ class TComputeDatabaseMonitoringActor : public NActors::TActorBootstrapped(GetDuration(config.GetAverageLoadInterval(), TDuration::Seconds(10)), TDuration::Seconds(1))) , MaxClusterLoad(std::min(config.GetMaxClusterLoadPercentage(), 100) / 100.0) - , DefaultQueryLoad(config.GetDefaultQueryLoadPercentage() ? std::min(config.GetDefaultQueryLoadPercentage(), 100) / 100.0 : 0.1) , PendingQueueSize(config.GetPendingQueueSize()) , Strict(config.GetStrict()) - , CpuNumber(config.GetCpuNumber()) + , CpuQuotaManager( + GetDuration(config.GetMonitoringRequestDelay(), TDuration::Seconds(1)), + std::max(GetDuration(config.GetAverageLoadInterval(), TDuration::Seconds(10)), TDuration::Seconds(1)), + TDuration::Zero(), + config.GetDefaultQueryLoadPercentage() ? std::min(config.GetDefaultQueryLoadPercentage(), 100) / 100.0 : 0.1, + config.GetStrict(), + config.GetCpuNumber(), + Counters.SubComponent + ) { - *Counters.AvailableLoadPercentage = 100; *Counters.TargetLoadPercentage = static_cast(MaxClusterLoad * 100); } @@ -105,8 +93,8 @@ class TComputeDatabaseMonitoringActor : public NActors::TActorBootstrapped(InstantLoad, AverageLoad); - if (!Ready) { + auto response = std::make_unique(CpuQuotaManager.GetInstantLoad(), CpuQuotaManager.GetAverageLoad()); + if (!CpuQuotaManager.CheckLoadIsOutdated()) { response->Issues.AddIssue("CPU Load is unavailable"); } Send(ev->Sender, response.release(), 0, ev->Cookie); @@ -114,45 +102,20 @@ class TComputeDatabaseMonitoringActor : public NActors::TActorBootstrappedGet(); - - auto now = TInstant::Now(); - if (!response.Issues) { - auto delta = now - LastCpuLoad; - LastCpuLoad = now; - - if (response.CpuNumber) { - CpuNumber = response.CpuNumber; - } - - InstantLoad = response.InstantLoad; - // exponential moving average - if (!Ready || delta >= AverageLoadInterval) { - AverageLoad = InstantLoad; - QuotedLoad = InstantLoad; - } else { - auto ratio = static_cast(delta.GetValue()) / AverageLoadInterval.GetValue(); - AverageLoad = (1 - ratio) * AverageLoad + ratio * InstantLoad; - QuotedLoad = (1 - ratio) * QuotedLoad + ratio * InstantLoad; - } - Ready = true; - Counters.CpuLoadRequest.Ok->Inc(); - *Counters.InstantLoadPercentage = static_cast(InstantLoad * 100); - *Counters.AverageLoadPercentage = static_cast(AverageLoad * 100); - CheckPendingQueue(); - *Counters.QuotedLoadPercentage = static_cast(QuotedLoad * 100); - } else { + if (response.Issues) { LOG_E("CPU Load Request FAILED: " << response.Issues.ToOneLineString()); - Counters.CpuLoadRequest.Error->Inc(); - CheckLoadIsOutdated(); } - Counters.CpuLoadRequest.LatencyMs->Collect((now - StartCpuLoad).MilliSeconds()); + Counters.CpuLoadRequestLatencyMs->Collect((TInstant::Now() - StartCpuLoad).MilliSeconds()); + + CpuQuotaManager.UpdateCpuLoad(response.InstantLoad, response.CpuNumber, !response.Issues); + CheckPendingQueue(); // TODO: make load pulling reactive // 1. Long period (i.e. AverageLoadInterval/2) when idle (no requests) // 2. Active pulling when busy - if (MonitoringRequestDelay) { - Schedule(MonitoringRequestDelay, new NActors::TEvents::TEvWakeup()); + if (auto delay = CpuQuotaManager.GetMonitoringRequestDelay()) { + Schedule(delay, new NActors::TEvents::TEvWakeup()); } else { SendCpuLoadRequest(); } @@ -164,48 +127,24 @@ class TComputeDatabaseMonitoringActor : public NActors::TActorBootstrapped 1.0) { Send(ev->Sender, new TEvYdbCompute::TEvCpuQuotaResponse(-1, NYdb::EStatus::OVERLOADED, NYql::TIssues{NYql::TIssue{TStringBuilder{} << "Incorrect quota value (exceeds 1.0) " << request.Quota}}), 0, ev->Cookie); } else { - if (!request.Quota) { - request.Quota = DefaultQueryLoad; - } - CheckLoadIsOutdated(); - if (MaxClusterLoad > 0.0 && ((!Ready && Strict) || QuotedLoad >= MaxClusterLoad)) { - if (PendingQueue.size() >= PendingQueueSize) { - Send(ev->Sender, new TEvYdbCompute::TEvCpuQuotaResponse(-1, NYdb::EStatus::OVERLOADED, NYql::TIssues{ - NYql::TIssue{TStringBuilder{} - << "Cluster is overloaded, current quoted load " << static_cast(QuotedLoad * 100) - << "%, average load " << static_cast(AverageLoad * 100) << "%" - }}), 0, ev->Cookie); + auto response = CpuQuotaManager.RequestCpuQuota(request.Quota, MaxClusterLoad); + CheckPendingQueue(); + if (response.Status == NYdb::EStatus::OVERLOADED && PendingQueue.size() < PendingQueueSize) { + PendingQueue.push(ev); + Counters.PendingQueueSize->Inc(); + } else { + if (response.Status == NYdb::EStatus::OVERLOADED) { Counters.PendingQueueOverload->Inc(); - } else { - PendingQueue.push(ev); - Counters.PendingQueueSize->Inc(); } - } else { - QuotedLoad += request.Quota; - *Counters.QuotedLoadPercentage = static_cast(QuotedLoad * 100); - Send(ev->Sender, new TEvYdbCompute::TEvCpuQuotaResponse(QuotedLoad * 100), 0, ev->Cookie); + Send(ev->Sender, new TEvYdbCompute::TEvCpuQuotaResponse(response.CurrentLoad, response.Status, response.Issues), 0, ev->Cookie); } } } void Handle(TEvYdbCompute::TEvCpuQuotaAdjust::TPtr& ev) { - if (CpuNumber) { - auto& request = *ev.Get()->Get(); - if (request.Duration && request.Duration < AverageLoadInterval / 2 && request.Quota <= 1.0) { - auto load = (request.CpuSecondsConsumed * 1000 / request.Duration.MilliSeconds()) / CpuNumber; - auto quota = request.Quota ? request.Quota : DefaultQueryLoad; - if (quota > load) { - auto adjustment = (quota - load) / 2; - if (QuotedLoad > adjustment) { - QuotedLoad -= adjustment; - } else { - QuotedLoad = 0.0; - } - CheckPendingQueue(); - *Counters.QuotedLoadPercentage = static_cast(QuotedLoad * 100); - } - } - } + auto& request = *ev.Get()->Get(); + CpuQuotaManager.AdjustCpuQuota(request.Quota, request.Duration, request.CpuSecondsConsumed); + CheckPendingQueue(); } void SendCpuLoadRequest() { @@ -215,57 +154,51 @@ class TComputeDatabaseMonitoringActor : public NActors::TActorBootstrapped AverageLoadInterval) { - Ready = false; - QuotedLoad = 0.0; - if (Strict) { - while (PendingQueue.size()) { - auto& ev = PendingQueue.front(); - Send(ev->Sender, new TEvYdbCompute::TEvCpuQuotaResponse(-1, NYdb::EStatus::OVERLOADED, NYql::TIssues{NYql::TIssue{TStringBuilder{} << "Cluster load info is not available"}}), 0, ev->Cookie); - PendingQueue.pop(); - Counters.PendingQueueSize->Dec(); - } + if (Strict && !CpuQuotaManager.CheckLoadIsOutdated()) { + while (PendingQueue.size()) { + auto& ev = PendingQueue.front(); + Send(ev->Sender, new TEvYdbCompute::TEvCpuQuotaResponse(-1, NYdb::EStatus::OVERLOADED, NYql::TIssues{NYql::TIssue{TStringBuilder{} << "Cluster load info is not available"}}), 0, ev->Cookie); + PendingQueue.pop(); + Counters.PendingQueueSize->Dec(); } } } void CheckPendingQueue() { + CheckLoadIsOutdated(); + auto now = TInstant::Now(); - while (QuotedLoad < MaxClusterLoad && PendingQueue.size()) { + while (PendingQueue.size()) { auto& ev = PendingQueue.front(); auto& request = *ev.Get()->Get(); if (request.Deadline && now >= request.Deadline) { Send(ev->Sender, new TEvYdbCompute::TEvCpuQuotaResponse(-1, NYdb::EStatus::CANCELLED, NYql::TIssues{ NYql::TIssue{TStringBuilder{} << "Deadline reached " << request.Deadline}}), 0, ev->Cookie); } else { - QuotedLoad += request.Quota; - Send(ev->Sender, new TEvYdbCompute::TEvCpuQuotaResponse(QuotedLoad * 100), 0, ev->Cookie); + auto response = CpuQuotaManager.RequestCpuQuota(request.Quota, MaxClusterLoad); + if (response.Status == NYdb::EStatus::OVERLOADED) { + break; + } + + Send(ev->Sender, new TEvYdbCompute::TEvCpuQuotaResponse(response.CurrentLoad, response.Status, response.Issues), 0, ev->Cookie); } + PendingQueue.pop(); Counters.PendingQueueSize->Dec(); } } private: - TInstant StartCpuLoad; - TInstant LastCpuLoad; TActorId MonitoringClientActorId; TCounters Counters; - - double InstantLoad = 0.0; - double AverageLoad = 0.0; - double QuotedLoad = 0.0; - bool Ready = false; - - const TDuration MonitoringRequestDelay; - const TDuration AverageLoadInterval; const double MaxClusterLoad; - const double DefaultQueryLoad; const ui32 PendingQueueSize; const bool Strict; - ui32 CpuNumber = 0; + NKikimr::NKqp::NWorkload::TCpuQuotaManager CpuQuotaManager; TQueue PendingQueue; + + TInstant StartCpuLoad; }; std::unique_ptr CreateDatabaseMonitoringActor(const NActors::TActorId& monitoringClientActorId, NFq::NConfig::TLoadControlConfig config, const ::NMonitoring::TDynamicCounterPtr& counters) { diff --git a/ydb/core/fq/libs/compute/ydb/control_plane/ya.make b/ydb/core/fq/libs/compute/ydb/control_plane/ya.make index a77f4292a98a..fcfb13f095ef 100644 --- a/ydb/core/fq/libs/compute/ydb/control_plane/ya.make +++ b/ydb/core/fq/libs/compute/ydb/control_plane/ya.make @@ -18,6 +18,7 @@ PEERDIR( ydb/core/fq/libs/compute/ydb/synchronization_service ydb/core/fq/libs/control_plane_storage/proto ydb/core/fq/libs/quota_manager/proto + ydb/core/kqp/workload_service/common ydb/core/protos ydb/library/db_pool/protos ydb/library/yql/public/issue diff --git a/ydb/core/fq/libs/config/protos/control_plane_storage.proto b/ydb/core/fq/libs/config/protos/control_plane_storage.proto index 1c4b668d4ccc..9cc2d31158d4 100644 --- a/ydb/core/fq/libs/config/protos/control_plane_storage.proto +++ b/ydb/core/fq/libs/config/protos/control_plane_storage.proto @@ -24,11 +24,15 @@ message TQueryMapping { // 1. StatusCode(s) are handled with defined policies, non-unique StatusCode(s) across all policies is UB // 2. RetryCount and RetryPeriodMs are used to calculate actual RetryRate, if it exceeds RetryCount, query is aborted +// - Number of retries during RetryPeriod time less than 2 * RetryCount due to RetryRate // 3. BackoffPeriodMs is factor of RetryRate to delay query execution before next retry -// 4. There are no default retry policy, all unhandled statuses are fatal +// 4. RetryLimit is hard limit for amount query retry count, after that query is aborted +// - If RetryLimit = 0, query can be abborted only by RetryRate +// 5. There are no default retry policy, all unhandled statuses are fatal message TRetryPolicy { uint64 RetryCount = 1; + uint64 RetryLimit = 4; string RetryPeriod = 2; string BackoffPeriod = 3; } diff --git a/ydb/core/fq/libs/config/protos/ya.make b/ydb/core/fq/libs/config/protos/ya.make index 2bf50a9a41fd..215cf5a898e5 100644 --- a/ydb/core/fq/libs/config/protos/ya.make +++ b/ydb/core/fq/libs/config/protos/ya.make @@ -34,6 +34,10 @@ PEERDIR( ydb/library/yql/providers/s3/proto ) -EXCLUDE_TAGS(GO_PROTO) +ONLY_TAGS( + CPP_PROTO + PY_PROTO + PY3_PROTO +) END() diff --git a/ydb/core/fq/libs/config/yq_issue.cpp b/ydb/core/fq/libs/config/yq_issue.cpp index 42c56c487b75..ebc7141ac349 100644 --- a/ydb/core/fq/libs/config/yq_issue.cpp +++ b/ydb/core/fq/libs/config/yq_issue.cpp @@ -5,28 +5,28 @@ namespace NFq { -NYql::TIssue MakeFatalIssue(TIssuesIds::EIssueCode id, const TString& message) { +NYql::TIssue MakeFatalIssue(ui32 id, const TString& message) { NYql::TIssue issue; issue.SetCode(id, NYql::TSeverityIds::S_FATAL); issue.SetMessage(message); return issue; } -NYql::TIssue MakeErrorIssue(TIssuesIds::EIssueCode id, const TString& message) { +NYql::TIssue MakeErrorIssue(ui32 id, const TString& message) { NYql::TIssue issue; issue.SetCode(id, NYql::TSeverityIds::S_ERROR); issue.SetMessage(message); return issue; } -NYql::TIssue MakeWarningIssue(TIssuesIds::EIssueCode id, const TString& message) { +NYql::TIssue MakeWarningIssue(ui32 id, const TString& message) { NYql::TIssue issue; issue.SetCode(id, NYql::TSeverityIds::S_WARNING); issue.SetMessage(message); return issue; } -NYql::TIssue MakeInfoIssue(TIssuesIds::EIssueCode id, const TString& message) { +NYql::TIssue MakeInfoIssue(ui32 id, const TString& message) { NYql::TIssue issue; issue.SetCode(id, NYql::TSeverityIds::S_INFO); issue.SetMessage(message); diff --git a/ydb/core/fq/libs/config/yq_issue.h b/ydb/core/fq/libs/config/yq_issue.h index b9f1abc9a35c..960c790ee046 100644 --- a/ydb/core/fq/libs/config/yq_issue.h +++ b/ydb/core/fq/libs/config/yq_issue.h @@ -6,12 +6,12 @@ namespace NFq { -NYql::TIssue MakeFatalIssue(TIssuesIds::EIssueCode id, const TString& message); +NYql::TIssue MakeFatalIssue(ui32 id, const TString& message); -NYql::TIssue MakeErrorIssue(TIssuesIds::EIssueCode id, const TString& message); +NYql::TIssue MakeErrorIssue(ui32 id, const TString& message); -NYql::TIssue MakeWarningIssue(TIssuesIds::EIssueCode id, const TString& message); +NYql::TIssue MakeWarningIssue(ui32 id, const TString& message); -NYql::TIssue MakeInfoIssue(TIssuesIds::EIssueCode id, const TString& message); +NYql::TIssue MakeInfoIssue(ui32 id, const TString& message); } diff --git a/ydb/core/fq/libs/control_plane_proxy/actors/query_utils.cpp b/ydb/core/fq/libs/control_plane_proxy/actors/query_utils.cpp index 728bb2081131..4bfbc027791e 100644 --- a/ydb/core/fq/libs/control_plane_proxy/actors/query_utils.cpp +++ b/ydb/core/fq/libs/control_plane_proxy/actors/query_utils.cpp @@ -246,7 +246,28 @@ TString MakeCreateExternalDataSourceQuery( "database_name"_a = EncloseAndEscapeString(connectionContent.setting().greenplum_cluster().database_name(), '"'), "use_tls"_a = common.GetDisableSslForGenericDataSources() ? "false" : "true", "schema"_a = gpschema ? ", SCHEMA=" + EncloseAndEscapeString(gpschema, '"') : TString{}); - + } + break; + case FederatedQuery::ConnectionSetting::kMysqlCluster: { + properties = fmt::format( + R"( + SOURCE_TYPE="MySQL", + MDB_CLUSTER_ID={mdb_cluster_id}, + DATABASE_NAME={database_name}, + USE_TLS="{use_tls}" + )", + "mdb_cluster_id"_a = EncloseAndEscapeString(connectionContent.setting().mysql_cluster().database_id(), '"'), + "database_name"_a = EncloseAndEscapeString(connectionContent.setting().mysql_cluster().database_name(), '"'), + "use_tls"_a = common.GetDisableSslForGenericDataSources() ? "false" : "true"); + } + case FederatedQuery::ConnectionSetting::kLogging: { + properties = fmt::format( + R"( + SOURCE_TYPE="Logging", + FOLDER_ID={folder_id} + )", + "folder_id"_a = EncloseAndEscapeString(connectionContent.setting().logging().folder_id(), '"')); + break; } break; } diff --git a/ydb/core/fq/libs/control_plane_proxy/utils/utils.h b/ydb/core/fq/libs/control_plane_proxy/utils/utils.h index 1e6a531e2bc0..cfe0f7edd7c1 100644 --- a/ydb/core/fq/libs/control_plane_proxy/utils/utils.h +++ b/ydb/core/fq/libs/control_plane_proxy/utils/utils.h @@ -34,6 +34,12 @@ TString ExtractServiceAccountIdWithConnection(const T& setting) { case FederatedQuery::ConnectionSetting::kGreenplumCluster: { return GetServiceAccountId(setting.greenplum_cluster().auth()); } + case FederatedQuery::ConnectionSetting::kMysqlCluster: { + return GetServiceAccountId(setting.mysql_cluster().auth()); + } + case FederatedQuery::ConnectionSetting::kLogging: { + return GetServiceAccountId(setting.logging().auth()); + } // Do not replace with default. Adding a new connection should cause a compilation error case FederatedQuery::ConnectionSetting::CONNECTION_NOT_SET: break; diff --git a/ydb/core/fq/libs/control_plane_storage/config.cpp b/ydb/core/fq/libs/control_plane_storage/config.cpp index 2cd4dd6bbe74..41638b506741 100644 --- a/ydb/core/fq/libs/control_plane_storage/config.cpp +++ b/ydb/core/fq/libs/control_plane_storage/config.cpp @@ -50,10 +50,11 @@ TControlPlaneStorageConfig::TControlPlaneStorageConfig(const NConfig::TControlPl for (const auto& mapping : Proto.GetRetryPolicyMapping()) { auto& retryPolicy = mapping.GetPolicy(); auto retryCount = retryPolicy.GetRetryCount(); + auto retryLimit = retryPolicy.GetRetryLimit(); auto retryPeriod = GetDuration(retryPolicy.GetRetryPeriod(), TDuration::Hours(1)); auto backoffPeriod = GetDuration(retryPolicy.GetBackoffPeriod(), TDuration::Zero()); for (const auto statusCode: mapping.GetStatusCode()) { - RetryPolicies.emplace(statusCode, TRetryPolicyItem(retryCount, retryPeriod, backoffPeriod)); + RetryPolicies.emplace(statusCode, TRetryPolicyItem(retryCount, retryLimit, retryPeriod, backoffPeriod)); } } diff --git a/ydb/core/fq/libs/control_plane_storage/extractors.h b/ydb/core/fq/libs/control_plane_storage/extractors.h index d669825db024..bf71f862b370 100644 --- a/ydb/core/fq/libs/control_plane_storage/extractors.h +++ b/ydb/core/fq/libs/control_plane_storage/extractors.h @@ -29,7 +29,7 @@ TValidationQuery CreateEntityExtractor(const TString& scope, auto validator = [response, entityColumnName, parseProtobufError](NYdb::NTable::TDataQueryResult result) { const auto& resultSets = result.GetResultSets(); if (resultSets.size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "internal error, result set size is not equal to 1 but equal " << resultSets.size(); + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "internal error, result set size is not equal to 1 but equal " << resultSets.size(); } NYdb::TResultSetParser parser(resultSets.back()); @@ -39,7 +39,7 @@ TValidationQuery CreateEntityExtractor(const TString& scope, if (!response->second.Before.ConstructInPlace().ParseFromString(*parser.ColumnParser(entityColumnName).GetOptionalString())) { parseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message. Please contact internal support"; } return false; }; diff --git a/ydb/core/fq/libs/control_plane_storage/internal/task_get.cpp b/ydb/core/fq/libs/control_plane_storage/internal/task_get.cpp index e41846b58d1d..b23f658a8e29 100644 --- a/ydb/core/fq/libs/control_plane_storage/internal/task_get.cpp +++ b/ydb/core/fq/libs/control_plane_storage/internal/task_get.cpp @@ -161,13 +161,13 @@ std::tupleParseProtobufError->Inc(); - throw TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for query. Please contact internal support"; + throw NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for query. Please contact internal support"; } const TInstant deadline = TInstant::Now() + (task.Query.content().automatic() ? std::min(automaticQueriesTtl, resultSetsTtl) : resultSetsTtl); task.Deadline = deadline; if (!task.Internal.ParseFromString(*parser.ColumnParser(INTERNAL_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - throw TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for query internal. Please contact internal support"; + throw NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for query internal. Please contact internal support"; } *task.Internal.mutable_result_ttl() = NProtoInterop::CastToProto(resultSetsTtl); diff --git a/ydb/core/fq/libs/control_plane_storage/internal/task_ping.cpp b/ydb/core/fq/libs/control_plane_storage/internal/task_ping.cpp index b214e127d44c..d640a563c5ed 100644 --- a/ydb/core/fq/libs/control_plane_storage/internal/task_ping.cpp +++ b/ydb/core/fq/libs/control_plane_storage/internal/task_ping.cpp @@ -99,32 +99,32 @@ TPingTaskParams ConstructHardPingTask( TString owner; if (resultSets.size() != 3) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "RESULT SET SIZE of " << resultSets.size() << " != 3"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "RESULT SET SIZE of " << resultSets.size() << " != 3"; } { TResultSetParser parser(resultSets[0]); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "NOT FOUND " QUERIES_TABLE_NAME " where " SCOPE_COLUMN_NAME " = \"" << request.scope() << "\" and " QUERY_ID_COLUMN_NAME " = \"" << request.query_id().value() << "\""; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "NOT FOUND " QUERIES_TABLE_NAME " where " SCOPE_COLUMN_NAME " = \"" << request.scope() << "\" and " QUERY_ID_COLUMN_NAME " = \"" << request.query_id().value() << "\""; } if (!query.ParseFromString(*parser.ColumnParser(QUERY_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "ERROR PARSING " QUERIES_TABLE_NAME "." QUERY_COLUMN_NAME " where " QUERY_ID_COLUMN_NAME " = \"" << request.query_id().value() << "\" and " SCOPE_COLUMN_NAME " = \"" << request.scope() << "\""; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "ERROR PARSING " QUERIES_TABLE_NAME "." QUERY_COLUMN_NAME " where " QUERY_ID_COLUMN_NAME " = \"" << request.query_id().value() << "\" and " SCOPE_COLUMN_NAME " = \"" << request.scope() << "\""; } if (!internal.ParseFromString(*parser.ColumnParser(INTERNAL_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "ERROR PARSING " QUERIES_TABLE_NAME "." INTERNAL_COLUMN_NAME " where " QUERY_ID_COLUMN_NAME " = \"" << request.query_id().value() << "\" and " SCOPE_COLUMN_NAME " = \"" << request.scope() << "\""; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "ERROR PARSING " QUERIES_TABLE_NAME "." INTERNAL_COLUMN_NAME " where " QUERY_ID_COLUMN_NAME " = \"" << request.query_id().value() << "\" and " SCOPE_COLUMN_NAME " = \"" << request.scope() << "\""; } } { TResultSetParser parser(resultSets[1]); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "NOT FOUND " JOBS_TABLE_NAME " where " SCOPE_COLUMN_NAME " = \"" << request.scope() << "\" and " QUERY_ID_COLUMN_NAME " = \"" << request.query_id().value() << "\""; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "NOT FOUND " JOBS_TABLE_NAME " where " SCOPE_COLUMN_NAME " = \"" << request.scope() << "\" and " QUERY_ID_COLUMN_NAME " = \"" << request.query_id().value() << "\""; } if (!job.ParseFromString(*parser.ColumnParser(JOB_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "ERROR PARSING " JOBS_TABLE_NAME "." JOB_COLUMN_NAME " where " SCOPE_COLUMN_NAME " = \"" << request.scope() << "\" and " QUERY_ID_COLUMN_NAME " = \"" << request.query_id().value() << "\""; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "ERROR PARSING " JOBS_TABLE_NAME "." JOB_COLUMN_NAME " where " SCOPE_COLUMN_NAME " = \"" << request.scope() << "\" and " QUERY_ID_COLUMN_NAME " = \"" << request.query_id().value() << "\""; } jobId = *parser.ColumnParser(JOB_ID_COLUMN_NAME).GetOptionalString(); } @@ -133,11 +133,11 @@ TPingTaskParams ConstructHardPingTask( { TResultSetParser parser(resultSets[2]); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "NOT FOUND " PENDING_SMALL_TABLE_NAME " where " TENANT_COLUMN_NAME " = \"" << request.tenant() << "\" and " SCOPE_COLUMN_NAME " = \"" << request.scope() << "\" and " QUERY_ID_COLUMN_NAME " = \"" << request.query_id().value() << "\"" ; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "NOT FOUND " PENDING_SMALL_TABLE_NAME " where " TENANT_COLUMN_NAME " = \"" << request.tenant() << "\" and " SCOPE_COLUMN_NAME " = \"" << request.scope() << "\" and " QUERY_ID_COLUMN_NAME " = \"" << request.query_id().value() << "\"" ; } owner = *parser.ColumnParser(OWNER_COLUMN_NAME).GetOptionalString(); if (owner != request.owner_id()) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "OWNER of QUERY ID = \"" << request.query_id().value() << "\" MISMATCHED: \"" << request.owner_id() << "\" (received) != \"" << owner << "\" (selected)"; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "OWNER of QUERY ID = \"" << request.query_id().value() << "\" MISMATCHED: \"" << request.owner_id() << "\" (received) != \"" << owner << "\" (selected)"; } retryLimiter.Assign( parser.ColumnParser(RETRY_COUNTER_COLUMN_NAME).GetOptionalUint64().GetOrElse(0), @@ -173,7 +173,7 @@ TPingTaskParams ConstructHardPingTask( internal.clear_operation_id(); } - TRetryPolicyItem policy(0, TDuration::Seconds(1), TDuration::Zero()); + TRetryPolicyItem policy(0, 0, TDuration::Seconds(1), TDuration::Zero()); auto it = retryPolicies.find(request.status_code()); auto policyFound = it != retryPolicies.end(); if (policyFound) { @@ -200,7 +200,7 @@ TPingTaskParams ConstructHardPingTask( TStringBuilder builder; builder << "Query failed with code " << NYql::NDqProto::StatusIds_StatusCode_Name(request.status_code()); if (policy.RetryCount) { - builder << " (failure rate " << retryLimiter.RetryRate << " exceeds limit of " << policy.RetryCount << ")"; + builder << " (" << retryLimiter.LastError << ")"; } builder << " at " << Now(); @@ -404,15 +404,15 @@ TPingTaskParams ConstructHardPingTask( } if (job.ByteSizeLong() > maxRequestSize) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "Job proto exceeded the size limit: " << job.ByteSizeLong() << " of " << maxRequestSize << " " << TSizeFormatPrinter(job).ToString(); + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "Job proto exceeded the size limit: " << job.ByteSizeLong() << " of " << maxRequestSize << " " << TSizeFormatPrinter(job).ToString(); } if (query.ByteSizeLong() > maxRequestSize) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "Query proto exceeded the size limit: " << query.ByteSizeLong() << " of " << maxRequestSize << " " << TSizeFormatPrinter(query).ToString(); + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "Query proto exceeded the size limit: " << query.ByteSizeLong() << " of " << maxRequestSize << " " << TSizeFormatPrinter(query).ToString(); } if (internal.ByteSizeLong() > maxRequestSize) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "QueryInternal proto exceeded the size limit: " << internal.ByteSizeLong() << " of " << maxRequestSize << " " << TSizeFormatPrinter(internal).ToString(); + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "QueryInternal proto exceeded the size limit: " << internal.ByteSizeLong() << " of " << maxRequestSize << " " << TSizeFormatPrinter(internal).ToString(); } finalStatus->Status = query.meta().status(); @@ -564,17 +564,17 @@ TPingTaskParams ConstructSoftPingTask( FederatedQuery::Internal::QueryInternal internal; if (resultSets.size() != 2) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "RESULT SET SIZE of " << resultSets.size() << " != 2"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "RESULT SET SIZE of " << resultSets.size() << " != 2"; } { TResultSetParser parser(resultSets[0]); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "NOT FOUND " QUERIES_TABLE_NAME " where " SCOPE_COLUMN_NAME " = \"" << request.scope() << "\" and " QUERY_ID_COLUMN_NAME " = \"" << request.query_id().value() << "\"" ; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "NOT FOUND " QUERIES_TABLE_NAME " where " SCOPE_COLUMN_NAME " = \"" << request.scope() << "\" and " QUERY_ID_COLUMN_NAME " = \"" << request.query_id().value() << "\"" ; } if (!internal.ParseFromString(*parser.ColumnParser(INTERNAL_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "ERROR PARSING " QUERIES_TABLE_NAME "." INTERNAL_COLUMN_NAME " where " QUERY_ID_COLUMN_NAME " = \"" << request.query_id().value() << "\" and " SCOPE_COLUMN_NAME " = \"" << request.scope() << "\""; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "ERROR PARSING " QUERIES_TABLE_NAME "." INTERNAL_COLUMN_NAME " where " QUERY_ID_COLUMN_NAME " = \"" << request.query_id().value() << "\" and " SCOPE_COLUMN_NAME " = \"" << request.scope() << "\""; } } @@ -582,11 +582,11 @@ TPingTaskParams ConstructSoftPingTask( TResultSetParser parser(resultSets[1]); if (!parser.TryNextRow()) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "NOT FOUND " PENDING_SMALL_TABLE_NAME " where " TENANT_COLUMN_NAME " = \"" << request.tenant() << "\" and " SCOPE_COLUMN_NAME " = \"" << request.scope() << "\" and " QUERY_ID_COLUMN_NAME " = \"" << request.query_id().value() << "\"" ; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "NOT FOUND " PENDING_SMALL_TABLE_NAME " where " TENANT_COLUMN_NAME " = \"" << request.tenant() << "\" and " SCOPE_COLUMN_NAME " = \"" << request.scope() << "\" and " QUERY_ID_COLUMN_NAME " = \"" << request.query_id().value() << "\"" ; } owner = *parser.ColumnParser(OWNER_COLUMN_NAME).GetOptionalString(); if (owner != request.owner_id()) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "OWNER of QUERY ID = \"" << request.query_id().value() << "\" MISMATCHED: \"" << request.owner_id() << "\" (received) != \"" << owner << "\" (selected)"; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "OWNER of QUERY ID = \"" << request.query_id().value() << "\" MISMATCHED: \"" << request.owner_id() << "\" (received) != \"" << owner << "\" (selected)"; } } diff --git a/ydb/core/fq/libs/control_plane_storage/internal/task_result_write.cpp b/ydb/core/fq/libs/control_plane_storage/internal/task_result_write.cpp index ce9c14d70d26..c2a39dbfe05b 100644 --- a/ydb/core/fq/libs/control_plane_storage/internal/task_result_write.cpp +++ b/ydb/core/fq/libs/control_plane_storage/internal/task_result_write.cpp @@ -39,7 +39,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvWriteResult for (const auto& row : resultSet.rows()) { TString serializedRow; if (!row.SerializeToString(&serializedRow)) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error serialize proto message for row. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error serialize proto message for row. Please contact internal support"; } itemsAsList.AddListItem() diff --git a/ydb/core/fq/libs/control_plane_storage/internal/utils.h b/ydb/core/fq/libs/control_plane_storage/internal/utils.h index 331ea14e0c0b..60036ec02276 100644 --- a/ydb/core/fq/libs/control_plane_storage/internal/utils.h +++ b/ydb/core/fq/libs/control_plane_storage/internal/utils.h @@ -8,7 +8,7 @@ #include #include -#include +#include namespace NFq { diff --git a/ydb/core/fq/libs/control_plane_storage/internal/ya.make b/ydb/core/fq/libs/control_plane_storage/internal/ya.make index eb8c77f2c3a3..f74f8aded260 100644 --- a/ydb/core/fq/libs/control_plane_storage/internal/ya.make +++ b/ydb/core/fq/libs/control_plane_storage/internal/ya.make @@ -21,7 +21,6 @@ PEERDIR( ydb/core/fq/libs/compute/common ydb/core/fq/libs/config ydb/core/fq/libs/control_plane_storage/proto - ydb/core/fq/libs/exceptions ydb/core/fq/libs/metrics ydb/core/fq/libs/quota_manager ydb/core/fq/libs/quota_manager/events @@ -31,6 +30,7 @@ PEERDIR( ydb/library/protobuf_printer ydb/library/security ydb/library/yql/public/issue + ydb/library/yql/utils ydb/public/lib/fq ydb/public/sdk/cpp/client/ydb_scheme ydb/public/sdk/cpp/client/ydb_value diff --git a/ydb/core/fq/libs/control_plane_storage/request_actor.h b/ydb/core/fq/libs/control_plane_storage/request_actor.h index 915d2de0920b..caf23b5c76bb 100644 --- a/ydb/core/fq/libs/control_plane_storage/request_actor.h +++ b/ydb/core/fq/libs/control_plane_storage/request_actor.h @@ -124,7 +124,7 @@ class TControlPlaneRequestActor : public NActors::TActorBootstrapped, issues.AddIssues(status.GetIssues()); internalIssues.AddIssues(status.GetIssues()); } - } catch (const TCodeLineException& exception) { + } catch (const NYql::TCodeLineException& exception) { NYql::TIssue issue = MakeErrorIssue(exception.Code, exception.GetRawMessage()); issues.AddIssue(issue); NYql::TIssue internalIssue = MakeErrorIssue(exception.Code, CurrentExceptionMessage()); diff --git a/ydb/core/fq/libs/control_plane_storage/request_validators.cpp b/ydb/core/fq/libs/control_plane_storage/request_validators.cpp index 03cf908e52de..e084bacd7176 100644 --- a/ydb/core/fq/libs/control_plane_storage/request_validators.cpp +++ b/ydb/core/fq/libs/control_plane_storage/request_validators.cpp @@ -19,11 +19,16 @@ void ValidateGenericConnectionSetting( issues.AddIssue(MakeErrorIssue(TIssuesIds::BAD_REQUEST, "current iam authorization is disabled")); } - if (!connection.database_id() && !(connection.host() && connection.port())) { - auto msg = TStringBuilder() << "content.setting.clickhouse_cluster.{database_id or host,port} field is not specified"; - issues.AddIssue( MakeErrorIssue(TIssuesIds::BAD_REQUEST,msg)); + if (!connection.database_id()) { + auto msg = TStringBuilder() << "content.setting." << dataSourceKind << "_cluster.database_id field is not specified"; + issues.AddIssue(MakeErrorIssue(TIssuesIds::BAD_REQUEST,msg)); } + if (!connection.database_name()) { + auto msg = TStringBuilder() << "content.setting." << dataSourceKind << "_cluster.database_name field is not specified"; + issues.AddIssue(MakeErrorIssue(TIssuesIds::BAD_REQUEST,msg)); + } + if (!connection.login()) { auto msg = TStringBuilder() << "content.setting." << dataSourceKind << "_cluster.login is not specified"; issues.AddIssue(MakeErrorIssue(TIssuesIds::BAD_REQUEST, msg)); @@ -70,18 +75,11 @@ NYql::TIssues ValidateConnectionSetting( break; } case FederatedQuery::ConnectionSetting::kGreenplumCluster: { - const FederatedQuery::GreenplumCluster database = setting.greenplum_cluster(); - if (!database.has_auth() || database.auth().identity_case() == FederatedQuery::IamAuth::IDENTITY_NOT_SET) { - issues.AddIssue(MakeErrorIssue(TIssuesIds::BAD_REQUEST, "content.setting.greenplum_database.auth field is not specified")); - } - - if (database.auth().identity_case() == FederatedQuery::IamAuth::kCurrentIam && disableCurrentIam) { - issues.AddIssue(MakeErrorIssue(TIssuesIds::BAD_REQUEST, "current iam authorization is disabled")); - } - - if (!database.database_id() && !database.database_name()) { - issues.AddIssue(MakeErrorIssue(TIssuesIds::BAD_REQUEST, "content.setting.greenplum_database.{database_id or database_name} field is not specified")); - } + ValidateGenericConnectionSetting(setting.greenplum_cluster(), "greenplum", disableCurrentIam, passwordRequired, issues); + break; + } + case FederatedQuery::ConnectionSetting::kMysqlCluster: { + ValidateGenericConnectionSetting(setting.mysql_cluster(), "mysql", disableCurrentIam, passwordRequired, issues); break; } case FederatedQuery::ConnectionSetting::kObjectStorage: { @@ -133,6 +131,22 @@ NYql::TIssues ValidateConnectionSetting( } break; } + case FederatedQuery::ConnectionSetting::kLogging: { + const FederatedQuery::Logging logging = setting.logging(); + if (!logging.has_auth() || logging.auth().identity_case() == FederatedQuery::IamAuth::IDENTITY_NOT_SET) { + issues.AddIssue(MakeErrorIssue(TIssuesIds::BAD_REQUEST, "content.setting.logging.auth field is not specified")); + } + + if (logging.auth().identity_case() == FederatedQuery::IamAuth::kCurrentIam && disableCurrentIam) { + issues.AddIssue(MakeErrorIssue(TIssuesIds::BAD_REQUEST, "current iam authorization is disabled")); + } + + if (!logging.folder_id()) { + issues.AddIssue(MakeErrorIssue(TIssuesIds::BAD_REQUEST, "content.setting.logging.folder_id field is not specified")); + } + + break; + } case FederatedQuery::ConnectionSetting::CONNECTION_NOT_SET: { issues.AddIssue(MakeErrorIssue(TIssuesIds::BAD_REQUEST, "connection is not set")); break; diff --git a/ydb/core/fq/libs/control_plane_storage/request_validators.h b/ydb/core/fq/libs/control_plane_storage/request_validators.h index c17ac4d41a16..557a2c81a400 100644 --- a/ydb/core/fq/libs/control_plane_storage/request_validators.h +++ b/ydb/core/fq/libs/control_plane_storage/request_validators.h @@ -121,7 +121,7 @@ NYql::TIssues ValidateBinding(const T& ev, size_t maxSize, const TSet= policy.RetryCount) { + shouldRetry = false; + LastError = TStringBuilder() << "failure rate " << RetryRate << " exceeds limit of " << policy.RetryCount; + } else if (policy.RetryLimit && RetryCount >= policy.RetryLimit) { + shouldRetry = false; + LastError = TStringBuilder() << "retry count reached limit of " << policy.RetryLimit; + } + if (shouldRetry) { RetryCount++; RetryCounterUpdatedAt = now; @@ -145,6 +154,7 @@ NConfig::TControlPlaneStorageConfig FillDefaultParameters(NConfig::TControlPlane policyMapping.AddStatusCode(NYql::NDqProto::StatusIds::EXTERNAL_ERROR); auto& policy = *policyMapping.MutablePolicy(); policy.SetRetryCount(10); + policy.SetRetryLimit(40); policy.SetRetryPeriod("1m"); policy.SetBackoffPeriod("1s"); } diff --git a/ydb/core/fq/libs/control_plane_storage/util.h b/ydb/core/fq/libs/control_plane_storage/util.h index 2c95b6fe9894..8d2b49a6d951 100644 --- a/ydb/core/fq/libs/control_plane_storage/util.h +++ b/ydb/core/fq/libs/control_plane_storage/util.h @@ -15,10 +15,11 @@ namespace NFq { class TRetryPolicyItem { public: TRetryPolicyItem() = default; - TRetryPolicyItem(ui64 retryCount, const TDuration& retryPeriod, const TDuration& backoffPeriod) - : RetryCount(retryCount), RetryPeriod(retryPeriod), BackoffPeriod(backoffPeriod) + TRetryPolicyItem(ui64 retryCount, ui64 retryLimit, const TDuration& retryPeriod, const TDuration& backoffPeriod) + : RetryCount(retryCount), RetryLimit(retryLimit), RetryPeriod(retryPeriod), BackoffPeriod(backoffPeriod) { } ui64 RetryCount = 0; + ui64 RetryLimit = 0; TDuration RetryPeriod = TDuration::Zero(); TDuration BackoffPeriod = TDuration::Zero(); }; @@ -32,6 +33,7 @@ class TRetryLimiter { ui64 RetryCount = 0; TInstant RetryCounterUpdatedAt = TInstant::Zero(); double RetryRate = 0.0; + TString LastError; }; bool IsTerminalStatus(FederatedQuery::QueryMeta::ComputeStatus status); diff --git a/ydb/core/fq/libs/control_plane_storage/validators.cpp b/ydb/core/fq/libs/control_plane_storage/validators.cpp index fb85ec52c06f..84f80761149d 100644 --- a/ydb/core/fq/libs/control_plane_storage/validators.cpp +++ b/ydb/core/fq/libs/control_plane_storage/validators.cpp @@ -33,17 +33,17 @@ TValidationQuery CreateUniqueNameValidator(const TString& tableName, auto validator = [error](NYdb::NTable::TDataQueryResult result) { const auto& resultSets = result.GetResultSets(); if (resultSets.size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets.size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets.size() << ". Please contact internal support"; } TResultSetParser parser(resultSets.front()); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Not valid number of lines, one is expected. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Not valid number of lines, one is expected. Please contact internal support"; } ui64 countNames = parser.ColumnParser("count").GetUint64(); if (countNames != 0) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << error; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << error; } return false; @@ -81,13 +81,13 @@ TValidationQuery CreateModifyUniqueNameValidator(const TString& tableName, auto validator = [error, visibility, name](NYdb::NTable::TDataQueryResult result) { const auto& resultSets = result.GetResultSets(); if (resultSets.size() != 2) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 2 but equal " << resultSets.size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 2 but equal " << resultSets.size() << ". Please contact internal support"; } { TResultSetParser parser(resultSets.front()); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Not valid number of lines, one is expected. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Not valid number of lines, one is expected. Please contact internal support"; } FederatedQuery::Acl::Visibility oldVisibility = @@ -105,12 +105,12 @@ TValidationQuery CreateModifyUniqueNameValidator(const TString& tableName, TResultSetParser parser(resultSets.back()); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Not valid number of lines, one is expected. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Not valid number of lines, one is expected. Please contact internal support"; } ui64 countNames = parser.ColumnParser("count").GetUint64(); if (countNames != 0) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << error; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << error; } return false; @@ -135,17 +135,17 @@ TValidationQuery CreateCountEntitiesValidator(const TString& scope, auto validator = [error, limit](NYdb::NTable::TDataQueryResult result) { const auto& resultSets = result.GetResultSets(); if (resultSets.size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets.size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets.size() << ". Please contact internal support"; } TResultSetParser parser(resultSets.front()); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Not valid number of lines, one is expected. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Not valid number of lines, one is expected. Please contact internal support"; } ui64 countEntities = parser.ColumnParser("count").GetUint64(); if (countEntities >= limit) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << error; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << error; } return false; @@ -172,7 +172,7 @@ TValidationQuery CreateRevisionValidator(const TString& tableName, auto validator = [error, previousRevision](NYdb::NTable::TDataQueryResult result) { const auto& resultSets = result.GetResultSets(); if (resultSets.size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets.size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets.size() << ". Please contact internal support"; } TResultSetParser parser(resultSets.front()); @@ -182,7 +182,7 @@ TValidationQuery CreateRevisionValidator(const TString& tableName, i64 revision = parser.ColumnParser(REVISION_COLUMN_NAME).GetOptionalInt64().GetOrElse(0); if (revision != previousRevision) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << error; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << error; } return false; @@ -212,19 +212,19 @@ static TValidationQuery CreateAccessValidatorImpl(const TString& tableName, auto validator = [error, user, permissions, privatePermission, publicPermission](NYdb::NTable::TDataQueryResult result) { const auto& resultSets = result.GetResultSets(); if (resultSets.size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets.size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets.size() << ". Please contact internal support"; } TResultSetParser parser(resultSets.front()); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << error; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << error; } TString queryUser = parser.ColumnParser(USER_COLUMN_NAME).GetOptionalString().GetOrElse(""); FederatedQuery::Acl::Visibility visibility = static_cast(parser.ColumnParser(VISIBILITY_COLUMN_NAME).GetOptionalInt64().GetOrElse(FederatedQuery::Acl::VISIBILITY_UNSPECIFIED)); bool hasAccess = HasAccessImpl(permissions, visibility, queryUser, user, privatePermission, publicPermission); if (!hasAccess) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << error; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << error; } return false; @@ -276,17 +276,17 @@ TValidationQuery CreateRelatedBindingsValidator(const TString& scope, auto validator = [error](NYdb::NTable::TDataQueryResult result) { const auto& resultSets = result.GetResultSets(); if (resultSets.size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets.size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets.size() << ". Please contact internal support"; } TResultSetParser parser(resultSets.front()); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Not valid number of lines, one is expected. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Not valid number of lines, one is expected. Please contact internal support"; } ui64 countEntities = parser.ColumnParser("count").GetUint64(); if (countEntities != 0) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << error; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << error; } return false; @@ -313,23 +313,23 @@ TValidationQuery CreateConnectionExistsValidator(const TString& scope, auto validator = [error, user, permissions, bindingVisibility](NYdb::NTable::TDataQueryResult result) { const auto& resultSets = result.GetResultSets(); if (resultSets.size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets.size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets.size() << ". Please contact internal support"; } TResultSetParser parser(resultSets.front()); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << error; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << error; } FederatedQuery::Acl::Visibility connectionVisibility = static_cast(parser.ColumnParser(VISIBILITY_COLUMN_NAME).GetOptionalInt64().GetOrElse(FederatedQuery::Acl::VISIBILITY_UNSPECIFIED)); TString connectionUser = parser.ColumnParser(USER_COLUMN_NAME).GetOptionalString().GetOrElse(""); if (bindingVisibility == FederatedQuery::Acl::SCOPE && connectionVisibility == FederatedQuery::Acl::PRIVATE) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "Binding with SCOPE visibility cannot refer to connection with PRIVATE visibility"; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "Binding with SCOPE visibility cannot refer to connection with PRIVATE visibility"; } if (!HasManageAccess(permissions, connectionVisibility, connectionUser, user)) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << error; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << error; } return false; @@ -357,7 +357,7 @@ TValidationQuery CreateConnectionOverrideBindingValidator(const TString& scope, auto validator = [connectionName, user, permissions](NYdb::NTable::TDataQueryResult result) { const auto& resultSets = result.GetResultSets(); if (resultSets.size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets.size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets.size() << ". Please contact internal support"; } TResultSetParser parser(resultSets.front()); @@ -370,7 +370,7 @@ TValidationQuery CreateConnectionOverrideBindingValidator(const TString& scope, FederatedQuery::Acl::Visibility bindingVisibility = static_cast(parser.ColumnParser(VISIBILITY_COLUMN_NAME).GetOptionalInt64().GetOrElse(FederatedQuery::Acl::VISIBILITY_UNSPECIFIED)); if (HasViewAccess(permissions, bindingVisibility, bindingUser, user)) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "Connection named " << connectionName << " overrides connection from binding " << bindingName << ". Please rename this connection"; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "Connection named " << connectionName << " overrides connection from binding " << bindingName << ". Please rename this connection"; } return false; @@ -398,7 +398,7 @@ TValidationQuery CreateBindingConnectionValidator(const TString& scope, auto validator = [connectionId](NYdb::NTable::TDataQueryResult result) { const auto& resultSets = result.GetResultSets(); if (resultSets.size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets.size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets.size() << ". Please contact internal support"; } TResultSetParser parser(resultSets.front()); @@ -409,7 +409,7 @@ TValidationQuery CreateBindingConnectionValidator(const TString& scope, TString privateConnectionName = parser.ColumnParser(NAME_COLUMN_NAME).GetOptionalString().GetOrElse(""); TString privateConnectionId = parser.ColumnParser(CONNECTION_ID_COLUMN_NAME).GetOptionalString().GetOrElse(""); - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "The connection with id " << connectionId << " is overridden by the private conection with id " << privateConnectionId << " (" << privateConnectionName << "). Please rename the private connection or use another connection"; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "The connection with id " << connectionId << " is overridden by the private conection with id " << privateConnectionId << " (" << privateConnectionName << "). Please rename the private connection or use another connection"; }; const auto query = queryBuilder.Build(); return {query.Sql, query.Params, validator}; @@ -433,12 +433,12 @@ TValidationQuery CreateTtlValidator(const TString& tableName, auto validator = [error](NYdb::NTable::TDataQueryResult result) { const auto& resultSets = result.GetResultSets(); if (resultSets.size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets.size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets.size() << ". Please contact internal support"; } TResultSetParser parser(resultSets.front()); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << error; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << error; } return false; @@ -465,23 +465,23 @@ TValidationQuery CreateQueryComputeStatusValidator(const std::vectorInc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for query. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for query. Please contact internal support"; } const FederatedQuery::QueryMeta::ComputeStatus status = query.meta().status(); if (!IsIn(computeStatuses, status)) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << error; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << error; } return false; diff --git a/ydb/core/fq/libs/control_plane_storage/validators.h b/ydb/core/fq/libs/control_plane_storage/validators.h index bc563baa17a8..27e7dccd945e 100644 --- a/ydb/core/fq/libs/control_plane_storage/validators.h +++ b/ydb/core/fq/libs/control_plane_storage/validators.h @@ -15,7 +15,8 @@ #include #include -#include +#include +#include namespace NFq { @@ -130,7 +131,7 @@ TValidationQuery CreateIdempotencyKeyValidator(const TString& scope, auto validator = [response, parseProtobufError](NYdb::NTable::TDataQueryResult result) { const auto& resultSets = result.GetResultSets(); if (resultSets.size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "internal error, result set size is not equal to 1 but equal " << resultSets.size(); + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "internal error, result set size is not equal to 1 but equal " << resultSets.size(); } NYdb::TResultSetParser parser(resultSets.back()); @@ -140,7 +141,7 @@ TValidationQuery CreateIdempotencyKeyValidator(const TString& scope, if (!response->first.ParseFromString(*parser.ColumnParser(RESPONSE_COLUMN_NAME).GetOptionalString())) { parseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for response. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for response. Please contact internal support"; } return true; @@ -166,7 +167,7 @@ TValidationQuery CreateIdempotencyKeyValidator(const TString& scope, auto validator = [response, parseProtobufError](NYdb::NTable::TDataQueryResult result) { const auto& resultSets = result.GetResultSets(); if (resultSets.size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "internal error, result set size is not equal to 1 but equal " << resultSets.size(); + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "internal error, result set size is not equal to 1 but equal " << resultSets.size(); } NYdb::TResultSetParser parser(resultSets.back()); @@ -176,7 +177,7 @@ TValidationQuery CreateIdempotencyKeyValidator(const TString& scope, if (!response->first.ParseFromString(*parser.ColumnParser(RESPONSE_COLUMN_NAME).GetOptionalString())) { parseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for response. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for response. Please contact internal support"; } response->second.IdempotencyResult = true; diff --git a/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage.cpp b/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage.cpp index 7347058203bb..362cb5d7ec6d 100644 --- a/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage.cpp +++ b/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage.cpp @@ -478,7 +478,7 @@ TAsyncStatus TDbRequester::Write( return future; } return writeHandler(session); - } catch (const TCodeLineException& exception) { + } catch (const NYql::TCodeLineException& exception) { if (exception.Code == TIssuesIds::INTERNAL_ERROR) { CPS_LOG_AS_E(*actorSystem, "Validation: " << CurrentExceptionMessage()); } else { @@ -589,7 +589,7 @@ TAsyncStatus TDbRequester::ReadModifyWrite( } return status; }); - } catch (const TCodeLineException& exception) { + } catch (const NYql::TCodeLineException& exception) { if (exception.Code == TIssuesIds::INTERNAL_ERROR) { CPS_LOG_AS_E(*actorSystem, "Validation: " << CurrentExceptionMessage()); } else { @@ -623,7 +623,7 @@ TAsyncStatus TDbRequester::ReadModifyWrite( return future; } return readModifyWriteHandler(session); - } catch (const TCodeLineException& exception) { + } catch (const NYql::TCodeLineException& exception) { if (exception.Code == TIssuesIds::INTERNAL_ERROR) { CPS_LOG_AS_E(*actorSystem, "Validation: " << CurrentExceptionMessage()); } else { diff --git a/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_bindings.cpp b/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_bindings.cpp index dc4286fd4de7..75f5557ea197 100644 --- a/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_bindings.cpp +++ b/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_bindings.cpp @@ -250,7 +250,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvListBinding auto [result, resultSets] = Read(query.Sql, query.Params, requestCounters, debugInfo); auto prepare = [resultSets=resultSets, limit, commonCounters=requestCounters.Common] { if (resultSets->size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets->size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets->size() << ". Please contact internal support"; } FederatedQuery::ListBindingsResult result; @@ -259,7 +259,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvListBinding FederatedQuery::Binding binding; if (!binding.ParseFromString(*parser.ColumnParser(BINDING_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for binding. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for binding. Please contact internal support"; } FederatedQuery::BriefBinding& briefBinding = *result.add_binding(); briefBinding.set_name(binding.content().name()); @@ -355,23 +355,23 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvDescribeBin auto [result, resultSets] = Read(query.Sql, query.Params, requestCounters, debugInfo); auto prepare = [=, resultSets=resultSets, commonCounters=requestCounters.Common] { if (resultSets->size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets->size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets->size() << ". Please contact internal support"; } TResultSetParser parser(resultSets->front()); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Binding does not exist or permission denied. Please check the id binding or your access rights"; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Binding does not exist or permission denied. Please check the id binding or your access rights"; } FederatedQuery::DescribeBindingResult result; if (!result.mutable_binding()->ParseFromString(*parser.ColumnParser(BINDING_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for binding. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for binding. Please contact internal support"; } bool hasViewAccess = HasViewAccess(permissions, result.binding().content().acl().visibility(), result.binding().meta().created_by(), user); if (!hasViewAccess) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Binding does not exist or permission denied. Please check the id binding or your access rights"; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Binding does not exist or permission denied. Please check the id binding or your access rights"; } return result; }; @@ -448,19 +448,19 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvModifyBindi std::shared_ptr>> response = std::make_shared>>(); auto prepareParams = [=, config=Config, commonCounters=requestCounters.Common](const TVector& resultSets) { if (resultSets.size() != 2) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 2 but equal " << resultSets.size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 2 but equal " << resultSets.size() << ". Please contact internal support"; } FederatedQuery::Binding binding; { TResultSetParser parser(resultSets.front()); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Binding does not exist or permission denied. Please check the binding id or your access rights"; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Binding does not exist or permission denied. Please check the binding id or your access rights"; } if (!binding.ParseFromString(*parser.ColumnParser(BINDING_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for binding. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for binding. Please contact internal support"; } } @@ -468,7 +468,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvModifyBindi { TResultSetParser parser(resultSets.back()); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Connection does not exist or permission denied. Please check the connectin id or your access rights"; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Connection does not exist or permission denied. Please check the connectin id or your access rights"; } connectionVisibility = static_cast(parser.ColumnParser(VISIBILITY_COLUMN_NAME).GetOptionalInt64().GetOrElse(FederatedQuery::Acl::VISIBILITY_UNSPECIFIED)); @@ -476,12 +476,12 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvModifyBindi const FederatedQuery::Acl::Visibility requestBindingVisibility = request.content().acl().visibility(); if (requestBindingVisibility == FederatedQuery::Acl::SCOPE && connectionVisibility == FederatedQuery::Acl::PRIVATE) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "Binding with SCOPE visibility cannot refer to connection with PRIVATE visibility"; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "Binding with SCOPE visibility cannot refer to connection with PRIVATE visibility"; } bool hasManageAccess = HasManageAccess(permissions, binding.content().acl().visibility(), binding.meta().created_by(), user); if (!hasManageAccess) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Binding does not exist or permission denied. Please check the id binding or your access rights"; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Binding does not exist or permission denied. Please check the id binding or your access rights"; } auto& meta = *binding.mutable_meta(); @@ -494,15 +494,15 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvModifyBindi bool validateType = content.setting().binding_case() == request.content().setting().binding_case(); if (!validateType) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "Binding type cannot be changed. Please specify the same binding type"; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "Binding type cannot be changed. Please specify the same binding type"; } if (binding.content().acl().visibility() == FederatedQuery::Acl::SCOPE && requestBindingVisibility == FederatedQuery::Acl::PRIVATE) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "Changing visibility from SCOPE to PRIVATE is forbidden. Please create a new binding with visibility PRIVATE"; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "Changing visibility from SCOPE to PRIVATE is forbidden. Please create a new binding with visibility PRIVATE"; } if (content.connection_id() != request.content().connection_id()) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "Connection id cannot be changed. Please specify the same connection id"; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "Connection id cannot be changed. Please specify the same connection id"; } content = request.content(); diff --git a/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_compute_database.cpp b/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_compute_database.cpp index 39791ea71397..385dd265fdf3 100644 --- a/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_compute_database.cpp +++ b/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_compute_database.cpp @@ -35,7 +35,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvCreateDatab auto prepareParams = [=](const TVector& resultSets) { if (resultSets.size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets.size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets.size() << ". Please contact internal support"; } TResultSetParser parser(resultSets.front()); @@ -101,18 +101,18 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvDescribeDat auto [result, resultSets] = Read(query.Sql, query.Params, requestCounters, debugInfo); auto prepare = [=, resultSets=resultSets, commonCounters=requestCounters.Common] { if (resultSets->size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets->size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets->size() << ". Please contact internal support"; } TResultSetParser parser(resultSets->front()); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Database does not exist or permission denied. Please check the id database or your access rights"; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Database does not exist or permission denied. Please check the id database or your access rights"; } FederatedQuery::Internal::ComputeDatabaseInternal result; if (!result.ParseFromString(*parser.ColumnParser(INTERNAL_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for internal compute database. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for internal compute database. Please contact internal support"; } return result; @@ -192,18 +192,18 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvModifyDatab auto prepareParams = [=, synchronized = ev->Get()->Synchronized, commonCounters=requestCounters.Common](const TVector& resultSets) { if (resultSets.size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets.size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets.size() << ". Please contact internal support"; } TResultSetParser parser(resultSets.front()); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Database does not exist or permission denied. Please check the id database or your access rights"; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Database does not exist or permission denied. Please check the id database or your access rights"; } FederatedQuery::Internal::ComputeDatabaseInternal result; if (!result.ParseFromString(*parser.ColumnParser(INTERNAL_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for internal compute database. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for internal compute database. Please contact internal support"; } if (synchronized) { diff --git a/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_connections.cpp b/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_connections.cpp index c0e30d08b21b..426ce7594725 100644 --- a/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_connections.cpp +++ b/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_connections.cpp @@ -266,7 +266,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvListConnect auto [result, resultSets] = Read(query.Sql, query.Params, requestCounters, debugInfo); auto prepare = [resultSets=resultSets, limit, extractSensitiveFields, commonCounters=requestCounters.Common] { if (resultSets->size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets->size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets->size() << ". Please contact internal support"; } FederatedQuery::ListConnectionsResult result; @@ -275,7 +275,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvListConnect auto& connection = *result.add_connection(); if (!connection.ParseFromString(*parser.ColumnParser(CONNECTION_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for connection. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for connection. Please contact internal support"; } PrepareSensitiveFields(connection, extractSensitiveFields); } @@ -358,23 +358,23 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvDescribeCon auto [result, resultSets] = Read(query.Sql, query.Params, requestCounters, debugInfo); auto prepare = [=, resultSets=resultSets, commonCounters=requestCounters.Common] { if (resultSets->size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets->size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets->size() << ". Please contact internal support"; } FederatedQuery::DescribeConnectionResult result; TResultSetParser parser(resultSets->front()); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Connection does not exist or permission denied. Please check the id connection or your access rights"; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Connection does not exist or permission denied. Please check the id connection or your access rights"; } if (!result.mutable_connection()->ParseFromString(*parser.ColumnParser(CONNECTION_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for connection. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for connection. Please contact internal support"; } bool hasViewAccess = HasViewAccess(permissions, result.connection().content().acl().visibility(), result.connection().meta().created_by(), user); if (!hasViewAccess) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Connection does not exist or permission denied. Please check the id connection or your access rights"; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Connection does not exist or permission denied. Please check the id connection or your access rights"; } PrepareSensitiveFields(*result.mutable_connection(), extractSensitiveFields); @@ -449,18 +449,18 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvModifyConne std::shared_ptr>> response = std::make_shared>>(); auto prepareParams = [=, config=Config, commonCounters=requestCounters.Common](const TVector& resultSets) { if (resultSets.size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets.size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets.size() << ". Please contact internal support"; } TResultSetParser parser(resultSets.front()); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Connection does not exist or permission denied. Please check the id connection or your access rights"; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Connection does not exist or permission denied. Please check the id connection or your access rights"; } FederatedQuery::Connection connection; if (!connection.ParseFromString(*parser.ColumnParser(CONNECTION_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for connection. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for connection. Please contact internal support"; } auto& meta = *connection.mutable_meta(); @@ -473,11 +473,11 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvModifyConne bool validateType = content.setting().connection_case() == request.content().setting().connection_case(); if (!validateType) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "Connection type cannot be changed. Please specify the same connection type"; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "Connection type cannot be changed. Please specify the same connection type"; } if (content.acl().visibility() == FederatedQuery::Acl::SCOPE && request.content().acl().visibility() == FederatedQuery::Acl::PRIVATE) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "Changing visibility from SCOPE to PRIVATE is forbidden. Please create a new connection with visibility PRIVATE"; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "Changing visibility from SCOPE to PRIVATE is forbidden. Please create a new connection with visibility PRIVATE"; } // FIXME: this code needs better generalization diff --git a/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_impl.h b/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_impl.h index bef8ad4b0aa3..8bf6ba638b58 100644 --- a/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_impl.h +++ b/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_impl.h @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include #include @@ -183,7 +183,7 @@ THashMap GetEntitiesWithVisibilityPriority(const TResultSet& resultS T entity; if (!entity.ParseFromString(*parser.ColumnParser(columnName).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for GetEntitiesWithVisibilityPriority. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for GetEntitiesWithVisibilityPriority. Please contact internal support"; } const auto visibility = entity.content().acl().visibility(); if (ignorePrivateSources && visibility == FederatedQuery::Acl::PRIVATE) { @@ -211,7 +211,7 @@ TVector GetEntities(const TResultSet& resultSet, const TString& columnName, b T entity; if (!entity.ParseFromString(*parser.ColumnParser(columnName).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for GetEntities. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for GetEntities. Please contact internal support"; } const auto visibility = entity.content().acl().visibility(); if (ignorePrivateSources && visibility == FederatedQuery::Acl::PRIVATE) { @@ -767,7 +767,7 @@ class TYdbControlPlaneStorageActor : public NActors::TActorBootstrapped& resultSets) mutable { const size_t countSets = (idempotencyKey ? 1 : 0) + (request.execute_mode() != FederatedQuery::SAVE ? 2 : 0); if (resultSets.size() != countSets) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to " << countSets << " but equal " << resultSets.size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to " << countSets << " but equal " << resultSets.size() << ". Please contact internal support"; } if (idempotencyKey) { @@ -186,7 +190,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvCreateQuery if (parser.TryNextRow()) { if (!response->first.ParseFromString(*parser.ColumnParser(RESPONSE_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for idempotency key request. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for idempotency key request. Please contact internal support"; } response->second.IdempotencyResult = true; return make_pair(TString{}, TParamsBuilder{}.Build()); @@ -247,17 +251,17 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvCreateQuery *queryInternal.add_binding() = binding; if (!connectionIds.contains(binding.content().connection_id())) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "Unable to resolve connection for binding " << binding.meta().id() << ", name " << binding.content().name() << ", connection id " << binding.content().connection_id(); + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "Unable to resolve connection for binding " << binding.meta().id() << ", name " << binding.content().name() << ", connection id " << binding.content().connection_id(); } } } if (query.ByteSizeLong() > Config->Proto.GetMaxRequestSize()) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "Incoming request exceeded the size limit: " << query.ByteSizeLong() << " of " << Config->Proto.GetMaxRequestSize() << ". Please shorten your request"; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "Incoming request exceeded the size limit: " << query.ByteSizeLong() << " of " << Config->Proto.GetMaxRequestSize() << ". Please shorten your request"; } if (queryInternal.ByteSizeLong() > Config->Proto.GetMaxRequestSize()) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "The size of all connections and bindings in the project exceeded the limit: " << queryInternal.ByteSizeLong() << " of " << Config->Proto.GetMaxRequestSize() << ". Please reduce the number of connections and bindings"; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "The size of all connections and bindings in the project exceeded the limit: " << queryInternal.ByteSizeLong() << " of " << Config->Proto.GetMaxRequestSize() << ". Please reduce the number of connections and bindings"; } response->second.After.ConstructInPlace().CopyFrom(query); @@ -460,7 +464,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvListQueries auto [result, resultSets] = Read(read.Sql, read.Params, requestCounters, debugInfo); auto prepare = [resultSets=resultSets, limit, commonCounters=requestCounters.Common] { if (resultSets->size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets->size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets->size() << ". Please contact internal support"; } FederatedQuery::ListQueriesResult result; @@ -469,7 +473,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvListQueries FederatedQuery::Query query; if (!query.ParseFromString(*parser.ColumnParser(QUERY_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for query. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for query. Please contact internal support"; } FederatedQuery::BriefQuery briefQuery; const auto lastJobId = query.meta().last_job_id(); @@ -551,18 +555,18 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvDescribeQue auto [result, resultSets] = Read(query.Sql, query.Params, requestCounters, debugInfo); auto prepare = [resultSets=resultSets, user, permissions, commonCounters=requestCounters.Common] { if (resultSets->size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets->size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets->size() << ". Please contact internal support"; } TResultSetParser parser(resultSets->front()); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Query does not exist or permission denied. Please check the id of the query or your access rights"; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Query does not exist or permission denied. Please check the id of the query or your access rights"; } FederatedQuery::DescribeQueryResult result; if (!result.mutable_query()->ParseFromString(*parser.ColumnParser(QUERY_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for query. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for query. Please contact internal support"; } const auto lastJobId = result.query().meta().last_job_id(); @@ -572,13 +576,13 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvDescribeQue const auto queryUser = result.query().meta().common().created_by(); const bool hasViewAccess = HasViewAccess(permissions, queryVisibility, queryUser, user); if (!hasViewAccess) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Query does not exist or permission denied. Please check the id of the query or your access rights"; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Query does not exist or permission denied. Please check the id of the query or your access rights"; } FederatedQuery::Internal::QueryInternal internal; if (!internal.ParseFromString(*parser.ColumnParser(INTERNAL_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for query internal. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for query internal. Please contact internal support"; } // decompress plan @@ -612,7 +616,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvDescribeQue } if (result.query().ByteSizeLong() > GRPC_MESSAGE_SIZE_LIMIT) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Resulting query of size " << result.query().ByteSizeLong() << " bytes is too big"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Resulting query of size " << result.query().ByteSizeLong() << " bytes is too big"; } return result; }; @@ -680,12 +684,12 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvGetQuerySta auto [result, resultSets] = Read(read.Sql, read.Params, requestCounters, debugInfo); auto prepare = [resultSets=resultSets, user,permissions] { if (resultSets->size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets->size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets->size() << ". Please contact internal support"; } TResultSetParser parser(resultSets->front()); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Query does not exist or permission denied. Please check the id of the query or your access rights"; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Query does not exist or permission denied. Please check the id of the query or your access rights"; } FederatedQuery::GetQueryStatusResult result; @@ -696,7 +700,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvGetQuerySta const auto queryUser = *parser.ColumnParser(USER_COLUMN_NAME).GetOptionalString(); const bool hasViewAccess = HasViewAccess(permissions, queryVisibility, queryUser, user); if (!hasViewAccess) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Query does not exist or permission denied. Please check the id of the query or your access rights"; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Query does not exist or permission denied. Please check the id of the query or your access rights"; } return result; @@ -806,25 +810,25 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvModifyQuery const size_t countSets = 1 + (request.execute_mode() != FederatedQuery::SAVE ? 2 : 0); if (resultSets.size() != countSets) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to " << countSets << " but equal " << resultSets.size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to " << countSets << " but equal " << resultSets.size() << ". Please contact internal support"; } TResultSetParser parser(resultSets.back()); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Query does not exist or permission denied. Please check the id of the query or your access rights"; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Query does not exist or permission denied. Please check the id of the query or your access rights"; } FederatedQuery::Query query; if (!query.ParseFromString(*parser.ColumnParser(QUERY_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for query. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for query. Please contact internal support"; } FederatedQuery::Internal::QueryInternal internal; if (!internal.ParseFromString(*parser.ColumnParser(INTERNAL_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for query internal. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for query internal. Please contact internal support"; } *internal.mutable_execution_ttl() = NProtoInterop::CastToProto(TDuration::MilliSeconds(executionLimitMills)); @@ -834,15 +838,15 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvModifyQuery const auto queryUser = query.meta().common().created_by(); const bool hasManageAccess = HasManageAccess(permissions, queryVisibility, queryUser, user); if (!hasManageAccess) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Query does not exist or permission denied. Please check the id of the query or your access rights"; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Query does not exist or permission denied. Please check the id of the query or your access rights"; } if (query.content().type() != request.content().type()) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "Query type cannot be changed. Please specify " << FederatedQuery::QueryContent_QueryType_Name(query.content().type()) << " instead of " << FederatedQuery::QueryContent_QueryType_Name(request.content().type()); + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "Query type cannot be changed. Please specify " << FederatedQuery::QueryContent_QueryType_Name(query.content().type()) << " instead of " << FederatedQuery::QueryContent_QueryType_Name(request.content().type()); } if (query.content().acl().visibility() == FederatedQuery::Acl::SCOPE && request.content().acl().visibility() == FederatedQuery::Acl::PRIVATE) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "Changing visibility from SCOPE to PRIVATE is forbidden. Please create a new query with visibility PRIVATE"; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "Changing visibility from SCOPE to PRIVATE is forbidden. Please create a new query with visibility PRIVATE"; } auto oldVisibility = query.content().acl().visibility(); @@ -866,7 +870,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvModifyQuery }, query.meta().status()); if (!isValidMode) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "Conversion from status " << FederatedQuery::QueryMeta::ComputeStatus_Name(query.meta().status()) << " to " << FederatedQuery::QueryMeta::ComputeStatus_Name(FederatedQuery::QueryMeta::STARTING) << " is not possible. Please wait for the query to complete or stop it"; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "Conversion from status " << FederatedQuery::QueryMeta::ComputeStatus_Name(query.meta().status()) << " to " << FederatedQuery::QueryMeta::ComputeStatus_Name(FederatedQuery::QueryMeta::STARTING) << " is not possible. Please wait for the query to complete or stop it"; } if (!Config->Proto.GetDisableCurrentIam()) { @@ -924,17 +928,17 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvModifyQuery *internal.add_binding() = binding; if (!connectionIds.contains(binding.content().connection_id())) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "Unable to resolve connection for binding " << binding.meta().id() << ", name " << binding.content().name() << ", connection id " << binding.content().connection_id(); + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "Unable to resolve connection for binding " << binding.meta().id() << ", name " << binding.content().name() << ", connection id " << binding.content().connection_id(); } } } if (query.ByteSizeLong() > Config->Proto.GetMaxRequestSize()) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "Incoming request exceeded the size limit: " << query.ByteSizeLong() << " of " << Config->Proto.GetMaxRequestSize() << ". Please shorten your request"; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "Incoming request exceeded the size limit: " << query.ByteSizeLong() << " of " << Config->Proto.GetMaxRequestSize() << ". Please shorten your request"; } if (internal.ByteSizeLong() > Config->Proto.GetMaxRequestSize()) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "The size of all connections and bindings in the project exceeded the limit: " << internal.ByteSizeLong() << " of " << Config->Proto.GetMaxRequestSize() << ". Please reduce the number of connections and bindings"; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "The size of all connections and bindings in the project exceeded the limit: " << internal.ByteSizeLong() << " of " << Config->Proto.GetMaxRequestSize() << ". Please reduce the number of connections and bindings"; } FederatedQuery::Job job; @@ -1294,7 +1298,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvControlQuer auto prepareParams = [=, config=Config, commonCounters=requestCounters.Common](const TVector& resultSets) { if (resultSets.size() != 2) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 2 but equal " << resultSets.size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 2 but equal " << resultSets.size() << ". Please contact internal support"; } auto now = TInstant::Now(); @@ -1304,17 +1308,17 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvControlQuer { TResultSetParser parser(resultSets[0]); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Query does not exist or permission denied. Please check the id of the query or your access rights"; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Query does not exist or permission denied. Please check the id of the query or your access rights"; } if (!query.ParseFromString(*parser.ColumnParser(QUERY_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for query. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for query. Please contact internal support"; } if (!queryInternal.ParseFromString(*parser.ColumnParser(INTERNAL_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for query internal. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for query internal. Please contact internal support"; } tenantName = *parser.ColumnParser(TENANT_COLUMN_NAME).GetOptionalString(); } @@ -1324,19 +1328,19 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvControlQuer { TResultSetParser parser(resultSets[1]); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Job does not exist or permission denied. Please check the id query or your access rights"; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Job does not exist or permission denied. Please check the id query or your access rights"; } if (!job.ParseFromString(parser.ColumnParser(JOB_COLUMN_NAME).GetOptionalString().GetOrElse(""))) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for job. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for job. Please contact internal support"; } jobId = job.meta().id(); const bool hasManageAccess = HasManageAccess(permissions, query.content().acl().visibility(), query.meta().common().created_by(), user); if (!hasManageAccess) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Query does not exist or permission denied. Please check the id of the query or your access rights"; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Query does not exist or permission denied. Please check the id of the query or your access rights"; } } @@ -1365,7 +1369,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvControlQuer metaQuery.set_status(FederatedQuery::QueryMeta::ABORTING_BY_USER); metaQuery.set_aborted_by(user); } else { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Conversion from status " << FederatedQuery::QueryMeta::ComputeStatus_Name(metaQuery.status()) << " to " << FederatedQuery::QueryMeta::ComputeStatus_Name(FederatedQuery::QueryMeta::ABORTING_BY_USER) << " is not possible. Please wait for the previous operation to be completed"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Conversion from status " << FederatedQuery::QueryMeta::ComputeStatus_Name(metaQuery.status()) << " to " << FederatedQuery::QueryMeta::ComputeStatus_Name(FederatedQuery::QueryMeta::ABORTING_BY_USER) << " is not possible. Please wait for the previous operation to be completed"; } } @@ -1379,7 +1383,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvControlQuer metaQuery.set_status(FederatedQuery::QueryMeta::PAUSING); metaQuery.set_paused_by(user); } else { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "Conversion from status " << FederatedQuery::QueryMeta::ComputeStatus_Name(metaQuery.status()) << " to " << FederatedQuery::QueryMeta::ComputeStatus_Name(FederatedQuery::QueryMeta::PAUSING) << " is not possible. Please wait for the previous operation to be completed"; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "Conversion from status " << FederatedQuery::QueryMeta::ComputeStatus_Name(metaQuery.status()) << " to " << FederatedQuery::QueryMeta::ComputeStatus_Name(FederatedQuery::QueryMeta::PAUSING) << " is not possible. Please wait for the previous operation to be completed"; } } @@ -1388,7 +1392,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvControlQuer if (isValidStatusForResume) { metaQuery.set_status(FederatedQuery::QueryMeta::RESUMING); } else { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "Conversion from status " << FederatedQuery::QueryMeta::ComputeStatus_Name(metaQuery.status()) << " to " << FederatedQuery::QueryMeta::ComputeStatus_Name(FederatedQuery::QueryMeta::RESUMING) << " is not possible. Please wait for the previous operation to be completed"; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "Conversion from status " << FederatedQuery::QueryMeta::ComputeStatus_Name(metaQuery.status()) << " to " << FederatedQuery::QueryMeta::ComputeStatus_Name(FederatedQuery::QueryMeta::RESUMING) << " is not possible. Please wait for the previous operation to be completed"; } } @@ -1534,7 +1538,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvGetResultDa auto [result, resultSets] = Read(query.Sql, query.Params, requestCounters, debugInfo); auto prepare = [resultSets=resultSets, resultSetIndex, user, permissions, commonCounters=requestCounters.Common] { if (resultSets->size() != 2) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 2 but equal " << resultSets->size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 2 but equal " << resultSets->size() << ". Please contact internal support"; } FederatedQuery::GetResultDataResult result; @@ -1543,13 +1547,13 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvGetResultDa const auto& resultSet = (*resultSets)[0]; TResultSetParser parser(resultSet); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Query does not exist or permission denied. Please check the id of the query or your access rights"; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Query does not exist or permission denied. Please check the id of the query or your access rights"; } FederatedQuery::Query query; if (!query.ParseFromString(*parser.ColumnParser(QUERY_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for query. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for query. Please contact internal support"; } FederatedQuery::Acl::Visibility queryVisibility = static_cast(parser.ColumnParser(VISIBILITY_COLUMN_NAME).GetOptionalInt64().GetOrElse(FederatedQuery::Acl::VISIBILITY_UNSPECIFIED)); @@ -1557,24 +1561,24 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvGetResultDa bool hasViewAccess = HasViewAccess(permissions, queryVisibility, queryUser, user); if (!hasViewAccess) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Query does not exist or permission denied. Please check the id of the query or your access rights"; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Query does not exist or permission denied. Please check the id of the query or your access rights"; } if (resultSetIndex >= query.result_set_meta_size()) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "Result set index out of bound: " << resultSetIndex << " >= " << query.result_set_meta_size(); + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "Result set index out of bound: " << resultSetIndex << " >= " << query.result_set_meta_size(); } if (FederatedQuery::QueryMeta::ComputeStatus(*parser.ColumnParser(STATUS_COLUMN_NAME).GetOptionalInt64()) != FederatedQuery::QueryMeta::COMPLETED) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "Result doesn't exist"; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "Result doesn't exist"; } auto resultSetsExpireAtParser = parser.ColumnParser(RESULT_SETS_EXPIRE_AT_COLUMN_NAME).GetOptionalTimestamp(); if (!resultSetsExpireAtParser) { - ythrow TCodeLineException(TIssuesIds::BAD_REQUEST) << "Result doesn't exist"; + ythrow NYql::TCodeLineException(TIssuesIds::BAD_REQUEST) << "Result doesn't exist"; } if (*resultSetsExpireAtParser < TInstant::Now()) { - ythrow TCodeLineException(TIssuesIds::EXPIRED) << "Result removed by TTL"; + ythrow NYql::TCodeLineException(TIssuesIds::EXPIRED) << "Result removed by TTL"; } resultSetProto.mutable_columns()->CopyFrom(query.result_set_meta(resultSetIndex).column()); @@ -1586,7 +1590,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvGetResultDa while (parser.TryNextRow()) { if (!resultSetProto.add_rows()->ParseFromString(*parser.ColumnParser(RESULT_SET_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for row. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for row. Please contact internal support"; } } } @@ -1694,7 +1698,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvListJobsReq auto [result, resultSets] = Read(query.Sql, query.Params, requestCounters, debugInfo); auto prepare = [resultSets=resultSets, limit, commonCounters=requestCounters.Common] { if (resultSets->size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets->size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets->size() << ". Please contact internal support"; } FederatedQuery::ListJobsResult result; @@ -1703,7 +1707,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvListJobsReq FederatedQuery::Job job; if (!job.ParseFromString(*parser.ColumnParser(JOB_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for job. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for job. Please contact internal support"; } const TString mergedId = job.meta().id() + "-" + job.query_meta().common().id(); job.mutable_meta()->set_id(mergedId); @@ -1796,24 +1800,24 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvDescribeJob auto prepare = [=, id=request.job_id(), resultSets=resultSets, commonCounters=requestCounters.Common] { if (resultSets->size() != 1) { - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets->size() << ". Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Result set size is not equal to 1 but equal " << resultSets->size() << ". Please contact internal support"; } FederatedQuery::DescribeJobResult result; TResultSetParser parser(resultSets->front()); if (!parser.TryNextRow()) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Job does not exist or permission denied. Please check the job id or your access rights"; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Job does not exist or permission denied. Please check the job id or your access rights"; } if (!result.mutable_job()->ParseFromString(*parser.ColumnParser(JOB_COLUMN_NAME).GetOptionalString())) { commonCounters->ParseProtobufError->Inc(); - ythrow TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for job. Please contact internal support"; + ythrow NYql::TCodeLineException(TIssuesIds::INTERNAL_ERROR) << "Error parsing proto message for job. Please contact internal support"; } auto visibility = static_cast(*parser.ColumnParser(VISIBILITY_COLUMN_NAME).GetOptionalInt64()); result.mutable_job()->mutable_meta()->set_id(id); bool hasViewAccces = HasViewAccess(permissions, visibility, result.job().meta().created_by(), user); if (!hasViewAccces) { - ythrow TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Job does not exist or permission denied. Please check the job id or your access rights"; + ythrow NYql::TCodeLineException(TIssuesIds::ACCESS_DENIED) << "Job does not exist or permission denied. Please check the job id or your access rights"; } if (!permissions.Check(TPermissions::VIEW_AST)) { result.mutable_job()->clear_ast(); diff --git a/ydb/core/fq/libs/db_id_async_resolver_impl/mdb_endpoint_generator.cpp b/ydb/core/fq/libs/db_id_async_resolver_impl/mdb_endpoint_generator.cpp index ada5a7709fc7..15d0c6522c3d 100644 --- a/ydb/core/fq/libs/db_id_async_resolver_impl/mdb_endpoint_generator.cpp +++ b/ydb/core/fq/libs/db_id_async_resolver_impl/mdb_endpoint_generator.cpp @@ -13,11 +13,12 @@ namespace NFq { constexpr ui32 CLICKHOUSE_HTTP_SECURE_PORT = 8443; constexpr ui32 CLICKHOUSE_HTTP_INSECURE_PORT = 8123; - // Managed PostgreSQL provides the only port both for secure and insecure connections + // Managed PostgreSQL and Greenplum provide the only port both for secure and insecure connections constexpr ui32 POSTGRESQL_PORT = 6432; - constexpr ui32 GREENPLUM_PORT = 6432; + constexpr ui32 MYSQL_PORT = 3306; + // TMdbEndpointGeneratorLegacy implements behavior required by YQL legacy ClickHouse provider class TMdbEndpointGeneratorLegacy: public NYql::IMdbEndpointGenerator { TEndpoint ToEndpoint(const NYql::IMdbEndpointGenerator::TParams& params) const override { @@ -55,14 +56,14 @@ namespace NFq { ui32 port; switch (params.Protocol) { - case NYql::NConnector::NApi::EProtocol::NATIVE: + case NYql::EGenericProtocol::NATIVE: port = params.UseTls ? CLICKHOUSE_NATIVE_SECURE_PORT : CLICKHOUSE_NATIVE_INSECURE_PORT; break; - case NYql::NConnector::NApi::EProtocol::HTTP: + case NYql::EGenericProtocol::HTTP: port = params.UseTls ? CLICKHOUSE_HTTP_SECURE_PORT : CLICKHOUSE_HTTP_INSECURE_PORT; break; default: - ythrow yexception() << "Unexpected protocol for ClickHouse: " << NYql::NConnector::NApi::EProtocol_Name(params.Protocol); + ythrow yexception() << "Unexpected protocol for ClickHouse: " << NYql::EGenericProtocol_Name(params.Protocol); } return TEndpoint(fixedHost, port); @@ -70,18 +71,26 @@ namespace NFq { case NYql::EDatabaseType::PostgreSQL: // https://cloud.yandex.ru/docs/managed-postgresql/operations/connect switch (params.Protocol) { - case NYql::NConnector::NApi::EProtocol::NATIVE: + case NYql::EGenericProtocol::NATIVE: return TEndpoint(fixedHost, POSTGRESQL_PORT); default: - ythrow yexception() << "Unexpected protocol for PostgreSQL " << NYql::NConnector::NApi::EProtocol_Name(params.Protocol); + ythrow yexception() << "Unexpected protocol for PostgreSQL " << NYql::EGenericProtocol_Name(params.Protocol); } case NYql::EDatabaseType::Greenplum: - // https://cloud.yandex.ru/docs/managed-postgresql/operations/connect + // https://cloud.yandex.ru/docs/managed-greenplum/operations/connect switch (params.Protocol) { - case NYql::NConnector::NApi::EProtocol::NATIVE: + case NYql::EGenericProtocol::NATIVE: return TEndpoint(fixedHost, GREENPLUM_PORT); default: - ythrow yexception() << "Unexpected protocol for Greenplum: " << NYql::NConnector::NApi::EProtocol_Name(params.Protocol); + ythrow yexception() << "Unexpected protocol for Greenplum: " << NYql::EGenericProtocol_Name(params.Protocol); + } + case NYql::EDatabaseType::MySQL: + // https://cloud.yandex.ru/docs/managed-mysql/operations/connect + switch (params.Protocol) { + case NYql::EGenericProtocol::NATIVE: + return TEndpoint(fixedHost, MYSQL_PORT); + default: + ythrow yexception() << "Unexpected protocol for MySQL: " << NYql::EGenericProtocol_Name(params.Protocol); } default: ythrow yexception() << "Unexpected database type: " << ToString(params.DatabaseType); diff --git a/ydb/core/fq/libs/db_id_async_resolver_impl/ut/mdb_endpoint_generator_ut.cpp b/ydb/core/fq/libs/db_id_async_resolver_impl/ut/mdb_endpoint_generator_ut.cpp index f02d3a4e08ac..f05a6e49aa7c 100644 --- a/ydb/core/fq/libs/db_id_async_resolver_impl/ut/mdb_endpoint_generator_ut.cpp +++ b/ydb/core/fq/libs/db_id_async_resolver_impl/ut/mdb_endpoint_generator_ut.cpp @@ -11,7 +11,7 @@ Y_UNIT_TEST_SUITE(MdbEndpoingGenerator) { .DatabaseType = NYql::EDatabaseType::ClickHouse, .MdbHost = "rc1c-p5waby2y5y1kb5ue.db.yandex.net", .UseTls = true, - .Protocol = NYql::NConnector::NApi::EProtocol::HTTP, + .Protocol = NYql::EGenericProtocol::HTTP, }; UNIT_ASSERT_VALUES_EQUAL( @@ -22,7 +22,7 @@ Y_UNIT_TEST_SUITE(MdbEndpoingGenerator) { .DatabaseType = NYql::EDatabaseType::ClickHouse, .MdbHost = "ya.ru", .UseTls = false, - .Protocol = NYql::NConnector::NApi::EProtocol::HTTP, + .Protocol = NYql::EGenericProtocol::HTTP, }; UNIT_ASSERT_VALUES_EQUAL( @@ -37,7 +37,7 @@ Y_UNIT_TEST_SUITE(MdbEndpoingGenerator) { .DatabaseType = NYql::EDatabaseType::ClickHouse, .MdbHost = "rc1a-d6dv17lv47v5mcop.mdb.yandexcloud.net", .UseTls = true, - .Protocol = NYql::NConnector::NApi::EProtocol::HTTP, + .Protocol = NYql::EGenericProtocol::HTTP, }; UNIT_ASSERT_VALUES_EQUAL( @@ -48,7 +48,7 @@ Y_UNIT_TEST_SUITE(MdbEndpoingGenerator) { .DatabaseType = NYql::EDatabaseType::PostgreSQL, .MdbHost = "rc1b-eyt6dtobu96rwydq.mdb.yandexcloud.net", .UseTls = false, - .Protocol = NYql::NConnector::NApi::EProtocol::NATIVE, + .Protocol = NYql::EGenericProtocol::NATIVE, }; UNIT_ASSERT_VALUES_EQUAL( @@ -65,7 +65,7 @@ Y_UNIT_TEST_SUITE(MdbEndpoingGenerator) { .DatabaseType = NYql::EDatabaseType::ClickHouse, .MdbHost = "rc1a-d6dv17lv47v5mcop.mdb.yandexcloud.net", .UseTls = false, - .Protocol = NYql::NConnector::NApi::EProtocol::HTTP, + .Protocol = NYql::EGenericProtocol::HTTP, }; UNIT_ASSERT_VALUES_EQUAL( @@ -76,7 +76,7 @@ Y_UNIT_TEST_SUITE(MdbEndpoingGenerator) { .DatabaseType = NYql::EDatabaseType::ClickHouse, .MdbHost = "rc1a-d6dv17lv47v5mcop.mdb.yandexcloud.net", .UseTls = false, - .Protocol = NYql::NConnector::NApi::EProtocol::NATIVE, + .Protocol = NYql::EGenericProtocol::NATIVE, }; UNIT_ASSERT_VALUES_EQUAL( @@ -87,7 +87,7 @@ Y_UNIT_TEST_SUITE(MdbEndpoingGenerator) { .DatabaseType = NYql::EDatabaseType::ClickHouse, .MdbHost = "rc1a-d6dv17lv47v5mcop.mdb.yandexcloud.net", .UseTls = true, - .Protocol = NYql::NConnector::NApi::EProtocol::HTTP, + .Protocol = NYql::EGenericProtocol::HTTP, }; UNIT_ASSERT_VALUES_EQUAL( @@ -98,7 +98,7 @@ Y_UNIT_TEST_SUITE(MdbEndpoingGenerator) { .DatabaseType = NYql::EDatabaseType::ClickHouse, .MdbHost = "rc1a-d6dv17lv47v5mcop.mdb.yandexcloud.net", .UseTls = true, - .Protocol = NYql::NConnector::NApi::EProtocol::NATIVE, + .Protocol = NYql::EGenericProtocol::NATIVE, }; UNIT_ASSERT_VALUES_EQUAL( @@ -111,7 +111,7 @@ Y_UNIT_TEST_SUITE(MdbEndpoingGenerator) { .DatabaseType = NYql::EDatabaseType::PostgreSQL, .MdbHost = "rc1b-eyt6dtobu96rwydq.mdb.yandexcloud.net", .UseTls = true, - .Protocol = NYql::NConnector::NApi::EProtocol::NATIVE, + .Protocol = NYql::EGenericProtocol::NATIVE, }; UNIT_ASSERT_VALUES_EQUAL( diff --git a/ydb/core/fq/libs/exceptions/exceptions.h b/ydb/core/fq/libs/exceptions/exceptions.h deleted file mode 100644 index 75d2551ff497..000000000000 --- a/ydb/core/fq/libs/exceptions/exceptions.h +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include -#include - -namespace NFq { - -// This exception can separate code line and file name from the error message -struct TCodeLineException: public yexception { - - TSourceLocation SourceLocation; - mutable TString Message; - TIssuesIds::EIssueCode Code; - - TCodeLineException(TIssuesIds::EIssueCode code); - - TCodeLineException(const TSourceLocation& sl, const TCodeLineException& t); - - virtual const char* what() const noexcept override; - - const char* GetRawMessage() const; - -}; - -TCodeLineException operator+(const TSourceLocation& sl, TCodeLineException&& t); - -} // namespace NFq \ No newline at end of file diff --git a/ydb/core/fq/libs/exceptions/ya.make b/ydb/core/fq/libs/exceptions/ya.make deleted file mode 100644 index 483a47b8e3f6..000000000000 --- a/ydb/core/fq/libs/exceptions/ya.make +++ /dev/null @@ -1,11 +0,0 @@ -LIBRARY() - -SRCS( - exceptions.cpp -) - -PEERDIR( - ydb/core/fq/libs/config/protos -) - -END() diff --git a/ydb/core/fq/libs/init/init.cpp b/ydb/core/fq/libs/init/init.cpp index acd5518a98cf..18054bb09fa5 100644 --- a/ydb/core/fq/libs/init/init.cpp +++ b/ydb/core/fq/libs/init/init.cpp @@ -227,7 +227,7 @@ void Init( RegisterYdbReadActorFactory(*asyncIoFactory, yqSharedResources->UserSpaceYdbDriver, credentialsFactory); s3ActorsFactory->RegisterS3ReadActorFactory(*asyncIoFactory, credentialsFactory, httpGateway, s3HttpRetryPolicy, readActorFactoryCfg, - yqCounters->GetSubgroup("subsystem", "S3ReadActor")); + yqCounters->GetSubgroup("subsystem", "S3ReadActor"), protoConfig.GetGateways().GetS3().GetAllowLocalFiles()); s3ActorsFactory->RegisterS3WriteActorFactory(*asyncIoFactory, credentialsFactory, httpGateway, s3HttpRetryPolicy); diff --git a/ydb/core/fq/libs/shared_resources/db_exec.h b/ydb/core/fq/libs/shared_resources/db_exec.h index 8eeac48ec64b..b69785258034 100644 --- a/ydb/core/fq/libs/shared_resources/db_exec.h +++ b/ydb/core/fq/libs/shared_resources/db_exec.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include namespace NFq { @@ -214,7 +214,7 @@ class TDbExecuter : public TDbExecutable { if (self->Steps[self->CurrentStepIndex].ResultCallback) { try { self->Steps[self->CurrentStepIndex].ResultCallback(*self, result.GetResultSets()); - } catch (const TCodeLineException& exception) { + } catch (const NYql::TCodeLineException& exception) { NYql::TIssue issue = MakeErrorIssue(exception.Code, exception.GetRawMessage()); self->Issues.AddIssue(issue); NYql::TIssue internalIssue = MakeErrorIssue(exception.Code, CurrentExceptionMessage()); diff --git a/ydb/core/fq/libs/shared_resources/ya.make b/ydb/core/fq/libs/shared_resources/ya.make index 3949ba3c75b6..118765441d6c 100644 --- a/ydb/core/fq/libs/shared_resources/ya.make +++ b/ydb/core/fq/libs/shared_resources/ya.make @@ -13,13 +13,13 @@ PEERDIR( ydb/core/fq/libs/control_plane_storage/proto ydb/core/fq/libs/db_schema ydb/core/fq/libs/events - ydb/core/fq/libs/exceptions ydb/core/fq/libs/quota_manager/events ydb/core/fq/libs/shared_resources/interface ydb/core/protos ydb/library/db_pool ydb/library/logger ydb/library/security + ydb/library/yql/utils ydb/public/sdk/cpp/client/extensions/solomon_stats ydb/public/sdk/cpp/client/ydb_driver ydb/public/sdk/cpp/client/ydb_extension diff --git a/ydb/core/grpc_services/base/flow_control.h b/ydb/core/grpc_services/base/flow_control.h new file mode 100644 index 000000000000..f3c2ca72dd4d --- /dev/null +++ b/ydb/core/grpc_services/base/flow_control.h @@ -0,0 +1,47 @@ +#pragma once +#include +#include +#include + +namespace NKikimr::NGRpcService { + +class TRpcFlowControlState { +public: + TRpcFlowControlState(ui64 inflightLimitBytes) + : InflightLimitBytes_(inflightLimitBytes) {} + + void PushResponse(ui64 responseSizeBytes) { + ResponseSizeQueue_.push(responseSizeBytes); + TotalResponsesSize_ += responseSizeBytes; + } + + void PopResponse() { + Y_ENSURE(!ResponseSizeQueue_.empty()); + TotalResponsesSize_ -= ResponseSizeQueue_.front(); + ResponseSizeQueue_.pop(); + } + + size_t QueueSize() const { + return ResponseSizeQueue_.size(); + } + + i64 FreeSpaceBytes() const { // Negative value temporarily stops data evaluation in DQ graph + return static_cast(InflightLimitBytes_) - static_cast(TotalResponsesSize_); + } + + ui64 InflightBytes() const { + return TotalResponsesSize_; + } + + ui64 InflightLimitBytes() const { + return InflightLimitBytes_; + } + +private: + const ui64 InflightLimitBytes_; + + TQueue ResponseSizeQueue_; + ui64 TotalResponsesSize_ = 0; +}; + +} // namespace NKikimr::NGRpcService diff --git a/ydb/core/grpc_services/grpc_request_check_actor.h b/ydb/core/grpc_services/grpc_request_check_actor.h index 54fe15aca653..5e944d63e16e 100644 --- a/ydb/core/grpc_services/grpc_request_check_actor.h +++ b/ydb/core/grpc_services/grpc_request_check_actor.h @@ -312,6 +312,7 @@ class TGrpcRequestCheckActor SetTokenAndDie(); break; case Ydb::StatusIds::TIMEOUT: + case Ydb::StatusIds::CANCELLED: Counters_->IncDatabaseRateLimitedCounter(); LOG_INFO(*TlsActivationContext, NKikimrServices::GRPC_SERVER, "Throughput limit exceeded"); ReplyOverloadedAndDie(MakeIssue(NKikimrIssues::TIssuesIds::YDB_RESOURCE_USAGE_LIMITED, "Throughput limit exceeded")); @@ -331,7 +332,8 @@ class TGrpcRequestCheckActor } }; - req.mutable_operation_params()->mutable_operation_timeout()->set_nanos(200000000); // same as cloud-go serverless proxy + req.mutable_operation_params()->mutable_operation_timeout()->set_seconds(10); + req.mutable_operation_params()->mutable_cancel_after()->set_nanos(200000000); // same as cloud-go serverless proxy NKikimr::NRpcService::RateLimiterAcquireUseSameMailbox( std::move(req), diff --git a/ydb/core/grpc_services/grpc_request_proxy.cpp b/ydb/core/grpc_services/grpc_request_proxy.cpp index 5a9c0771480f..cf4eafb7e395 100644 --- a/ydb/core/grpc_services/grpc_request_proxy.cpp +++ b/ydb/core/grpc_services/grpc_request_proxy.cpp @@ -420,9 +420,22 @@ void TGRpcRequestProxyImpl::HandleUndelivery(TEvents::TEvUndelivered::TPtr& ev) bool TGRpcRequestProxyImpl::IsAuthStateOK(const IRequestProxyCtx& ctx) { const auto& state = ctx.GetAuthState(); - return state.State == NYdbGrpc::TAuthState::AS_OK || - state.State == NYdbGrpc::TAuthState::AS_FAIL && state.NeedAuth == false || - state.NeedAuth == false && !ctx.GetYdbToken(); + if (state.State == NYdbGrpc::TAuthState::AS_OK) { + return true; + } + + const bool authorizationParamsAreSet = ctx.GetYdbToken() || !ctx.FindClientCertPropertyValues().empty(); + if (!state.NeedAuth && !authorizationParamsAreSet) { + return true; + } + + if (!state.NeedAuth && state.State == NYdbGrpc::TAuthState::AS_FAIL) { + if (AppData()->EnforceUserTokenCheckRequirement && authorizationParamsAreSet) { + return false; + } + return true; + } + return false; } void TGRpcRequestProxyImpl::MaybeStartTracing(IRequestProxyCtx& ctx) { diff --git a/ydb/core/grpc_services/grpc_request_proxy_simple.cpp b/ydb/core/grpc_services/grpc_request_proxy_simple.cpp index 3ad80fbbf542..c2274c3d7be3 100644 --- a/ydb/core/grpc_services/grpc_request_proxy_simple.cpp +++ b/ydb/core/grpc_services/grpc_request_proxy_simple.cpp @@ -172,9 +172,22 @@ void TGRpcRequestProxySimple::HandleUndelivery(TEvents::TEvUndelivered::TPtr& ev bool TGRpcRequestProxySimple::IsAuthStateOK(const IRequestProxyCtx& ctx) { const auto& state = ctx.GetAuthState(); - return state.State == NYdbGrpc::TAuthState::AS_OK || - state.State == NYdbGrpc::TAuthState::AS_FAIL && state.NeedAuth == false || - state.NeedAuth == false && !ctx.GetYdbToken(); + if (state.State == NYdbGrpc::TAuthState::AS_OK) { + return true; + } + + const bool authorizationParamsAreSet = ctx.GetYdbToken() || !ctx.FindClientCertPropertyValues().empty(); + if (!state.NeedAuth && !authorizationParamsAreSet) { + return true; + } + + if (!state.NeedAuth && state.State == NYdbGrpc::TAuthState::AS_FAIL) { + if (AppData()->EnforceUserTokenCheckRequirement && authorizationParamsAreSet) { + return false; + } + return true; + } + return false; } template diff --git a/ydb/core/grpc_services/local_rate_limiter.cpp b/ydb/core/grpc_services/local_rate_limiter.cpp index d77fd26f9845..993c71e246e8 100644 --- a/ydb/core/grpc_services/local_rate_limiter.cpp +++ b/ydb/core/grpc_services/local_rate_limiter.cpp @@ -23,6 +23,7 @@ TActorId RateLimiterAcquireUseSameMailbox( onSuccess(); break; case Ydb::StatusIds::TIMEOUT: + case Ydb::StatusIds::CANCELLED: onTimeout(); break; default: @@ -32,7 +33,8 @@ TActorId RateLimiterAcquireUseSameMailbox( }; Ydb::RateLimiter::AcquireResourceRequest request; - SetDuration(duration, *request.mutable_operation_params()->mutable_operation_timeout()); + SetDuration(duration * 10, *request.mutable_operation_params()->mutable_operation_timeout()); + SetDuration(duration, *request.mutable_operation_params()->mutable_cancel_after()); request.set_coordination_node_path(fullPath.CoordinationNode); request.set_resource_path(fullPath.ResourcePath); request.set_required(required); @@ -72,6 +74,7 @@ TActorId RateLimiterAcquireUseSameMailbox( onSuccess(); break; case Ydb::StatusIds::TIMEOUT: + case Ydb::StatusIds::CANCELLED: onTimeout(); break; default: @@ -82,7 +85,8 @@ TActorId RateLimiterAcquireUseSameMailbox( const auto& rlPath = maybeRlPath.GetRef(); Ydb::RateLimiter::AcquireResourceRequest request; - SetDuration(duration, *request.mutable_operation_params()->mutable_operation_timeout()); + SetDuration(duration * 10, *request.mutable_operation_params()->mutable_operation_timeout()); + SetDuration(duration, *request.mutable_operation_params()->mutable_cancel_after()); request.set_coordination_node_path(rlPath.CoordinationNode); request.set_resource_path(rlPath.ResourcePath); request.set_required(required); diff --git a/ydb/core/grpc_services/local_rpc/local_rpc.h b/ydb/core/grpc_services/local_rpc/local_rpc.h index 53771041766a..eb4b4a8deee2 100644 --- a/ydb/core/grpc_services/local_rpc/local_rpc.h +++ b/ydb/core/grpc_services/local_rpc/local_rpc.h @@ -153,7 +153,12 @@ class TLocalRpcCtx : public TLocalRpcCtxImpl { if (key == NYdb::YDB_DATABASE_HEADER) { return GetDatabaseName(); } - return TMaybe{}; + auto valueIt = PeerMeta.find(key); + return valueIt == PeerMeta.end() ? Nothing() : TMaybe(valueIt->second); + } + + void PutPeerMeta(const TString& key, const TString& value) { + PeerMeta.insert_or_assign(key, value); } TVector FindClientCert() const override { @@ -278,6 +283,7 @@ class TLocalRpcCtx : public TLocalRpcCtxImpl { const bool InternalCall; TIntrusiveConstPtr InternalToken; const TString EmptySerializedTokenMessage_; + TMap PeerMeta; google::protobuf::Arena Arena; }; @@ -318,6 +324,41 @@ NThreading::TFuture DoLocalRpc(typename TRpc::TRequest return DoLocalRpc(std::move(proto), database, token, Nothing(), actorSystem, internalCall); } +template +NThreading::TFuture DoLocalRpc( + typename TRpc::TRequest&& proto, + const TString& database, + const TMaybe& token, + const TMaybe& requestType, + TActorSystem* actorSystem, + const TMap& peerMeta, + bool internalCall = false +) +{ + auto promise = NThreading::NewPromise(); + + SetRequestSyncOperationMode(proto); + + using TCbWrapper = TPromiseWrapper; + auto req = new TLocalRpcCtx( + std::move(proto), + TCbWrapper(promise), + database, + token, + requestType, + internalCall + ); + + for (const auto& [key, value] : peerMeta) { + req->PutPeerMeta(key, value); + } + + auto actor = TRpc::CreateRpcActor(req); + actorSystem->Register(actor, TMailboxType::HTSwap, actorSystem->AppData()->UserPoolId); + + return promise.GetFuture(); +} + template TActorId DoLocalRpcSameMailbox(typename TRpc::TRequest&& proto, std::function&& cb, const TString& database, const TMaybe& token, const TMaybe& requestType, diff --git a/ydb/core/grpc_services/query/rpc_execute_query.cpp b/ydb/core/grpc_services/query/rpc_execute_query.cpp index 0f8ab413a3cb..5ddc3dea0573 100644 --- a/ydb/core/grpc_services/query/rpc_execute_query.cpp +++ b/ydb/core/grpc_services/query/rpc_execute_query.cpp @@ -1,9 +1,9 @@ #include "service_query.h" - #include #include #include #include +#include #include #include #include @@ -23,51 +23,10 @@ using TEvExecuteQueryRequest = TGrpcRequestNoOperationCall LastSeqNo; - ui64 AckedFreeSpaceBytes = 0; + i64 AckedFreeSpaceBytes = 0; TActorId ActorId; }; -class TRpcFlowControlState { -public: - TRpcFlowControlState(ui64 inflightLimitBytes) - : InflightLimitBytes_(inflightLimitBytes) {} - - void PushResponse(ui64 responseSizeBytes) { - ResponseSizeQueue_.push(responseSizeBytes); - TotalResponsesSize_ += responseSizeBytes; - } - - void PopResponse() { - Y_ENSURE(!ResponseSizeQueue_.empty()); - TotalResponsesSize_ -= ResponseSizeQueue_.front(); - ResponseSizeQueue_.pop(); - } - - size_t QueueSize() const { - return ResponseSizeQueue_.size(); - } - - ui64 FreeSpaceBytes() const { - return TotalResponsesSize_ < InflightLimitBytes_ - ? InflightLimitBytes_ - TotalResponsesSize_ - : 0; - } - - ui64 InflightBytes() const { - return TotalResponsesSize_; - } - - ui64 InflightLimitBytes() const { - return InflightLimitBytes_; - } - -private: - const ui64 InflightLimitBytes_; - - TQueue ResponseSizeQueue_; - ui64 TotalResponsesSize_ = 0; -}; - bool FillTxSettings(const Ydb::Query::TransactionSettings& from, Ydb::Table::TransactionSettings& to, NYql::TIssues& issues) { @@ -326,13 +285,13 @@ class TExecuteQueryRPC : public TActorBootstrapped { FlowControl_.PopResponse(); } - ui64 freeSpaceBytes = FlowControl_.FreeSpaceBytes(); + const i64 freeSpaceBytes = FlowControl_.FreeSpaceBytes(); for (auto& pair : StreamChannels_) { const auto& channelId = pair.first; auto& channel = pair.second; - if (freeSpaceBytes > 0 && channel.LastSeqNo && channel.AckedFreeSpaceBytes == 0) { + if (freeSpaceBytes > 0 && channel.LastSeqNo && channel.AckedFreeSpaceBytes <= 0) { LOG_DEBUG_S(ctx, NKikimrServices::RPC_REQUEST, this->SelfId() << "Resume execution, " << ", channel: " << channelId << ", seqNo: " << channel.LastSeqNo @@ -361,7 +320,7 @@ class TExecuteQueryRPC : public TActorBootstrapped { Y_PROTOBUF_SUPPRESS_NODISCARD response.SerializeToString(&out); FlowControl_.PushResponse(out.size()); - auto freeSpaceBytes = FlowControl_.FreeSpaceBytes(); + const i64 freeSpaceBytes = FlowControl_.FreeSpaceBytes(); Request_->SendSerializedResult(std::move(out), Ydb::StatusIds::SUCCESS); @@ -392,7 +351,7 @@ class TExecuteQueryRPC : public TActorBootstrapped { bool hasTrailingMessage = false; auto& kqpResponse = record.GetResponse(); - if (kqpResponse.GetYdbResults().size() > 1) { + if (kqpResponse.GetYdbResults().size() > 1 && QueryAction != NKikimrKqp::QUERY_ACTION_EXPLAIN) { auto issue = MakeIssue(NKikimrIssues::TIssuesIds::DEFAULT_ERROR, "Unexpected trailing message with multiple result sets."); ReplyFinishStream(Ydb::StatusIds::INTERNAL_ERROR, issue); diff --git a/ydb/core/grpc_services/query/rpc_execute_script.cpp b/ydb/core/grpc_services/query/rpc_execute_script.cpp index 1c5efad67097..7f1502b35574 100644 --- a/ydb/core/grpc_services/query/rpc_execute_script.cpp +++ b/ydb/core/grpc_services/query/rpc_execute_script.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -71,27 +72,28 @@ std::tuple FillKqpRequest( return {Ydb::StatusIds::SUCCESS, {}}; } -class TExecuteScriptRPC : public TActorBootstrapped { +class TExecuteScriptRPC : public TRpcRequestActor { public: + using TRpcRequestActorBase = TRpcRequestActor; + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::GRPC_REQ; } - TExecuteScriptRPC(TEvExecuteScriptRequest* request) - : Request_(request) + TExecuteScriptRPC(IRequestNoOpCtx* request) + : TRpcRequestActorBase(request) {} void Bootstrap() { NYql::TIssues issues; - const auto& request = *Request_->GetProtoRequest(); + const auto& request = GetProtoRequest(); - if (request.operation_params().operation_mode() == Ydb::Operations::OperationParams::SYNC) { + if (request->operation_params().operation_mode() == Ydb::Operations::OperationParams::SYNC) { issues.AddIssue("ExecuteScript must be asyncronous operation"); return Reply(Ydb::StatusIds::BAD_REQUEST, issues); } - AuditContextAppend(Request_.get(), request); - + AuditContextAppend(Request.Get(), request); Ydb::StatusIds::StatusCode status = Ydb::StatusIds::SUCCESS; if (auto scriptRequest = MakeScriptRequest(issues, status)) { if (Send(NKqp::MakeKqpProxyID(SelfId().NodeId()), scriptRequest.Release())) { @@ -122,14 +124,14 @@ class TExecuteScriptRPC : public TActorBootstrapped { } THolder MakeScriptRequest(NYql::TIssues& issues, Ydb::StatusIds::StatusCode& status) const { - const auto* req = Request_->GetProtoRequest(); - const auto traceId = Request_->GetTraceId(); + const auto* req = GetProtoRequest(); + const auto traceId = Request->GetTraceId(); auto ev = MakeHolder(); - SetAuthToken(ev, *Request_); - SetDatabase(ev, *Request_); - SetRlPath(ev, *Request_); + SetAuthToken(ev, *Request); + SetDatabase(ev, *Request); + SetRlPath(ev, *Request); if (traceId) { ev->Record.SetTraceId(traceId.GetRef()); @@ -162,12 +164,9 @@ class TExecuteScriptRPC : public TActorBootstrapped { result.set_status(status); - AuditContextAppend(Request_.get(), *Request_->GetProtoRequest(), result); - - TString serializedResult; - Y_PROTOBUF_SUPPRESS_NODISCARD result.SerializeToString(&serializedResult); + AuditContextAppend(Request.Get(), GetProtoRequest(), result); - Request_->SendSerializedResult(std::move(serializedResult), status); + TProtoResponseHelper::SendProtoResponse(result, status, Request); PassAway(); } @@ -177,9 +176,6 @@ class TExecuteScriptRPC : public TActorBootstrapped { result.set_ready(true); Reply(status, std::move(result), issues); } - -private: - std::unique_ptr Request_; }; } // namespace @@ -193,6 +189,11 @@ void DoExecuteScript(std::unique_ptr p, const IFacilityProvider f.RegisterActor(new TExecuteScriptRPC(req)); } +} // namespace NQuery + +template<> +IActor* TEvExecuteScriptRequest::CreateRpcActor(IRequestNoOpCtx* msg) { + return new TExecuteScriptRPC(msg); } } // namespace NKikimr::NGRpcService diff --git a/ydb/core/grpc_services/query/rpc_fetch_script_results.cpp b/ydb/core/grpc_services/query/rpc_fetch_script_results.cpp index e25877095cc3..80d815723b8a 100644 --- a/ydb/core/grpc_services/query/rpc_fetch_script_results.cpp +++ b/ydb/core/grpc_services/query/rpc_fetch_script_results.cpp @@ -35,7 +35,7 @@ class TFetchScriptResultsRPC : public TRpcRequestActorSendSerializedResult(std::move(serializedResult), status); + TProtoResponseHelper::SendProtoResponse(result, status, Request); PassAway(); } @@ -154,4 +151,9 @@ void DoFetchScriptResults(std::unique_ptr p, const IFacilityPro } +template<> +IActor* TEvFetchScriptResultsRequest::CreateRpcActor(IRequestNoOpCtx* msg) { + return new TFetchScriptResultsRPC(msg); +} + } // namespace NKikimr::NGRpcService diff --git a/ydb/core/grpc_services/query/rpc_kqp_tx.cpp b/ydb/core/grpc_services/query/rpc_kqp_tx.cpp index 4f92c00faff7..d71caa0f428e 100644 --- a/ydb/core/grpc_services/query/rpc_kqp_tx.cpp +++ b/ydb/core/grpc_services/query/rpc_kqp_tx.cpp @@ -124,6 +124,7 @@ class TBeginTransactionRPC : public TActorBootstrapped { if (kqpResponse.HasTxMeta()) { beginTxResult->mutable_tx_meta()->set_id(kqpResponse.GetTxMeta().id()); } + *beginTxResult->mutable_issues() = issueMessage; } Reply(record.GetYdbStatus(), beginTxResult); @@ -168,7 +169,7 @@ class TFinishTransactionRPC : public TActorBootstrapped private: virtual std::pair GetReqData() const = 0; virtual void Fill(NKikimrKqp::TQueryRequest* req) const = 0; - virtual NProtoBuf::Message* CreateResult(Ydb::StatusIds::StatusCode status) const = 0; + virtual NProtoBuf::Message* CreateResult(Ydb::StatusIds::StatusCode status, const NYql::TIssues& issues) const = 0; void StateWork(TAutoPtr& ev) { try { @@ -218,15 +219,15 @@ class TFinishTransactionRPC : public TActorBootstrapped const auto& record = ev->Get()->Record.GetRef(); FillCommonKqpRespFields(record, Request.get()); + NYql::TIssues issues; if (record.HasResponse()) { const auto& kqpResponse = record.GetResponse(); const auto& issueMessage = kqpResponse.GetQueryIssues(); - NYql::TIssues issues; NYql::IssuesFromMessage(issueMessage, issues); Request->RaiseIssues(issues); } - Reply(record.GetYdbStatus(), CreateResult(record.GetYdbStatus())); + Reply(record.GetYdbStatus(), CreateResult(record.GetYdbStatus(), issues)); } void InternalError(const TString& message) { @@ -271,9 +272,10 @@ class TCommitTransactionRPC : public TFinishTransactionRPC { req->MutableTxControl()->set_commit_tx(true); } - NProtoBuf::Message* CreateResult(Ydb::StatusIds::StatusCode status) const override { + NProtoBuf::Message* CreateResult(Ydb::StatusIds::StatusCode status, const NYql::TIssues& issues) const override { auto result = TEvCommitTransactionRequest::AllocateResult(Request); result->set_status(status); + NYql::IssuesToMessage(issues, result->mutable_issues()); return result; } }; @@ -293,9 +295,10 @@ class TRollbackTransactionRPC : public TFinishTransactionRPC { req->SetAction(NKikimrKqp::QUERY_ACTION_ROLLBACK_TX); } - NProtoBuf::Message* CreateResult(Ydb::StatusIds::StatusCode status) const override { + NProtoBuf::Message* CreateResult(Ydb::StatusIds::StatusCode status, const NYql::TIssues& issues) const override { auto result = TEvRollbackTransactionRequest::AllocateResult(Request); result->set_status(status); + NYql::IssuesToMessage(issues, result->mutable_issues()); return result; } }; diff --git a/ydb/core/grpc_services/rpc_alter_table.cpp b/ydb/core/grpc_services/rpc_alter_table.cpp index b18ade6d3f0b..290a129fdcbc 100644 --- a/ydb/core/grpc_services/rpc_alter_table.cpp +++ b/ydb/core/grpc_services/rpc_alter_table.cpp @@ -109,12 +109,12 @@ class TAlterTableRPC : public TRpcSchemeRequestActorServices.SchemeCache, ctx); } - void PrepareAlterUserAttrubutes() { + void GetProxyServices() { using namespace NTxProxy; Send(MakeTxProxyID(), new TEvTxUserProxy::TEvGetProxyServicesRequest); } @@ -222,13 +222,38 @@ class TAlterTableRPC : public TRpcSchemeRequestActor(ev)->Request->ResultSet.emplace_back(); - entry.Operation = NSchemeCache::TSchemeCacheNavigate::OpTable; entry.Path = paths; } Send(schemeCache, ev); } + void Navigate(const TTableId& pathId) { + DatabaseName = Request_->GetDatabaseName() + .GetOrElse(DatabaseFromDomain(AppData())); + + auto ev = CreateNavigateForPath(DatabaseName); + { + auto& entry = static_cast(ev)->Request->ResultSet.emplace_back(); + entry.RequestType = NSchemeCache::TSchemeCacheNavigate::TEntry::ERequestType::ByTableId; + entry.TableId = pathId; + entry.ShowPrivatePath = true; + entry.Operation = NSchemeCache::TSchemeCacheNavigate::OpList; + } + + Send(MakeSchemeCacheID(), ev); + } + + static bool IsChangefeedOperation(EOp type) { + switch (type) { + case EOp::AddChangefeed: + case EOp::DropChangefeed: + return true; + default: + return false; + } + } + void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx) { TXLOG_D("Handle TEvTxProxySchemeCache::TEvNavigateKeySetResult" << ", errors# " << ev->Get()->Request.Get()->ErrorCount); @@ -251,13 +276,48 @@ class TAlterTableRPC : public TRpcSchemeRequestActorResultSet.empty()); + const auto& entry = resp->ResultSet.back(); + + switch (entry.Kind) { + case NSchemeCache::TSchemeCacheNavigate::KindTable: + case NSchemeCache::TSchemeCacheNavigate::KindColumnTable: + case NSchemeCache::TSchemeCacheNavigate::KindExternalTable: + case NSchemeCache::TSchemeCacheNavigate::KindExternalDataSource: + case NSchemeCache::TSchemeCacheNavigate::KindView: + break; // table + case NSchemeCache::TSchemeCacheNavigate::KindIndex: + if (IsChangefeedOperation(OpType)) { + break; + } + [[fallthrough]]; + default: + Request_->RaiseIssue(MakeIssue(NKikimrIssues::TIssuesIds::GENERIC_RESOLVE_ERROR, TStringBuilder() + << "Unable to nagivate: " << JoinPath(entry.Path) << " status: PathNotTable")); + return Reply(Ydb::StatusIds::SCHEME_ERROR, ctx); + } + switch (OpType) { case EOp::AddIndex: return AlterTableAddIndexOp(resp, ctx); case EOp::Attribute: - Y_ABORT_UNLESS(!resp->ResultSet.empty()); ResolvedPathId = resp->ResultSet.back().TableId.PathId; return AlterTable(ctx); + case EOp::AddChangefeed: + case EOp::DropChangefeed: + if (entry.Kind != NSchemeCache::TSchemeCacheNavigate::KindIndex) { + AlterTable(ctx); + } else if (auto list = entry.ListNodeEntry) { + if (list->Children.size() != 1) { + return Reply(Ydb::StatusIds::SCHEME_ERROR, ctx); + } + + const auto& child = list->Children.at(0); + AlterTable(ctx, CanonizePath(ChildPath(NKikimr::SplitPath(GetProtoRequest()->path()), child.Name))); + } else { + Navigate(entry.TableId); + } + break; default: TXLOG_E("Got unexpected cache response"); return Reply(Ydb::StatusIds::INTERNAL_ERROR, ctx); @@ -351,13 +411,14 @@ class TAlterTableRPC : public TRpcSchemeRequestActor& overridePath = {}) { const auto req = GetProtoRequest(); std::unique_ptr proposeRequest = CreateProposeTransaction(); auto modifyScheme = proposeRequest->Record.MutableTransaction()->MutableModifyScheme(); + modifyScheme->SetAllowAccessToPrivatePaths(overridePath.Defined()); Ydb::StatusIds::StatusCode code; TString error; - if (!BuildAlterTableModifyScheme(req, modifyScheme, Profiles, ResolvedPathId, code, error)) { + if (!BuildAlterTableModifyScheme(overridePath.GetOrElse(req->path()), req, modifyScheme, Profiles, ResolvedPathId, code, error)) { NYql::TIssues issues; issues.AddIssue(NYql::TIssue(error)); return Reply(code, issues, ctx); diff --git a/ydb/core/grpc_services/rpc_create_table.cpp b/ydb/core/grpc_services/rpc_create_table.cpp index 556004f466d0..88fed4de46d3 100644 --- a/ydb/core/grpc_services/rpc_create_table.cpp +++ b/ydb/core/grpc_services/rpc_create_table.cpp @@ -181,8 +181,28 @@ class TCreateTableRPC : public TRpcSchemeRequestActorcolumns()) { + switch (column.default_value_case()) { + case Ydb::Table::ColumnMeta::kFromSequence: { + auto* seqDesc = modifyScheme->MutableCreateIndexedTable()->MutableSequenceDescription()->Add(); + if (!FillSequenceDescription(*seqDesc, column.from_sequence(), code, error)) { + NYql::TIssues issues; + issues.AddIssue(NYql::TIssue(error)); + return Reply(code, issues, ctx); + } + hasSerial = true; + break; + } + default: break; + } + } + NKikimrSchemeOp::TTableDescription* tableDesc = nullptr; - if (req->indexesSize()) { + if (req->indexesSize() || hasSerial) { modifyScheme->SetOperationType(NKikimrSchemeOp::EOperationType::ESchemeOpCreateIndexedTable); tableDesc = modifyScheme->MutableCreateIndexedTable()->MutableTableDescription(); } else { @@ -192,9 +212,6 @@ class TCreateTableRPC : public TRpcSchemeRequestActorSetName(name); - StatusIds::StatusCode code = StatusIds::SUCCESS; - TString error; - if (!FillColumnDescription(*tableDesc, req->columns(), code, error)) { NYql::TIssues issues; issues.AddIssue(NYql::TIssue(error)); diff --git a/ydb/core/grpc_services/rpc_describe_table.cpp b/ydb/core/grpc_services/rpc_describe_table.cpp index 05f481e60c89..c4fa00af373b 100644 --- a/ydb/core/grpc_services/rpc_describe_table.cpp +++ b/ydb/core/grpc_services/rpc_describe_table.cpp @@ -1,11 +1,11 @@ -#include "service_table.h" -#include - #include "rpc_calls.h" #include "rpc_scheme_base.h" - #include "service_table.h" -#include "rpc_common/rpc_common.h" + +#include +#include +#include +#include #include #include #include @@ -22,6 +22,20 @@ using TEvDescribeTableRequest = TGrpcRequestOperationCall { using TBase = TRpcSchemeRequestActor; + TString OverrideName; + + static bool ShowPrivatePath(const TString& path) { + if (AppData()->AllowPrivateTableDescribeForTest) { + return true; + } + + if (path.EndsWith("/indexImplTable")) { + return true; + } + + return false; + } + public: TDescribeTableRPC(IRequestOpCtx* msg) : TBase(msg) {} @@ -29,18 +43,63 @@ class TDescribeTableRPC : public TRpcSchemeRequestActorpath(); + const auto paths = NKikimr::SplitPath(path); + if (paths.empty()) { + Request_->RaiseIssue(NYql::TIssue("Invalid path")); + return Reply(Ydb::StatusIds::BAD_REQUEST, ctx); + } + + auto navigate = MakeHolder(); + navigate->DatabaseName = CanonizePath(Request_->GetDatabaseName().GetOrElse("")); + auto& entry = navigate->ResultSet.emplace_back(); + entry.Path = paths; + entry.Operation = NSchemeCache::TSchemeCacheNavigate::OpList; + entry.SyncVersion = true; + entry.ShowPrivatePath = ShowPrivatePath(path); + + Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(navigate)); Become(&TDescribeTableRPC::StateWork); } private: void StateWork(TAutoPtr& ev) { switch (ev->GetTypeRewrite()) { + HFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); HFunc(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult, Handle); default: TBase::StateWork(ev); } } + void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx) { + auto* navigate = ev->Get()->Request.Get(); + + Y_ABORT_UNLESS(navigate->ResultSet.size() == 1); + const auto& entry = navigate->ResultSet.front(); + + if (navigate->ErrorCount > 0) { + switch (entry.Status) { + case NSchemeCache::TSchemeCacheNavigate::EStatus::PathErrorUnknown: + case NSchemeCache::TSchemeCacheNavigate::EStatus::RootUnknown: + return Reply(Ydb::StatusIds::SCHEME_ERROR, ctx); + default: + return Reply(Ydb::StatusIds::UNAVAILABLE, ctx); + } + } + + if (entry.Kind == NSchemeCache::TSchemeCacheNavigate::KindIndex) { + auto list = entry.ListNodeEntry; + if (!list || list->Children.size() != 1) { + return Reply(Ydb::StatusIds::SCHEME_ERROR, ctx); + } + + OverrideName = entry.Path.back(); + SendProposeRequest(CanonizePath(ChildPath(entry.Path, list->Children.at(0).Name)), ctx); + } else { + SendProposeRequest(GetProtoRequest()->path(), ctx); + } + } + void Handle(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr& ev, const TActorContext& ctx) { const auto& record = ev->Get()->GetRecord(); const auto status = record.GetStatus(); @@ -53,9 +112,10 @@ class TDescribeTableRPC : public TRpcSchemeRequestActorset_name(pathDescription.GetSelf().GetName()); - selfEntry->set_type(static_cast(pathDescription.GetSelf().GetPathType())); ConvertDirectoryEntry(pathDescription.GetSelf(), selfEntry, true); + if (OverrideName) { + selfEntry->set_name(OverrideName); + } if (pathDescription.HasColumnTableDescription()) { const auto& tableDescription = pathDescription.GetColumnTableDescription(); @@ -82,6 +142,14 @@ class TDescribeTableRPC : public TRpcSchemeRequestActorRaiseIssue(NYql::TIssue(error)); + return Reply(Ydb::StatusIds::INTERNAL_ERROR, ctx); + } + describeTableResult.mutable_primary_key()->CopyFrom(tableDescription.GetKeyColumnNames()); try { @@ -92,7 +160,14 @@ class TDescribeTableRPC : public TRpcSchemeRequestActorRaiseIssue(NYql::ExceptionToIssue(ex)); + return Reply(Ydb::StatusIds::INTERNAL_ERROR, ctx); + } + FillChangefeedDescription(describeTableResult, tableDescription); if (GetProtoRequest()->include_table_stats()) { @@ -129,9 +204,8 @@ class TDescribeTableRPC : public TRpcSchemeRequestActorpath(); std::unique_ptr navigateRequest(new TEvTxUserProxy::TEvNavigate()); SetAuthToken(navigateRequest, *Request_); @@ -146,10 +220,12 @@ class TDescribeTableRPC : public TRpcSchemeRequestActorMutableOptions()->SetReturnPartitionStats(true); } - if (AppData(ctx)->AllowPrivateTableDescribeForTest || path.EndsWith("/indexImplTable")) { - record->MutableOptions()->SetShowPrivateTable(true); + if (req->include_set_val()) { + record->MutableOptions()->SetReturnSetVal(true); } + record->MutableOptions()->SetShowPrivateTable(ShowPrivatePath(path)); + ctx.Send(MakeTxProxyID(), navigateRequest.release()); } }; diff --git a/ydb/core/grpc_services/rpc_execute_data_query.cpp b/ydb/core/grpc_services/rpc_execute_data_query.cpp index f5b7e87043ff..19f392a0d310 100644 --- a/ydb/core/grpc_services/rpc_execute_data_query.cpp +++ b/ydb/core/grpc_services/rpc_execute_data_query.cpp @@ -185,9 +185,8 @@ class TExecuteDataQueryRPC : public TRpcKqpRequestActormutable_result_sets()->Swap(record.MutableResponse()->MutableYdbResults()); - } else { - NKqp::ConvertKqpQueryResultsToDbResult(kqpResponse, queryResult); } + ConvertQueryStats(kqpResponse, queryResult); if (kqpResponse.HasTxMeta()) { queryResult->mutable_tx_meta()->CopyFrom(kqpResponse.GetTxMeta()); diff --git a/ydb/core/grpc_services/rpc_execute_yql_script.cpp b/ydb/core/grpc_services/rpc_execute_yql_script.cpp index 72b8350d8b74..53fd5f6ff146 100644 --- a/ydb/core/grpc_services/rpc_execute_yql_script.cpp +++ b/ydb/core/grpc_services/rpc_execute_yql_script.cpp @@ -97,7 +97,11 @@ class TExecuteYqlScriptRPC : public TRpcKqpRequestActor(Request_); try { - NKqp::ConvertKqpQueryResultsToDbResult(kqpResponse, queryResult); + const auto& results = kqpResponse.GetYdbResults(); + for (const auto& result : results) { + queryResult->add_result_sets()->CopyFrom(result); + } + } catch (const std::exception& ex) { NYql::TIssues issues; issues.AddIssue(NYql::ExceptionToIssue(ex)); diff --git a/ydb/core/grpc_services/rpc_export.cpp b/ydb/core/grpc_services/rpc_export.cpp index 6bf1b5c713ad..59dac9c6fda4 100644 --- a/ydb/core/grpc_services/rpc_export.cpp +++ b/ydb/core/grpc_services/rpc_export.cpp @@ -41,6 +41,7 @@ class TExportRPC: public TRpcOperationRequestActor, if (this->UserToken) { ev->Record.SetUserSID(this->UserToken->GetUserSID()); } + ev->Record.SetPeerName(this->Request->GetPeerName()); auto& createExport = *ev->Record.MutableRequest(); *createExport.MutableOperationParams() = request.operation_params(); diff --git a/ydb/core/grpc_services/rpc_export_base.h b/ydb/core/grpc_services/rpc_export_base.h index c85ca5b5e7db..0950aa815270 100644 --- a/ydb/core/grpc_services/rpc_export_base.h +++ b/ydb/core/grpc_services/rpc_export_base.h @@ -46,12 +46,16 @@ struct TExportConv { } if (exprt.HasStartTime()) { - *operation.mutable_start_time() = exprt.GetStartTime(); + *operation.mutable_create_time() = exprt.GetStartTime(); } if (exprt.HasEndTime()) { *operation.mutable_end_time() = exprt.GetEndTime(); } + if (exprt.HasUserSID()) { + operation.set_created_by(exprt.GetUserSID()); + } + using namespace Ydb::Export; switch (exprt.GetSettingsCase()) { case NKikimrExport::TExport::kExportToYtSettings: diff --git a/ydb/core/grpc_services/rpc_import.cpp b/ydb/core/grpc_services/rpc_import.cpp index 174e9df84cb9..1cf09ad123c0 100644 --- a/ydb/core/grpc_services/rpc_import.cpp +++ b/ydb/core/grpc_services/rpc_import.cpp @@ -39,6 +39,7 @@ class TImportRPC: public TRpcOperationRequestActor, if (this->UserToken) { ev->Record.SetUserSID(this->UserToken->GetUserSID()); } + ev->Record.SetPeerName(this->Request->GetPeerName()); auto& createImport = *ev->Record.MutableRequest(); createImport.MutableOperationParams()->CopyFrom(request.operation_params()); diff --git a/ydb/core/grpc_services/rpc_import_base.h b/ydb/core/grpc_services/rpc_import_base.h index 16d88e0bb3c9..be700af4f026 100644 --- a/ydb/core/grpc_services/rpc_import_base.h +++ b/ydb/core/grpc_services/rpc_import_base.h @@ -43,12 +43,16 @@ struct TImportConv { } if (import.HasStartTime()) { - *operation.mutable_start_time() = import.GetStartTime(); + *operation.mutable_create_time() = import.GetStartTime(); } if (import.HasEndTime()) { *operation.mutable_end_time() = import.GetEndTime(); } + if (import.HasUserSID()) { + operation.set_created_by(import.GetUserSID()); + } + using namespace Ydb::Import; switch (import.GetSettingsCase()) { case NKikimrImport::TImport::kImportFromS3Settings: diff --git a/ydb/core/grpc_services/rpc_load_rows.cpp b/ydb/core/grpc_services/rpc_load_rows.cpp index e780de7eea05..4b5cab1c5b1c 100644 --- a/ydb/core/grpc_services/rpc_load_rows.cpp +++ b/ydb/core/grpc_services/rpc_load_rows.cpp @@ -75,9 +75,12 @@ bool ConvertArrowToYdbPrimitive(const arrow::DataType& type, Ydb::Type& toType) case arrow::Type::DURATION: toType.set_type_id(Ydb::Type::INTERVAL); return true; - case arrow::Type::DECIMAL: - // TODO - return false; + case arrow::Type::DECIMAL: { + Ydb::DecimalType* decimalType = toType.mutable_decimal_type(); + decimalType->set_precision(22); + decimalType->set_scale(9); + return true; + } case arrow::Type::NA: case arrow::Type::HALF_FLOAT: case arrow::Type::FIXED_SIZE_BINARY: diff --git a/ydb/core/grpc_services/rpc_login.cpp b/ydb/core/grpc_services/rpc_login.cpp index 0f527fae42fb..f259181d2e8b 100644 --- a/ydb/core/grpc_services/rpc_login.cpp +++ b/ydb/core/grpc_services/rpc_login.cpp @@ -63,6 +63,7 @@ class TLoginRPC : public TRpcRequestActor { PipeClient = RegisterWithSameMailbox(pipe); THolder request = MakeHolder(); request.Get()->Record = CreateLoginRequest(Credentials, AppData()->AuthConfig); + request.Get()->Record.SetPeerName(Request->GetPeerName()); NTabletPipe::SendData(SelfId(), PipeClient, request.Release()); return; } diff --git a/ydb/core/grpc_services/rpc_object_storage.cpp b/ydb/core/grpc_services/rpc_object_storage.cpp index 8ed0134084b6..2fd91b4a11cd 100644 --- a/ydb/core/grpc_services/rpc_object_storage.cpp +++ b/ydb/core/grpc_services/rpc_object_storage.cpp @@ -144,6 +144,19 @@ bool CellFromTuple(NScheme::TTypeInfo type, } break; } + case NScheme::NTypeIds::Decimal: + { + if (tupleValue.Haslow_128()) { + NYql::NDecimal::TInt128 int128 = NYql::NDecimal::FromHalfs(tupleValue.Getlow_128(), tupleValue.Gethigh_128()); + auto &data = memoryOwner.emplace_back(); + data.resize(sizeof(NYql::NDecimal::TInt128)); + std::memcpy(data.Detach(), &int128, sizeof(NYql::NDecimal::TInt128)); + c = TCell(data); + } else { + CHECK_OR_RETURN_ERROR(false, Sprintf("Cannot parse value of type Decimal in tuple at position %" PRIu32, position)); + } + break; + } default: CHECK_OR_RETURN_ERROR(false, Sprintf("Unsupported typeId %" PRIu16 " at index %" PRIu32, typeId, position)); break; diff --git a/ydb/core/grpc_services/rpc_rate_limiter_api.cpp b/ydb/core/grpc_services/rpc_rate_limiter_api.cpp index 288527ae1e95..4ce11d417c4d 100644 --- a/ydb/core/grpc_services/rpc_rate_limiter_api.cpp +++ b/ydb/core/grpc_services/rpc_rate_limiter_api.cpp @@ -594,11 +594,18 @@ class TAcquireRateLimiterResourceRPC : public TRateLimiterRequestcoordination_node_path(), GetProtoRequest()->resource_path()), 0, 0); TBase::OnOperationTimeout(ctx); } + // Do nothing here, because quoter service replies after "cancel after" time passes. + void OnCancelOperation(const TActorContext& ctx) { + Y_UNUSED(ctx); + } + STFUNC(StateFunc) { switch (ev->GetTypeRewrite()) { hFunc(TEvQuota::TEvClearance, Handle); @@ -637,22 +644,37 @@ class TAcquireRateLimiterResourceRPC : public TRateLimiterRequestGet()->Result) { case TEvQuota::TEvClearance::EResult::Success: Reply(StatusIds::SUCCESS, TActivationContext::AsActorContext()); - break; + break; case TEvQuota::TEvClearance::EResult::UnknownResource: Reply(StatusIds::BAD_REQUEST, TActivationContext::AsActorContext()); - break; + break; case TEvQuota::TEvClearance::EResult::Deadline: - Reply(StatusIds::TIMEOUT, TActivationContext::AsActorContext()); - break; + Reply(QuoterDeadlineStatusCode(), TActivationContext::AsActorContext()); + break; default: Reply(StatusIds::INTERNAL_ERROR, TActivationContext::AsActorContext()); } diff --git a/ydb/core/grpc_services/rpc_replication.cpp b/ydb/core/grpc_services/rpc_replication.cpp index 83c28cc04845..8b75b72a08d0 100644 --- a/ydb/core/grpc_services/rpc_replication.cpp +++ b/ydb/core/grpc_services/rpc_replication.cpp @@ -11,6 +11,8 @@ #include +#include + namespace NKikimr::NGRpcService { using namespace Ydb; @@ -138,9 +140,18 @@ class TDescribeReplicationRPC: public TRpcSchemeRequestActor +#include #include "rpc_common/rpc_common.h" #include "rpc_kqp_base.h" @@ -155,7 +156,7 @@ class TStreamExecuteScanQueryRPC : public TActorBootstrappedBecome(&TStreamExecuteScanQueryRPC::StateWork); @@ -250,32 +251,30 @@ class TStreamExecuteScanQueryRPC : public TActorBootstrappedSelfId() << " NextReply" << ", left: " << ev->Get()->LeftInQueue - << ", queue: " << GRpcResponsesSizeQueue_.size() - << ", used memory: " << GRpcResponsesSize_ - << ", buffer size: " << RpcBufferSize_); + << ", queue: " << FlowControl_.QueueSize() + << ", inflight bytes: " << FlowControl_.InflightBytes() + << ", limit bytes: " << FlowControl_.InflightLimitBytes()); - while (GRpcResponsesSizeQueue_.size() > ev->Get()->LeftInQueue) { - GRpcResponsesSize_ -= GRpcResponsesSizeQueue_.front(); - GRpcResponsesSizeQueue_.pop(); + while (FlowControl_.QueueSize() > ev->Get()->LeftInQueue) { + FlowControl_.PopResponse(); } - Y_DEBUG_ABORT_UNLESS(GRpcResponsesSizeQueue_.empty() == (GRpcResponsesSize_ == 0)); - LastDataStreamTimestamp_ = TAppData::TimeProvider->Now(); - if (WaitOnSeqNo_ && RpcBufferSize_ > GRpcResponsesSize_) { - ui64 freeSpace = RpcBufferSize_ - GRpcResponsesSize_; + LastDataStreamTimestamp_ = TAppData::TimeProvider->Now(); + const i64 freeSpaceBytes = FlowControl_.FreeSpaceBytes(); + if (freeSpaceBytes > 0 && LastSeqNo_ && AckedFreeSpaceBytes_ <= 0) { LOG_DEBUG_S(ctx, NKikimrServices::RPC_REQUEST, this->SelfId() << " Send stream data ack" - << ", seqNo: " << *WaitOnSeqNo_ - << ", freeSpace: " << freeSpace + << ", seqNo: " << *LastSeqNo_ + << ", freeSpace: " << freeSpaceBytes << ", to: " << ExecuterActorId_); auto resp = MakeHolder(); - resp->Record.SetSeqNo(*WaitOnSeqNo_); - resp->Record.SetFreeSpace(freeSpace); + resp->Record.SetSeqNo(*LastSeqNo_); + resp->Record.SetFreeSpace(freeSpaceBytes); ctx.Send(ExecuterActorId_, resp.Release()); - WaitOnSeqNo_.Clear(); + AckedFreeSpaceBytes_ = freeSpaceBytes; } } @@ -349,28 +348,22 @@ class TStreamExecuteScanQueryRPC : public TActorBootstrappedGet()->Record.GetSeqNo(); + AckedFreeSpaceBytes_ = freeSpaceBytes; Request_->SendSerializedResult(std::move(out), StatusIds::SUCCESS); - ui64 freeSpace = GRpcResponsesSize_ < RpcBufferSize_ - ? RpcBufferSize_ - GRpcResponsesSize_ - : 0; - - if (freeSpace == 0) { - WaitOnSeqNo_ = ev->Get()->Record.GetSeqNo(); - } - LOG_DEBUG_S(ctx, NKikimrServices::RPC_REQUEST, this->SelfId() << " Send stream data ack" << ", seqNo: " << ev->Get()->Record.GetSeqNo() - << ", freeSpace: " << freeSpace + << ", freeSpace: " << freeSpaceBytes << ", to: " << ev->Sender - << ", queue: " << GRpcResponsesSizeQueue_.size()); + << ", queue: " << FlowControl_.QueueSize()); auto resp = MakeHolder(); resp->Record.SetSeqNo(ev->Get()->Record.GetSeqNo()); - resp->Record.SetFreeSpace(freeSpace); + resp->Record.SetFreeSpace(freeSpaceBytes); ctx.Send(ev->Sender, resp.Release()); } @@ -411,9 +404,9 @@ class TStreamExecuteScanQueryRPC : public TActorBootstrappedNow(); TDuration timeout; LOG_DEBUG_S(ctx, NKikimrServices::RPC_REQUEST, "Got timeout event, InactiveClientTimeout: " << InactiveClientTimeout_ - << " GRpcResponsesSizeQueue: " << GRpcResponsesSizeQueue_.size()); + << " GRpcResponsesSizeQueue: " << FlowControl_.QueueSize()); - if (InactiveClientTimeout_ && GRpcResponsesSizeQueue_.size() > 0) { + if (InactiveClientTimeout_ && FlowControl_.QueueSize() > 0) { TDuration processTime = now - LastDataStreamTimestamp_; if (processTime >= InactiveClientTimeout_) { auto message = TStringBuilder() << this->SelfId() << " Client cannot process data in " << processTime @@ -477,13 +470,12 @@ class TStreamExecuteScanQueryRPC : public TActorBootstrapped Request_; - const ui64 RpcBufferSize_; + TRpcFlowControlState FlowControl_; + TMaybe LastSeqNo_; + i64 AckedFreeSpaceBytes_ = 0; TDuration InactiveClientTimeout_; - TQueue GRpcResponsesSizeQueue_; - ui64 GRpcResponsesSize_ = 0; TInstant LastDataStreamTimestamp_; - TMaybe WaitOnSeqNo_; TSchedulerCookieHolder TimeoutTimerCookieHolder_; diff --git a/ydb/core/grpc_services/rpc_stream_execute_yql_script.cpp b/ydb/core/grpc_services/rpc_stream_execute_yql_script.cpp index d36c122c2c47..11716bda176e 100644 --- a/ydb/core/grpc_services/rpc_stream_execute_yql_script.cpp +++ b/ydb/core/grpc_services/rpc_stream_execute_yql_script.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -49,7 +50,7 @@ namespace { {} NKqp::TEvKqp::TEvDataQueryStreamPart::TPtr Handle; - google::protobuf::RepeatedPtrField::const_iterator ResultIterator; + google::protobuf::RepeatedPtrField::const_iterator ResultIterator; }; enum EStreamRpcWakeupTag : ui64 { @@ -83,7 +84,7 @@ class TStreamExecuteYqlScriptRPC TStreamExecuteYqlScriptRPC(IRequestNoOpCtx* request, ui64 rpcBufferSize) : TBase(request) - , RpcBufferSize_(rpcBufferSize) + , FlowControl_(rpcBufferSize) , CancelationFlag(std::make_shared(false)) { // StreamExecuteYqlScript allows write in to table. @@ -218,7 +219,7 @@ class TStreamExecuteYqlScriptRPC auto result = response.mutable_result(); try { - NKqp::ConvertKqpQueryResultToDbResult(kqpResult, result->mutable_result_set()); + result->mutable_result_set()->CopyFrom(kqpResult); } catch (std::exception ex) { ReplyFinishStream(ex.what()); } @@ -228,8 +229,7 @@ class TStreamExecuteYqlScriptRPC TString out; Y_PROTOBUF_SUPPRESS_NODISCARD response.SerializeToString(&out); - GRpcResponsesSizeQueue_.push(out.size()); - GRpcResponsesSize_ += out.size(); + FlowControl_.PushResponse(out.size()); RequestPtr()->SendSerializedResult(std::move(out), StatusIds::SUCCESS); } @@ -266,28 +266,22 @@ class TStreamExecuteYqlScriptRPC TString out; Y_PROTOBUF_SUPPRESS_NODISCARD response.SerializeToString(&out); - GRpcResponsesSizeQueue_.push(out.size()); - GRpcResponsesSize_ += out.size(); + FlowControl_.PushResponse(out.size()); + const i64 freeSpaceBytes = FlowControl_.FreeSpaceBytes(); + LastSeqNo_ = ev->Get()->Record.GetSeqNo(); + AckedFreeSpaceBytes_ = freeSpaceBytes; RequestPtr()->SendSerializedResult(std::move(out), StatusIds::SUCCESS); - ui64 freeSpace = GRpcResponsesSize_ < RpcBufferSize_ - ? RpcBufferSize_ - GRpcResponsesSize_ - : 0; - - if (freeSpace == 0) { - WaitOnSeqNo_ = ev->Get()->Record.GetSeqNo(); - } - LOG_DEBUG_S(ctx, NKikimrServices::RPC_REQUEST, this->SelfId() << " Send stream data ack" << ", seqNo: " << ev->Get()->Record.GetSeqNo() - << ", freeSpace: " << freeSpace + << ", freeSpace: " << freeSpaceBytes << ", to: " << ev->Sender - << ", queue: " << GRpcResponsesSizeQueue_.size()); + << ", queue: " << FlowControl_.QueueSize()); auto resp = MakeHolder(); resp->Record.SetSeqNo(ev->Get()->Record.GetSeqNo()); - resp->Record.SetFreeSpace(freeSpace); + resp->Record.SetFreeSpace(freeSpaceBytes); ctx.Send(ev->Sender, resp.Release()); } @@ -295,9 +289,9 @@ class TStreamExecuteYqlScriptRPC void Handle(TRpcServices::TEvGrpcNextReply::TPtr& ev, const TActorContext& ctx) { LOG_DEBUG_S(ctx, NKikimrServices::RPC_REQUEST, this->SelfId() << " NextReply" << ", left: " << ev->Get()->LeftInQueue - << ", queue: " << GRpcResponsesSizeQueue_.size() - << ", used memory: " << GRpcResponsesSize_ - << ", buffer size: " << RpcBufferSize_); + << ", queue: " << FlowControl_.QueueSize() + << ", inflight bytes: " << FlowControl_.InflightBytes() + << ", limit bytes: " << FlowControl_.InflightLimitBytes()); LastDataStreamTimestamp_ = TAppData::TimeProvider->Now(); if (DataQueryStreamContext) { @@ -315,27 +309,24 @@ class TStreamExecuteYqlScriptRPC } else { //ScanQuery in progress - while (GRpcResponsesSizeQueue_.size() > ev->Get()->LeftInQueue) { - GRpcResponsesSize_ -= GRpcResponsesSizeQueue_.front(); - GRpcResponsesSizeQueue_.pop(); + while (FlowControl_.QueueSize() > ev->Get()->LeftInQueue) { + FlowControl_.PopResponse(); } - Y_DEBUG_ABORT_UNLESS(GRpcResponsesSizeQueue_.empty() == (GRpcResponsesSize_ == 0)); - - if (WaitOnSeqNo_ && RpcBufferSize_ > GRpcResponsesSize_) { - ui64 freeSpace = RpcBufferSize_ - GRpcResponsesSize_; + const i64 freeSpaceBytes = FlowControl_.FreeSpaceBytes(); + if (freeSpaceBytes > 0 && LastSeqNo_ && AckedFreeSpaceBytes_ <= 0) { LOG_DEBUG_S(ctx, NKikimrServices::RPC_REQUEST, this->SelfId() << " Send stream data ack" - << ", seqNo: " << *WaitOnSeqNo_ - << ", freeSpace: " << freeSpace + << ", seqNo: " << *LastSeqNo_ + << ", freeSpace: " << freeSpaceBytes << ", to: " << GatewayRequestHandlerActorId_); auto resp = MakeHolder(); - resp->Record.SetSeqNo(*WaitOnSeqNo_); - resp->Record.SetFreeSpace(freeSpace); + resp->Record.SetSeqNo(*LastSeqNo_); + resp->Record.SetFreeSpace(freeSpaceBytes); ctx.Send(GatewayRequestHandlerActorId_, resp.Release()); - WaitOnSeqNo_.Clear(); + AckedFreeSpaceBytes_ = freeSpaceBytes; } } } @@ -393,7 +384,7 @@ class TStreamExecuteYqlScriptRPC TInstant now = TAppData::TimeProvider->Now(); TDuration timeout; - if (InactiveClientTimeout_ && GRpcResponsesSizeQueue_.size() > 0) { + if (InactiveClientTimeout_ && FlowControl_.QueueSize() > 0) { TDuration processTime = now - LastDataStreamTimestamp_; if (processTime >= InactiveClientTimeout_) { auto message = TStringBuilder() << this->SelfId() << " Client cannot process data in " << processTime @@ -472,13 +463,12 @@ class TStreamExecuteYqlScriptRPC } private: - const ui64 RpcBufferSize_; + TRpcFlowControlState FlowControl_; + TMaybe LastSeqNo_; + i64 AckedFreeSpaceBytes_ = 0; TDuration InactiveClientTimeout_; - TQueue GRpcResponsesSizeQueue_; - ui64 GRpcResponsesSize_ = 0; TInstant LastDataStreamTimestamp_; - TMaybe WaitOnSeqNo_; TSchedulerCookieHolder ClientTimeoutTimerCookieHolder_; diff --git a/ydb/core/grpc_services/rpc_view.cpp b/ydb/core/grpc_services/rpc_view.cpp new file mode 100644 index 000000000000..70172179716b --- /dev/null +++ b/ydb/core/grpc_services/rpc_view.cpp @@ -0,0 +1,94 @@ +#include "rpc_scheme_base.h" +#include "service_view.h" + +#include +#include +#include +#include +#include +#include + +namespace NKikimr::NGRpcService { + +using namespace Ydb; + +using TEvDescribeView = TGrpcRequestOperationCall; + +class TDescribeViewRPC : public TRpcSchemeRequestActor { + using TBase = TRpcSchemeRequestActor; + +public: + using TBase::TBase; + + void Bootstrap() { + DescribeScheme(); + } + + void PassAway() override { + TBase::PassAway(); + } + +private: + void DescribeScheme() { + auto ev = std::make_unique(); + SetAuthToken(ev, *Request_); + SetDatabase(ev.get(), *Request_); + ev->Record.MutableDescribePath()->SetPath(GetProtoRequest()->path()); + + Send(MakeTxProxyID(), ev.release()); + Become(&TDescribeViewRPC::StateDescribeScheme); + } + + STATEFN(StateDescribeScheme) { + switch (ev->GetTypeRewrite()) { + HFunc(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult, Handle); + default: + return TBase::StateWork(ev); + } + } + + void Handle(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr& ev, const TActorContext& ctx) { + const auto& record = ev->Get()->GetRecord(); + const auto& desc = record.GetPathDescription(); + + if (record.HasReason()) { + Request_->RaiseIssue(NYql::TIssue(record.GetReason())); + } + + switch (record.GetStatus()) { + case NKikimrScheme::StatusSuccess: + if (desc.GetSelf().GetPathType() != NKikimrSchemeOp::EPathTypeView) { + auto message = TStringBuilder() << "Expected a view, but got: " << desc.GetSelf().GetPathType(); + Request_->RaiseIssue(NYql::TIssue(message)); + return Reply(StatusIds::SCHEME_ERROR, ctx); + } + + ConvertDirectoryEntry(desc.GetSelf(), Result_.mutable_self(), true); + Result_.set_query_text(desc.GetViewDescription().GetQueryText()); + + return ReplyWithResult(StatusIds::SUCCESS, Result_, ctx); + + case NKikimrScheme::StatusPathDoesNotExist: + case NKikimrScheme::StatusSchemeError: + return Reply(StatusIds::SCHEME_ERROR, ctx); + + case NKikimrScheme::StatusAccessDenied: + return Reply(StatusIds::UNAUTHORIZED, ctx); + + case NKikimrScheme::StatusNotAvailable: + return Reply(StatusIds::UNAVAILABLE, ctx); + + default: + return Reply(StatusIds::GENERIC_ERROR, ctx); + } + } + +private: + View::DescribeViewResult Result_; +}; + +void DoDescribeView(std::unique_ptr p, const IFacilityProvider& f) { + f.RegisterActor(new TDescribeViewRPC(p.release())); +} + +} diff --git a/ydb/core/grpc_services/rpc_whoami.cpp b/ydb/core/grpc_services/rpc_whoami.cpp index 137c4360b0c5..d0096a50e444 100644 --- a/ydb/core/grpc_services/rpc_whoami.cpp +++ b/ydb/core/grpc_services/rpc_whoami.cpp @@ -21,24 +21,29 @@ class TWhoAmIRPC : public TActorBootstrapped { : Request(request) {} - void Bootstrap(const TActorContext& ctx) { + void Bootstrap() { //TODO: Do we realy realy need to make call to the ticket parser here??? //we have done it already in grpc_request_proxy auto req = dynamic_cast(Request.get()); Y_ABORT_UNLESS(req, "Unexpected request type for TWhoAmIRPC"); - TMaybe authToken = req->GetYdbToken(); - if (authToken) { - TMaybe database = Request->GetDatabaseName(); - ctx.Send(MakeTicketParserID(), new TEvTicketParser::TEvAuthorizeTicket({ - .Database = database ? database.GetRef() : TString(), - .Ticket = authToken.GetRef(), - .PeerName = Request->GetPeerName() - })); - Become(&TThis::StateWaitForTicket); + TString ticket; + if (TMaybe authToken = req->GetYdbToken()) { + ticket = authToken.GetRef(); + } else if (TVector clientCert = Request->FindClientCert(); !clientCert.empty()) { + ticket = TString(clientCert.front()); } else { ReplyError("No token provided"); PassAway(); + return; } + + TMaybe database = Request->GetDatabaseName(); + Send(MakeTicketParserID(), new TEvTicketParser::TEvAuthorizeTicket({ + .Database = database ? database.GetRef() : TString(), + .Ticket = ticket, + .PeerName = Request->GetPeerName() + })); + Become(&TThis::StateWaitForTicket); } STFUNC(StateWaitForTicket) { diff --git a/ydb/core/grpc_services/service_view.h b/ydb/core/grpc_services/service_view.h new file mode 100644 index 000000000000..54021e0f3b18 --- /dev/null +++ b/ydb/core/grpc_services/service_view.h @@ -0,0 +1,12 @@ +#pragma once + +#include + +namespace NKikimr::NGRpcService { + +class IRequestOpCtx; +class IFacilityProvider; + +void DoDescribeView(std::unique_ptr p, const IFacilityProvider& f); + +} diff --git a/ydb/core/grpc_services/service_ymq.h b/ydb/core/grpc_services/service_ymq.h new file mode 100644 index 000000000000..6ccf8137df2f --- /dev/null +++ b/ydb/core/grpc_services/service_ymq.h @@ -0,0 +1,30 @@ +#pragma once +#include + +namespace NActors { +struct TActorId; +} + +namespace NKikimr { +namespace NGRpcService { + +class IRequestOpCtx; +class IFacilityProvider; + +void DoYmqGetQueueUrlRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqCreateQueueRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqSendMessageRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqReceiveMessageRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqGetQueueAttributesRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqListQueuesRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqDeleteMessageRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqPurgeQueueRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqDeleteQueueRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqChangeMessageVisibilityRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqSetQueueAttributesRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqSendMessageBatchRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqDeleteMessageBatchRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqChangeMessageVisibilityBatchRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqListDeadLetterSourceQueuesRequest(std::unique_ptr p, const IFacilityProvider& f); +} +} diff --git a/ydb/core/grpc_services/ya.make b/ydb/core/grpc_services/ya.make index 63344c31d9fb..0f58e4c2c20c 100644 --- a/ydb/core/grpc_services/ya.make +++ b/ydb/core/grpc_services/ya.make @@ -74,6 +74,7 @@ SRCS( rpc_stream_execute_yql_script.cpp rpc_whoami.cpp rpc_object_storage.cpp + rpc_view.cpp table_settings.cpp rpc_common/rpc_common_kqp_session.cpp diff --git a/ydb/core/health_check/health_check.cpp b/ydb/core/health_check/health_check.cpp index 46460e4de841..1f1e595d0679 100644 --- a/ydb/core/health_check/health_check.cpp +++ b/ydb/core/health_check/health_check.cpp @@ -32,6 +32,9 @@ #include #include +#include +#include + static decltype(auto) make_vslot_tuple(const NKikimrBlobStorage::TVSlotId& id) { return std::make_tuple(id.GetNodeId(), id.GetPDiskId(), id.GetVSlotId()); } @@ -56,16 +59,21 @@ struct hash { } #define BLOG_CRIT(stream) LOG_CRIT_S(*TlsActivationContext, NKikimrServices::HEALTH, stream) +#define BLOG_D(stream) LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::HEALTH, stream) namespace NKikimr { using NNodeWhiteboard::TNodeId; -using NNodeWhiteboard::TTabletId; namespace NHealthCheck { using namespace NActors; using namespace Ydb; +using namespace NSchemeCache; +using namespace NSchemeShard; +using namespace NSysView; +using namespace NConsole; +using NNodeWhiteboard::TTabletId; void RemoveUnrequestedEntries(Ydb::Monitoring::SelfCheckResult& result, const Ydb::Monitoring::SelfCheckRequest& request) { if (!request.return_verbose_status()) { @@ -117,11 +125,13 @@ class TSelfCheckRequest : public TActorBootstrapped { TActorId Sender; THolder Request; ui64 Cookie; + NWilson::TSpan Span; - TSelfCheckRequest(const TActorId& sender, THolder request, ui64 cookie) + TSelfCheckRequest(const TActorId& sender, THolder request, ui64 cookie, NWilson::TTraceId&& traceId) : Sender(sender) , Request(std::move(request)) , Cookie(cookie) + , Span(TComponentTracingLevels::TTablet::Basic, std::move(traceId), "health_check", NWilson::EFlags::AUTO_END) {} using TGroupId = ui32; @@ -179,21 +189,34 @@ class TSelfCheckRequest : public TActorBootstrapped { int Count = 1; TStackVec Identifiers; - TNodeTabletStateCount(const NKikimrHive::TTabletInfo& info, const TTabletStateSettings& settings) { - Type = info.tablettype(); - Leader = info.followerid() == 0; + static ETabletState GetState(const NKikimrHive::TTabletInfo& info, const TTabletStateSettings& settings) { if (info.volatilestate() == NKikimrHive::TABLET_VOLATILE_STATE_STOPPED) { - State = ETabletState::Stopped; - } else if (!settings.IsHiveSynchronizationPeriod - && info.volatilestate() != NKikimrHive::TABLET_VOLATILE_STATE_RUNNING - && TInstant::MilliSeconds(info.lastalivetimestamp()) < settings.AliveBarrier - && info.tabletbootmode() == NKikimrHive::TABLET_BOOT_MODE_DEFAULT) { - State = ETabletState::Dead; - } else if (info.restartsperperiod() >= settings.MaxRestartsPerPeriod) { - State = ETabletState::RestartsTooOften; - } else { - State = ETabletState::Good; + return ETabletState::Stopped; + } + ETabletState state = (info.restartsperperiod() >= settings.MaxRestartsPerPeriod) ? ETabletState::RestartsTooOften : ETabletState::Good; + if (info.volatilestate() == NKikimrHive::TABLET_VOLATILE_STATE_RUNNING) { + return state; + } + if (info.tabletbootmode() != NKikimrHive::TABLET_BOOT_MODE_DEFAULT) { + return state; + } + if (info.lastalivetimestamp() != 0 && TInstant::MilliSeconds(info.lastalivetimestamp()) < settings.AliveBarrier) { + // Tablet is not alive for a long time + // We should report it as dead unless it's just waiting to be created + if (info.generation() == 0 && info.volatilestate() == NKikimrHive::TABLET_VOLATILE_STATE_BOOTING && !info.inwaitqueue()) { + return state; + } + return ETabletState::Dead; } + return state; + + } + + TNodeTabletStateCount(const NKikimrHive::TTabletInfo& info, const TTabletStateSettings& settings) + : Type(info.tablettype()) + , State(GetState(info, settings)) + , Leader(info.followerid() == 0) + { } bool operator ==(const TNodeTabletStateCount& o) const { @@ -248,6 +271,7 @@ class TSelfCheckRequest : public TActorBootstrapped { struct TGroupState { TString ErasureSpecies; std::vector VSlots; + ui32 Generation; }; struct TSelfCheckResult { @@ -437,6 +461,167 @@ class TSelfCheckRequest : public TActorBootstrapped { } }; + template + struct TRequestResponse { + std::variant, TString> Response; + NWilson::TSpan Span; + + TRequestResponse() = default; + TRequestResponse(NWilson::TSpan&& span) + : Span(std::move(span)) + {} + + TRequestResponse(const TRequestResponse&) = delete; + TRequestResponse(TRequestResponse&&) = default; + TRequestResponse& operator =(const TRequestResponse&) = delete; + TRequestResponse& operator =(TRequestResponse&&) = default; + + void Set(std::unique_ptr&& response) { + constexpr bool hasErrorCheck = requires(const std::unique_ptr& r) {TSelfCheckRequest::IsSuccess(r);}; + if constexpr (hasErrorCheck) { + if (!TSelfCheckRequest::IsSuccess(response)) { + Error(TSelfCheckRequest::GetError(response)); + return; + } + } + if (!IsDone()) { + Span.EndOk(); + } + Response = std::move(response); + } + + void Set(TAutoPtr>&& response) { + Set(std::unique_ptr(response->Release().Release())); + } + + bool Error(const TString& error) { + if (!IsDone()) { + Span.EndError(error); + Response = error; + return true; + } + return false; + } + + bool IsOk() const { + return std::holds_alternative>(Response); + } + + bool IsError() const { + return std::holds_alternative(Response); + } + + bool IsDone() const { + return Response.index() != 0; + } + + explicit operator bool() const { + return IsOk(); + } + + T* Get() { + return std::get>(Response).get(); + } + + const T* Get() const { + return std::get>(Response).get(); + } + + T& GetRef() { + return *Get(); + } + + const T& GetRef() const { + return *Get(); + } + + T* operator ->() { + return Get(); + } + + const T* operator ->() const { + return Get(); + } + + T& operator *() { + return GetRef(); + } + + const T& operator *() const { + return GetRef(); + } + + TString GetError() const { + return std::get(Response); + } + + void Event(const TString& name) { + if (Span) { + Span.Event(name); + } + } + }; + + static bool IsSuccess(const std::unique_ptr& ev) { + return (ev->Request->ResultSet.size() > 0) && (std::find_if(ev->Request->ResultSet.begin(), ev->Request->ResultSet.end(), + [](const auto& entry) { + return entry.Status == NSchemeCache::TSchemeCacheNavigate::EStatus::Ok; + }) != ev->Request->ResultSet.end()); + } + + static TString GetError(const std::unique_ptr& ev) { + if (ev->Request->ResultSet.size() == 0) { + return "empty response"; + } + for (const auto& entry : ev->Request->ResultSet) { + if (entry.Status != NSchemeCache::TSchemeCacheNavigate::EStatus::Ok) { + switch (entry.Status) { + case NSchemeCache::TSchemeCacheNavigate::EStatus::Ok: + return "Ok"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::Unknown: + return "Unknown"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::RootUnknown: + return "RootUnknown"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::PathErrorUnknown: + return "PathErrorUnknown"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::PathNotTable: + return "PathNotTable"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::PathNotPath: + return "PathNotPath"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::TableCreationNotComplete: + return "TableCreationNotComplete"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::LookupError: + return "LookupError"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::RedirectLookupError: + return "RedirectLookupError"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::AccessDenied: + return "AccessDenied"; + default: + return ::ToString(static_cast(entry.Status)); + } + } + } + return "no error"; + } + + static bool IsSuccess(const std::unique_ptr& ev) { + return ev->GetRecord().status() == NKikimrScheme::StatusSuccess; + } + + static TString GetError(const std::unique_ptr& ev) { + return NKikimrScheme::EStatus_Name(ev->GetRecord().status()); + } + + static bool IsSuccess(const std::unique_ptr& ev) { + const auto& operation(ev->Record.GetResponse().operation()); + return operation.ready() && operation.status() == Ydb::StatusIds::SUCCESS; + } + + static TString GetError(const std::unique_ptr& ev) { + const auto& operation(ev->Record.GetResponse().operation()); + return Ydb::StatusIds_StatusCode_Name(operation.status()); + } + TString FilterDatabase; THashMap FilterDomainKey; TVector PipeClients; @@ -447,21 +632,23 @@ class TSelfCheckRequest : public TActorBootstrapped { TTabletId RootSchemeShardId; TTabletId RootHiveId; THashMap TenantByPath; - THashMap> DescribeByPath; + THashMap> DescribeByPath; THashMap> PathsByPoolName; + THashMap> TenantStatusByPath; THashMap DatabaseStatusByPath; THashMap> TenantStateByPath; - THashMap> NavigateResult; - THashMap> HiveDomainStats; - THashMap> HiveNodeStats; - THashMap> HiveInfo; - THolder NodesInfo; + THashMap NavigateResult; + THashMap> HiveDomainStats; + THashMap> HiveNodeStats; + THashMap> HiveInfo; + std::optional> ListTenants; + std::optional> NodesInfo; THashMap MergedNodeInfo; - std::optional StoragePools; - std::optional Groups; - std::optional VSlots; - std::optional PDisks; - bool RequestedStorageConfig = false; + std::optional> StoragePools; + std::optional> Groups; + std::optional> VSlots; + std::optional> PDisks; + std::optional> NodeWardenStorageConfig; THashSet UnknownStaticGroups; THashSet NodeIds; @@ -474,7 +661,7 @@ class TSelfCheckRequest : public TActorBootstrapped { THashMap DatabaseState; THashMap SharedDatabases; - THashMap> NodeSystemState; + THashMap> NodeSystemState; THashMap MergedNodeSystemState; std::unordered_map PDisksMap; @@ -491,15 +678,15 @@ class TSelfCheckRequest : public TActorBootstrapped { THashSet UnavailableStorageNodes; THashSet UnavailableComputeNodes; - THashMap> NodeVDiskState; + THashMap> NodeVDiskState; TList VDisksAppended; std::unordered_map MergedVDiskState; - THashMap> NodePDiskState; + THashMap> NodePDiskState; TList PDisksAppended; std::unordered_map MergedPDiskState; - THashMap> NodeBSGroupState; + THashMap> NodeBSGroupState; TList BSGroupAppended; std::unordered_map MergedBSGroupState; @@ -600,9 +787,9 @@ class TSelfCheckRequest : public TActorBootstrapped { TTenantInfo& tenant = TenantByPath[DomainPath]; tenant.Name = DomainPath; RequestSchemeCacheNavigate(DomainPath); - RequestListTenants(); + ListTenants = RequestListTenants(); } else if (FilterDatabase != DomainPath) { - RequestTenantStatus(FilterDatabase); + TenantStatusByPath[FilterDatabase] = RequestTenantStatus(FilterDatabase); } else { TTenantInfo& tenant = TenantByPath[DomainPath]; tenant.Name = DomainPath; @@ -614,14 +801,14 @@ class TSelfCheckRequest : public TActorBootstrapped { TabletRequests.TabletStates[RootHiveId].Database = DomainPath; TabletRequests.TabletStates[RootHiveId].Type = TTabletTypes::Hive; //RequestHiveDomainStats(RootHiveId); - RequestHiveNodeStats(RootHiveId); - RequestHiveInfo(RootHiveId); + HiveNodeStats[RootHiveId] = RequestHiveNodeStats(RootHiveId); + HiveInfo[RootHiveId] = RequestHiveInfo(RootHiveId); } if (RootSchemeShardId && !IsSpecificDatabaseFilter()) { TabletRequests.TabletStates[RootSchemeShardId].Database = DomainPath; TabletRequests.TabletStates[RootSchemeShardId].Type = TTabletTypes::SchemeShard; - RequestDescribe(RootSchemeShardId, DomainPath); + DescribeByPath[DomainPath] = RequestDescribe(RootSchemeShardId, DomainPath); } if (BsControllerId) { @@ -630,7 +817,9 @@ class TSelfCheckRequest : public TActorBootstrapped { RequestBsController(); } - Send(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes()); + + NodesInfo = TRequestResponse(Span.CreateChild(TComponentTracingLevels::TTablet::Detailed, "TEvInterconnect::TEvListNodes")); + Send(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes(), 0/*flags*/, 0/*cookie*/, Span.GetTraceId()); ++Requests; Become(&TThis::StateWait); @@ -639,15 +828,16 @@ class TSelfCheckRequest : public TActorBootstrapped { } bool HaveAllBSControllerInfo() { - return StoragePools && Groups && VSlots && PDisks; + return StoragePools && StoragePools->IsOk() && Groups && Groups->IsOk() && VSlots && VSlots->IsOk() && PDisks && PDisks->IsOk(); } bool NeedWhiteboardInfoForGroup(TGroupId groupId) { - return !HaveAllBSControllerInfo() && IsStaticGroup(groupId); + return UnknownStaticGroups.contains(groupId) || (!HaveAllBSControllerInfo() && IsStaticGroup(groupId)); } void Handle(TEvNodeWardenStorageConfig::TPtr ev) { - if (const NKikimrBlobStorage::TStorageConfig& config = *ev->Get()->Config; config.HasBlobStorageConfig()) { + NodeWardenStorageConfig->Set(std::move(ev)); + if (const NKikimrBlobStorage::TStorageConfig& config = *NodeWardenStorageConfig->Get()->Config; config.HasBlobStorageConfig()) { if (const auto& bsConfig = config.GetBlobStorageConfig(); bsConfig.HasServiceSet()) { const auto& staticConfig = bsConfig.GetServiceSet(); for (const NKikimrBlobStorage::TNodeWardenServiceSet_TPDisk& pDisk : staticConfig.pdisks()) { @@ -678,6 +868,7 @@ class TSelfCheckRequest : public TActorBootstrapped { auto groupId = vDisk.GetVDiskID().GetGroupID(); if (NeedWhiteboardInfoForGroup(groupId)) { + BLOG_D("Requesting whiteboard for group " << groupId); RequestStorageNode(vDisk.GetVDiskLocation().GetNodeID()); } } @@ -737,10 +928,11 @@ class TSelfCheckRequest : public TActorBootstrapped { } } - void RequestTabletPipe(TTabletId tabletId, - const TString& key, - IEventBase* payload, - std::optional requestId = std::nullopt) { + template + [[nodiscard]] TRequestResponse RequestTabletPipe(TTabletId tabletId, + IEventBase* payload, + std::optional requestId = std::nullopt) { + TString key = TypeName(*payload); ui64 cookie; if (requestId) { cookie = *requestId; @@ -748,6 +940,10 @@ class TSelfCheckRequest : public TActorBootstrapped { } else { cookie = TabletRequests.MakeRequest(tabletId, key); } + TRequestResponse response(Span.CreateChild(TComponentTracingLevels::TTablet::Detailed, key)); + if (Span) { + response.Span.Attribute("tablet_id", ::ToString(tabletId)); + } TTabletRequestsState::TTabletState& requestState(TabletRequests.TabletStates[tabletId]); if (!requestState.TabletPipe) { requestState.TabletPipe = RegisterWithSameMailbox(NTabletPipe::CreateClient( @@ -756,93 +952,123 @@ class TSelfCheckRequest : public TActorBootstrapped { NTabletPipe::TClientRetryPolicy::WithRetries())); PipeClients.emplace_back(requestState.TabletPipe); } - NTabletPipe::SendData(SelfId(), requestState.TabletPipe, payload, cookie); + NTabletPipe::SendData(SelfId(), requestState.TabletPipe, payload, cookie, response.Span.GetTraceId()); ++Requests; + return response; } - void RequestDescribe(TTabletId schemeShardId, const TString& path) { + [[nodiscard]] TRequestResponse RequestDescribe(TTabletId schemeShardId, const TString& path) { THolder request = MakeHolder(); NKikimrSchemeOp::TDescribePath& record = request->Record; record.SetPath(path); record.MutableOptions()->SetReturnPartitioningInfo(false); record.MutableOptions()->SetReturnPartitionConfig(false); record.MutableOptions()->SetReturnChildren(false); - RequestTabletPipe(schemeShardId, "TEvDescribeScheme:" + path, request.Release()); + auto response = RequestTabletPipe(schemeShardId, request.Release()); + if (response.Span) { + response.Span.Attribute("path", path); + } + return response; } - void RequestHiveInfo(TTabletId hiveId) { + [[nodiscard]] TRequestResponse RequestHiveInfo(TTabletId hiveId) { THolder request = MakeHolder(); request->Record.SetReturnFollowers(true); - RequestTabletPipe(hiveId, "TEvRequestHiveInfo", request.Release()); + return RequestTabletPipe(hiveId, request.Release()); } - void RequestHiveDomainStats(TTabletId hiveId) { + [[nodiscard]] TRequestResponse RequestHiveDomainStats(TTabletId hiveId) { THolder request = MakeHolder(); request->Record.SetReturnFollowers(true); request->Record.SetReturnMetrics(true); - RequestTabletPipe(hiveId, "TEvRequestHiveDomainStats", request.Release()); + return RequestTabletPipe(hiveId, request.Release()); } - void RequestHiveNodeStats(TTabletId hiveId) { + [[nodiscard]] TRequestResponse RequestHiveNodeStats(TTabletId hiveId) { THolder request = MakeHolder(); - RequestTabletPipe(hiveId, "TEvRequestHiveNodeStats", request.Release()); + return RequestTabletPipe(hiveId, request.Release()); } - void RequestTenantStatus(const TString& path) { + [[nodiscard]] TRequestResponse RequestTenantStatus(const TString& path) { THolder request = MakeHolder(); request->Record.MutableRequest()->set_path(path); - RequestTabletPipe(ConsoleId, "TEvGetTenantStatusRequest:" + path, request.Release()); + auto response = RequestTabletPipe(ConsoleId, request.Release()); + if (response.Span) { + response.Span.Attribute("path", path); + } + return response; } - void RequestListTenants() { + [[nodiscard]] TRequestResponse RequestListTenants() { THolder request = MakeHolder(); - RequestTabletPipe(ConsoleId, "TEvListTenantsRequest", request.Release()); + return RequestTabletPipe(ConsoleId, request.Release()); } void RequestBsController() { THolder requestPools = MakeHolder(); - RequestTabletPipe(BsControllerId, "TEvGetStoragePoolsRequest", requestPools.Release(), TTabletRequestsState::RequestStoragePools); + StoragePools = RequestTabletPipe(BsControllerId, requestPools.Release(), TTabletRequestsState::RequestStoragePools); THolder requestGroups = MakeHolder(); - RequestTabletPipe(BsControllerId, "TEvGetGroupsRequest", requestGroups.Release(), TTabletRequestsState::RequestGroups); + Groups = RequestTabletPipe(BsControllerId, requestGroups.Release(), TTabletRequestsState::RequestGroups); THolder requestVSlots = MakeHolder(); - RequestTabletPipe(BsControllerId, "TEvGetVSlotsRequest", requestVSlots.Release(), TTabletRequestsState::RequestVSlots); + VSlots = RequestTabletPipe(BsControllerId, requestVSlots.Release(), TTabletRequestsState::RequestVSlots); THolder requestPDisks = MakeHolder(); - RequestTabletPipe(BsControllerId, "TEvGetPDisksRequest", requestPDisks.Release(), TTabletRequestsState::RequestPDisks); + PDisks = RequestTabletPipe(BsControllerId, requestPDisks.Release(), TTabletRequestsState::RequestPDisks); } + THashMap> NavigateKeySet; + void RequestSchemeCacheNavigate(const TString& path) { - THolder request = MakeHolder(); - NSchemeCache::TSchemeCacheNavigate::TEntry entry; + ui64 cookie = NavigateKeySet.size(); + THolder request = MakeHolder(); + request->Cookie = cookie; + TSchemeCacheNavigate::TEntry& entry = request->ResultSet.emplace_back(); entry.Path = NKikimr::SplitPath(path); - entry.Operation = NSchemeCache::TSchemeCacheNavigate::EOp::OpPath; - request->ResultSet.emplace_back(entry); - Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request.Release())); + entry.Operation = TSchemeCacheNavigate::EOp::OpPath; + TRequestResponse response(Span.CreateChild(TComponentTracingLevels::TTablet::Detailed, TypeName(*request.Get()))); + if (Span) { + response.Span.Attribute("path", path); + } + NavigateKeySet.emplace(cookie, std::move(response)); + Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request.Release()), 0/*flags*/, 0/*cookie*/, response.Span.GetTraceId()); ++Requests; } void RequestSchemeCacheNavigate(const TPathId& pathId) { + ui64 cookie = NavigateKeySet.size(); THolder request = MakeHolder(); - NSchemeCache::TSchemeCacheNavigate::TEntry entry; + request->Cookie = cookie; + NSchemeCache::TSchemeCacheNavigate::TEntry& entry = request->ResultSet.emplace_back(); entry.TableId.PathId = pathId; entry.RequestType = NSchemeCache::TSchemeCacheNavigate::TEntry::ERequestType::ByTableId; entry.RedirectRequired = false; entry.Operation = NSchemeCache::TSchemeCacheNavigate::EOp::OpPath; - request->ResultSet.emplace_back(entry); - Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request.Release())); + TRequestResponse response(Span.CreateChild(TComponentTracingLevels::TTablet::Detailed, TypeName(*request.Get()))); + if (Span) { + response.Span.Attribute("path_id", pathId.ToString()); + } + NavigateKeySet.emplace(cookie, std::move(response)); + Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request.Release()), 0/*flags*/, 0/*cookie*/, response.Span.GetTraceId()); ++Requests; } template - void RequestNodeWhiteboard(TNodeId nodeId) { + [[nodiscard]] TRequestResponse::Type> RequestNodeWhiteboard(TNodeId nodeId) { TActorId whiteboardServiceId = NNodeWhiteboard::MakeNodeWhiteboardServiceId(nodeId); auto request = MakeHolder(); - Send(whiteboardServiceId, request.Release(), IEventHandle::FlagTrackDelivery, nodeId); + TRequestResponse::Type> response(Span.CreateChild(TComponentTracingLevels::TTablet::Detailed, TypeName(*request.Get()))); + if (response.Span) { + response.Span.Attribute("target_node_id", nodeId); + } + Send(whiteboardServiceId, request.Release(), IEventHandle::FlagTrackDelivery, nodeId, response.Span.GetTraceId()); + return response; } void RequestGenericNode(TNodeId nodeId) { - if (NodeIds.emplace(nodeId).second) { - Send(TlsActivationContext->ActorSystem()->InterconnectProxy(nodeId), new TEvents::TEvSubscribe()); - RequestNodeWhiteboard(nodeId); + if (NodeSystemState.count(nodeId) == 0) { + if (NodeIds.insert(nodeId).second) { + Send(TlsActivationContext->ActorSystem()->InterconnectProxy(nodeId), new TEvents::TEvSubscribe()); + } + NodeSystemState.emplace(nodeId, RequestNodeWhiteboard(nodeId)); ++Requests; } } @@ -856,36 +1082,47 @@ class TSelfCheckRequest : public TActorBootstrapped { void RequestStorageNode(TNodeId nodeId) { if (StorageNodeIds.emplace(nodeId).second) { RequestGenericNode(nodeId); - RequestNodeWhiteboard(nodeId); - ++Requests; - RequestNodeWhiteboard(nodeId); - ++Requests; - RequestNodeWhiteboard(nodeId); - ++Requests; + if (NodeVDiskState.count(nodeId) == 0) { + NodeVDiskState.emplace(nodeId, RequestNodeWhiteboard(nodeId)); + ++Requests; + } + if (NodePDiskState.count(nodeId) == 0) { + NodePDiskState.emplace(nodeId, RequestNodeWhiteboard(nodeId)); + ++Requests; + } + if (NodeBSGroupState.count(nodeId) == 0) { + NodeBSGroupState.emplace(nodeId, RequestNodeWhiteboard(nodeId)); + ++Requests; + } } } - void RequestStorageConfig() { - if (!RequestedStorageConfig) { - Send(MakeBlobStorageNodeWardenID(SelfId().NodeId()), new TEvNodeWardenQueryStorageConfig(false)); - RequestedStorageConfig = true; - ++Requests; - } + [[nodiscard]] TRequestResponse RequestStorageConfig() { + TRequestResponse response(Span.CreateChild(TComponentTracingLevels::TTablet::Detailed, TypeName())); + Send(MakeBlobStorageNodeWardenID(SelfId().NodeId()), new TEvNodeWardenQueryStorageConfig(false), 0/*flags*/, 0/*cookie*/, response.Span.GetTraceId()); + ++Requests; + return response; } void Handle(TEvPrivate::TEvRetryNodeWhiteboard::TPtr& ev) { - switch (ev->Get()->EventId) { + auto eventId = ev->Get()->EventId; + auto nodeId = ev->Get()->NodeId; + switch (eventId) { case NNodeWhiteboard::TEvWhiteboard::EvSystemStateRequest: - RequestNodeWhiteboard(ev->Get()->NodeId); + NodeSystemState.erase(nodeId); + NodeSystemState[nodeId] = RequestNodeWhiteboard(nodeId); break; case NNodeWhiteboard::TEvWhiteboard::EvVDiskStateRequest: - RequestNodeWhiteboard(ev->Get()->NodeId); + NodeVDiskState.erase(nodeId); + NodeVDiskState[nodeId] = RequestNodeWhiteboard(nodeId); break; case NNodeWhiteboard::TEvWhiteboard::EvPDiskStateRequest: - RequestNodeWhiteboard(ev->Get()->NodeId); + NodePDiskState.erase(nodeId); + NodePDiskState[nodeId] = RequestNodeWhiteboard(nodeId); break; case NNodeWhiteboard::TEvWhiteboard::EvBSGroupStateRequest: - RequestNodeWhiteboard(ev->Get()->NodeId); + NodeBSGroupState.erase(nodeId); + NodeBSGroupState[nodeId] = RequestNodeWhiteboard(nodeId); break; default: RequestDone("unsupported event scheduled"); @@ -904,37 +1141,34 @@ class TSelfCheckRequest : public TActorBootstrapped { void Handle(TEvents::TEvUndelivered::TPtr& ev) { ui32 nodeId = ev.Get()->Cookie; + TString error = "Undelivered"; if (ev->Get()->SourceType == NNodeWhiteboard::TEvWhiteboard::EvSystemStateRequest) { - if (NodeIds.count(nodeId) != 0 && NodeSystemState.count(nodeId) == 0) { + if (NodeSystemState.count(nodeId) && NodeSystemState[nodeId].Error(error)) { if (!RetryRequestNodeWhiteboard(nodeId)) { - NodeSystemState.emplace(nodeId, nullptr); RequestDone("undelivered of TEvSystemStateRequest"); UnavailableComputeNodes.insert(nodeId); } } } if (ev->Get()->SourceType == NNodeWhiteboard::TEvWhiteboard::EvVDiskStateRequest) { - if (StorageNodeIds.count(nodeId) != 0 && NodeVDiskState.count(nodeId) == 0) { + if (NodeVDiskState.count(nodeId) && NodeVDiskState[nodeId].Error(error)) { if (!RetryRequestNodeWhiteboard(nodeId)) { - NodeVDiskState.emplace(nodeId, nullptr); RequestDone("undelivered of TEvVDiskStateRequest"); UnavailableStorageNodes.insert(nodeId); } } } if (ev->Get()->SourceType == NNodeWhiteboard::TEvWhiteboard::EvPDiskStateRequest) { - if (StorageNodeIds.count(nodeId) != 0 && NodePDiskState.count(nodeId) == 0) { + if (NodePDiskState.count(nodeId) && NodePDiskState[nodeId].Error(error)) { if (!RetryRequestNodeWhiteboard(nodeId)) { - NodePDiskState.emplace(nodeId, nullptr); RequestDone("undelivered of TEvPDiskStateRequest"); UnavailableStorageNodes.insert(nodeId); } } } if (ev->Get()->SourceType == NNodeWhiteboard::TEvWhiteboard::EvBSGroupStateRequest) { - if (StorageNodeIds.count(nodeId) != 0 && NodeBSGroupState.count(nodeId) == 0) { + if (NodeBSGroupState.count(nodeId) && NodeBSGroupState[nodeId].Error(error)) { if (!RetryRequestNodeWhiteboard(nodeId)) { - NodeBSGroupState.emplace(nodeId, nullptr); RequestDone("undelivered of TEvBSGroupStateRequest"); } } @@ -943,30 +1177,27 @@ class TSelfCheckRequest : public TActorBootstrapped { void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr& ev) { ui32 nodeId = ev->Get()->NodeId; - if (NodeIds.count(nodeId) != 0 && NodeSystemState.count(nodeId) == 0) { + TString error = "NodeDisconnected"; + if (NodeSystemState.count(nodeId) && NodeSystemState[nodeId].Error(error)) { if (!RetryRequestNodeWhiteboard(nodeId)) { - NodeSystemState.emplace(nodeId, nullptr); RequestDone("node disconnected with TEvSystemStateRequest"); UnavailableComputeNodes.insert(nodeId); } } - if (StorageNodeIds.count(nodeId) != 0 && NodeVDiskState.count(nodeId) == 0) { + if (NodeVDiskState.count(nodeId) && NodeVDiskState[nodeId].Error(error)) { if (!RetryRequestNodeWhiteboard(nodeId)) { - NodeVDiskState.emplace(nodeId, nullptr); RequestDone("node disconnected with TEvVDiskStateRequest"); UnavailableStorageNodes.insert(nodeId); } } - if (StorageNodeIds.count(nodeId) != 0 && NodePDiskState.count(nodeId) == 0) { + if (NodePDiskState.count(nodeId) && NodePDiskState[nodeId].Error(error)) { if (!RetryRequestNodeWhiteboard(nodeId)) { - NodePDiskState.emplace(nodeId, nullptr); RequestDone("node disconnected with TEvPDiskStateRequest"); UnavailableStorageNodes.insert(nodeId); } } - if (StorageNodeIds.count(nodeId) != 0 && NodeBSGroupState.count(nodeId) == 0) { + if (NodeBSGroupState.count(nodeId) && NodeBSGroupState[nodeId].Error(error)) { if (!RetryRequestNodeWhiteboard(nodeId)) { - NodeBSGroupState.emplace(nodeId, nullptr); RequestDone("node disconnected with TEvBSGroupStateRequest"); } } @@ -989,11 +1220,15 @@ class TSelfCheckRequest : public TActorBootstrapped { void HandleTimeout(TEvents::TEvWakeup::TPtr& ev) { switch (ev->Get()->Tag) { case TimeoutBSC: + Span.Event("TimeoutBSC"); if (!HaveAllBSControllerInfo()) { - RequestStorageConfig(); + if (!NodeWardenStorageConfig) { + NodeWardenStorageConfig = RequestStorageConfig(); + } } break; case TimeoutFinal: + Span.Event("TimeoutFinal"); ReplyAndPassAway(); break; } @@ -1010,8 +1245,8 @@ class TSelfCheckRequest : public TActorBootstrapped { void Handle(TEvInterconnect::TEvNodesInfo::TPtr& ev) { bool needComputeFromStaticNodes = !IsSpecificDatabaseFilter(); - NodesInfo = ev->Release(); - for (const auto& ni : NodesInfo->Nodes) { + NodesInfo->Set(std::move(ev)); + for (const auto& ni : NodesInfo->Get()->Nodes) { MergedNodeInfo[ni.NodeId] = ∋ if (IsStaticNode(ni.NodeId) && needComputeFromStaticNodes) { DatabaseState[DomainPath].ComputeNodeIds.push_back(ni.NodeId); @@ -1026,44 +1261,46 @@ class TSelfCheckRequest : public TActorBootstrapped { } bool NeedWhiteboardForStaticGroupsWithUnknownStatus() { - return RequestedStorageConfig && !IsSpecificDatabaseFilter(); + return NodeWardenStorageConfig && !IsSpecificDatabaseFilter(); } void Handle(NSysView::TEvSysView::TEvGetStoragePoolsResponse::TPtr& ev) { TabletRequests.CompleteRequest(TTabletRequestsState::RequestStoragePools); - StoragePools = std::move(ev->Get()->Record); + StoragePools->Set(std::move(ev)); AggregateBSControllerState(); RequestDone("TEvGetStoragePoolsRequest"); } void Handle(NSysView::TEvSysView::TEvGetGroupsResponse::TPtr& ev) { TabletRequests.CompleteRequest(TTabletRequestsState::RequestGroups); - Groups = std::move(ev->Get()->Record); + Groups->Set(std::move(ev)); AggregateBSControllerState(); RequestDone("TEvGetGroupsRequest"); } void Handle(NSysView::TEvSysView::TEvGetVSlotsResponse::TPtr& ev) { TabletRequests.CompleteRequest(TTabletRequestsState::RequestVSlots); - VSlots = std::move(ev->Get()->Record); + VSlots->Set(std::move(ev)); AggregateBSControllerState(); RequestDone("TEvGetVSlotsRequest"); } void Handle(NSysView::TEvSysView::TEvGetPDisksResponse::TPtr& ev) { TabletRequests.CompleteRequest(TTabletRequestsState::RequestPDisks); - PDisks = std::move(ev->Get()->Record); + PDisks->Set(std::move(ev)); AggregateBSControllerState(); RequestDone("TEvGetPDisksRequest"); } void Handle(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr& ev) { TabletRequests.CompleteRequest(ev->Cookie); - if (ev->Get()->GetRecord().status() == NKikimrScheme::StatusSuccess) { - TString path = ev->Get()->GetRecord().path(); + TString path = ev->Get()->GetRecord().path(); + auto& response = DescribeByPath[path]; + response.Set(std::move(ev)); + if (response.IsOk()) { TDatabaseState& state(DatabaseState[path]); state.Path = path; - for (const auto& storagePool : ev->Get()->GetRecord().pathdescription().domaindescription().storagepools()) { + for (const auto& storagePool : response.Get()->GetRecord().pathdescription().domaindescription().storagepools()) { TString storagePoolName = storagePool.name(); state.StoragePoolNames.emplace(storagePoolName); PathsByPoolName[storagePoolName].emplace(path); // no poolId in TEvDescribeSchemeResult, so it's neccesary to keep poolNames instead @@ -1073,18 +1310,19 @@ class TSelfCheckRequest : public TActorBootstrapped { state.StoragePools.emplace(0); // static group has poolId = 0 StoragePoolState[0].Name = STATIC_STORAGE_POOL_NAME; } - state.StorageUsage = ev->Get()->GetRecord().pathdescription().domaindescription().diskspaceusage().tables().totalsize(); - state.StorageQuota = ev->Get()->GetRecord().pathdescription().domaindescription().databasequotas().data_size_hard_quota(); - - DescribeByPath[path] = ev->Release(); + state.StorageUsage = response.Get()->GetRecord().pathdescription().domaindescription().diskspaceusage().tables().totalsize(); + state.StorageQuota = response.Get()->GetRecord().pathdescription().domaindescription().databasequotas().data_size_hard_quota(); } RequestDone("TEvDescribeSchemeResult"); } void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { - if (ev->Get()->Request->ResultSet.size() == 1 && ev->Get()->Request->ResultSet.begin()->Status == NSchemeCache::TSchemeCacheNavigate::EStatus::Ok) { - auto domainInfo = ev->Get()->Request->ResultSet.begin()->DomainInfo; - TString path = CanonizePath(ev->Get()->Request->ResultSet.begin()->Path); + TRequestResponse& response = NavigateKeySet[ev->Get()->Request->Cookie]; + response.Set(std::move(ev)); + if (response.IsOk()) { + auto domainInfo = response.Get()->Request->ResultSet.begin()->DomainInfo; + TString path = CanonizePath(response.Get()->Request->ResultSet.begin()->Path); + NavigateResult[path] = response.Get()->Request->Cookie; if (domainInfo->IsServerless()) { if (NeedHealthCheckForServerless(domainInfo)) { if (SharedDatabases.emplace(domainInfo->ResourcesDomainKey, path).second) { @@ -1105,11 +1343,14 @@ class TSelfCheckRequest : public TActorBootstrapped { TabletRequests.TabletStates[hiveId].Database = path; TabletRequests.TabletStates[hiveId].Type = TTabletTypes::Hive; //RequestHiveDomainStats(hiveId); - RequestHiveNodeStats(hiveId); - RequestHiveInfo(hiveId); + if (HiveNodeStats.count(hiveId) == 0) { + HiveNodeStats[hiveId] = RequestHiveNodeStats(hiveId); + } + if (HiveInfo.count(hiveId) == 0) { + HiveInfo[hiveId] = RequestHiveInfo(hiveId); + } } FilterDomainKey[TSubDomainKey(domainInfo->DomainKey.OwnerId, domainInfo->DomainKey.LocalPathId)] = path; - NavigateResult[path] = std::move(ev->Get()->Request); TTabletId schemeShardId = domainInfo->Params.GetSchemeShard(); if (!schemeShardId) { schemeShardId = RootSchemeShardId; @@ -1117,7 +1358,9 @@ class TSelfCheckRequest : public TActorBootstrapped { TabletRequests.TabletStates[schemeShardId].Database = path; TabletRequests.TabletStates[schemeShardId].Type = TTabletTypes::SchemeShard; } - RequestDescribe(schemeShardId, path); + if (DescribeByPath.count(path) == 0) { + DescribeByPath[path] = RequestDescribe(schemeShardId, path); + } } RequestDone("TEvNavigateKeySetResult"); } @@ -1129,53 +1372,62 @@ class TSelfCheckRequest : public TActorBootstrapped { void Handle(TEvHive::TEvResponseHiveDomainStats::TPtr& ev) { TTabletId hiveId = TabletRequests.CompleteRequest(ev->Cookie); - for (const NKikimrHive::THiveDomainStats& hiveStat : ev->Get()->Record.GetDomainStats()) { + auto& response = HiveDomainStats[hiveId]; + response.Set(std::move(ev)); + for (const NKikimrHive::THiveDomainStats& hiveStat : response.Get()->Record.GetDomainStats()) { for (TNodeId nodeId : hiveStat.GetNodeIds()) { RequestComputeNode(nodeId); } } - HiveDomainStats[hiveId] = std::move(ev->Release()); RequestDone("TEvResponseHiveDomainStats"); } void Handle(TEvHive::TEvResponseHiveNodeStats::TPtr& ev) { TTabletId hiveId = TabletRequests.CompleteRequest(ev->Cookie); + auto& response = HiveNodeStats[hiveId]; + response.Set(std::move(ev)); TInstant aliveBarrier = TInstant::Now() - TDuration::Minutes(5); - for (const NKikimrHive::THiveNodeStats& hiveStat : ev->Get()->Record.GetNodeStats()) { + for (const NKikimrHive::THiveNodeStats& hiveStat : response.Get()->Record.GetNodeStats()) { if (!hiveStat.HasLastAliveTimestamp() || TInstant::MilliSeconds(hiveStat.GetLastAliveTimestamp()) > aliveBarrier) { RequestComputeNode(hiveStat.GetNodeId()); } } - HiveNodeStats[hiveId] = std::move(ev->Release()); RequestDone("TEvResponseHiveNodeStats"); } void Handle(TEvHive::TEvResponseHiveInfo::TPtr& ev) { TTabletId hiveId = TabletRequests.CompleteRequest(ev->Cookie); - HiveInfo[hiveId] = std::move(ev->Release()); + HiveInfo[hiveId].Set(std::move(ev)); RequestDone("TEvResponseHiveInfo"); } void Handle(NConsole::TEvConsole::TEvGetTenantStatusResponse::TPtr& ev) { TabletRequests.CompleteRequest(ev->Cookie); auto& operation(ev->Get()->Record.GetResponse().operation()); - if (operation.ready() && operation.status() == Ydb::StatusIds::SUCCESS) { + if (operation.ready()) { Ydb::Cms::GetDatabaseStatusResult getTenantStatusResult; operation.result().UnpackTo(&getTenantStatusResult); TString path = getTenantStatusResult.path(); - DatabaseStatusByPath[path] = std::move(getTenantStatusResult); - DatabaseState[path]; - RequestSchemeCacheNavigate(path); + auto& response = TenantStatusByPath[path]; + response.Set(std::move(ev)); + if (response.IsOk()) { + Ydb::Cms::GetDatabaseStatusResult getTenantStatusResult; + operation.result().UnpackTo(&getTenantStatusResult); + DatabaseStatusByPath[path] = getTenantStatusResult; + DatabaseState[path]; + RequestSchemeCacheNavigate(path); + } } RequestDone("TEvGetTenantStatusResponse"); } void Handle(NConsole::TEvConsole::TEvListTenantsResponse::TPtr& ev) { TabletRequests.CompleteRequest(ev->Cookie); + ListTenants->Set(std::move(ev)); Ydb::Cms::ListDatabasesResult listTenantsResult; - ev->Get()->Record.GetResponse().operation().result().UnpackTo(&listTenantsResult); + ListTenants->Get()->Record.GetResponse().operation().result().UnpackTo(&listTenantsResult); for (const TString& path : listTenantsResult.paths()) { - RequestTenantStatus(path); + TenantStatusByPath[path] = RequestTenantStatus(path); DatabaseState[path]; } RequestDone("TEvListTenantsResponse"); @@ -1183,20 +1435,18 @@ class TSelfCheckRequest : public TActorBootstrapped { void Handle(NNodeWhiteboard::TEvWhiteboard::TEvSystemStateResponse::TPtr& ev) { TNodeId nodeId = ev.Get()->Cookie; - if (NodeSystemState.count(nodeId) == 0) { - auto& nodeSystemState(NodeSystemState[nodeId]); - nodeSystemState = ev->Release(); - for (NKikimrWhiteboard::TSystemStateInfo& state : *nodeSystemState->Record.MutableSystemStateInfo()) { - state.set_nodeid(nodeId); - MergedNodeSystemState[nodeId] = &state; - } - RequestDone("TEvSystemStateResponse"); + auto& nodeSystemState(NodeSystemState[nodeId]); + nodeSystemState.Set(std::move(ev)); + for (NKikimrWhiteboard::TSystemStateInfo& state : *nodeSystemState->Record.MutableSystemStateInfo()) { + state.set_nodeid(nodeId); + MergedNodeSystemState[nodeId] = &state; } + RequestDone("TEvSystemStateResponse"); } static const int HIVE_SYNCHRONIZATION_PERIOD_MS = 10000; - bool IsHiveSynchronizationPeriod(NKikimrHive::TEvResponseHiveInfo& hiveInfo) { + bool IsHiveSynchronizationPeriod(const NKikimrHive::TEvResponseHiveInfo& hiveInfo) { return hiveInfo.GetResponseTimestamp() < hiveInfo.GetStartTimeTimestamp() + HIVE_SYNCHRONIZATION_PERIOD_MS; } @@ -1204,7 +1454,7 @@ class TSelfCheckRequest : public TActorBootstrapped { TNodeTabletState::TTabletStateSettings settings; settings.AliveBarrier = TInstant::Now() - TDuration::Minutes(5); for (const auto& [hiveId, hiveResponse] : HiveInfo) { - if (hiveResponse) { + if (hiveResponse.IsOk()) { settings.IsHiveSynchronizationPeriod = IsHiveSynchronizationPeriod(hiveResponse->Record); for (const NKikimrHive::TTabletInfo& hiveTablet : hiveResponse->Record.GetTablets()) { TSubDomainKey tenantId = TSubDomainKey(hiveTablet.GetObjectDomain()); @@ -1283,22 +1533,27 @@ class TSelfCheckRequest : public TActorBootstrapped { if (!HaveAllBSControllerInfo()) { return; } - for (const auto& group : Groups->GetEntries()) { + for (const auto& group : Groups->Get()->Record.GetEntries()) { auto groupId = group.GetKey().GetGroupId(); auto poolId = group.GetInfo().GetStoragePoolId(); - GroupState[groupId].ErasureSpecies = group.GetInfo().GetErasureSpeciesV2(); + auto& groupState = GroupState[groupId]; + groupState.ErasureSpecies = group.GetInfo().GetErasureSpeciesV2(); + groupState.Generation = group.GetInfo().GetGeneration(); StoragePoolState[poolId].Groups.emplace(groupId); } - for (const auto& vSlot : VSlots->GetEntries()) { + for (const auto& vSlot : VSlots->Get()->Record.GetEntries()) { auto vSlotId = GetVSlotId(vSlot.GetKey()); - GroupState[vSlot.GetInfo().GetGroupId()].VSlots.push_back(&vSlot); + auto groupStateIt = GroupState.find(vSlot.GetInfo().GetGroupId()); + if (groupStateIt != GroupState.end() && vSlot.GetInfo().GetGroupGeneration() == groupStateIt->second.Generation) { + groupStateIt->second.VSlots.push_back(&vSlot); + } } - for (const auto& pool : StoragePools->GetEntries()) { // there is no specific pool for static group here + for (const auto& pool : StoragePools->Get()->Record.GetEntries()) { // there is no specific pool for static group here ui64 poolId = pool.GetKey().GetStoragePoolId(); TString storagePoolName = pool.GetInfo().GetName(); StoragePoolState[poolId].Name = storagePoolName; } - for (const auto& pDisk : PDisks->GetEntries()) { + for (const auto& pDisk : PDisks->Get()->Record.GetEntries()) { auto pDiskId = GetPDiskId(pDisk.GetKey()); PDisksMap.emplace(pDiskId, &pDisk); } @@ -1308,9 +1563,12 @@ class TSelfCheckRequest : public TActorBootstrapped { // it should not be trusted Ydb::Monitoring::StorageGroupStatus staticGroupStatus; FillGroupStatus(0, staticGroupStatus, {nullptr}); + BLOG_D("Static group status is " << staticGroupStatus.overall()); if (staticGroupStatus.overall() != Ydb::Monitoring::StatusFlag::GREEN) { UnknownStaticGroups.emplace(0); - RequestStorageConfig(); + if (!NodeWardenStorageConfig) { + NodeWardenStorageConfig = RequestStorageConfig(); + } } } @@ -1712,12 +1970,9 @@ class TSelfCheckRequest : public TActorBootstrapped { ETags::PDiskState); } switch (status->number()) { - case NKikimrBlobStorage::ACTIVE: { - context.ReportStatus(Ydb::Monitoring::StatusFlag::GREEN); - break; - } + case NKikimrBlobStorage::ACTIVE: case NKikimrBlobStorage::INACTIVE: { - context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "PDisk is inactive", ETags::PDiskState); + context.ReportStatus(Ydb::Monitoring::StatusFlag::GREEN); break; } case NKikimrBlobStorage::FAULTY: @@ -1741,6 +1996,8 @@ class TSelfCheckRequest : public TActorBootstrapped { } } + // do not propagate RED status to vdisk - so that vdisk is not considered down when computing group status + context.OverallStatus = MinStatus(context.OverallStatus, Ydb::Monitoring::StatusFlag::ORANGE); storagePDiskStatus.set_overall(context.GetOverallStatus()); } @@ -1797,6 +2054,13 @@ class TSelfCheckRequest : public TActorBootstrapped { storageVDiskStatus.set_id(GetVSlotId(vSlot->GetKey())); + if (!vSlot->GetInfo().HasStatusV2()) { + // this should mean that BSC recently restarted and does not have accurate data yet - we should not report to avoid false positives + context.ReportStatus(Ydb::Monitoring::StatusFlag::GREEN); + storageVDiskStatus.set_overall(context.GetOverallStatus()); + return; + } + const auto *descriptor = NKikimrBlobStorage::EVDiskStatus_descriptor(); auto status = descriptor->FindValueByName(vSlot->GetInfo().GetStatusV2()); if (!status) { // this case is not expected because becouse bsc assignes status according EVDiskStatus enum @@ -1816,16 +2080,12 @@ class TSelfCheckRequest : public TActorBootstrapped { storageVDiskStatus.set_overall(context.GetOverallStatus()); return; } - case NKikimrBlobStorage::INIT_PENDING: { // initialization in process - context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, TStringBuilder() << "VDisk is being initialized", ETags::VDiskState); - storageVDiskStatus.set_overall(context.GetOverallStatus()); - return; - } case NKikimrBlobStorage::REPLICATING: { // the disk accepts queries, but not all the data was replicated context.ReportStatus(Ydb::Monitoring::StatusFlag::BLUE, TStringBuilder() << "Replication in progress", ETags::VDiskState); storageVDiskStatus.set_overall(context.GetOverallStatus()); return; } + case NKikimrBlobStorage::INIT_PENDING: case NKikimrBlobStorage::READY: { // the disk is fully operational and does not affect group fault tolerance context.ReportStatus(Ydb::Monitoring::StatusFlag::GREEN); } @@ -1839,53 +2099,47 @@ class TSelfCheckRequest : public TActorBootstrapped { void Handle(NNodeWhiteboard::TEvWhiteboard::TEvVDiskStateResponse::TPtr& ev) { TNodeId nodeId = ev.Get()->Cookie; - if (NodeVDiskState.count(nodeId) == 0) { - auto& nodeVDiskState(NodeVDiskState[nodeId]); - nodeVDiskState = ev->Release(); - for (NKikimrWhiteboard::TVDiskStateInfo& state : *nodeVDiskState->Record.MutableVDiskStateInfo()) { - state.set_nodeid(nodeId); - auto id = GetVDiskId(state.vdiskid()); - MergedVDiskState[id] = &state; - } - RequestDone("TEvVDiskStateResponse"); + auto& nodeVDiskState(NodeVDiskState[nodeId]); + nodeVDiskState.Set(std::move(ev)); + for (NKikimrWhiteboard::TVDiskStateInfo& state : *nodeVDiskState->Record.MutableVDiskStateInfo()) { + state.set_nodeid(nodeId); + auto id = GetVDiskId(state.vdiskid()); + MergedVDiskState[id] = &state; } + RequestDone("TEvVDiskStateResponse"); } void Handle(NNodeWhiteboard::TEvWhiteboard::TEvPDiskStateResponse::TPtr& ev) { TNodeId nodeId = ev.Get()->Cookie; - if (NodePDiskState.count(nodeId) == 0) { - auto& nodePDiskState(NodePDiskState[nodeId]); - nodePDiskState = ev->Release(); - for (NKikimrWhiteboard::TPDiskStateInfo& state : *nodePDiskState->Record.MutablePDiskStateInfo()) { - state.set_nodeid(nodeId); - auto id = GetPDiskId(state); - MergedPDiskState[id] = &state; - } - RequestDone("TEvPDiskStateResponse"); + auto& nodePDiskState(NodePDiskState[nodeId]); + nodePDiskState.Set(std::move(ev)); + for (NKikimrWhiteboard::TPDiskStateInfo& state : *nodePDiskState->Record.MutablePDiskStateInfo()) { + state.set_nodeid(nodeId); + auto id = GetPDiskId(state); + MergedPDiskState[id] = &state; } + RequestDone("TEvPDiskStateResponse"); } void Handle(NNodeWhiteboard::TEvWhiteboard::TEvBSGroupStateResponse::TPtr& ev) { ui64 nodeId = ev.Get()->Cookie; - if (NodeBSGroupState.count(nodeId) == 0) { - auto& nodeBSGroupState(NodeBSGroupState[nodeId]); - nodeBSGroupState = ev->Release(); - for (NKikimrWhiteboard::TBSGroupStateInfo& state : *nodeBSGroupState->Record.MutableBSGroupStateInfo()) { - state.set_nodeid(nodeId); - TString storagePoolName = state.storagepoolname(); - TGroupID groupId(state.groupid()); - const NKikimrWhiteboard::TBSGroupStateInfo*& current(MergedBSGroupState[state.groupid()]); - if (current == nullptr || current->GetGroupGeneration() < state.GetGroupGeneration()) { - current = &state; - } - if (storagePoolName.empty() && groupId.ConfigurationType() != EGroupConfigurationType::Static) { - continue; - } - StoragePoolStateByName[storagePoolName].Groups.emplace(state.groupid()); - StoragePoolStateByName[storagePoolName].Name = storagePoolName; + auto& nodeBSGroupState(NodeBSGroupState[nodeId]); + nodeBSGroupState.Set(std::move(ev)); + for (NKikimrWhiteboard::TBSGroupStateInfo& state : *nodeBSGroupState->Record.MutableBSGroupStateInfo()) { + state.set_nodeid(nodeId); + TString storagePoolName = state.storagepoolname(); + TGroupID groupId(state.groupid()); + const NKikimrWhiteboard::TBSGroupStateInfo*& current(MergedBSGroupState[state.groupid()]); + if (current == nullptr || current->GetGroupGeneration() < state.GetGroupGeneration()) { + current = &state; + } + if (storagePoolName.empty() && groupId.ConfigurationType() != EGroupConfigurationType::Static) { + continue; } - RequestDone("TEvBSGroupStateResponse"); + StoragePoolStateByName[storagePoolName].Groups.emplace(state.groupid()); + StoragePoolStateByName[storagePoolName].Name = storagePoolName; } + RequestDone("TEvBSGroupStateResponse"); } void FillPDiskStatusWithWhiteboard(const TString& pDiskId, const NKikimrWhiteboard::TPDiskStateInfo& pDiskInfo, Ydb::Monitoring::StoragePDiskStatus& storagePDiskStatus, TSelfCheckContext context) { @@ -1999,9 +2253,9 @@ class TSelfCheckRequest : public TActorBootstrapped { switch (vDiskInfo.GetVDiskState()) { case NKikimrWhiteboard::EVDiskState::OK: + case NKikimrWhiteboard::EVDiskState::Initial: context.ReportStatus(Ydb::Monitoring::StatusFlag::GREEN); break; - case NKikimrWhiteboard::EVDiskState::Initial: case NKikimrWhiteboard::EVDiskState::SyncGuidRecovery: context.IssueRecords.clear(); context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, @@ -2081,7 +2335,7 @@ class TSelfCheckRequest : public TActorBootstrapped { ++DisksColors[status]; switch (status) { case Ydb::Monitoring::StatusFlag::BLUE: // disk is good, but not available - case Ydb::Monitoring::StatusFlag::YELLOW: // disk is initializing, not currently available + // No yellow or orange status here - this is intentional - they are used when a disk is running out of space, but is currently available case Ydb::Monitoring::StatusFlag::RED: // disk is bad, probably not available case Ydb::Monitoring::StatusFlag::GREY: // the status is absent, the disk is not available IncrementFor(realm); @@ -2097,7 +2351,7 @@ class TSelfCheckRequest : public TActorBootstrapped { if (ErasureSpecies == NONE) { if (FailedDisks > 0) { context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Group failed", ETags::GroupState, {ETags::VDiskState}); - } else if (DisksColors[Ydb::Monitoring::StatusFlag::YELLOW] > 0) { + } else if (DisksColors[Ydb::Monitoring::StatusFlag::YELLOW] > 0 || DisksColors[Ydb::Monitoring::StatusFlag::ORANGE] > 0) { context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Group degraded", ETags::GroupState, {ETags::VDiskState}); } } else if (ErasureSpecies == BLOCK_4_2) { @@ -2111,7 +2365,7 @@ class TSelfCheckRequest : public TActorBootstrapped { } else { context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Group degraded", ETags::GroupState, {ETags::VDiskState}); } - } else if (DisksColors[Ydb::Monitoring::StatusFlag::YELLOW] > 0) { + } else if (DisksColors[Ydb::Monitoring::StatusFlag::YELLOW] > 0 || DisksColors[Ydb::Monitoring::StatusFlag::ORANGE] > 0) { context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Group degraded", ETags::GroupState, {ETags::VDiskState}); } } else if (ErasureSpecies == MIRROR_3_DC) { @@ -2125,7 +2379,7 @@ class TSelfCheckRequest : public TActorBootstrapped { } else { context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Group degraded", ETags::GroupState, {ETags::VDiskState}); } - } else if (DisksColors[Ydb::Monitoring::StatusFlag::YELLOW] > 0) { + } else if (DisksColors[Ydb::Monitoring::StatusFlag::YELLOW] > 0 || DisksColors[Ydb::Monitoring::StatusFlag::ORANGE] > 0) { context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Group degraded", ETags::GroupState, {ETags::VDiskState}); } } @@ -2169,7 +2423,7 @@ class TSelfCheckRequest : public TActorBootstrapped { context.OverallStatus = MinStatus(context.OverallStatus, Ydb::Monitoring::StatusFlag::YELLOW); checker.ReportStatus(context); - + BLOG_D("Group " << groupId << " has status " << context.GetOverallStatus()); storageGroupStatus.set_overall(context.GetOverallStatus()); } @@ -2727,6 +2981,7 @@ class TSelfCheckRequest : public TActorBootstrapped { } void ReplyAndPassAway() { + Span.Event("ReplyAndPassAway"); THolder response = MakeHolder(); Ydb::Monitoring::SelfCheckResult& result = response->Result; @@ -2987,7 +3242,7 @@ class THealthCheckService : public TActorBootstrapped { } void Handle(TEvSelfCheckRequest::TPtr& ev) { - Register(new TSelfCheckRequest(ev->Sender, ev.Get()->Release(), ev->Cookie)); + Register(new TSelfCheckRequest(ev->Sender, ev.Get()->Release(), ev->Cookie, std::move(ev->TraceId))); } std::shared_ptr GRpcClientLow; diff --git a/ydb/core/health_check/health_check_ut.cpp b/ydb/core/health_check/health_check_ut.cpp index fa869c265be3..eefa30345e57 100644 --- a/ydb/core/health_check/health_check_ut.cpp +++ b/ydb/core/health_check/health_check_ut.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -58,12 +59,35 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { const int GROUP_START_ID = 0x80000000; const int VCARD_START_ID = 55; + const int PDISK_START_ID = 42; + const int DEFAULT_GROUP_GENERATION = 3; const TPathId SUBDOMAIN_KEY = {7000000000, 1}; const TPathId SERVERLESS_DOMAIN_KEY = {7000000000, 2}; const TPathId SHARED_DOMAIN_KEY = {7000000000, 3}; const TString STORAGE_POOL_NAME = "/Root:test"; + struct TTestVSlotInfo { + std::optional Status; + ui32 Generation = DEFAULT_GROUP_GENERATION; + NKikimrBlobStorage::EDriveStatus PDiskStatus = NKikimrBlobStorage::ACTIVE; + + TTestVSlotInfo(std::optional status = NKikimrBlobStorage::READY, + ui32 generation = DEFAULT_GROUP_GENERATION) + : Status(status) + , Generation(generation) + { + } + + TTestVSlotInfo(NKikimrBlobStorage::EVDiskStatus status, NKikimrBlobStorage::EDriveStatus pDiskStatus = NKikimrBlobStorage::ACTIVE) + : Status(status) + , PDiskStatus(pDiskStatus) + { + } + }; + + using TVDisks = TVector; + void ChangeDescribeSchemeResult(TEvSchemeShard::TEvDescribeSchemeResult::TPtr* ev, ui64 size = 20000000, ui64 quota = 90000000) { auto record = (*ev)->Get()->MutableRecord(); auto pool = record->mutable_pathdescription()->mutable_domaindescription()->add_storagepools(); @@ -150,6 +174,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { entry->mutable_key()->set_groupid(groupId); entry->mutable_info()->set_erasurespeciesv2(NHealthCheck::TSelfCheckRequest::BLOCK_4_2); entry->mutable_info()->set_storagepoolid(poolId); + entry->mutable_info()->set_generation(DEFAULT_GROUP_GENERATION); }; if (addStatic) { @@ -163,24 +188,33 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } void AddVSlotsToSysViewResponse(NSysView::TEvSysView::TEvGetVSlotsResponse::TPtr* ev, size_t groupCount, - const TVector& vdiskStatuses) { + const TVDisks& vslots, ui32 groupStartId = GROUP_START_ID, + bool withPdisk = false) { auto& record = (*ev)->Get()->Record; auto entrySample = record.entries(0); record.clear_entries(); - auto groupId = GROUP_START_ID; + auto groupId = groupStartId; const auto *descriptor = NKikimrBlobStorage::EVDiskStatus_descriptor(); for (size_t i = 0; i < groupCount; ++i) { auto vslotId = VCARD_START_ID; - for (auto status: vdiskStatuses) { + auto pdiskId = PDISK_START_ID; + for (const auto& vslot : vslots) { auto* entry = record.add_entries(); entry->CopyFrom(entrySample); entry->mutable_key()->set_vslotid(vslotId); + if (withPdisk) { + entry->mutable_key()->set_pdiskid(pdiskId); + } entry->mutable_info()->set_groupid(groupId); entry->mutable_info()->set_failrealm(vslotId); - entry->mutable_info()->set_statusv2(descriptor->FindValueByNumber(status)->name()); + if (vslot.Status) { + entry->mutable_info()->set_statusv2(descriptor->FindValueByNumber(*vslot.Status)->name()); + } + entry->mutable_info()->set_groupgeneration(vslot.Generation); entry->mutable_info()->set_vdisk(vslotId); ++vslotId; + ++pdiskId; } ++groupId; } @@ -194,8 +228,26 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { entry->mutable_info()->set_name(STORAGE_POOL_NAME); } + void AddPDisksToSysViewResponse(NSysView::TEvSysView::TEvGetPDisksResponse::TPtr* ev, const TVDisks& vslots, double occupancy) { + auto& record = (*ev)->Get()->Record; + auto entrySample = record.entries(0); + record.clear_entries(); + auto pdiskId = PDISK_START_ID; + const size_t totalSize = 3'200'000'000'000ull; + const auto *descriptor = NKikimrBlobStorage::EDriveStatus_descriptor(); + for (const auto& vslot : vslots) { + auto* entry = record.add_entries(); + entry->CopyFrom(entrySample); + entry->mutable_key()->set_pdiskid(pdiskId); + entry->mutable_info()->set_totalsize(totalSize); + entry->mutable_info()->set_availablesize((1 - occupancy) * totalSize); + entry->mutable_info()->set_statusv2(descriptor->FindValueByNumber(vslot.PDiskStatus)->name()); + ++pdiskId; + } + } + void AddGroupVSlotInControllerConfigResponseWithStaticGroup(TEvBlobStorage::TEvControllerConfigResponse::TPtr* ev, - const NKikimrBlobStorage::TGroupStatus::E groupStatus, const TVector& vdiskStatuses) + const NKikimrBlobStorage::TGroupStatus::E groupStatus, const TVDisks& vslots) { auto& pbRecord = (*ev)->Get()->Record; auto pbConfig = pbRecord.mutable_response()->mutable_status(0)->mutable_baseconfig(); @@ -216,6 +268,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { staticGroup->set_storagepoolid(0); staticGroup->set_operatingstatus(groupStatus); staticGroup->set_erasurespecies(NHealthCheck::TSelfCheckRequest::BLOCK_4_2); + staticGroup->set_groupgeneration(DEFAULT_GROUP_GENERATION); auto group = pbConfig->add_group(); group->CopyFrom(groupSample); @@ -223,11 +276,12 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { group->set_storagepoolid(1); group->set_operatingstatus(groupStatus); group->set_erasurespecies(NHealthCheck::TSelfCheckRequest::BLOCK_4_2); + group->set_groupgeneration(DEFAULT_GROUP_GENERATION); group->clear_vslotid(); auto vslotId = VCARD_START_ID; - for (auto status: vdiskStatuses) { + for (const auto& vslotInfo : vslots) { auto vslot = pbConfig->add_vslot(); vslot->CopyFrom(vslotSample); vslot->set_vdiskidx(vslotId); @@ -239,8 +293,11 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { slotId->CopyFrom(vslotIdSample); slotId->set_vslotid(vslotId); - const auto *descriptor = NKikimrBlobStorage::EVDiskStatus_descriptor(); - vslot->set_status(descriptor->FindValueByNumber(status)->name()); + if (vslotInfo.Status) { + const auto *descriptor = NKikimrBlobStorage::EVDiskStatus_descriptor(); + vslot->set_status(descriptor->FindValueByNumber(*vslotInfo.Status)->name()); + } + vslot->set_groupgeneration(vslotInfo.Generation); vslotId++; } @@ -252,13 +309,13 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { sPool->set_name(STORAGE_POOL_NAME); }; - void AddVSlotInVDiskStateResponse(TEvWhiteboard::TEvVDiskStateResponse::TPtr* ev, int groupCount, int vslotCount) { + void AddVSlotInVDiskStateResponse(TEvWhiteboard::TEvVDiskStateResponse::TPtr* ev, int groupCount, int vslotCount, ui32 groupStartId = GROUP_START_ID) { auto& pbRecord = (*ev)->Get()->Record; auto sample = pbRecord.vdiskstateinfo(0); pbRecord.clear_vdiskstateinfo(); - auto groupId = GROUP_START_ID; + auto groupId = groupStartId; for (int i = 0; i < groupCount; i++) { auto slotId = VCARD_START_ID; for (int j = 0; j < vslotCount; j++) { @@ -273,6 +330,12 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } } + void ChangeGroupStateResponse(NNodeWhiteboard::TEvWhiteboard::TEvBSGroupStateResponse::TPtr* ev) { + for (auto& groupInfo : *(*ev)->Get()->Record.mutable_bsgroupstateinfo()) { + groupInfo.set_erasurespecies(NHealthCheck::TSelfCheckRequest::BLOCK_4_2); + } + } + void SetLongHostValue(TEvInterconnect::TEvNodesInfo::TPtr* ev) { TString host(1000000, 'a'); auto& pbRecord = (*ev)->Get()->Nodes; @@ -323,7 +386,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } case NSysView::TEvSysView::EvGetVSlotsResponse: { auto* x = reinterpret_cast(&ev); - AddVSlotsToSysViewResponse(x, groupNumber, {vdiscPerGroupNumber, NKikimrBlobStorage::EVDiskStatus::ERROR}); + AddVSlotsToSysViewResponse(x, groupNumber, TVDisks{vdiscPerGroupNumber, NKikimrBlobStorage::EVDiskStatus::ERROR}); break; } case NSysView::TEvSysView::EvGetGroupsResponse: { @@ -383,7 +446,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { CheckHcResult(result, groupNumber, vdiscPerGroupNumber, isMergeRecords); } - Ydb::Monitoring::SelfCheckResult RequestHcWithVdisks(const NKikimrBlobStorage::TGroupStatus::E groupStatus, const TVector& vdiskStatuses) { + Ydb::Monitoring::SelfCheckResult RequestHcWithVdisks(const NKikimrBlobStorage::TGroupStatus::E groupStatus, const TVDisks& vdisks, bool forStaticGroup = false, double occupancy = 0) { TPortManager tp; ui16 port = tp.GetPort(2134); ui16 grpcPort = tp.GetPort(2135); @@ -413,12 +476,21 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } case TEvBlobStorage::EvControllerConfigResponse: { auto *x = reinterpret_cast(&ev); - AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, groupStatus, vdiskStatuses); + AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, groupStatus, vdisks); break; } case NSysView::TEvSysView::EvGetVSlotsResponse: { auto* x = reinterpret_cast(&ev); - AddVSlotsToSysViewResponse(x, 1, vdiskStatuses); + if (forStaticGroup) { + AddVSlotsToSysViewResponse(x, 1, vdisks, 0, true); + } else { + AddVSlotsToSysViewResponse(x, 1, vdisks, GROUP_START_ID, true); + } + break; + } + case NSysView::TEvSysView::EvGetPDisksResponse: { + auto* x = reinterpret_cast(&ev); + AddPDisksToSysViewResponse(x, vdisks, occupancy); break; } case NSysView::TEvSysView::EvGetGroupsResponse: { @@ -431,6 +503,19 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { AddStoragePoolsToSysViewResponse(x); break; } + case NNodeWhiteboard::TEvWhiteboard::EvVDiskStateResponse: { + auto *x = reinterpret_cast(&ev); + if (forStaticGroup) { + AddVSlotInVDiskStateResponse(x, 1, vdisks.size(), 0); + } else { + AddVSlotInVDiskStateResponse(x, 1, vdisks.size()); + } + break; + } + case NNodeWhiteboard::TEvWhiteboard::EvBSGroupStateResponse: { + auto* x = reinterpret_cast(&ev); + ChangeGroupStateResponse(x); + } } return TTestActorRuntime::EEventAction::PROCESS; @@ -444,10 +529,12 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { return runtime.GrabEdgeEvent(handle)->Result; } - void CheckHcResultHasIssuesWithStatus(Ydb::Monitoring::SelfCheckResult& result, const TString& type, const Ydb::Monitoring::StatusFlag::Status expectingStatus, ui32 total) { + void CheckHcResultHasIssuesWithStatus(Ydb::Monitoring::SelfCheckResult& result, const TString& type, + const Ydb::Monitoring::StatusFlag::Status expectingStatus, ui32 total, + std::string_view pool = "/Root:test") { int issuesCount = 0; for (const auto& issue_log : result.Getissue_log()) { - if (issue_log.type() == type && issue_log.location().storage().pool().name() == "/Root:test" && issue_log.status() == expectingStatus) { + if (issue_log.type() == type && issue_log.location().storage().pool().name() == pool && issue_log.status() == expectingStatus) { issuesCount++; } } @@ -570,25 +657,75 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } Y_UNIT_TEST(YellowGroupIssueWhenPartialGroupStatus) { - auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, {NKikimrBlobStorage::ERROR}); + auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, TVDisks{NKikimrBlobStorage::ERROR}); CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::YELLOW, 1); } Y_UNIT_TEST(BlueGroupIssueWhenPartialGroupStatusAndReplicationDisks) { - auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, {NKikimrBlobStorage::REPLICATING}); + auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, TVDisks{NKikimrBlobStorage::REPLICATING}); CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::BLUE, 1); } Y_UNIT_TEST(OrangeGroupIssueWhenDegradedGroupStatus) { - auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::DEGRADED, {2, NKikimrBlobStorage::ERROR}); + auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::DEGRADED, TVDisks{2, NKikimrBlobStorage::ERROR}); CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::ORANGE, 1); } Y_UNIT_TEST(RedGroupIssueWhenDisintegratedGroupStatus) { - auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::DISINTEGRATED, {3, NKikimrBlobStorage::ERROR}); + auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::DISINTEGRATED, TVDisks{3, NKikimrBlobStorage::ERROR}); CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::RED, 1); } + Y_UNIT_TEST(StaticGroupIssue) { + auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, TVDisks{NKikimrBlobStorage::ERROR}, /*forStatic*/ true); + Cerr << result.ShortDebugString() << Endl; + CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::YELLOW, 1, "static"); + } + + Y_UNIT_TEST(GreenStatusWhenCreatingGroup) { + std::optional emptyStatus; + auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, TVDisks{8, emptyStatus}); + Cerr << result.ShortDebugString() << Endl; + UNIT_ASSERT_VALUES_EQUAL(result.self_check_result(), Ydb::Monitoring::SelfCheck::GOOD); + } + + Y_UNIT_TEST(GreenStatusWhenInitPending) { + auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, TVDisks{8, NKikimrBlobStorage::INIT_PENDING}); + Cerr << result.ShortDebugString() << Endl; + UNIT_ASSERT_VALUES_EQUAL(result.self_check_result(), Ydb::Monitoring::SelfCheck::GOOD); + } + + Y_UNIT_TEST(IgnoreOtherGenerations) { + TVDisks vdisks; + vdisks.emplace_back(NKikimrBlobStorage::ERROR, DEFAULT_GROUP_GENERATION - 1); + vdisks.emplace_back(NKikimrBlobStorage::READY, DEFAULT_GROUP_GENERATION); + vdisks.emplace_back(NKikimrBlobStorage::ERROR, DEFAULT_GROUP_GENERATION + 1); + auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, vdisks); + Cerr << result.ShortDebugString() << Endl; + UNIT_ASSERT_VALUES_EQUAL(result.self_check_result(), Ydb::Monitoring::SelfCheck::GOOD); + } + + Y_UNIT_TEST(YellowGroupIssueOnYellowSpace) { + auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, TVDisks{3, NKikimrBlobStorage::READY}, false, 0.9); + Cerr << result.ShortDebugString() << Endl; + CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::YELLOW, 1); + CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::RED, 0); + } + + Y_UNIT_TEST(RedGroupIssueOnRedSpace) { + auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, TVDisks{3, NKikimrBlobStorage::READY}, false, 0.95); + Cerr << result.ShortDebugString() << Endl; + CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::RED, 1); + } + + Y_UNIT_TEST(YellowIssueReadyVDisksOnFaultyPDisks) { + auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, TVDisks{3, {NKikimrBlobStorage::READY, NKikimrBlobStorage::FAULTY}}); + Cerr << result.ShortDebugString() << Endl; + CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::YELLOW, 1); + CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::ORANGE, 0); + CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::RED, 0); + } + /* HC currently infers group status on its own, so it's never unknown Y_UNIT_TEST(RedGroupIssueWhenUnknownGroupStatus) { auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::UNKNOWN, {}); @@ -768,14 +905,12 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } case TEvBlobStorage::EvControllerConfigResponse: { auto *x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, vdiskStatuses); + AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetVSlotsResponse: { auto* x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddVSlotsToSysViewResponse(x, 1, vdiskStatuses); + AddVSlotsToSysViewResponse(x, 1, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetGroupsResponse: { @@ -870,14 +1005,12 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } case TEvBlobStorage::EvControllerConfigResponse: { auto *x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, vdiskStatuses); + AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetVSlotsResponse: { auto* x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddVSlotsToSysViewResponse(x, 1, vdiskStatuses); + AddVSlotsToSysViewResponse(x, 1, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetGroupsResponse: { @@ -983,14 +1116,12 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } case TEvBlobStorage::EvControllerConfigResponse: { auto *x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, vdiskStatuses); + AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetVSlotsResponse: { auto* x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddVSlotsToSysViewResponse(x, 1, vdiskStatuses); + AddVSlotsToSysViewResponse(x, 1, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetGroupsResponse: { @@ -1085,14 +1216,12 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } case TEvBlobStorage::EvControllerConfigResponse: { auto *x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, vdiskStatuses); + AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetVSlotsResponse: { auto* x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddVSlotsToSysViewResponse(x, 1, vdiskStatuses); + AddVSlotsToSysViewResponse(x, 1, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetGroupsResponse: { @@ -1180,14 +1309,12 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } case TEvBlobStorage::EvControllerConfigResponse: { auto *x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, vdiskStatuses); + AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetVSlotsResponse: { auto* x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddVSlotsToSysViewResponse(x, 1, vdiskStatuses); + AddVSlotsToSysViewResponse(x, 1, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetGroupsResponse: { @@ -1293,14 +1420,12 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } case TEvBlobStorage::EvControllerConfigResponse: { auto *x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, vdiskStatuses); + AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetVSlotsResponse: { auto* x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddVSlotsToSysViewResponse(x, 1, vdiskStatuses); + AddVSlotsToSysViewResponse(x, 1, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetGroupsResponse: { @@ -1439,14 +1564,12 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } case TEvBlobStorage::EvControllerConfigResponse: { auto *x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, vdiskStatuses); + AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetVSlotsResponse: { auto* x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddVSlotsToSysViewResponse(x, 1, vdiskStatuses); + AddVSlotsToSysViewResponse(x, 1, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetGroupsResponse: { @@ -1592,14 +1715,12 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } case TEvBlobStorage::EvControllerConfigResponse: { auto *x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, vdiskStatuses); + AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetVSlotsResponse: { auto* x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddVSlotsToSysViewResponse(x, 1, vdiskStatuses); + AddVSlotsToSysViewResponse(x, 1, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetGroupsResponse: { @@ -1713,123 +1834,128 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { UNIT_ASSERT_VALUES_EQUAL(database_status.storage().pools()[0].id(), "static"); } - void HiveSyncTest(bool syncPeriod) { + Y_UNIT_TEST(ShardsLimit999) { + ShardsQuotaTest(999, 1000, 1, Ydb::Monitoring::StatusFlag::RED); + } + + Y_UNIT_TEST(ShardsLimit995) { + ShardsQuotaTest(995, 1000, 1, Ydb::Monitoring::StatusFlag::ORANGE); + } + + Y_UNIT_TEST(ShardsLimit905) { + ShardsQuotaTest(905, 1000, 1, Ydb::Monitoring::StatusFlag::YELLOW); + } + + Y_UNIT_TEST(ShardsLimit800) { + ShardsQuotaTest(805, 1000, 0, Ydb::Monitoring::StatusFlag::GREEN); + } + + Y_UNIT_TEST(ShardsNoLimit) { + ShardsQuotaTest(105, 0, 0, Ydb::Monitoring::StatusFlag::GREEN); + } + + bool HasDeadTabletIssue(const Ydb::Monitoring::SelfCheckResult& result) { + for (const auto& issue_log : result.issue_log()) { + if (issue_log.level() == 4 && issue_log.type() == "TABLET") { + return true; + } + } + return false; + } + + Y_UNIT_TEST(TestTabletIsDead) { TPortManager tp; ui16 port = tp.GetPort(2134); ui16 grpcPort = tp.GetPort(2135); auto settings = TServerSettings(port) - .SetNodeCount(1) + .SetNodeCount(2) .SetDynamicNodeCount(1) .SetUseRealThreads(false) .SetDomainName("Root"); TServer server(settings); server.EnableGRpc(grpcPort); + TClient client(settings); - TTestActorRuntime& runtime = *server.GetRuntime(); - ui32 dynNodeId = runtime.GetNodeId(1); + TTestActorRuntime* runtime = server.GetRuntime(); + TActorId sender = runtime->AllocateEdgeActor(); - auto observerFunc = [&](TAutoPtr& ev) { - switch (ev->GetTypeRewrite()) { - case TEvHive::EvResponseHiveInfo: { - auto *x = reinterpret_cast(&ev); - auto& record = (*x)->Get()->Record; - record.SetStartTimeTimestamp(0); - if (syncPeriod) { - record.SetResponseTimestamp(NHealthCheck::TSelfCheckRequest::HIVE_SYNCHRONIZATION_PERIOD_MS / 2); - } else { - record.SetResponseTimestamp(NHealthCheck::TSelfCheckRequest::HIVE_SYNCHRONIZATION_PERIOD_MS * 2); - } - auto *tablet = record.MutableTablets()->Add(); - tablet->SetTabletID(1); - tablet->SetNodeID(dynNodeId); - tablet->SetTabletType(NKikimrTabletBase::TTabletTypes::DataShard); - tablet->SetVolatileState(NKikimrHive::TABLET_VOLATILE_STATE_BOOTING); - tablet->MutableObjectDomain()->SetSchemeShard(SUBDOMAIN_KEY.OwnerId); - tablet->MutableObjectDomain()->SetPathId(SUBDOMAIN_KEY.LocalPathId); - break; - } - case TEvHive::EvResponseHiveNodeStats: { - auto *x = reinterpret_cast(&ev); - auto &record = (*x)->Get()->Record; - auto *nodeStats = record.MutableNodeStats()->Add(); - nodeStats->SetNodeId(dynNodeId); - nodeStats->MutableNodeDomain()->SetSchemeShard(SUBDOMAIN_KEY.OwnerId); - nodeStats->MutableNodeDomain()->SetPathId(SUBDOMAIN_KEY.LocalPathId); - break; - } - case NConsole::TEvConsole::EvGetTenantStatusResponse: { - auto *x = reinterpret_cast(&ev); - ChangeGetTenantStatusResponse(x, "/Root/database"); - break; - } - case TEvTxProxySchemeCache::EvNavigateKeySetResult: { - auto *x = reinterpret_cast(&ev); - TSchemeCacheNavigate::TEntry& entry((*x)->Get()->Request->ResultSet.front()); - entry.Status = TSchemeCacheNavigate::EStatus::Ok; - entry.Kind = TSchemeCacheNavigate::EKind::KindExtSubdomain; - entry.Path = {"Root", "database"}; - entry.DomainInfo = MakeIntrusive(SUBDOMAIN_KEY, SUBDOMAIN_KEY); + server.SetupDynamicLocalService(2, "Root"); + server.StartPQTablets(1); + server.DestroyDynamicLocalService(2); + runtime->AdvanceCurrentTime(TDuration::Minutes(5)); - break; - } - } + TAutoPtr handle; + runtime->Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, new NHealthCheck::TEvSelfCheckRequest(), 0)); + auto result = runtime->GrabEdgeEvent(handle)->Result; + Cerr << result.ShortDebugString(); - return TTestActorRuntime::EEventAction::PROCESS; - }; - runtime.SetObserverFunc(observerFunc); + UNIT_ASSERT(HasDeadTabletIssue(result)); + } - TActorId sender = runtime.AllocateEdgeActor(); - TAutoPtr handle; + Y_UNIT_TEST(TestBootingTabletIsNotDead) { + TPortManager tp; + ui16 port = tp.GetPort(2134); + ui16 grpcPort = tp.GetPort(2135); + auto settings = TServerSettings(port) + .SetNodeCount(2) + .SetDynamicNodeCount(1) + .SetUseRealThreads(false) + .SetDomainName("Root"); + TServer server(settings); + server.EnableGRpc(grpcPort); - auto *request = new NHealthCheck::TEvSelfCheckRequest; - request->Request.set_return_verbose_status(true); - request->Database = "/Root/database"; - runtime.Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, request, 0)); - const auto result = runtime.GrabEdgeEvent(handle)->Result; + TClient client(settings); - Cerr << result.ShortDebugString() << Endl; + TTestActorRuntime* runtime = server.GetRuntime(); + TActorId sender = runtime->AllocateEdgeActor(); - UNIT_ASSERT_VALUES_EQUAL(result.database_status_size(), 1); + auto blockBoot = runtime->AddObserver([](auto&& ev) { ev.Reset(); }); - bool deadTabletIssueFoundInResult = false; - for (const auto &issue_log : result.issue_log()) { - if (issue_log.level() == 4 && issue_log.type() == "TABLET") { - UNIT_ASSERT_VALUES_EQUAL(issue_log.location().compute().tablet().id().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(issue_log.location().compute().tablet().type(), "DataShard"); - deadTabletIssueFoundInResult = true; - } - } + server.SetupDynamicLocalService(2, "Root"); + server.StartPQTablets(1, false); + runtime->AdvanceCurrentTime(TDuration::Minutes(5)); - UNIT_ASSERT_VALUES_EQUAL(syncPeriod, !deadTabletIssueFoundInResult); - } + TAutoPtr handle; + runtime->Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, new NHealthCheck::TEvSelfCheckRequest(), 0)); + auto result = runtime->GrabEdgeEvent(handle)->Result; + Cerr << result.ShortDebugString(); - Y_UNIT_TEST(HiveSyncPeriodIgnoresTabletsState) { - HiveSyncTest(true); + UNIT_ASSERT(!HasDeadTabletIssue(result)); } - Y_UNIT_TEST(AfterHiveSyncPeriodReportsTabletsState) { - HiveSyncTest(false); - } + Y_UNIT_TEST(TestReBootingTabletIsDead) { + TPortManager tp; + ui16 port = tp.GetPort(2134); + ui16 grpcPort = tp.GetPort(2135); + auto settings = TServerSettings(port) + .SetNodeCount(2) + .SetDynamicNodeCount(2) + .SetUseRealThreads(false) + .SetDomainName("Root"); + TServer server(settings); + server.EnableGRpc(grpcPort); - Y_UNIT_TEST(ShardsLimit999) { - ShardsQuotaTest(999, 1000, 1, Ydb::Monitoring::StatusFlag::RED); - } + TClient client(settings); - Y_UNIT_TEST(ShardsLimit995) { - ShardsQuotaTest(995, 1000, 1, Ydb::Monitoring::StatusFlag::ORANGE); - } + TTestActorRuntime* runtime = server.GetRuntime(); + runtime->SetLogPriority(NKikimrServices::HIVE, NActors::NLog::PRI_TRACE); + TActorId sender = runtime->AllocateEdgeActor(); - Y_UNIT_TEST(ShardsLimit905) { - ShardsQuotaTest(905, 1000, 1, Ydb::Monitoring::StatusFlag::YELLOW); - } - Y_UNIT_TEST(ShardsLimit800) { - ShardsQuotaTest(805, 1000, 0, Ydb::Monitoring::StatusFlag::GREEN); - } + server.SetupDynamicLocalService(2, "Root"); + server.StartPQTablets(1, true); + server.SetupDynamicLocalService(3, "Root"); + auto blockBoot = runtime->AddObserver([](auto&& ev) { ev.Reset(); }); + server.DestroyDynamicLocalService(2); + runtime->AdvanceCurrentTime(TDuration::Minutes(5)); - Y_UNIT_TEST(ShardsNoLimit) { - ShardsQuotaTest(105, 0, 0, Ydb::Monitoring::StatusFlag::GREEN); + TAutoPtr handle; + runtime->Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, new NHealthCheck::TEvSelfCheckRequest(), 0)); + auto result = runtime->GrabEdgeEvent(handle)->Result; + Cerr << result.ShortDebugString(); + + UNIT_ASSERT(HasDeadTabletIssue(result)); } } } diff --git a/ydb/core/http_proxy/events.h b/ydb/core/http_proxy/events.h index 3c6ee15db123..501b35881bed 100644 --- a/ydb/core/http_proxy/events.h +++ b/ydb/core/http_proxy/events.h @@ -148,6 +148,40 @@ namespace NKikimr::NHttpProxy { }; }; + enum TEv { + EvYmqCloudAuthResponse + }; + + struct TEvYmqCloudAuthResponse: public TEventLocal< + TEvYmqCloudAuthResponse, + EvYmqCloudAuthResponse> { + struct TError { + TString ErrorCode; + ui32 HttpStatusCode; + TString Message; + }; + + bool IsSuccess; + + TString CloudId; + TString FolderId; + TString Sid; + + TMaybe Error; + + TEvYmqCloudAuthResponse(const TString& cloudId, const TString& folderId, const TString& sid) + : IsSuccess(true) + , CloudId(cloudId) + , FolderId(folderId) + , Sid(sid) + , Error(Nothing()) + {} + + TEvYmqCloudAuthResponse(TError& error) + : IsSuccess(false) + , Error(error) + {} + }; inline TActorId MakeAccessServiceID() { static const char x[12] = "accss_srvce"; @@ -184,6 +218,11 @@ namespace NKikimr::NHttpProxy { return TActorId(0, TStringBuf(x, 12)); } + inline TActorId MakeFolderServiceID() { + static const char x[12] = "folder_svc"; + return TActorId(0, TStringBuf(x, 12)); + } + #define LOG_SP_ERROR_S(actorCtxOrSystem, component, stream) LOG_ERROR_S(actorCtxOrSystem, component, LogPrefix() << " " << stream) #define LOG_SP_WARN_S(actorCtxOrSystem, component, stream) LOG_WARN_S(actorCtxOrSystem, component, LogPrefix() << " " << stream) #define LOG_SP_INFO_S(actorCtxOrSystem, component, stream) LOG_INFO_S(actorCtxOrSystem, component, LogPrefix() << " " << stream) diff --git a/ydb/core/http_proxy/http_req.cpp b/ydb/core/http_proxy/http_req.cpp index f7aee26f521d..d3f2118be3db 100644 --- a/ydb/core/http_proxy/http_req.cpp +++ b/ydb/core/http_proxy/http_req.cpp @@ -45,6 +45,9 @@ #include #include +#include +#include + #include #include @@ -55,6 +58,15 @@ #include +#include +#include + +#include + +#include + +#include +#include namespace NKikimr::NHttpProxy { @@ -158,6 +170,7 @@ namespace NKikimr::NHttpProxy { } constexpr TStringBuf IAM_HEADER = "x-yacloud-subjecttoken"; + constexpr TStringBuf SECURITY_TOKEN_HEADER = "x-amz-security-token"; constexpr TStringBuf AUTHORIZATION_HEADER = "authorization"; constexpr TStringBuf REQUEST_ID_HEADER = "x-request-id"; constexpr TStringBuf REQUEST_ID_HEADER_EXT = "x-amzn-requestid"; @@ -166,11 +179,22 @@ namespace NKikimr::NHttpProxy { constexpr TStringBuf REQUEST_TARGET_HEADER = "x-amz-target"; constexpr TStringBuf REQUEST_CONTENT_TYPE_HEADER = "content-type"; constexpr TStringBuf CRC32_HEADER = "x-amz-crc32"; - static const TString CREDENTIAL_PARAM = "credential"; + constexpr TStringBuf CREDENTIAL_PARAM = "Credential"; + template - class THttpRequestProcessor : public IHttpRequestProcessor { + class TBaseHttpRequestProcessor : public IHttpRequestProcessor { public: + TBaseHttpRequestProcessor(TString method, TProtoCall protoCall) + : Method(method) + , ProtoCall(protoCall) + { + } + + const TString& Name() const override { + return Method; + } + enum TRequestState { StateIdle, StateAuthentication, @@ -179,34 +203,415 @@ namespace NKikimr::NHttpProxy { StateGrpcRequest, StateFinished }; + protected: + TString Method; + TProtoCall ProtoCall; + }; - enum TEv { - EvRequest, - EvResponse, - EvResult - }; - + template + class TYmqHttpRequestProcessor : public TBaseHttpRequestProcessor{ + using TProcessorBase = TBaseHttpRequestProcessor; public: - THttpRequestProcessor(TString method, TProtoCall protoCall) - : Method(method) - , ProtoCall(protoCall) + TYmqHttpRequestProcessor( + TString method, + TProtoCall protoCall, + std::function queueUrlExtractor) + : TProcessorBase(method, protoCall) + , QueueUrlExtractor(queueUrlExtractor) { } - const TString& Name() const override { - return Method; + void Execute(THttpRequestContext&& context, THolder signature, const TActorContext& ctx) override { + ctx.Register( + new TYmqHttpRequestActor( + std::move(context), + std::move(signature), + TProcessorBase::ProtoCall, + TProcessorBase::Method, + QueueUrlExtractor + ) + ); + } + + private: + class TYmqHttpRequestActor : public NActors::TActorBootstrapped { + public: + using TBase = NActors::TActorBootstrapped; + + TYmqHttpRequestActor( + THttpRequestContext&& httpContext, + THolder&& signature, + TProtoCall protoCall, + const TString& method, + std::function queueUrlExtractor) + : HttpContext(std::move(httpContext)) + , Signature(std::move(signature)) + , ProtoCall(protoCall) + , Method(method) + , QueueUrlExtractor(queueUrlExtractor) + { + } + + TStringBuilder LogPrefix() const { + return HttpContext.LogPrefix(); + } + + private: + STFUNC(StateWork) + { + switch (ev->GetTypeRewrite()) { + HFunc(TEvents::TEvWakeup, HandleTimeout); + HFunc(TEvServerlessProxy::TEvGrpcRequestResult, HandleGrpcResponse); + HFunc(TEvYmqCloudAuthResponse, HandleYmqCloudAuthorizationResponse); + default: + HandleUnexpectedEvent(ev); + break; + } + } + + void SendGrpcRequestNoDriver(const TActorContext& ctx) { + RequestState = TProcessorBase::TRequestState::StateGrpcRequest; + LOG_SP_INFO_S(ctx, NKikimrServices::HTTP_PROXY, + "sending grpc request to '" << HttpContext.DiscoveryEndpoint << + "' database: '" << HttpContext.DatabasePath << + "' iam token size: " << HttpContext.IamToken.size()); + TMap peerMetadata { + {NYmq::V1::FOLDER_ID, FolderId}, + {NYmq::V1::CLOUD_ID, HttpContext.UserName ? HttpContext.UserName : CloudId}, + {NYmq::V1::USER_SID, UserSid}, + {NYmq::V1::REQUEST_ID, HttpContext.RequestId}, + {NYmq::V1::SECURITY_TOKEN, HttpContext.SecurityToken}, + }; + RpcFuture = NRpcService::DoLocalRpc( + std::move(Request), + HttpContext.DatabasePath, + HttpContext.SerializedUserToken, + Nothing(), + ctx.ActorSystem(), + peerMetadata + ); + RpcFuture.Subscribe( + [actorId = ctx.SelfID, actorSystem = ctx.ActorSystem()] + (const NThreading::TFuture& future) { + auto& response = future.GetValueSync(); + auto result = MakeHolder(); + Y_ABORT_UNLESS(response.operation().ready()); + if (response.operation().status() == Ydb::StatusIds::SUCCESS) { + TProtoResult rs; + response.operation().result().UnpackTo(&rs); + result->Message = MakeHolder(rs); + } + NYql::TIssues issues; + NYql::IssuesFromMessage(response.operation().issues(), issues); + result->Status = MakeHolder( + NYdb::EStatus(response.operation().status()), + std::move(issues) + ); + actorSystem->Send(actorId, result.Release()); + } + ); + return; + } + + void HandleUnexpectedEvent(const TAutoPtr& ev) { + Y_UNUSED(ev); + } + + void ReplyWithError( + const TActorContext& ctx, + NYdb::EStatus status, + const TString& errorText, + size_t issueCode = ISSUE_CODE_GENERIC) { + HttpContext.ResponseData.Status = status; + HttpContext.ResponseData.ErrorText = errorText; + + ReplyToHttpContext(ctx, issueCode); + + ctx.Send(AuthActor, new TEvents::TEvPoisonPill()); + + TBase::Die(ctx); + } + + void ReplyWithError( + const TActorContext& ctx, + ui32 httpStatusCode, + const TString& ymqStatusCode, + const TString& errorText) { + HttpContext.ResponseData.IsYmq = true; + HttpContext.ResponseData.Status = NYdb::EStatus::STATUS_UNDEFINED; + HttpContext.ResponseData.YmqHttpCode = httpStatusCode; + HttpContext.ResponseData.YmqStatusCode = ymqStatusCode; + HttpContext.ResponseData.ErrorText = errorText; + + ReplyToHttpContext(ctx); + + ctx.Send(AuthActor, new TEvents::TEvPoisonPill()); + + TBase::Die(ctx); + } + + void ReplyToHttpContext(const TActorContext& ctx, std::optional issueCode = std::nullopt) { + if (issueCode.has_value()) { + HttpContext.DoReply(ctx, issueCode.value()); + } else { + HttpContext.DoReply(ctx); + } + } + + void HandleGrpcResponse(TEvServerlessProxy::TEvGrpcRequestResult::TPtr ev, + const TActorContext& ctx) { + if (ev->Get()->Status->IsSuccess()) { + LOG_SP_DEBUG_S( + ctx, + NKikimrServices::HTTP_PROXY, + "Got succesfult GRPC response."; + ); + ProtoToJson( + *ev->Get()->Message, + HttpContext.ResponseData.Body, + HttpContext.ContentType == MIME_CBOR + ); + HttpContext.ResponseData.IsYmq = true; + HttpContext.ResponseData.YmqHttpCode = 200; + ReplyToHttpContext(ctx); + } else { + auto retryClass = NYdb::NTopic::GetRetryErrorClass(ev->Get()->Status->GetStatus()); + + switch (retryClass) { + case ERetryErrorClass::ShortRetry: + case ERetryErrorClass::LongRetry: + LOG_SP_DEBUG_S( + ctx, + NKikimrServices::HTTP_PROXY, + "Retrying failed GRPC response" + ); + RetryCounter.Click(); + if (RetryCounter.HasAttemps()) { + return SendGrpcRequestNoDriver(ctx); + } + case ERetryErrorClass::NoRetry: + TString errorText; + TStringOutput stringOutput(errorText); + + ev->Get()->Status->GetIssues().PrintTo(stringOutput); + + RetryCounter.Void(); + + auto issues = ev->Get()->Status->GetIssues(); + auto errorAndCode = issues.Empty() + ? std::make_tuple( + NSQS::NErrors::INTERNAL_FAILURE.ErrorCode, + NSQS::NErrors::INTERNAL_FAILURE.HttpStatusCode) + : NKikimr::NSQS::TErrorClass::GetErrorAndCode(issues.begin()->GetCode()); + + LOG_SP_DEBUG_S( + ctx, + NKikimrServices::HTTP_PROXY, + "Not retrying GRPC response." + << " Code: " << get<1>(errorAndCode) + << ", Error: " << get<0>(errorAndCode); + ); + + return ReplyWithError( + ctx, + get<1>(errorAndCode), + get<0>(errorAndCode), + issues.begin()->GetMessage() + ); + } + } + TBase::Die(ctx); + } + + void HandleTimeout(TEvents::TEvWakeup::TPtr ev, const TActorContext& ctx) { + Y_UNUSED(ev); + return ReplyWithError(ctx, NYdb::EStatus::TIMEOUT, "Request hasn't been completed by deadline"); + } + + void HandleYmqCloudAuthorizationResponse(TEvYmqCloudAuthResponse::TPtr ev, const TActorContext& ctx) { + if (ev->Get()->IsSuccess) { + LOG_SP_DEBUG_S( + ctx, + NKikimrServices::HTTP_PROXY, + TStringBuilder() << "Got cloud auth response." + << " FolderId: " << ev->Get()->FolderId + << " CloudId: " << ev->Get()->CloudId + << " UserSid: " << ev->Get()->Sid; + ); + FolderId = ev->Get()->FolderId; + CloudId = ev->Get()->CloudId; + UserSid = ev->Get()->Sid; + SendGrpcRequestNoDriver(ctx); + } else { + LOG_SP_DEBUG_S( + ctx, + NKikimrServices::HTTP_PROXY, + TStringBuilder() << "Got cloud auth response." + << " HttpStatusCode: " << ev->Get()->Error->HttpStatusCode + << " ErrorCode: " << ev->Get()->Error->ErrorCode + << " Message: " << ev->Get()->Error->Message; + ); + ReplyWithError( + ctx, + ev->Get()->Error->HttpStatusCode, + ev->Get()->Error->ErrorCode, + ev->Get()->Error->Message + ); + } + } + + public: + void Bootstrap(const TActorContext& ctx) { + StartTime = ctx.Now(); + try { + HttpContext.RequestBodyToProto(&Request); + auto queueUrl = QueueUrlExtractor(Request); + if (!queueUrl.empty()) { + auto cloudIdAndResourceId = NKikimr::NYmq::CloudIdAndResourceIdFromQueueUrl(queueUrl); + if(cloudIdAndResourceId.Empty()) { + return ReplyWithError(ctx, NYdb::EStatus::BAD_REQUEST, "Invalid queue url"); + } + CloudId = cloudIdAndResourceId.Get()->first; + ResourceId = cloudIdAndResourceId.Get()->second; + } + } catch (const NKikimr::NSQS::TSQSException& e) { + NYds::EErrorCodes issueCode = NYds::EErrorCodes::OK; + if (e.ErrorClass.ErrorCode == "MissingParameter") { + issueCode = NYds::EErrorCodes::MISSING_PARAMETER; + } else if (e.ErrorClass.ErrorCode == "InvalidQueryParameter" + || e.ErrorClass.ErrorCode == "MalformedQueryString") { + issueCode = NYds::EErrorCodes::INVALID_ARGUMENT; + } + return ReplyWithError(ctx, NYdb::EStatus::BAD_REQUEST, e.what(), static_cast(issueCode)); + } catch (const std::exception& e) { + LOG_SP_WARN_S( + ctx, + NKikimrServices::HTTP_PROXY, + "got new request with incorrect json from [" << HttpContext.SourceAddress << "] " + ); + return ReplyWithError( + ctx, + NYdb::EStatus::BAD_REQUEST, + e.what(), + static_cast(NYds::EErrorCodes::INVALID_ARGUMENT) + ); + } + + LOG_SP_INFO_S( + ctx, + NKikimrServices::HTTP_PROXY, + "got new request from [" << HttpContext.SourceAddress << "]" + ); + + if (!HttpContext.ServiceConfig.GetHttpConfig().GetYandexCloudMode()) { + SendGrpcRequestNoDriver(ctx); + } else { + auto requestHolder = MakeHolder(); + NSQS::EAction action = NSQS::EAction::Unknown; + if (Method == "CreateQueue") { + action = NSQS::EAction::CreateQueue; + } else if (Method == "GetQueueUrl") { + action = NSQS::EAction::GetQueueUrl; + } else if (Method == "SendMessage") { + action = NSQS::EAction::SendMessage; + } else if (Method == "ReceiveMessage") { + action = NSQS::EAction::ReceiveMessage; + } else if (Method == "GetQueueAttributes") { + action = NSQS::EAction::GetQueueAttributes; + } else if (Method == "ListQueues") { + action = NSQS::EAction::ListQueues; + } else if (Method == "DeleteMessage") { + action = NSQS::EAction::DeleteMessage; + } else if (Method == "PurgeQueue") { + action = NSQS::EAction::PurgeQueue; + } else if (Method == "DeleteQueue") { + action = NSQS::EAction::DeleteQueue; + } else if (Method == "ChangeMessageVisibility") { + action = NSQS::EAction::ChangeMessageVisibility; + } else if (Method == "SetQueueAttributes") { + action = NSQS::EAction::SetQueueAttributes; + } else if (Method == "SendMessageBatch") { + action = NSQS::EAction::SendMessageBatch; + }else if (Method == "DeleteMessageBatch") { + action = NSQS::EAction::DeleteMessageBatch; + } else if (Method == "ChangeMessageVisibilityBatch") { + action = NSQS::EAction::ChangeMessageVisibilityBatch; + } else if (Method == "ListDeadLetterSourceQueues") { + action = NSQS::EAction::ListDeadLetterSourceQueues; + } + + requestHolder->SetRequestId(HttpContext.RequestId); + + NSQS::TAuthActorData data { + .SQSRequest = std::move(requestHolder), + .UserSidCallback = [](const TString& userSid) { Y_UNUSED(userSid); }, + .EnableQueueLeader = true, + .Action = action, + .ExecutorPoolID = PoolId, + .CloudID = CloudId, + .ResourceID = ResourceId, + .Counters = nullptr, + .AWSSignature = std::move(HttpContext.GetSignature()), + .IAMToken = HttpContext.IamToken, + .FolderID = HttpContext.FolderId + }; + + auto authRequestProxy = MakeHolder( + std::move(data), + "", + ctx.SelfID); + + ctx.RegisterWithSameMailbox(authRequestProxy.Release()); + } + + ctx.Schedule(RequestTimeout, new TEvents::TEvWakeup()); + + TBase::Become(&TYmqHttpRequestActor::StateWork); + } + + private: + TInstant StartTime; + typename TProcessorBase::TRequestState RequestState = TProcessorBase::TRequestState::StateIdle; + TProtoRequest Request; + TDuration RequestTimeout = TDuration::Seconds(60); + ui32 PoolId; + THttpRequestContext HttpContext; + THolder Signature; + THolder>> Future; + NThreading::TFuture RpcFuture; + THolder> DiscoveryFuture; + TProtoCall ProtoCall; + TString Method; + std::function QueueUrlExtractor; + TRetryCounter RetryCounter; + TActorId AuthActor; + bool InputCountersReported = false; + TString FolderId; + TString CloudId; + TString ResourceId; + TString UserSid; + }; + + std::function QueueUrlExtractor; + }; + + template + class THttpRequestProcessor : public TBaseHttpRequestProcessor{ + using TProcessorBase = TBaseHttpRequestProcessor; + public: + THttpRequestProcessor(TString method, TProtoCall protoCall) : TProcessorBase(method, protoCall) + { } void Execute(THttpRequestContext&& context, THolder signature, const TActorContext& ctx) override { ctx.Register(new THttpRequestActor( std::move(context), std::move(signature), - ProtoCall, Method)); + TProcessorBase::ProtoCall, TProcessorBase::Method)); } private: - class THttpRequestActor : public NActors::TActorBootstrapped { public: using TBase = NActors::TActorBootstrapped; @@ -244,7 +649,7 @@ namespace NKikimr::NHttpProxy { void SendYdbDriverRequest(const TActorContext& ctx) { Y_ABORT_UNLESS(HttpContext.Driver); - RequestState = StateAuthorization; + RequestState = TProcessorBase::TRequestState::StateAuthorization; auto request = MakeHolder(); request->DatabasePath = HttpContext.DatabasePath; @@ -253,7 +658,7 @@ namespace NKikimr::NHttpProxy { } void CreateClient(const TActorContext& ctx) { - RequestState = StateListEndpoints; + RequestState = TProcessorBase::TRequestState::StateListEndpoints; LOG_SP_INFO_S(ctx, NKikimrServices::HTTP_PROXY, "create client to '" << HttpContext.DiscoveryEndpoint << "' database: '" << HttpContext.DatabasePath << @@ -282,7 +687,7 @@ namespace NKikimr::NHttpProxy { } void SendGrpcRequestNoDriver(const TActorContext& ctx) { - RequestState = StateGrpcRequest; + RequestState = TProcessorBase::TRequestState::StateGrpcRequest; LOG_SP_INFO_S(ctx, NKikimrServices::HTTP_PROXY, "sending grpc request to '" << HttpContext.DiscoveryEndpoint << "' database: '" << HttpContext.DatabasePath << @@ -310,7 +715,7 @@ namespace NKikimr::NHttpProxy { } void SendGrpcRequest(const TActorContext& ctx) { - RequestState = StateGrpcRequest; + RequestState = TProcessorBase::TRequestState::StateGrpcRequest; LOG_SP_INFO_S(ctx, NKikimrServices::HTTP_PROXY, "sending grpc request to '" << HttpContext.DiscoveryEndpoint << "' database: '" << HttpContext.DatabasePath << @@ -588,7 +993,7 @@ namespace NKikimr::NHttpProxy { private: TInstant StartTime; - TRequestState RequestState = StateIdle; + typename TProcessorBase::TRequestState RequestState = TProcessorBase::TRequestState::StateIdle; TProtoRequest Request; TDuration RequestTimeout = TDuration::Seconds(60); ui32 PoolId; @@ -606,62 +1011,100 @@ namespace NKikimr::NHttpProxy { TActorId AuthActor; bool InputCountersReported = false; }; - - private: - TString Method; - - struct TAccessKeySignature { - TString AccessKeyId; - TString SignedString; - TString Signature; - TString Region; - TInstant SignedAt; - }; - - TProtoCall ProtoCall; }; + template + TString ExtractQueueName(TProtoRequest& request) { + return request.GetQueueUrl(); + }; void THttpRequestProcessors::Initialize() { - #define DECLARE_PROCESSOR(name) Name2Processor[#name] = MakeHolder> \ (#name, &Ydb::DataStreams::V1::DataStreamsService::Stub::Async##name); - DECLARE_PROCESSOR(PutRecords); - DECLARE_PROCESSOR(CreateStream); - DECLARE_PROCESSOR(ListStreams); - DECLARE_PROCESSOR(DeleteStream); - DECLARE_PROCESSOR(UpdateStream); - DECLARE_PROCESSOR(DescribeStream); - DECLARE_PROCESSOR(ListShards); - DECLARE_PROCESSOR(PutRecord); - DECLARE_PROCESSOR(GetRecords); - DECLARE_PROCESSOR(GetShardIterator); - DECLARE_PROCESSOR(DescribeLimits); - DECLARE_PROCESSOR(DescribeStreamSummary); - DECLARE_PROCESSOR(DecreaseStreamRetentionPeriod); - DECLARE_PROCESSOR(IncreaseStreamRetentionPeriod); - DECLARE_PROCESSOR(UpdateShardCount); - DECLARE_PROCESSOR(UpdateStreamMode); - DECLARE_PROCESSOR(RegisterStreamConsumer); - DECLARE_PROCESSOR(DeregisterStreamConsumer); - DECLARE_PROCESSOR(DescribeStreamConsumer); - DECLARE_PROCESSOR(ListStreamConsumers); - DECLARE_PROCESSOR(AddTagsToStream); - DECLARE_PROCESSOR(DisableEnhancedMonitoring); - DECLARE_PROCESSOR(EnableEnhancedMonitoring); - DECLARE_PROCESSOR(ListTagsForStream); - DECLARE_PROCESSOR(MergeShards); - DECLARE_PROCESSOR(RemoveTagsFromStream); - DECLARE_PROCESSOR(SplitShard); - DECLARE_PROCESSOR(StartStreamEncryption); - DECLARE_PROCESSOR(StopStreamEncryption); - #undef DECLARE_PROCESSOR + + DECLARE_DATASTREAMS_PROCESSOR(PutRecords); + DECLARE_DATASTREAMS_PROCESSOR(CreateStream); + DECLARE_DATASTREAMS_PROCESSOR(ListStreams); + DECLARE_DATASTREAMS_PROCESSOR(DeleteStream); + DECLARE_DATASTREAMS_PROCESSOR(UpdateStream); + DECLARE_DATASTREAMS_PROCESSOR(DescribeStream); + DECLARE_DATASTREAMS_PROCESSOR(ListShards); + DECLARE_DATASTREAMS_PROCESSOR(PutRecord); + DECLARE_DATASTREAMS_PROCESSOR(GetRecords); + DECLARE_DATASTREAMS_PROCESSOR(GetShardIterator); + DECLARE_DATASTREAMS_PROCESSOR(DescribeLimits); + DECLARE_DATASTREAMS_PROCESSOR(DescribeStreamSummary); + DECLARE_DATASTREAMS_PROCESSOR(DecreaseStreamRetentionPeriod); + DECLARE_DATASTREAMS_PROCESSOR(IncreaseStreamRetentionPeriod); + DECLARE_DATASTREAMS_PROCESSOR(UpdateShardCount); + DECLARE_DATASTREAMS_PROCESSOR(UpdateStreamMode); + DECLARE_DATASTREAMS_PROCESSOR(RegisterStreamConsumer); + DECLARE_DATASTREAMS_PROCESSOR(DeregisterStreamConsumer); + DECLARE_DATASTREAMS_PROCESSOR(DescribeStreamConsumer); + DECLARE_DATASTREAMS_PROCESSOR(ListStreamConsumers); + DECLARE_DATASTREAMS_PROCESSOR(AddTagsToStream); + DECLARE_DATASTREAMS_PROCESSOR(DisableEnhancedMonitoring); + DECLARE_DATASTREAMS_PROCESSOR(EnableEnhancedMonitoring); + DECLARE_DATASTREAMS_PROCESSOR(ListTagsForStream); + DECLARE_DATASTREAMS_PROCESSOR(MergeShards); + DECLARE_DATASTREAMS_PROCESSOR(RemoveTagsFromStream); + DECLARE_DATASTREAMS_PROCESSOR(SplitShard); + DECLARE_DATASTREAMS_PROCESSOR(StartStreamEncryption); + DECLARE_DATASTREAMS_PROCESSOR(StopStreamEncryption); + #undef DECLARE_DATASTREAMS_PROCESSOR + + + #define DECLARE_YMQ_PROCESSOR_QUEUE_UNKNOWN(name) Name2YmqProcessor[#name] = MakeHolder> \ + (#name, &Ydb::Ymq::V1::YmqService::Stub::AsyncYmq##name, [](Ydb::Ymq::V1::name##Request&){return "";}); + DECLARE_YMQ_PROCESSOR_QUEUE_UNKNOWN(GetQueueUrl); + DECLARE_YMQ_PROCESSOR_QUEUE_UNKNOWN(CreateQueue); + DECLARE_YMQ_PROCESSOR_QUEUE_UNKNOWN(ListQueues); + #undef DECLARE_YMQ_PROCESSOR_QUEUE_UNKNOWN + + #define DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(name) Name2YmqProcessor[#name] = MakeHolder> \ + (#name, &Ydb::Ymq::V1::YmqService::Stub::AsyncYmq##name, [](Ydb::Ymq::V1::name##Request& request){return request.Getqueue_url();}); + DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(SendMessage); + DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(ReceiveMessage); + DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(GetQueueAttributes); + DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(DeleteMessage); + DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(PurgeQueue); + DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(DeleteQueue); + DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(ChangeMessageVisibility); + DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(SetQueueAttributes); + DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(SendMessageBatch); + DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(DeleteMessageBatch); + DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(ChangeMessageVisibilityBatch); + DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(ListDeadLetterSourceQueues); + #undef DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN + } + + void SetApiVersionDisabledErrorText(THttpRequestContext& context) { + context.ResponseData.ErrorText = (TStringBuilder() << context.ApiVersion << " is disabled"); } bool THttpRequestProcessors::Execute(const TString& name, THttpRequestContext&& context, THolder signature, const TActorContext& ctx) { - if (auto proc = Name2Processor.find(name); proc != Name2Processor.end()) { + THashMap>* Name2Processor; + if (context.ApiVersion == "AmazonSQS") { + if (!context.ServiceConfig.GetHttpConfig().GetYmqEnabled()) { + context.ResponseData.IsYmq = true; + context.ResponseData.YmqHttpCode = 400; + SetApiVersionDisabledErrorText(context); + } + Name2Processor = &Name2YmqProcessor; + } else { + if (!context.ServiceConfig.GetHttpConfig().GetDataStreamsEnabled()) { + context.ResponseData.Status = NYdb::EStatus::BAD_REQUEST; + SetApiVersionDisabledErrorText(context); + } + Name2Processor = &Name2DataStreamsProcessor; + } + + if (auto proc = Name2Processor->find(name); proc != Name2Processor->end()) { proc->second->Execute(std::move(context), std::move(signature), ctx); return true; } @@ -705,10 +1148,15 @@ namespace NKikimr::NHttpProxy { SourceAddress = address; } - DatabasePath = Request->URL; + DatabasePath = Request->URL.Before('?'); if (DatabasePath == "/") { DatabasePath = ""; } + auto params = TCgiParameters(Request->URL.After('?')); + if (auto it = params.Find("folderId"); it != params.end()) { + FolderId = it->second; + } + //TODO: find out databaseId ParseHeaders(Request->Headers); } @@ -775,13 +1223,23 @@ namespace NKikimr::NHttpProxy { LOG_SP_INFO_S(ctx, NKikimrServices::HTTP_PROXY, "reply with status: " << ResponseData.Status << " message: " << ResponseData.ErrorText); - ResponseData.Body.SetType(NJson::JSON_MAP); ResponseData.Body["message"] = ResponseData.ErrorText; - ResponseData.Body["__type"] = MapToException(ResponseData.Status, MethodName, issueCode).first; + if (ResponseData.IsYmq) { + ResponseData.Body["__type"] = ResponseData.YmqStatusCode; + } else { + ResponseData.Body["__type"] = MapToException(ResponseData.Status, MethodName, issueCode).first; + } } - auto [errorName, httpCode] = MapToException(ResponseData.Status, MethodName, issueCode); + TString errorName; + ui32 httpCode; + if (ResponseData.IsYmq) { + httpCode = ResponseData.YmqHttpCode; + errorName = ResponseData.YmqStatusCode; + } else { + std::tie(errorName, httpCode) = MapToException(ResponseData.Status, MethodName, issueCode); + } auto response = createResponse( Request, TStringBuilder() << (ui32)httpCode, @@ -793,15 +1251,42 @@ namespace NKikimr::NHttpProxy { ctx.Send(Sender, new NHttp::TEvHttpProxy::TEvHttpOutgoingResponse(response)); } + TMaybe ExtractUserName(const TStringBuf& authorizationHeader) { + const size_t spacePos = authorizationHeader.find(' '); + if (spacePos == TString::npos) { + return Nothing(); + } + auto restOfHeader = authorizationHeader.substr(spacePos + 1); + if (restOfHeader.StartsWith(CREDENTIAL_PARAM)) { + const size_t equalsPos = restOfHeader.find('='); + if (equalsPos == TString::npos) { + return Nothing(); + } + const size_t slashPos = restOfHeader.find('/'); + if (slashPos == TString::npos || slashPos < equalsPos) { + return Nothing(); + } + return restOfHeader.substr(equalsPos + 1, slashPos - equalsPos - 1); + } + return Nothing(); + } + void THttpRequestContext::ParseHeaders(TStringBuf str) { TString sourceReqId; NHttp::THeaders headers(str); for (const auto& header : headers.Headers) { if (AsciiEqualsIgnoreCase(header.first, IAM_HEADER)) { IamToken = header.second; + } else if(AsciiEqualsIgnoreCase(header.first, SECURITY_TOKEN_HEADER)) { + SecurityToken = header.second; } else if (AsciiEqualsIgnoreCase(header.first, AUTHORIZATION_HEADER)) { if (header.second.StartsWith("Bearer ")) { IamToken = header.second; + } else { + auto userName = ExtractUserName(header.second); + if (userName.Defined()) { + UserName = userName.GetRef(); + } } } else if (AsciiEqualsIgnoreCase(header.first, REQUEST_ID_HEADER)) { sourceReqId = header.second; diff --git a/ydb/core/http_proxy/http_req.h b/ydb/core/http_proxy/http_req.h index 10b0c84f4d4b..e7dbe4d2e9e9 100644 --- a/ydb/core/http_proxy/http_req.h +++ b/ydb/core/http_proxy/http_req.h @@ -52,9 +52,12 @@ class TRetryCounter { struct THttpResponseData { + bool IsYmq = false; NYdb::EStatus Status{NYdb::EStatus::SUCCESS}; NJson::TJsonValue Body; TString ErrorText{"OK"}; + TString YmqStatusCode; + ui32 YmqHttpCode; TString DumpBody(MimeTypes contentType); }; @@ -85,7 +88,9 @@ struct THttpRequestContext { TString ApiVersion; // used once MimeTypes ContentType{MIME_UNKNOWN}; TString IamToken; + TString SecurityToken; TString SerializedUserToken; + TString UserName; TStringBuilder LogPrefix() const { return TStringBuilder() << "http request [" << MethodName << "] requestId [" << RequestId << "]"; @@ -119,7 +124,8 @@ class THttpRequestProcessors { const TActorContext& ctx); private: - THashMap> Name2Processor; + THashMap> Name2DataStreamsProcessor; + THashMap> Name2YmqProcessor; }; NActors::IActor* CreateAccessServiceActor(const NKikimrConfig::TServerlessProxyConfig& config); diff --git a/ydb/core/http_proxy/http_service.cpp b/ydb/core/http_proxy/http_service.cpp index 7ac573c3be2b..968979070a47 100644 --- a/ydb/core/http_proxy/http_service.cpp +++ b/ydb/core/http_proxy/http_service.cpp @@ -103,7 +103,8 @@ namespace NKikimr::NHttpProxy { try { auto signature = context.GetSignature(); - Processors->Execute(context.MethodName, std::move(context), std::move(signature), ctx); + auto methodName = context.MethodName; + Processors->Execute(std::move(methodName), std::move(context), std::move(signature), ctx); } catch (const NKikimr::NSQS::TSQSException& e) { context.ResponseData.Status = NYdb::EStatus::BAD_REQUEST; context.ResponseData.ErrorText = e.what(); diff --git a/ydb/core/http_proxy/json_proto_conversion.h b/ydb/core/http_proxy/json_proto_conversion.h index 13f30e46c087..61e3ff14c20f 100644 --- a/ydb/core/http_proxy/json_proto_conversion.h +++ b/ydb/core/http_proxy/json_proto_conversion.h @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include @@ -21,11 +23,9 @@ inline TString ProxyFieldNameConverter(const google::protobuf::FieldDescriptor& class TYdsProtoToJsonPrinter : public NProtobufJson::TProto2JsonPrinter { public: - TYdsProtoToJsonPrinter(const google::protobuf::Reflection* reflection, - const NProtobufJson::TProto2JsonConfig& config, + TYdsProtoToJsonPrinter(const NProtobufJson::TProto2JsonConfig& config, bool skipBase64Encode) : NProtobufJson::TProto2JsonPrinter(config) - , ProtoReflection(reflection) , SkipBase64Encode(skipBase64Encode) {} @@ -42,9 +42,9 @@ class TYdsProtoToJsonPrinter : public NProtobufJson::TProto2JsonPrinter { void PrintField(const NProtoBuf::Message& proto, const NProtoBuf::FieldDescriptor& field, NProtobufJson::IJsonOutput& json, TStringBuf key = {}) override { - if (field.options().HasExtension(Ydb::DataStreams::V1::FieldTransformer)) { - if (field.options().GetExtension(Ydb::DataStreams::V1::FieldTransformer) == - Ydb::DataStreams::V1::TRANSFORM_BASE64) { + if (field.options().HasExtension(Ydb::FieldTransformation::FieldTransformer)) { + if (field.options().GetExtension(Ydb::FieldTransformation::FieldTransformer) == + Ydb::FieldTransformation::TRANSFORM_BASE64) { Y_ENSURE(field.cpp_type() == google::protobuf::FieldDescriptor::CPPTYPE_STRING, "Base64 is only supported for strings"); if (!key) { @@ -59,20 +59,21 @@ class TYdsProtoToJsonPrinter : public NProtobufJson::TProto2JsonPrinter { return Base64Encode(str); }; + auto* reflection = proto.GetReflection(); if (field.is_repeated()) { - for (int i = 0, endI = ProtoReflection->FieldSize(proto, &field); i < endI; ++i) { + for (int i = 0, endI = reflection->FieldSize(proto, &field); i < endI; ++i) { PrintStringValue(field, TStringBuf(), - maybeBase64Encode(proto.GetReflection()->GetRepeatedString(proto, &field, i)), json); + maybeBase64Encode(reflection->GetRepeatedString(proto, &field, i)), json); } } else { PrintStringValue(field, key, - maybeBase64Encode(proto.GetReflection()->GetString(proto, &field)), json); + maybeBase64Encode(reflection->GetString(proto, &field)), json); } return; } - if (field.options().GetExtension(Ydb::DataStreams::V1::FieldTransformer) == - Ydb::DataStreams::V1::TRANSFORM_DOUBLE_S_TO_INT_MS) { + if (field.options().GetExtension(Ydb::FieldTransformation::FieldTransformer) == + Ydb::FieldTransformation::TRANSFORM_DOUBLE_S_TO_INT_MS) { Y_ENSURE(field.cpp_type() == google::protobuf::FieldDescriptor::CPPTYPE_INT64, "Double S to Int MS is only supported for int64 timestamps"); @@ -80,20 +81,21 @@ class TYdsProtoToJsonPrinter : public NProtobufJson::TProto2JsonPrinter { key = MakeKey(field); } + auto* reflection = proto.GetReflection(); if (field.is_repeated()) { - for (int i = 0, endI = ProtoReflection->FieldSize(proto, &field); i < endI; ++i) { - double value = proto.GetReflection()->GetRepeatedInt64(proto, &field, i) / 1000.0; + for (int i = 0, endI = reflection->FieldSize(proto, &field); i < endI; ++i) { + double value = reflection->GetRepeatedInt64(proto, &field, i) / 1000.0; PrintDoubleValue(TStringBuf(), value, json); } } else { - double value = proto.GetReflection()->GetInt64(proto, &field) / 1000.0; + double value = reflection->GetInt64(proto, &field) / 1000.0; PrintDoubleValue(key, value, json); } return; } - if (field.options().GetExtension(Ydb::DataStreams::V1::FieldTransformer) == - Ydb::DataStreams::V1::TRANSFORM_EMPTY_TO_NOTHING) { + if (field.options().GetExtension(Ydb::FieldTransformation::FieldTransformer) == + Ydb::FieldTransformation::TRANSFORM_EMPTY_TO_NOTHING) { Y_ENSURE(field.cpp_type() == google::protobuf::FieldDescriptor::CPPTYPE_STRING, "Empty to nothing is only supported for strings"); @@ -101,19 +103,20 @@ class TYdsProtoToJsonPrinter : public NProtobufJson::TProto2JsonPrinter { key = MakeKey(field); } + auto* reflection = proto.GetReflection(); if (field.is_repeated()) { - for (int i = 0, endI = ProtoReflection->FieldSize(proto, &field); i < endI; ++i) { - auto value = proto.GetReflection()->GetRepeatedString(proto, &field, i); + for (int i = 0, endI = reflection->FieldSize(proto, &field); i < endI; ++i) { + auto value = reflection->GetRepeatedString(proto, &field, i); if (!value.empty()) { PrintStringValue(field, TStringBuf(), - proto.GetReflection()->GetRepeatedString(proto, &field, i), json); + reflection->GetRepeatedString(proto, &field, i), json); } } } else { - auto value = proto.GetReflection()->GetString(proto, &field); + auto value = reflection->GetString(proto, &field); if (!value.empty()) { PrintStringValue(field, key, - proto.GetReflection()->GetString(proto, &field), json); + reflection->GetString(proto, &field), json); } } return; @@ -124,7 +127,6 @@ class TYdsProtoToJsonPrinter : public NProtobufJson::TProto2JsonPrinter { } private: - const google::protobuf::Reflection* ProtoReflection = nullptr; bool SkipBase64Encode; }; @@ -133,12 +135,90 @@ inline void ProtoToJson(const NProtoBuf::Message& resp, NJson::TJsonValue& value .SetFormatOutput(false) .SetMissingSingleKeyMode(NProtobufJson::TProto2JsonConfig::MissingKeyDefault) .SetNameGenerator(ProxyFieldNameConverter) + .SetMapAsObject(true) .SetEnumMode(NProtobufJson::TProto2JsonConfig::EnumName); - TYdsProtoToJsonPrinter printer(resp.GetReflection(), config, skipBase64Encode); + TYdsProtoToJsonPrinter printer(config, skipBase64Encode); printer.Print(resp, *NProtobufJson::CreateJsonMapOutput(value)); } -inline void JsonToProto(const NJson::TJsonValue& jsonValue, NProtoBuf::Message* message, ui32 depth = 0) { +template +inline void AddJsonObjectToProtoAsMap( + const google::protobuf::FieldDescriptor* fieldDescriptor, + const google::protobuf::Reflection* reflection, + grpc::protobuf::Message* message, + const JSON& jsonObject, + ui32 depth, + std::function extractMap, + std::function valueToString, + std::function jsonObjectToMessage +) { + const auto& protoMap = reflection->GetMutableRepeatedFieldRef(message, fieldDescriptor); + for (const auto& [key, value] : extractMap(jsonObject)) { + std::unique_ptr mapEntry( + google::protobuf::MessageFactory::generated_factory() + ->GetPrototype(fieldDescriptor->message_type()) + ->New(message->GetArena()) + ); + mapEntry + ->GetReflection() + ->SetString(mapEntry.get(), fieldDescriptor->message_type()->field(0), key); + + auto valueField = fieldDescriptor->message_type()->field(1); + if (valueField->cpp_type() == google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE) { + auto *msg = mapEntry->GetReflection()->MutableMessage(mapEntry.get(), valueField); + jsonObjectToMessage(value, msg, depth); + } else if (valueField->cpp_type() == google::protobuf::FieldDescriptor::CPPTYPE_STRING) { + mapEntry->GetReflection()->SetString(mapEntry.get(), valueField, valueToString(value)); + } else { + throw NKikimr::NSQS::TSQSException(NKikimr::NSQS::NErrors::INVALID_PARAMETER_VALUE) + << "Only String and Object can be converted to protobuf map"; + } + protoMap.Add(std::move(*mapEntry)); + } +} + +void JsonToProto(const NJson::TJsonValue& jsonValue, NProtoBuf::Message* message, ui32 depth = 0); + +inline void AddJsonObjectToProtoAsMap( + const google::protobuf::FieldDescriptor* fieldDescriptor, + const google::protobuf::Reflection* reflection, + grpc::protobuf::Message* message, + const NJson::TJsonValue& jsonObject, + ui32 depth +) { + AddJsonObjectToProtoAsMap( + fieldDescriptor, + reflection, + message, + jsonObject, + depth, + [](auto& json) { return json.GetMap(); }, + [](auto& value) -> const TString { return value.GetString(); }, + [](auto& json, auto message, auto depth) { JsonToProto(json, message, depth); } + ); +} +void NlohmannJsonToProto(const nlohmann::json& jsonValue, NProtoBuf::Message* message, ui32 depth = 0); + +inline void AddJsonObjectToProtoAsMap( + const google::protobuf::FieldDescriptor* fieldDescriptor, + const google::protobuf::Reflection* reflection, + grpc::protobuf::Message* message, + const nlohmann::basic_json<>& jsonObject, + ui32 depth +) { + AddJsonObjectToProtoAsMap, std::map>>( + fieldDescriptor, + reflection, + message, + jsonObject, + depth, + [](auto& json) { return json.template get>>(); }, + [](auto& value) -> const TString { return value.template get(); }, + [](auto& json, auto message, auto depth) { NlohmannJsonToProto(json, message, depth); } + ); +} + +inline void JsonToProto(const NJson::TJsonValue& jsonValue, NProtoBuf::Message* message, ui32 depth) { Y_ENSURE(depth < 101, "Json depth is > 100"); Y_ENSURE_EX( !jsonValue.IsNull(), @@ -155,28 +235,28 @@ inline void JsonToProto(const NJson::TJsonValue& jsonValue, NProtoBuf::Message* "Unexpected json key: " << key ); Y_ENSURE(fieldDescriptor, "Unexpected json key: " + key); - auto transformer = Ydb::DataStreams::V1::TRANSFORM_NONE; - if (fieldDescriptor->options().HasExtension(Ydb::DataStreams::V1::FieldTransformer)) { - transformer = fieldDescriptor->options().GetExtension(Ydb::DataStreams::V1::FieldTransformer); + auto transformer = Ydb::FieldTransformation::TRANSFORM_NONE; + if (fieldDescriptor->options().HasExtension(Ydb::FieldTransformation::FieldTransformer)) { + transformer = fieldDescriptor->options().GetExtension(Ydb::FieldTransformation::FieldTransformer); } if (value.IsArray()) { Y_ENSURE(fieldDescriptor->is_repeated()); for (auto& elem : value.GetArray()) { switch (transformer) { - case Ydb::DataStreams::V1::TRANSFORM_BASE64: { + case Ydb::FieldTransformation::TRANSFORM_BASE64: { Y_ENSURE(fieldDescriptor->cpp_type() == google::protobuf::FieldDescriptor::CPPTYPE_STRING, "Base64 transformer is only applicable to strings"); reflection->AddString(message, fieldDescriptor, Base64Decode(elem.GetString())); break; } - case Ydb::DataStreams::V1::TRANSFORM_DOUBLE_S_TO_INT_MS: { + case Ydb::FieldTransformation::TRANSFORM_DOUBLE_S_TO_INT_MS: { reflection->AddInt64(message, fieldDescriptor, elem.GetDouble() * 1000); break; } - case Ydb::DataStreams::V1::TRANSFORM_EMPTY_TO_NOTHING: - case Ydb::DataStreams::V1::TRANSFORM_NONE: { + case Ydb::FieldTransformation::TRANSFORM_EMPTY_TO_NOTHING: + case Ydb::FieldTransformation::TRANSFORM_NONE: { switch (fieldDescriptor->cpp_type()) { case google::protobuf::FieldDescriptor::CPPTYPE_INT32: reflection->AddInt32(message, fieldDescriptor, elem.GetInteger()); @@ -233,19 +313,19 @@ inline void JsonToProto(const NJson::TJsonValue& jsonValue, NProtoBuf::Message* } } else { switch (transformer) { - case Ydb::DataStreams::V1::TRANSFORM_BASE64: { + case Ydb::FieldTransformation::TRANSFORM_BASE64: { Y_ENSURE(fieldDescriptor->cpp_type() == google::protobuf::FieldDescriptor::CPPTYPE_STRING, "Base64 transformer is applicable only to strings"); reflection->SetString(message, fieldDescriptor, Base64Decode(value.GetString())); break; } - case Ydb::DataStreams::V1::TRANSFORM_DOUBLE_S_TO_INT_MS: { + case Ydb::FieldTransformation::TRANSFORM_DOUBLE_S_TO_INT_MS: { reflection->SetInt64(message, fieldDescriptor, value.GetDouble() * 1000); break; } - case Ydb::DataStreams::V1::TRANSFORM_EMPTY_TO_NOTHING: - case Ydb::DataStreams::V1::TRANSFORM_NONE: { + case Ydb::FieldTransformation::TRANSFORM_EMPTY_TO_NOTHING: + case Ydb::FieldTransformation::TRANSFORM_NONE: { switch (fieldDescriptor->cpp_type()) { case google::protobuf::FieldDescriptor::CPPTYPE_INT32: reflection->SetInt32(message, fieldDescriptor, value.GetInteger()); @@ -286,8 +366,12 @@ inline void JsonToProto(const NJson::TJsonValue& jsonValue, NProtoBuf::Message* reflection->SetString(message, fieldDescriptor, value.GetString()); break; case google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE: { - auto *msg = reflection->MutableMessage(message, fieldDescriptor); - JsonToProto(value, msg, depth + 1); + if (fieldDescriptor->is_map()) { + AddJsonObjectToProtoAsMap(fieldDescriptor, reflection, message, value, depth + 1); + } else { + auto *msg = reflection->MutableMessage(message, fieldDescriptor); + JsonToProto(value, msg, depth + 1); + } break; } default: @@ -301,7 +385,7 @@ inline void JsonToProto(const NJson::TJsonValue& jsonValue, NProtoBuf::Message* } } -inline void NlohmannJsonToProto(const nlohmann::json& jsonValue, NProtoBuf::Message* message, ui32 depth = 0) { +inline void NlohmannJsonToProto(const nlohmann::json& jsonValue, NProtoBuf::Message* message, ui32 depth) { Y_ENSURE(depth < 101, "Json depth is > 100"); Y_ENSURE_EX( !jsonValue.is_null(), @@ -313,16 +397,16 @@ inline void NlohmannJsonToProto(const nlohmann::json& jsonValue, NProtoBuf::Mess for (const auto& [key, value] : jsonValue.get>()) { auto* fieldDescriptor = desc->FindFieldByName(NNaming::CamelToSnakeCase(key.c_str())); Y_ENSURE(fieldDescriptor, "Unexpected json key: " + key); - auto transformer = Ydb::DataStreams::V1::TRANSFORM_NONE; - if (fieldDescriptor->options().HasExtension(Ydb::DataStreams::V1::FieldTransformer)) { - transformer = fieldDescriptor->options().GetExtension(Ydb::DataStreams::V1::FieldTransformer); + auto transformer = Ydb::FieldTransformation::TRANSFORM_NONE; + if (fieldDescriptor->options().HasExtension(Ydb::FieldTransformation::FieldTransformer)) { + transformer = fieldDescriptor->options().GetExtension(Ydb::FieldTransformation::FieldTransformer); } if (value.is_array()) { Y_ENSURE(fieldDescriptor->is_repeated()); for (auto& elem : value) { switch (transformer) { - case Ydb::DataStreams::V1::TRANSFORM_BASE64: { + case Ydb::FieldTransformation::TRANSFORM_BASE64: { Y_ENSURE(fieldDescriptor->cpp_type() == google::protobuf::FieldDescriptor::CPPTYPE_STRING, "Base64 transformer is only applicable to strings"); @@ -333,12 +417,12 @@ inline void NlohmannJsonToProto(const nlohmann::json& jsonValue, NProtoBuf::Mess } break; } - case Ydb::DataStreams::V1::TRANSFORM_DOUBLE_S_TO_INT_MS: { + case Ydb::FieldTransformation::TRANSFORM_DOUBLE_S_TO_INT_MS: { reflection->AddInt64(message, fieldDescriptor, elem.get() * 1000); break; } - case Ydb::DataStreams::V1::TRANSFORM_EMPTY_TO_NOTHING: - case Ydb::DataStreams::V1::TRANSFORM_NONE: { + case Ydb::FieldTransformation::TRANSFORM_EMPTY_TO_NOTHING: + case Ydb::FieldTransformation::TRANSFORM_NONE: { switch (fieldDescriptor->cpp_type()) { case google::protobuf::FieldDescriptor::CPPTYPE_INT32: reflection->AddInt32(message, fieldDescriptor, elem.get()); @@ -395,7 +479,7 @@ inline void NlohmannJsonToProto(const nlohmann::json& jsonValue, NProtoBuf::Mess } } else { switch (transformer) { - case Ydb::DataStreams::V1::TRANSFORM_BASE64: { + case Ydb::FieldTransformation::TRANSFORM_BASE64: { Y_ENSURE(fieldDescriptor->cpp_type() == google::protobuf::FieldDescriptor::CPPTYPE_STRING, "Base64 transformer is applicable only to strings"); @@ -406,12 +490,12 @@ inline void NlohmannJsonToProto(const nlohmann::json& jsonValue, NProtoBuf::Mess } break; } - case Ydb::DataStreams::V1::TRANSFORM_DOUBLE_S_TO_INT_MS: { + case Ydb::FieldTransformation::TRANSFORM_DOUBLE_S_TO_INT_MS: { reflection->SetInt64(message, fieldDescriptor, value.get() * 1000); break; } - case Ydb::DataStreams::V1::TRANSFORM_EMPTY_TO_NOTHING: - case Ydb::DataStreams::V1::TRANSFORM_NONE: { + case Ydb::FieldTransformation::TRANSFORM_EMPTY_TO_NOTHING: + case Ydb::FieldTransformation::TRANSFORM_NONE: { switch (fieldDescriptor->cpp_type()) { case google::protobuf::FieldDescriptor::CPPTYPE_INT32: reflection->SetInt32(message, fieldDescriptor, value.get()); @@ -452,8 +536,12 @@ inline void NlohmannJsonToProto(const nlohmann::json& jsonValue, NProtoBuf::Mess reflection->SetString(message, fieldDescriptor, value.get()); break; case google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE: { - auto *msg = reflection->MutableMessage(message, fieldDescriptor); - NlohmannJsonToProto(value, msg, depth + 1); + if (fieldDescriptor->is_map()) { + AddJsonObjectToProtoAsMap(fieldDescriptor, reflection, message, value, depth); + } else { + auto *msg = reflection->MutableMessage(message, fieldDescriptor); + NlohmannJsonToProto(value, msg, depth + 1); + } break; } default: diff --git a/ydb/core/http_proxy/ut/datastreams_fixture.h b/ydb/core/http_proxy/ut/datastreams_fixture.h index 77bfab022d81..5a4c98fcd68e 100644 --- a/ydb/core/http_proxy/ut/datastreams_fixture.h +++ b/ydb/core/http_proxy/ut/datastreams_fixture.h @@ -33,6 +33,9 @@ #include +#include +#include + using TJMap = NJson::TJsonValue::TMapType; using TJVector = NJson::TJsonValue::TArray; @@ -178,6 +181,18 @@ class THttpProxyTestMock : public NUnitTest::TBaseFixture { } + static NJson::TJsonValue CreateSqsGetQueueUrlRequest() { + NJson::TJsonValue record; + record["QueueName"] = "ExampleQueueName"; + return record; + } + + static NJson::TJsonValue CreateSqsCreateQueueRequest() { + NJson::TJsonValue record; + record["QueueName"] = "ExampleQueueName"; + return record; + } + THttpResult SendHttpRequestRaw(const TString& handler, const TString& target, const IOutputStream::TPart& body, const TString& authorizationStr, const TString& contentType = "application/json") { @@ -321,6 +336,34 @@ class THttpProxyTestMock : public NUnitTest::TBaseFixture { } private: + TMaybe RunYqlDataQuery(TString query) { + TString endpoint = TStringBuilder() << "localhost:" << KikimrGrpcPort; + auto driverConfig = NYdb::TDriverConfig() + .SetEndpoint(endpoint) + .SetLog(CreateLogBackend("cerr", ELogPriority::TLOG_DEBUG)); + NYdb::TDriver driver(driverConfig); + auto tableClient = NYdb::NTable::TTableClient(driver); + + TMaybe resultSet; + + auto operationResult = tableClient.RetryOperationSync([&](NYdb::NTable::TSession session) { + NYdb::TParamsBuilder paramsBuilder; + auto queryResult = session.ExecuteDataQuery( + query, + NYdb::NTable::TTxControl::BeginTx(NYdb::NTable::TTxSettings::SerializableRW()).CommitTx(), + paramsBuilder.Build() + ).GetValueSync(); + + if (queryResult.IsSuccess() && queryResult.GetResultSets().size() > 0) { + resultSet = queryResult.GetResultSet(0); + } + return queryResult; + }); + + Y_ABORT_UNLESS(operationResult.IsSuccess()); + return resultSet; + } + void InitKikimr() { AuthFactory = std::make_shared(); NKikimrConfig::TAppConfig appConfig; @@ -331,6 +374,10 @@ class THttpProxyTestMock : public NUnitTest::TBaseFixture { appConfig.MutablePQConfig()->AddValidWriteSpeedLimitsKbPerSec(1_KB); appConfig.MutablePQConfig()->MutableBillingMeteringConfig()->SetEnabled(true); + appConfig.MutableSqsConfig()->SetEnableSqs(true); + appConfig.MutableSqsConfig()->SetYandexCloudMode(true); + appConfig.MutableSqsConfig()->SetEnableDeadLetterQueues(true); + auto limit = appConfig.MutablePQConfig()->AddValidRetentionLimits(); limit->SetMinPeriodSeconds(0); limit->SetMaxPeriodSeconds(TDuration::Days(1).Seconds()); @@ -356,6 +403,7 @@ class THttpProxyTestMock : public NUnitTest::TBaseFixture { server->ServerSettings->SetUseRealThreads(false); KikimrServer = THolder(server); + KikimrGrpcPort = KikimrServer->ServerSettings->GrpcPort; ActorRuntime = KikimrServer->GetRuntime(); @@ -375,14 +423,211 @@ class THttpProxyTestMock : public NUnitTest::TBaseFixture { acl.AddAccess(NACLib::EAccessType::Allow, NACLib::GenericFull, "proxy_sa@as"); client.ModifyACL("/", "Root", acl.SerializeAsString()); + + client.MkDir("/Root", "SQS"); + + client.CreateTable("/Root/SQS", + "Name: \".Queues\"" + "Columns { Name: \"Account\" Type: \"Utf8\"}" + "Columns { Name: \"QueueName\" Type: \"Utf8\"}" + "Columns { Name: \"QueueId\" Type: \"String\"}" + "Columns { Name: \"QueueState\" Type: \"Uint64\"}" + "Columns { Name: \"FifoQueue\" Type: \"Bool\"}" + "Columns { Name: \"DeadLetterQueue\" Type: \"Bool\"}" + "Columns { Name: \"CreatedTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"Shards\" Type: \"Uint64\"}" + "Columns { Name: \"Partitions\" Type: \"Uint64\"}" + "Columns { Name: \"MasterTabletId\" Type: \"Uint64\"}" + "Columns { Name: \"CustomQueueName\" Type: \"Utf8\"}" + "Columns { Name: \"FolderId\" Type: \"Utf8\"}" + "Columns { Name: \"Version\" Type: \"Uint64\"}" + "Columns { Name: \"DlqName\" Type: \"Utf8\"}" + "Columns { Name: \"TablesFormat\" Type: \"Uint32\"}" + "KeyColumnNames: [\"Account\", \"QueueName\"]" + ); + + client.CreateTable("/Root/SQS", + "Name: \".RemovedQueues\"" + "Columns { Name: \"RemoveTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"QueueIdNumber\" Type: \"Uint64\"}" + "Columns { Name: \"Account\" Type: \"Utf8\"}" + "Columns { Name: \"QueueName\" Type: \"Utf8\"}" + "Columns { Name: \"FifoQueue\" Type: \"Bool\"}" + "Columns { Name: \"Shards\" Type: \"Uint32\"}" + "Columns { Name: \"CustomQueueName\" Type: \"Utf8\"}" + "Columns { Name: \"FolderId\" Type: \"Utf8\"}" + "Columns { Name: \"TablesFormat\" Type: \"Uint32\"}" + "Columns { Name: \"StartProcessTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"NodeProcess\" Type: \"Uint32\"}" + "KeyColumnNames: [\"RemoveTimestamp\", \"QueueIdNumber\"]" + ); + + client.MkDir("/Root/SQS", ".STD"); + client.CreateTable("/Root/SQS/.STD", + "Name: \"Messages\"" + "Columns { Name: \"QueueIdNumberAndShardHash\" Type: \"Uint64\"}" + "Columns { Name: \"QueueIdNumber\" Type: \"Uint64\"}" + "Columns { Name: \"Shard\" Type: \"Uint32\"}" + "Columns { Name: \"Offset\" Type: \"Uint64\"}" + "Columns { Name: \"RandomId\" Type: \"Uint64\"}" + "Columns { Name: \"SentTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"DelayDeadline\" Type: \"Uint64\"}" + "KeyColumnNames: [\"QueueIdNumberAndShardHash\", \"QueueIdNumber\", \"Shard\", \"Offset\"]" + ); + + client.MkDir("/Root/SQS", ".FIFO"); + client.CreateTable("/Root/SQS/.FIFO", + "Name: \"Messages\"" + "Columns { Name: \"QueueIdNumberHash\" Type: \"Uint64\"}" + "Columns { Name: \"QueueIdNumber\" Type: \"Uint64\"}" + "Columns { Name: \"Offset\" Type: \"Uint64\"}" + "Columns { Name: \"RandomId\" Type: \"Uint64\"}" + "Columns { Name: \"GroupId\" Type: \"String\"}" + "Columns { Name: \"NextOffset\" Type: \"Uint64\"}" + "Columns { Name: \"NextRandomId\" Type: \"Uint64\"}" + "Columns { Name: \"ReceiveCount\" Type: \"Uint32\"}" + "Columns { Name: \"FirstReceiveTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"SentTimestamp\" Type: \"Uint64\"}" + "KeyColumnNames: [\"QueueIdNumberHash\", \"QueueIdNumber\", \"Offset\"]" + ); + + client.CreateTable("/Root/SQS", + "Name: \".Settings\"" + "Columns { Name: \"Account\" Type: \"Utf8\"}" + "Columns { Name: \"Name\" Type: \"Utf8\"}" + "Columns { Name: \"Value\" Type: \"Utf8\"}" + "KeyColumnNames: [\"Account\", \"Name\"]" + ); + + client.CreateTable("/Root/SQS", + "Name: \".AtomicCounter\"" + "Columns { Name: \"counter_key\" Type: \"Uint64\"}" + "Columns { Name: \"value\" Type: \"Uint64\"}" + "KeyColumnNames: [\"counter_key\"]" + ); + RunYqlDataQuery("INSERT INTO `/Root/SQS/.AtomicCounter` (counter_key, value) VALUES (0, 0)"); + + auto attributesTable= "Name: \"Attributes\"" + "Columns { Name: \"QueueIdNumberHash\" Type: \"Uint64\"}" + "Columns { Name: \"QueueIdNumber\" Type: \"Uint64\"}" + "Columns { Name: \"ContentBasedDeduplication\" Type: \"Bool\"}" + "Columns { Name: \"DelaySeconds\" Type: \"Uint64\"}" + "Columns { Name: \"FifoQueue\" Type: \"Bool\"}" + "Columns { Name: \"MaximumMessageSize\" Type: \"Uint64\"}" + "Columns { Name: \"MessageRetentionPeriod\" Type: \"Uint64\"}" + "Columns { Name: \"ReceiveMessageWaitTime\" Type: \"Uint64\"}" + "Columns { Name: \"VisibilityTimeout\" Type: \"Uint64\"}" + "Columns { Name: \"DlqName\" Type: \"Utf8\"}" + "Columns { Name: \"DlqArn\" Type: \"Utf8\"}" + "Columns { Name: \"MaxReceiveCount\" Type: \"Uint64\"}" + "Columns { Name: \"ShowDetailedCountersDeadline\" Type: \"Uint64\"}" + "KeyColumnNames: [\"QueueIdNumberHash\", \"QueueIdNumber\"]"; + client.CreateTable("/Root/SQS/.STD", attributesTable); + client.CreateTable("/Root/SQS/.FIFO", attributesTable); + + client.CreateTable("/Root/SQS", + "Name: \".Events\"" + "Columns { Name: \"Account\" Type: \"Utf8\"}" + "Columns { Name: \"QueueName\" Type: \"Utf8\"}" + "Columns { Name: \"EventType\" Type: \"Uint64\"}" + "Columns { Name: \"CustomQueueName\" Type: \"Utf8\"}" + "Columns { Name: \"EventTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"FolderId\" Type: \"Utf8\"}" + "KeyColumnNames: [\"Account\", \"QueueName\", \"EventType\"]" + ); + + auto stateTableCommon = + "Name: \"State\"" + "Columns { Name: \"QueueIdNumberHash\" Type: \"Uint64\"}" + "Columns { Name: \"QueueIdNumber\" Type: \"Uint64\"}" + "Columns { Name: \"CleanupTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"CreatedTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"LastModifiedTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"RetentionBoundary\" Type: \"Uint64\"}" + "Columns { Name: \"InflyCount\" Type: \"Int64\"}" + "Columns { Name: \"MessageCount\" Type: \"Int64\"}" + "Columns { Name: \"ReadOffset\" Type: \"Uint64\"}" + "Columns { Name: \"WriteOffset\" Type: \"Uint64\"}" + "Columns { Name: \"CleanupVersion\" Type: \"Uint64\"}" + "Columns { Name: \"InflyVersion\" Type: \"Uint64\"}"; + client.CreateTable("/Root/SQS/.STD", + TStringBuilder() + << stateTableCommon + << "Columns { Name: \"Shard\" Type: \"Uint32\"}" + << "KeyColumnNames: [\"QueueIdNumberHash\", \"QueueIdNumber\", \"Shard\"]" + ); + client.CreateTable("/Root/SQS/.FIFO", + TStringBuilder() + << stateTableCommon + << "KeyColumnNames: [\"QueueIdNumberHash\", \"QueueIdNumber\"]" + ); + + + client.CreateTable("/Root/SQS/.STD", + "Name: \"Infly\"" + "Columns { Name: \"QueueIdNumberAndShardHash\" Type: \"Uint64\"}" + "Columns { Name: \"QueueIdNumber\" Type: \"Uint64\"}" + "Columns { Name: \"Shard\" Type: \"Uint32\"}" + "Columns { Name: \"Offset\" Type: \"Uint64\"}" + "Columns { Name: \"RandomId\" Type: \"Uint64\"}" + "Columns { Name: \"LoadId\" Type: \"Uint64\"}" + "Columns { Name: \"FirstReceiveTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"LockTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"ReceiveCount\" Type: \"Uint32\"}" + "Columns { Name: \"SentTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"VisibilityDeadline\" Type: \"Uint64\"}" + "Columns { Name: \"DelayDeadline\" Type: \"Uint64\"}" + "KeyColumnNames: [\"QueueIdNumberAndShardHash\", \"QueueIdNumber\", \"Shard\", \"Offset\"]" + ); + + auto sentTimestampIdxCommonColumns= + "Columns { Name: \"QueueIdNumberAndShardHash\" Type: \"Uint64\"}" + "Columns { Name: \"QueueIdNumber\" Type: \"Uint64\"}" + "Columns { Name: \"Shard\" Type: \"Uint32\"}" + "Columns { Name: \"SentTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"Offset\" Type: \"Uint64\"}" + "Columns { Name: \"RandomId\" Type: \"Uint64\"}" + "Columns { Name: \"DelayDeadline\" Type: \"Uint64\"}"; + auto sendTimestampIdsKeys = "KeyColumnNames: [\"QueueIdNumberAndShardHash\", \"QueueIdNumber\", \"Shard\", \"SentTimestamp\", \"Offset\"]"; + client.CreateTable("/Root/SQS/.STD", + TStringBuilder() + << "Name: \"SentTimestampIdx\"" + << sentTimestampIdxCommonColumns + << sendTimestampIdsKeys + ); + client.CreateTable("/Root/SQS/.FIFO", + TStringBuilder() + << "Name: \"SentTimestampIdx\"" + << "Columns { Name: \"GroupId\" Type: \"String\"}" + << sentTimestampIdxCommonColumns + << sendTimestampIdsKeys + ); + + client.CreateTable("/Root/SQS/.STD", + "Name: \"MessageData\"" + "Columns { Name: \"QueueIdNumberAndShardHash\" Type: \"Uint64\"}" + "Columns { Name: \"QueueIdNumber\" Type: \"Uint64\"}" + "Columns { Name: \"Shard\" Type: \"Uint32\"}" + "Columns { Name: \"RandomId\" Type: \"Uint64\"}" + "Columns { Name: \"Offset\" Type: \"Uint64\"}" + "Columns { Name: \"Attributes\" Type: \"String\"}" + "Columns { Name: \"Data\" Type: \"String\"}" + "Columns { Name: \"MessageId\" Type: \"String\"}" + "Columns { Name: \"SenderId\" Type: \"String\"}" + "KeyColumnNames: [\"QueueIdNumberAndShardHash\", \"QueueIdNumber\", \"Shard\", \"RandomId\", \"Offset\"]" + ); } void InitAccessServiceService() { // Service Account Service Mock grpc::ServerBuilder builder; AccessServiceMock.AuthenticateData["kinesis"].Response.mutable_subject()->mutable_service_account()->set_id("Service1_id"); + AccessServiceMock.AuthenticateData["kinesis"].Response.mutable_subject()->mutable_service_account()->set_folder_id("folder4"); // AccessServiceMock.AuthenticateData["proxy_sa@builtin"].Response.mutable_subject()->mutable_service_account()->set_id("Service1_id"); + AccessServiceMock.AuthenticateData["sqs"].Response.mutable_subject()->mutable_service_account()->set_id("Service1_id"); + AccessServiceMock.AuthenticateData["sqs"].Response.mutable_subject()->mutable_service_account()->set_folder_id("folder4"); + AccessServiceMock.AuthorizeData["AKIDEXAMPLE-ydb.databases.list-folder4"].Response.mutable_subject()->mutable_service_account()->set_id("Service1_id"); AccessServiceMock.AuthorizeData["proxy_sa@builtin-ydb.databases.list-folder4"].Response.mutable_subject()->mutable_service_account()->set_id("Service1_id"); @@ -403,6 +648,8 @@ class THttpProxyTestMock : public NUnitTest::TBaseFixture { config.MutableHttpConfig()->SetAccessServiceEndpoint(TStringBuilder() << "127.0.0.1:" << AccessServicePort); config.SetTestMode(true); config.MutableHttpConfig()->SetPort(HttpServicePort); + config.MutableHttpConfig()->SetYandexCloudMode(true); + config.MutableHttpConfig()->SetYmqEnabled(true); std::shared_ptr credentialsProviderFactory = NYdb::CreateOAuthCredentialsProviderFactory("proxy_sa@builtin"); @@ -436,6 +683,9 @@ class THttpProxyTestMock : public NUnitTest::TBaseFixture { TActorId actorId = as->Register(CreateAccessServiceActor(config)); as->RegisterLocalService(MakeAccessServiceID(), actorId); + actorId = as->Register(CreateAccessServiceActor(config)); + as->RegisterLocalService(NSQS::MakeSqsAccessServiceID(), actorId); + actorId = as->Register(CreateIamTokenServiceActor(config)); as->RegisterLocalService(MakeIamTokenServiceID(), actorId); @@ -445,6 +695,24 @@ class THttpProxyTestMock : public NUnitTest::TBaseFixture { actorId = as->Register(CreateMetricsActor(TMetricsSettings{Counters})); as->RegisterLocalService(MakeMetricsServiceID(), actorId); + NKikimrProto::NFolderService::TFolderServiceConfig folderServiceConfig; + folderServiceConfig.SetEnable(false); + actorId = as->Register(NKikimr::NFolderService::CreateFolderServiceActor(folderServiceConfig, "cloud4")); + as->RegisterLocalService(NFolderService::FolderServiceActorId(), actorId); + + actorId = as->Register(NKikimr::NFolderService::CreateFolderServiceActor(folderServiceConfig, "cloud4")); + as->RegisterLocalService(NSQS::MakeSqsFolderServiceID(), actorId); + + for (ui32 i = 0; i < ActorRuntime->GetNodeCount(); i++) { + auto nodeId = ActorRuntime->GetNodeId(i); + + actorId = as->Register(NSQS::CreateSqsService()); + as->RegisterLocalService(NSQS::MakeSqsServiceID(nodeId), actorId); + + actorId = as->Register(NSQS::CreateSqsProxyService()); + as->RegisterLocalService(NSQS::MakeSqsProxyServiceID(nodeId), actorId); + } + actorId = as->Register(NHttp::CreateHttpProxy()); as->RegisterLocalService(MakeHttpServerServiceID(), actorId); @@ -452,6 +720,7 @@ class THttpProxyTestMock : public NUnitTest::TBaseFixture { httpProxyConfig.Config = config; httpProxyConfig.CredentialsProvider = credentialsProvider; httpProxyConfig.UseSDK = GetEnv("INSIDE_YDB").empty(); + actorId = as->Register(NKikimr::NHttpProxy::CreateHttpProxy(httpProxyConfig)); as->RegisterLocalService(MakeHttpProxyID(), actorId); @@ -482,4 +751,5 @@ class THttpProxyTestMock : public NUnitTest::TBaseFixture { ui16 IamTokenServicePort = 0; ui16 DatabaseServicePort = 0; ui16 MonPort = 0; + ui16 KikimrGrpcPort = 0; }; diff --git a/ydb/core/http_proxy/ut/http_proxy_ut.h b/ydb/core/http_proxy/ut/http_proxy_ut.h index 3845f09a9cca..39ea72747ccc 100644 --- a/ydb/core/http_proxy/ut/http_proxy_ut.h +++ b/ydb/core/http_proxy/ut/http_proxy_ut.h @@ -3,11 +3,16 @@ #include "library/cpp/json/writer/json_value.h" #include "library/cpp/testing/unittest/registar.h" +#include +#include + extern TString Name_; extern bool ForceFork_; extern TString FormAuthorizationStr(const TString& region); extern NJson::TJsonValue CreateCreateStreamRequest(); extern NJson::TJsonValue CreateDescribeStreamRequest(); +extern NJson::TJsonValue CreateSqsGetQueueUrlRequest(); +extern NJson::TJsonValue CreateSqsCreateQueueRequest(); extern struct THttpResult httpResult; extern THttpResult SendHttpRequest( @@ -1525,4 +1530,801 @@ Y_UNIT_TEST_SUITE(TestHttpProxy) { UNIT_ASSERT_VALUES_EQUAL(res.Description, "MissingParameter"); } } + + Y_UNIT_TEST_F(TestCreateQueue, THttpProxyTestMock) { + auto req = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(req), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + } + + Y_UNIT_TEST_F(TestCreateQueueWithSameNameAndSameParams, THttpProxyTestMock) { + auto req = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(req), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + + req = CreateSqsCreateQueueRequest(); + res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(req), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + } + + Y_UNIT_TEST_F(TestCreateQueueWithSameNameAndDifferentParams, THttpProxyTestMock) { + auto req = CreateSqsCreateQueueRequest(); + NJson::TJsonMap attributes = NJson::TJsonMap({std::pair("MessageRetentionPeriod", "60")}); + req["Attributes"] = attributes; + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(req), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + + req = CreateSqsCreateQueueRequest(); + attributes = NJson::TJsonMap({std::pair("MessageRetentionPeriod", "61")}); + req["Attributes"] = attributes; + res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(req), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 400); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultType = GetByPath(json, "__type"); + UNIT_ASSERT_VALUES_EQUAL(resultType, "ValidationError"); + } + + Y_UNIT_TEST_F(TestCreateQueueWithBadQueueName, THttpProxyTestMock) { + auto req = CreateSqsCreateQueueRequest(); + req["QueueName"] = "B@d_queue_name"; + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(req), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 400); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultType = GetByPath(json, "__type"); + UNIT_ASSERT_VALUES_EQUAL(resultType, "InvalidParameterValue"); + } + + Y_UNIT_TEST_F(TestCreateQueueWithEmptyName, THttpProxyTestMock) { + NJson::TJsonValue req; + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(req), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 400); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultType = GetByPath(json, "__type"); + UNIT_ASSERT_VALUES_EQUAL(resultType, "MissingParameter"); + } + + Y_UNIT_TEST_F(TestCreateQueueWithWrongBody, THttpProxyTestMock) { + NJson::TJsonValue req; + req["wrongField"] = "foobar"; + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(req), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 400); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultType = GetByPath(json, "__type"); + UNIT_ASSERT_VALUES_EQUAL(resultType, "InvalidArgumentException"); + } + + Y_UNIT_TEST_F(TestGetQueueUrlOfNotExistingQueue, THttpProxyTestMock) { + auto req = CreateSqsGetQueueUrlRequest(); + req["QueueName"] = "not-existing-queue"; + auto res = SendHttpRequest("/Root", "AmazonSQS.GetQueueUrl", std::move(req), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 400); + + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultType = GetByPath(json, "__type"); + UNIT_ASSERT_VALUES_EQUAL(resultType, "AWS.SimpleQueueService.NonExistentQueue"); + TString resultMessage = GetByPath(json, "message"); + UNIT_ASSERT_VALUES_EQUAL(resultMessage, "The specified queue doesn't exist."); + } + + Y_UNIT_TEST_F(TestGetQueueUrlWithIAM, THttpProxyTestMock) { + auto req = CreateSqsGetQueueUrlRequest(); + req["QueueName"] = "not-existing-queue"; + auto res = SendHttpRequest("/Root?folderId=XXX", "AmazonSQS.GetQueueUrl", std::move(req), "X-YaCloud-SubjectToken: Bearer proxy_sa@builtin"); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 400); + + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultType = GetByPath(json, "__type"); + UNIT_ASSERT_VALUES_EQUAL(resultType, "AWS.SimpleQueueService.NonExistentQueue"); + TString resultMessage = GetByPath(json, "message"); + UNIT_ASSERT_VALUES_EQUAL(resultMessage, "The specified queue doesn't exist."); + } + + Y_UNIT_TEST_F(TestSendMessage, THttpProxyTestMock) { + auto createQueueReq = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(createQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + + NJson::TJsonValue sendMessageReq; + sendMessageReq["QueueUrl"] = resultQueueUrl; + auto body = "MessageBody-0"; + sendMessageReq["MessageBody"] = body; + sendMessageReq["MessageDeduplicationId"] = "MessageDeduplicationId-0"; + sendMessageReq["MessageGroupId"] = "MessageGroupId-0"; + + res = SendHttpRequest("/Root", "AmazonSQS.SendMessage", std::move(sendMessageReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT(!GetByPath(json, "SequenceNumber").empty()); + UNIT_ASSERT(!GetByPath(json, "MD5OfMessageBody").empty()); + UNIT_ASSERT(!GetByPath(json, "MessageId").empty()); + } + + Y_UNIT_TEST_F(TestReceiveMessage, THttpProxyTestMock) { + auto createQueueReq = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", createQueueReq, FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + + NJson::TJsonValue sendMessageReq; + sendMessageReq["QueueUrl"] = resultQueueUrl; + auto body0 = "MessageBody-0"; + sendMessageReq["MessageBody"] = body0; + + res = SendHttpRequest("/Root", "AmazonSQS.SendMessage", sendMessageReq, FormAuthorizationStr("ru-central1")); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT(!GetByPath(json, "MD5OfMessageBody").empty()); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + for (int i = 0; i < 20; ++i) { + res = SendHttpRequest("/Root", "AmazonSQS.ReceiveMessage", receiveMessageReq, FormAuthorizationStr("ru-central1")); + if (res.Body != "{}") { + break; + } + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + } + + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT_VALUES_EQUAL(json["Messages"].GetArray().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(json["Messages"][0]["Body"], body0); + } + + Y_UNIT_TEST_F(TestReceiveMessageWithAttributes, THttpProxyTestMock) { + // Test if we process AttributeNames, MessageSystemAttributeNames, MessageAttributeNames correctly. + + auto createQueueReq = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", createQueueReq, FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + + auto sendMessage = [this, resultQueueUrl](const TString& body) { + NJson::TJsonValue sendMessageReq; + sendMessageReq["QueueUrl"] = resultQueueUrl; + sendMessageReq["MessageBody"] = body; + + auto res = SendHttpRequest("/Root", "AmazonSQS.SendMessage", sendMessageReq, FormAuthorizationStr("ru-central1")); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT(!GetByPath(json, "MD5OfMessageBody").empty()); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + }; + + TString body = "MessageBody-0"; + sendMessage(body); + + auto receiveMessage = [this](NJson::TJsonValue request, const TString& expectedBody) -> NJson::TJsonValue { + request["VisibilityTimeout"] = 0; // Keep the message visible for next ReceiveMessage requests. + THttpResult res; + for (int i = 0; i < 20; ++i) { + res = SendHttpRequest("/Root", "AmazonSQS.ReceiveMessage", request, FormAuthorizationStr("ru-central1")); + if (res.Body != "{}") { + break; + } + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + } + + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT_VALUES_EQUAL(json["Messages"].GetArray().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(json["Messages"][0]["Body"], expectedBody); + return json; + }; + + { + // Request SentTimestamp message system attribute using deprecated AttributeNames field. + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + receiveMessageReq["AttributeNames"] = NJson::TJsonArray{"SentTimestamp"}; + json = receiveMessage(receiveMessageReq, body); + UNIT_ASSERT(!json["Messages"][0]["Attributes"]["SentTimestamp"].GetString().empty()); + } + + { + // Request SentTimestamp message system attribute using MessageSystemAttributeNames field. + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + receiveMessageReq["MessageSystemAttributeNames"] = NJson::TJsonArray{"SentTimestamp"}; + json = receiveMessage(receiveMessageReq, body); + UNIT_ASSERT(!json["Messages"][0]["Attributes"]["SentTimestamp"].GetString().empty()); + } + + { + // Request All message system attributes using deprecated AttributeNames field. + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + receiveMessageReq["AttributeNames"] = NJson::TJsonArray{"All"}; + json = receiveMessage(receiveMessageReq, body); + UNIT_ASSERT(!json["Messages"][0]["Attributes"]["SentTimestamp"].GetString().empty()); + } + + { + // Request All message system attributes using MessageSystemAttributeNames field. + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + receiveMessageReq["MessageSystemAttributeNames"] = NJson::TJsonArray{"All"}; + json = receiveMessage(receiveMessageReq, body); + UNIT_ASSERT(!json["Messages"][0]["Attributes"]["SentTimestamp"].GetString().empty()); + } + } + + Y_UNIT_TEST_F(TestGetQueueAttributes, THttpProxyTestMock) { + auto createQueueReq = CreateSqsCreateQueueRequest(); + NJson::TJsonValue attributes; + attributes["DelaySeconds"] = "1"; + createQueueReq["Attributes"] = attributes; + + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(createQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + + { + NJson::TJsonValue getQueueAttributes; + getQueueAttributes["QueueUrl"] = resultQueueUrl; + NJson::TJsonArray attributeNames = {"DelaySeconds"}; + getQueueAttributes["AttributeNames"] = attributeNames; + + res = SendHttpRequest("/Root", "AmazonSQS.GetQueueAttributes", std::move(getQueueAttributes), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue resultJson; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &resultJson)); + UNIT_ASSERT_VALUES_EQUAL(resultJson["Attributes"]["DelaySeconds"], "1"); + } + + { + NJson::TJsonValue getQueueAttributes; + getQueueAttributes["QueueUrl"] = resultQueueUrl; + NJson::TJsonArray attributeNames = { + "ApproximateNumberOfMessages", + "ApproximateNumberOfMessagesDelayed", + "ApproximateNumberOfMessagesNotVisible", + "CreatedTimestamp", + "DelaySeconds", + "MaximumMessageSize", + "MessageRetentionPeriod", + "ReceiveMessageWaitTimeSeconds", + "RedrivePolicy", + "VisibilityTimeout", + "FifoQueue", + "ContentBasedDeduplication", + "QueueArn" + }; + getQueueAttributes["AttributeNames"] = attributeNames; + + res = SendHttpRequest("/Root", "AmazonSQS.GetQueueAttributes", std::move(getQueueAttributes), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue resultJson; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &resultJson)); + UNIT_ASSERT_VALUES_EQUAL(resultJson["Attributes"]["DelaySeconds"], "1"); + } + + { + NJson::TJsonValue getQueueAttributes; + getQueueAttributes["QueueUrl"] = resultQueueUrl; + NJson::TJsonArray attributeNames = {"All"}; + getQueueAttributes["AttributeNames"] = attributeNames; + + res = SendHttpRequest("/Root", "AmazonSQS.GetQueueAttributes", std::move(getQueueAttributes), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue resultJson; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &resultJson)); + UNIT_ASSERT_VALUES_EQUAL(resultJson["Attributes"]["DelaySeconds"], "1"); + } + } + + Y_UNIT_TEST_F(TestListQueues, THttpProxyTestMock) { + auto createQueueReq = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(createQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + + NJson::TJsonValue listQueuesReq; + listQueuesReq["QueueNamePrefix"] = "Ex"; + res = SendHttpRequest("/Root", "AmazonSQS.ListQueues", std::move(listQueuesReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonArray result; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &result)); + UNIT_ASSERT_VALUES_EQUAL(result["QueueUrls"].GetArray().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(result["QueueUrls"][0], resultQueueUrl); + } + + Y_UNIT_TEST_F(TestDeleteMessage, THttpProxyTestMock) { + auto createQueueReq = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(createQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + + NJson::TJsonValue sendMessageReq; + sendMessageReq["QueueUrl"] = resultQueueUrl; + auto body = "MessageBody-0"; + sendMessageReq["MessageBody"] = body; + + res = SendHttpRequest("/Root", "AmazonSQS.SendMessage", std::move(sendMessageReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + + for (int i = 0; i < 20; ++i) { + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + res = SendHttpRequest("/Root", "AmazonSQS.ReceiveMessage", std::move(receiveMessageReq), FormAuthorizationStr("ru-central1")); + if (res.Body != TString("{}")) { + break; + } + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + } + + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT_VALUES_EQUAL(json["Messages"].GetArray().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(json["Messages"][0]["Body"], body); + + auto receiptHandle = json["Messages"][0]["ReceiptHandle"].GetString(); + UNIT_ASSERT(!receiptHandle.Empty()); + + NJson::TJsonValue deleteMessageReq; + deleteMessageReq["QueueUrl"] = resultQueueUrl; + deleteMessageReq["ReceiptHandle"] = receiptHandle; + + res = SendHttpRequest("/Root", "AmazonSQS.DeleteMessage", std::move(deleteMessageReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + res = SendHttpRequest("/Root", "AmazonSQS.ReceiveMessage", std::move(receiveMessageReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT_VALUES_EQUAL(json["Messages"].GetArray().size(), 0); + } + + Y_UNIT_TEST_F(TestPurgeQueue, THttpProxyTestMock) { + auto createQueueReq = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(createQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + + NJson::TJsonValue sendMessageReq; + sendMessageReq["QueueUrl"] = resultQueueUrl; + auto body = "MessageBody-0"; + sendMessageReq["MessageBody"] = body; + + res = SendHttpRequest("/Root", "AmazonSQS.SendMessage", std::move(sendMessageReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + + NJson::TJsonValue purgeQueueReq; + purgeQueueReq["QueueUrl"] = resultQueueUrl; + + res = SendHttpRequest("/Root", "AmazonSQS.PurgeQueue", std::move(purgeQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + res = SendHttpRequest("/Root", "AmazonSQS.ReceiveMessage", std::move(receiveMessageReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT_VALUES_EQUAL(json["Messages"].GetArray().size(), 0); + } + + Y_UNIT_TEST_F(TestDeleteQueue, THttpProxyTestMock) { + auto req = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(req), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + + NJson::TJsonValue deleteQueueReq; + deleteQueueReq["QueueUrl"] = resultQueueUrl; + res = SendHttpRequest("/Root", "AmazonSQS.DeleteQueue", std::move(deleteQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + + for (int i = 0; i < 61; ++i) { + req = CreateSqsGetQueueUrlRequest(); + res = SendHttpRequest("/Root", "AmazonSQS.GetQueueUrl", std::move(req), FormAuthorizationStr("ru-central1")); + if (res.HttpCode != 200) { + break; + } + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + } + + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 400); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT_VALUES_EQUAL(GetByPath(json, "__type"), "AWS.SimpleQueueService.NonExistentQueue"); + } + + Y_UNIT_TEST_F(TestSetQueueAttributes, THttpProxyTestMock) { + auto createQueueReq = CreateSqsCreateQueueRequest(); + NJson::TJsonValue attributes; + attributes["DelaySeconds"] = "1"; + createQueueReq["Attributes"] = attributes; + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(createQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + + NJson::TJsonValue setQueueAttributes; + setQueueAttributes["QueueUrl"] = resultQueueUrl; + attributes = {}; + attributes["DelaySeconds"] = "2"; + setQueueAttributes["Attributes"] = attributes; + + res = SendHttpRequest("/Root", "AmazonSQS.SetQueueAttributes", std::move(setQueueAttributes), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + + NJson::TJsonValue getQueueAttributes; + getQueueAttributes["QueueUrl"] = resultQueueUrl; + NJson::TJsonArray attributeNames = {"DelaySeconds"}; + getQueueAttributes["AttributeNames"] = attributeNames; + + res = SendHttpRequest("/Root", "AmazonSQS.GetQueueAttributes", std::move(getQueueAttributes), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue resultJson; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &resultJson)); + UNIT_ASSERT_VALUES_EQUAL(resultJson["Attributes"]["DelaySeconds"], "2"); + } + + Y_UNIT_TEST_F(TestSendMessageBatch, THttpProxyTestMock) { + auto createQueueReq = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(createQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + + NJson::TJsonValue message0; + message0["Id"] = "Id-0"; + message0["MessageBody"] = "MessageBody-0"; + message0["MessageDeduplicationId"] = "MessageDeduplicationId-0"; + + NJson::TJsonValue delaySeconds; + delaySeconds["StringValue"] = "1"; + delaySeconds["DataType"] = "String"; + + NJson::TJsonValue attributes; + attributes["DelaySeconds"] = delaySeconds; + + message0["MessageAttributes"] = attributes; + + NJson::TJsonValue message1; + message1["Id"] = "Id-1"; + message1["MessageBody"] = "MessageBody-1"; + message1["MessageDeduplicationId"] = "MessageDeduplicationId-1"; + + NJson::TJsonArray entries = {message0, message1}; + + NJson::TJsonValue sendMessageBatchReq; + sendMessageBatchReq["QueueUrl"] = resultQueueUrl; + sendMessageBatchReq["Entries"] = entries; + + res = SendHttpRequest("/Root", "AmazonSQS.SendMessageBatch", std::move(sendMessageBatchReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT(json["Successful"].GetArray().size() == 2); + auto succesful0 = json["Successful"][0]; + UNIT_ASSERT(succesful0["Id"] == "Id-0"); + UNIT_ASSERT(!GetByPath(succesful0, "MD5OfMessageAttributes").empty()); + UNIT_ASSERT(!GetByPath(succesful0, "MD5OfMessageBody").empty()); + UNIT_ASSERT(!GetByPath(succesful0, "MessageId").empty()); + + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + res = SendHttpRequest("/Root", "AmazonSQS.ReceiveMessage", std::move(receiveMessageReq), FormAuthorizationStr("ru-central1")); + } + + Y_UNIT_TEST_F(TestDeleteMessageBatch, THttpProxyTestMock) { + auto createQueueReq = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(createQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + + NJson::TJsonValue message0; + message0["Id"] = "Id-0"; + message0["MessageBody"] = "MessageBody-0"; + message0["MessageDeduplicationId"] = "MessageDeduplicationId-0"; + + NJson::TJsonValue message1; + message1["Id"] = "Id-1"; + message1["MessageBody"] = "MessageBody-1"; + message1["MessageDeduplicationId"] = "MessageDeduplicationId-1"; + + NJson::TJsonArray entries = {message0, message1}; + + NJson::TJsonValue sendMessageBatchReq; + sendMessageBatchReq["QueueUrl"] = resultQueueUrl; + sendMessageBatchReq["Entries"] = entries; + + res = SendHttpRequest("/Root", "AmazonSQS.SendMessageBatch", std::move(sendMessageBatchReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT(json["Successful"].GetArray().size() == 2); + + TVector messages; + for (int i = 0; i < 20; ++i) { + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + res = SendHttpRequest("/Root", "AmazonSQS.ReceiveMessage", std::move(receiveMessageReq), FormAuthorizationStr("ru-central1")); + if (res.Body != TString("{}")) { + NJson::ReadJsonTree(res.Body, &json); + if (json["Messages"].GetArray().size() == 2) { + messages.push_back(json["Messages"][0]); + messages.push_back(json["Messages"][1]); + break; + } + if (json["Messages"].GetArray().size() == 1) { + messages.push_back(json["Messages"][0]); + if (messages.size() == 2) { + break; + } + } + } + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + } + + UNIT_ASSERT_VALUES_EQUAL(messages.size(), 2); + + auto receiptHandle0 = messages[0]["ReceiptHandle"].GetString(); + UNIT_ASSERT(!receiptHandle0.Empty()); + auto receiptHandle1 = messages[1]["ReceiptHandle"].GetString(); + UNIT_ASSERT(!receiptHandle1.Empty()); + + NJson::TJsonValue deleteMessageBatchReq; + deleteMessageBatchReq["QueueUrl"] = resultQueueUrl; + + NJson::TJsonValue entry0; + entry0["Id"] = "Id-0"; + entry0["ReceiptHandle"] = receiptHandle0; + + NJson::TJsonValue entry1; + entry1["Id"] = "Id-1"; + entry1["ReceiptHandle"] = receiptHandle1; + + NJson::TJsonArray deleteEntries = {entry0, entry1}; + deleteMessageBatchReq["Entries"] = deleteEntries; + + res = SendHttpRequest("/Root", "AmazonSQS.DeleteMessageBatch", std::move(deleteMessageBatchReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT_VALUES_EQUAL(json["Successful"].GetArray().size(), 2); + UNIT_ASSERT_VALUES_EQUAL(json["Successful"][0]["Id"], "Id-0"); + UNIT_ASSERT_VALUES_EQUAL(json["Successful"][1]["Id"], "Id-1"); + + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + res = SendHttpRequest("/Root", "AmazonSQS.ReceiveMessage", std::move(receiveMessageReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT_VALUES_EQUAL(json["Messages"].GetArray().size(), 0); + + } + + Y_UNIT_TEST_F(TestListDeadLetterSourceQueues, THttpProxyTestMock) { + auto createQueueReq = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(createQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + + auto createDlqReq = CreateSqsCreateQueueRequest(); + createQueueReq["QueueName"] = "DlqName"; + res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(createQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + + TString dlqUrl = GetByPath(json, "QueueUrl"); + + NJson::TJsonValue getQueueAttributes; + getQueueAttributes["QueueUrl"] = dlqUrl; + NJson::TJsonArray attributeNames = {"QueueArn"}; + getQueueAttributes["AttributeNames"] = attributeNames; + res = SendHttpRequest("/Root", "AmazonSQS.GetQueueAttributes", std::move(getQueueAttributes), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + + TString dlqArn = GetByPath(json["Attributes"], "QueueArn"); + + NJson::TJsonValue setQueueAttributes; + setQueueAttributes["QueueUrl"] = resultQueueUrl; + NJson::TJsonValue attributes = {}; + auto redrivePolicy = TStringBuilder() + << "{\"deadLetterTargetArn\" : \"" << dlqArn << "\", \"maxReceiveCount\" : 100}"; + attributes["RedrivePolicy"] = redrivePolicy; + setQueueAttributes["Attributes"] = attributes; + + res = SendHttpRequest("/Root", "AmazonSQS.SetQueueAttributes", std::move(setQueueAttributes), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + + NJson::TJsonValue listDeadLetterSourceQueues; + listDeadLetterSourceQueues["QueueUrl"] = dlqUrl; + res = SendHttpRequest("/Root", "AmazonSQS.ListDeadLetterSourceQueues", std::move(listDeadLetterSourceQueues), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT_VALUES_EQUAL(json["QueueUrls"][0], resultQueueUrl); + } + + Y_UNIT_TEST_F(TestChangeMessageVisibility, THttpProxyTestMock) { + auto createQueueReq = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(createQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + + NJson::TJsonValue sendMessageReq; + sendMessageReq["QueueUrl"] = resultQueueUrl; + auto body = "MessageBody-0"; + sendMessageReq["MessageBody"] = body; + + res = SendHttpRequest("/Root", "AmazonSQS.SendMessage", std::move(sendMessageReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + + for (int i = 0; i < 20; ++i) { + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + res = SendHttpRequest("/Root", "AmazonSQS.ReceiveMessage", std::move(receiveMessageReq), FormAuthorizationStr("ru-central1")); + if (res.Body != TString("{}")) { + break; + } + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + } + + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + + auto receiptHandle = json["Messages"][0]["ReceiptHandle"].GetString(); + UNIT_ASSERT(!receiptHandle.Empty()); + + NJson::TJsonValue changeMessageVisibility; + changeMessageVisibility["QueueUrl"] = resultQueueUrl; + changeMessageVisibility["ReceiptHandle"] = receiptHandle; + changeMessageVisibility["VisibilityTimeout"] = 1; + + res = SendHttpRequest( + "/Root", + "AmazonSQS.ChangeMessageVisibility", + std::move(changeMessageVisibility), + FormAuthorizationStr("ru-central1") + ); + + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + } + + Y_UNIT_TEST_F(TestChangeMessageVisibilityBatch, THttpProxyTestMock) { + auto createQueueReq = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(createQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + + NJson::TJsonValue message0; + message0["Id"] = "Id-0"; + message0["MessageBody"] = "MessageBody-0"; + message0["MessageDeduplicationId"] = "MessageDeduplicationId-0"; + + NJson::TJsonValue message1; + message1["Id"] = "Id-1"; + message1["MessageBody"] = "MessageBody-1"; + message1["MessageDeduplicationId"] = "MessageDeduplicationId-1"; + + NJson::TJsonArray entries = {message0, message1}; + + NJson::TJsonValue sendMessageBatchReq; + sendMessageBatchReq["QueueUrl"] = resultQueueUrl; + sendMessageBatchReq["Entries"] = entries; + + res = SendHttpRequest("/Root", "AmazonSQS.SendMessageBatch", std::move(sendMessageBatchReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT(json["Successful"].GetArray().size() == 2); + + TVector messages; + for (int i = 0; i < 20; ++i) { + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + res = SendHttpRequest("/Root", "AmazonSQS.ReceiveMessage", std::move(receiveMessageReq), FormAuthorizationStr("ru-central1")); + if (res.Body != TString("{}")) { + NJson::ReadJsonTree(res.Body, &json); + if (json["Messages"].GetArray().size() == 2) { + messages.push_back(json["Messages"][0]); + messages.push_back(json["Messages"][1]); + break; + } + if (json["Messages"].GetArray().size() == 1) { + messages.push_back(json["Messages"][0]); + if (messages.size() == 2) { + break; + } + } + } + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + } + + UNIT_ASSERT_VALUES_EQUAL(messages.size(), 2); + + auto receiptHandle0 = messages[0]["ReceiptHandle"].GetString(); + UNIT_ASSERT(!receiptHandle0.Empty()); + auto receiptHandle1 = messages[1]["ReceiptHandle"].GetString(); + UNIT_ASSERT(!receiptHandle1.Empty()); + + + NJson::TJsonValue changeMessageVisibilityBatchReq; + changeMessageVisibilityBatchReq["QueueUrl"] = resultQueueUrl; + + NJson::TJsonValue entry0; + entry0["Id"] = "Id-0"; + entry0["ReceiptHandle"] = receiptHandle0; + entry0["VisibilityTimeout"] = 1; + + NJson::TJsonValue entry1; + entry1["Id"] = "Id-1"; + entry1["ReceiptHandle"] = receiptHandle1; + entry1["VisibilityTimeout"] = 2; + + NJson::TJsonArray changeVisibilityEntries = {entry0, entry1}; + changeMessageVisibilityBatchReq["Entries"] = changeVisibilityEntries; + + res = SendHttpRequest( + "/Root", "AmazonSQS.ChangeMessageVisibilityBatch", + std::move(changeMessageVisibilityBatchReq), + FormAuthorizationStr("ru-central1") + ); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT_VALUES_EQUAL(json["Successful"].GetArray().size(), 2); + UNIT_ASSERT_VALUES_EQUAL(json["Successful"][0]["Id"], "Id-0"); + UNIT_ASSERT_VALUES_EQUAL(json["Successful"][1]["Id"], "Id-1"); + } } // Y_UNIT_TEST_SUITE(TestHttpProxy) diff --git a/ydb/core/http_proxy/ut/json_proto_conversion_ut.cpp b/ydb/core/http_proxy/ut/json_proto_conversion_ut.cpp index fb986daf9284..dd117d0535cf 100644 --- a/ydb/core/http_proxy/ut/json_proto_conversion_ut.cpp +++ b/ydb/core/http_proxy/ut/json_proto_conversion_ut.cpp @@ -1,5 +1,6 @@ #include #include "json_proto_conversion.h" +#include Y_UNIT_TEST_SUITE(JsonProtoConversion) { @@ -164,4 +165,72 @@ Y_UNIT_TEST(NlohmannJsonToProtoArray) { } } + +Y_UNIT_TEST(JsonToProtoMap) { + { + Ydb::Ymq::V1::CreateQueueRequest message; + + NJson::TJsonValue jsonObject; + jsonObject["QueueName"] = "SampleQueueName"; + + NJson::TJsonMap attributes; + attributes["DelaySeconds"] = "900"; + attributes["MaximumMessageSize"] = "1024"; + + jsonObject["Attributes"] = attributes; + + NKikimr::NHttpProxy::JsonToProto(jsonObject, &message); + + UNIT_ASSERT_VALUES_EQUAL(message.queue_name(), "SampleQueueName"); + UNIT_ASSERT_VALUES_EQUAL(message.attributes().find("DelaySeconds")->second, "900"); + UNIT_ASSERT_VALUES_EQUAL(message.attributes().find("MaximumMessageSize")->second, "1024"); + } +} + +Y_UNIT_TEST(ProtoMapToJson) { + { + Ydb::Ymq::V1::GetQueueAttributesResult message; + message.mutable_attributes()->insert({google::protobuf::MapPair("DelaySeconds", "900")}); + message.mutable_attributes()->insert({google::protobuf::MapPair("MaximumMessageSize", "1024")}); + + NJson::TJsonValue jsonObject; + NKikimr::NHttpProxy::ProtoToJson(message, jsonObject, false); + + UNIT_ASSERT_VALUES_EQUAL(jsonObject.GetMap().find("Attributes")->second.GetMap().size(), 2); + UNIT_ASSERT_VALUES_EQUAL(jsonObject.GetMap().find("Attributes")->second.GetMap().find("DelaySeconds")->second.GetString(), "900"); + UNIT_ASSERT_VALUES_EQUAL(jsonObject.GetMap().find("Attributes")->second.GetMap().find("MaximumMessageSize")->second.GetString(), "1024"); + } +} + +Y_UNIT_TEST(ProtoMapToJson_ReceiveMessageResult) { + // Test using ReceiveMessageResult that has a repeated field with TRANSFORM_BASE64. + // Before fix it failed on messages with attributes. + { + Ydb::Ymq::V1::ReceiveMessageResult message; + message.add_messages()->mutable_message_attributes()->insert({google::protobuf::MapPair("a", {})}); + + NJson::TJsonValue jsonObject; + NKikimr::NHttpProxy::ProtoToJson(message, jsonObject, false); + } +} + +Y_UNIT_TEST(NlohmannJsonToProtoMap) { + { + nlohmann::json jsonObject; + jsonObject["QueueName"] = "SampleQueueName"; + + nlohmann::json attributes; + attributes["DelaySeconds"] = "900"; + attributes["MaximumMessageSize"] = "1024"; + jsonObject["Attributes"] = attributes; + nlohmann::json record; + + Ydb::Ymq::V1::CreateQueueRequest message; + NKikimr::NHttpProxy::NlohmannJsonToProto(jsonObject, &message); + + UNIT_ASSERT_VALUES_EQUAL(message.queue_name(), "SampleQueueName"); + UNIT_ASSERT_VALUES_EQUAL(message.attributes().find("DelaySeconds")->second, "900"); + UNIT_ASSERT_VALUES_EQUAL(message.attributes().find("MaximumMessageSize")->second, "1024"); + } +} } // Y_UNIT_TEST_SUITE(JsonProtoConversion) diff --git a/ydb/core/http_proxy/ut/ya.make b/ydb/core/http_proxy/ut/ya.make index ac00c02ab912..bf2a6a040af1 100644 --- a/ydb/core/http_proxy/ut/ya.make +++ b/ydb/core/http_proxy/ut/ya.make @@ -25,6 +25,7 @@ PEERDIR( ydb/services/kesus ydb/services/persqueue_cluster_discovery ydb/services/ydb + ydb/services/ymq ) SRCS( diff --git a/ydb/core/http_proxy/ya.make b/ydb/core/http_proxy/ya.make index 2839751963bd..24857f5ccb2b 100644 --- a/ydb/core/http_proxy/ya.make +++ b/ydb/core/http_proxy/ya.make @@ -42,6 +42,7 @@ PEERDIR( ydb/public/sdk/cpp/client/iam_private ydb/services/datastreams ydb/services/persqueue_v1/actors + ydb/services/ymq ydb/public/api/grpc ydb/public/api/protos ) diff --git a/ydb/core/kafka_proxy/actors/kafka_alter_configs_actor.cpp b/ydb/core/kafka_proxy/actors/kafka_alter_configs_actor.cpp index f498a4ee0925..55600858669f 100644 --- a/ydb/core/kafka_proxy/actors/kafka_alter_configs_actor.cpp +++ b/ydb/core/kafka_proxy/actors/kafka_alter_configs_actor.cpp @@ -34,14 +34,14 @@ class TAlterConfigsActor : public TAlterTopicActor userToken, TString topicPath, TString databaseName, std::optional retentionMs, std::optional retentionBytes) : TAlterTopicActor( - requester, + requester, userToken, topicPath, databaseName) @@ -54,12 +54,12 @@ class TAlterConfigsActor : public TAlterTopicActorDatabasePath, convertedRetentions.Ms, - convertedRetentions.Bytes + convertedRetentions.Bytes )); InflyTopics++; @@ -201,7 +201,7 @@ void TKafkaAlterConfigsActor::Reply(const TActorContext& ctx) { responseResource.ErrorCode = INVALID_REQUEST; response->Responses.push_back(responseResource); responseStatus = INVALID_REQUEST; - } + } Send(Context->ConnectionId, new TEvKafka::TEvResponse(CorrelationId, response, responseStatus)); diff --git a/ydb/core/kafka_proxy/actors/kafka_create_partitions_actor.cpp b/ydb/core/kafka_proxy/actors/kafka_create_partitions_actor.cpp index 6e13bd61cc7a..521f8598f9b3 100644 --- a/ydb/core/kafka_proxy/actors/kafka_create_partitions_actor.cpp +++ b/ydb/core/kafka_proxy/actors/kafka_create_partitions_actor.cpp @@ -215,13 +215,13 @@ class TCreatePartitionsActor : public TAlterTopicActor userToken, TString topicPath, TString databaseName, ui32 partitionsNumber) : TAlterTopicActor( - requester, + requester, userToken, topicPath, databaseName) @@ -234,12 +234,12 @@ class TCreatePartitionsActor : public TAlterTopicActorResults.push_back(responseTopic); responseStatus = INVALID_REQUEST; - } + } Send(Context->ConnectionId, new TEvKafka::TEvResponse(CorrelationId, response, responseStatus)); Die(ctx); diff --git a/ydb/core/kafka_proxy/actors/kafka_create_topics_actor.cpp b/ydb/core/kafka_proxy/actors/kafka_create_topics_actor.cpp index 3fad0055a1b1..44af7beb5ff7 100644 --- a/ydb/core/kafka_proxy/actors/kafka_create_topics_actor.cpp +++ b/ydb/core/kafka_proxy/actors/kafka_create_topics_actor.cpp @@ -16,7 +16,7 @@ class TCreateTopicActor : public NKikimr::NGRpcProxy::V1::TPQGrpcSchemaBase userToken, TString topicPath, TString databaseName, @@ -78,13 +78,13 @@ class TCreateTopicActor : public NKikimr::NGRpcProxy::V1::TPQGrpcSchemaBase, std::optional>( convertedRetentions.Ms, - convertedRetentions.Bytes + convertedRetentions.Bytes ); ctx.Register(new TCreateTopicActor( @@ -202,7 +202,7 @@ void TKafkaCreateTopicsActor::Bootstrap(const NActors::TActorContext& ctx) { Context->DatabasePath, topic.NumPartitions, convertedRetentions.Ms, - convertedRetentions.Bytes + convertedRetentions.Bytes )); InflyTopics++; @@ -243,7 +243,7 @@ void TKafkaCreateTopicsActor::Reply(const TActorContext& ctx) { responseTopic.ErrorMessage = TopicNamesToResponses[topicName]->Message; } - auto addConfigIfRequired = [this, &topicName, &responseTopic](std::optional configValue, TString configName) { + auto addConfigIfRequired = [this, &topicName, &responseTopic](std::optional configValue, TString configName) { if (configValue.has_value()) { TCreateTopicsResponseData::TCreatableTopicResult::TCreatableTopicConfigs config; config.Name = configName; @@ -271,7 +271,7 @@ void TKafkaCreateTopicsActor::Reply(const TActorContext& ctx) { responseTopic.ErrorMessage = "Duplicate topic in request."; response->Topics.push_back(responseTopic); responseStatus = INVALID_REQUEST; - } + } Send(Context->ConnectionId, new TEvKafka::TEvResponse(CorrelationId, response, responseStatus)); diff --git a/ydb/core/kafka_proxy/actors/kafka_fetch_actor.cpp b/ydb/core/kafka_proxy/actors/kafka_fetch_actor.cpp index 4968aac5f210..648d38d273e2 100644 --- a/ydb/core/kafka_proxy/actors/kafka_fetch_actor.cpp +++ b/ydb/core/kafka_proxy/actors/kafka_fetch_actor.cpp @@ -35,7 +35,7 @@ void TKafkaFetchActor::SendFetchRequests(const TActorContext& ctx) { TVector partPQRequests; PrepareFetchRequestData(topicIndex, partPQRequests); auto ruPerRequest = topicIndex == 0 && Context->Config.GetMeteringV2Enabled(); - NKikimr::NPQ::TFetchRequestSettings request(Context->DatabasePath, partPQRequests, FetchRequestData->MaxWaitMs, FetchRequestData->MaxBytes, Context->RlContext, *Context->UserToken, ruPerRequest); + NKikimr::NPQ::TFetchRequestSettings request(Context->DatabasePath, partPQRequests, FetchRequestData->MaxWaitMs, FetchRequestData->MaxBytes, Context->RlContext, *Context->UserToken, 0, ruPerRequest); auto fetchActor = NKikimr::NPQ::CreatePQFetchRequestActor(request, NKikimr::MakeSchemeCacheID(), ctx.SelfID); auto actorId = ctx.Register(fetchActor); PendingResponses++; diff --git a/ydb/core/kafka_proxy/actors/kafka_offset_fetch_actor.cpp b/ydb/core/kafka_proxy/actors/kafka_offset_fetch_actor.cpp index 96893aaa59ce..7f494d25e1b3 100644 --- a/ydb/core/kafka_proxy/actors/kafka_offset_fetch_actor.cpp +++ b/ydb/core/kafka_proxy/actors/kafka_offset_fetch_actor.cpp @@ -28,7 +28,8 @@ struct PartitionOffsets { class TTopicOffsetActor: public NKikimr::NGRpcProxy::V1::TPQInternalSchemaActor, - public NKikimr::NGRpcProxy::V1::TDescribeTopicActorImpl { + public NKikimr::NGRpcProxy::V1::TDescribeTopicActorImpl, + public NKikimr::NGRpcProxy::V1::TCdcStreamCompatible { using TBase = NKikimr::NGRpcProxy::V1::TPQInternalSchemaActor; @@ -36,7 +37,7 @@ class TTopicOffsetActor: public NKikimr::NGRpcProxy::V1::TPQInternalSchemaActor< public: TTopicOffsetActor(std::shared_ptr> consumers, const NKikimr::NGRpcProxy::V1::TLocalRequestBase& request, - const TActorId& requester, + const TActorId& requester, std::shared_ptr> partitions, const TString& originalTopicName, const TString& userSID) diff --git a/ydb/core/kafka_proxy/actors/kafka_produce_actor.cpp b/ydb/core/kafka_proxy/actors/kafka_produce_actor.cpp index 062e74853a18..3f7d50f7dc58 100644 --- a/ydb/core/kafka_proxy/actors/kafka_produce_actor.cpp +++ b/ydb/core/kafka_proxy/actors/kafka_produce_actor.cpp @@ -5,6 +5,7 @@ #include #include +#include namespace NKafka { @@ -262,6 +263,7 @@ THolder Convert(const TProduceRequestData:: for (const auto& record : batch->Records) { NKikimrPQClient::TDataChunk proto; + proto.set_codec(NPersQueueCommon::RAW); for(auto& h : record.Headers) { auto res = proto.AddMessageMeta(); if (h.Key) { diff --git a/ydb/core/kafka_proxy/actors/kafka_produce_actor.h b/ydb/core/kafka_proxy/actors/kafka_produce_actor.h index f1eea1c150de..1473ba616bf4 100644 --- a/ydb/core/kafka_proxy/actors/kafka_produce_actor.h +++ b/ydb/core/kafka_proxy/actors/kafka_produce_actor.h @@ -19,7 +19,7 @@ using namespace NKikimrClient; // Each request can contain data for writing to several topics, and in each topic to several partitions. // When a request to write to an unknown topic arrives, the actor changes the state to Init until it receives // information about all the topics needed to process the request. -// +// // Requests are processed in parallel, but it is guaranteed that the recording order will be preserved. // The order of responses to requests is also guaranteed. // diff --git a/ydb/core/kafka_proxy/actors/kafka_sasl_auth_actor.cpp b/ydb/core/kafka_proxy/actors/kafka_sasl_auth_actor.cpp index 8a0fafc5de3f..f011e4ba248d 100644 --- a/ydb/core/kafka_proxy/actors/kafka_sasl_auth_actor.cpp +++ b/ydb/core/kafka_proxy/actors/kafka_sasl_auth_actor.cpp @@ -134,12 +134,13 @@ bool TKafkaSaslAuthActor::TryParseAuthDataTo(TKafkaSaslAuthActor::TAuthData& aut auto password = tokens[2]; size_t atPos = userAndDatabase.rfind('@'); if (atPos == TString::npos) { - SendResponseAndDie(EKafkaErrors::SASL_AUTHENTICATION_FAILED, "Database not provided.", "", ctx); - return false; + authData.UserName = ""; + authData.Database = userAndDatabase; + } else { + authData.UserName = userAndDatabase.substr(0, atPos); + authData.Database = userAndDatabase.substr(atPos + 1); } - authData.UserName = userAndDatabase.substr(0, atPos); - authData.Database = userAndDatabase.substr(atPos + 1); authData.Password = password; return true; } diff --git a/ydb/core/kafka_proxy/actors/kafka_topic_offsets_actor.h b/ydb/core/kafka_proxy/actors/kafka_topic_offsets_actor.h index 08db21ce59c4..c284a9d36510 100644 --- a/ydb/core/kafka_proxy/actors/kafka_topic_offsets_actor.h +++ b/ydb/core/kafka_proxy/actors/kafka_topic_offsets_actor.h @@ -14,7 +14,8 @@ namespace NKafka { class TTopicOffsetsActor : public NKikimr::NGRpcProxy::V1::TPQInternalSchemaActor - , public NKikimr::NGRpcProxy::V1::TDescribeTopicActorImpl { + , public NKikimr::NGRpcProxy::V1::TDescribeTopicActorImpl + , public NKikimr::NGRpcProxy::V1::TCdcStreamCompatible { using TBase = TPQInternalSchemaActor& ev); void HandleCacheNavigateResponse(NKikimr::TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) override; - + virtual void ApplyResponse(TTabletInfo&, NKikimr::TEvPersQueue::TEvReadSessionsInfoResponse::TPtr&, const TActorContext&) override { Y_ABORT(); } + bool ApplyResponse(NKikimr::TEvPersQueue::TEvGetPartitionsLocationResponse::TPtr&, const TActorContext&) override { Y_ABORT(); } void ApplyResponse(TTabletInfo& tabletInfo, NKikimr::TEvPersQueue::TEvStatusResponse::TPtr& ev, const TActorContext& ctx) override; - + void Reply(const TActorContext&) override; void RaiseError(const TString& error, const Ydb::PersQueue::ErrorCode::ErrorCode errorCode, const Ydb::StatusIds::StatusCode status, const TActorContext&) override; diff --git a/ydb/core/kafka_proxy/kafka_events.h b/ydb/core/kafka_proxy/kafka_events.h index f3bacc32b395..14855d40c1d4 100644 --- a/ydb/core/kafka_proxy/kafka_events.h +++ b/ydb/core/kafka_proxy/kafka_events.h @@ -208,8 +208,8 @@ struct TGetOffsetsRequest : public NKikimr::NGRpcProxy::V1::TLocalRequestBase { TVector PartitionIds; }; -struct TEvTopicOffsetsResponse : public NActors::TEventLocal - , public NKikimr::NGRpcProxy::V1::TEvPQProxy::TLocalResponseBase +struct TEvTopicOffsetsResponse : public NActors::TEventLocal + , public NKikimr::NGRpcProxy::V1::TLocalResponseBase { TEvTopicOffsetsResponse() {} @@ -217,8 +217,8 @@ struct TEvTopicOffsetsResponse : public NActors::TEventLocal Partitions; }; -struct TEvCommitedOffsetsResponse : public NActors::TEventLocal - , public NKikimr::NGRpcProxy::V1::TEvPQProxy::TLocalResponseBase +struct TEvCommitedOffsetsResponse : public NActors::TEventLocal + , public NKikimr::NGRpcProxy::V1::TLocalResponseBase { TEvCommitedOffsetsResponse() {} @@ -228,8 +228,8 @@ struct TEvCommitedOffsetsResponse : public NActors::TEventLocal>> PartitionIdToOffsets; }; -struct TEvTopicModificationResponse : public NActors::TEventLocal - , public NKikimr::NGRpcProxy::V1::TEvPQProxy::TLocalResponseBase +struct TEvTopicModificationResponse : public NActors::TEventLocal + , public NKikimr::NGRpcProxy::V1::TLocalResponseBase { enum EStatus { OK, diff --git a/ydb/core/kafka_proxy/ut/ut_protocol.cpp b/ydb/core/kafka_proxy/ut/ut_protocol.cpp index 1a07bf9b505b..dbca66e624a3 100644 --- a/ydb/core/kafka_proxy/ut/ut_protocol.cpp +++ b/ydb/core/kafka_proxy/ut/ut_protocol.cpp @@ -1000,6 +1000,11 @@ Y_UNIT_TEST_SUITE(KafkaProtocol) { TString topicName = "/Root/topic-0-test"; TString shortTopicName = "topic-0-test"; TString notExistsTopicName = "/Root/not-exists"; + + TString tableName = "/Root/table-0-test"; + TString feedName = "feed"; + TString feedPath = tableName + "/" + feedName; + ui64 minActivePartitions = 10; TString key = "record-key"; @@ -1207,6 +1212,60 @@ Y_UNIT_TEST_SUITE(KafkaProtocol) { } } + { + NYdb::NTable::TTableClient tableClient(*testServer.Driver); + tableClient.RetryOperationSync([&](TSession session) + { + NYdb::NTable::TTableBuilder builder; + builder.AddNonNullableColumn("key", NYdb::EPrimitiveType::Int64).SetPrimaryKeyColumn("key"); + builder.AddNonNullableColumn("value", NYdb::EPrimitiveType::Int64); + + auto createResult = session.CreateTable(tableName, builder.Build()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(createResult.IsTransportError(), false); + Cerr << createResult.GetIssues().ToString() << "\n"; + UNIT_ASSERT_VALUES_EQUAL(createResult.GetStatus(), EStatus::SUCCESS); + + auto alterResult = session.AlterTable(tableName, NYdb::NTable::TAlterTableSettings() + .AppendAddChangefeeds(NYdb::NTable::TChangefeedDescription(feedName, + NYdb::NTable::EChangefeedMode::Updates, + NYdb::NTable::EChangefeedFormat::Json)) + ).ExtractValueSync(); + Cerr << alterResult.GetIssues().ToString() << "\n"; + UNIT_ASSERT_VALUES_EQUAL(alterResult.IsTransportError(), false); + UNIT_ASSERT_VALUES_EQUAL(alterResult.GetStatus(), EStatus::SUCCESS); + return alterResult; + } + ); + + TValueBuilder rows; + rows.BeginList(); + rows.AddListItem() + .BeginStruct() + .AddMember("key").Int64(1) + .AddMember("value").Int64(2) + .EndStruct(); + rows.EndList(); + + auto upsertResult = tableClient.BulkUpsert(tableName, rows.Build()).GetValueSync(); + UNIT_ASSERT_EQUAL(upsertResult.GetStatus(), EStatus::SUCCESS); + } + + { + // Check CDC + std::vector>> topics {{feedPath, {0}}}; + auto msg = client.Fetch(topics); + UNIT_ASSERT_VALUES_EQUAL(msg->Responses.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(msg->Responses[0].Partitions.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(msg->Responses[0].Partitions[0].ErrorCode, static_cast(EKafkaErrors::NONE_ERROR)); + UNIT_ASSERT_VALUES_EQUAL(msg->Responses[0].Partitions[0].Records.has_value(), true); + UNIT_ASSERT_VALUES_EQUAL(msg->Responses[0].Partitions[0].Records->Records.size(), 1); + auto record = msg->Responses[0].Partitions[0].Records->Records[0]; + + auto data = record.Value.value(); + auto dataStr = TString(data.data(), data.size()); + UNIT_ASSERT_VALUES_EQUAL(dataStr, "{\"update\":{\"value\":2},\"key\":[1]}"); + } + } // Y_UNIT_TEST(FetchScenario) Y_UNIT_TEST(BalanceScenario) { @@ -2300,4 +2359,53 @@ Y_UNIT_TEST_SUITE(KafkaProtocol) { Sleep(TDuration::Seconds(1)); } + + Y_UNIT_TEST(LoginWithApiKeyWithoutAt) { + TInsecureTestServer testServer; + + TString topicName = "/Root/topic-0-test"; + + NYdb::NTopic::TTopicClient pqClient(*testServer.Driver); + { + auto result = + pqClient + .CreateTopic(topicName, + NYdb::NTopic::TCreateTopicSettings() + .PartitioningSettings(10, 100) + .BeginAddConsumer("consumer-0").EndAddConsumer()) + .ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); + } + + auto settings = NTopic::TReadSessionSettings() + .AppendTopics(NTopic::TTopicReadSettings(topicName)) + .ConsumerName("consumer-0"); + auto topicReader = pqClient.CreateReadSession(settings); + + TTestClient client(testServer.Port); + + { + auto msg = client.ApiVersions(); + + UNIT_ASSERT_VALUES_EQUAL(msg->ErrorCode, static_cast(EKafkaErrors::NONE_ERROR)); + UNIT_ASSERT_VALUES_EQUAL(msg->ApiKeys.size(), 18u); + } + + { + auto msg = client.SaslHandshake(); + + UNIT_ASSERT_VALUES_EQUAL(msg->ErrorCode, static_cast(EKafkaErrors::NONE_ERROR)); + UNIT_ASSERT_VALUES_EQUAL(msg->Mechanisms.size(), 1u); + UNIT_ASSERT_VALUES_EQUAL(*msg->Mechanisms[0], "PLAIN"); + } + + { + auto msg = client.SaslAuthenticate("/Root", "ApiKey-value-valid"); + Cerr << msg->ErrorMessage << "\n"; + UNIT_ASSERT_VALUES_EQUAL(msg->ErrorCode, static_cast(EKafkaErrors::NONE_ERROR)); + } + + Sleep(TDuration::Seconds(1)); + } } // Y_UNIT_TEST_SUITE(KafkaProtocol) diff --git a/ydb/core/keyvalue/keyvalue_state.cpp b/ydb/core/keyvalue/keyvalue_state.cpp index 8135c86c2a2b..5ec47557f1fa 100644 --- a/ydb/core/keyvalue/keyvalue_state.cpp +++ b/ydb/core/keyvalue/keyvalue_state.cpp @@ -3092,6 +3092,9 @@ void TKeyValueState::RegisterRequestActor(const TActorContext &ctx, THolderRefCountsIncr.emplace_back(patch.PatchedBlobId, true); + + LOG_INFO_S(ctx, NKikimrServices::KEYVALUE, "KeyValue# " << TabletId + << " PatchedKey# " << patch.PatchedKey << " BlobId# " << patch.PatchedBlobId); }; for (auto& write : intermediate->Writes) { diff --git a/ydb/core/kqp/common/compilation/events.h b/ydb/core/kqp/common/compilation/events.h index 062148adf752..7b43ccd4e31d 100644 --- a/ydb/core/kqp/common/compilation/events.h +++ b/ydb/core/kqp/common/compilation/events.h @@ -76,7 +76,8 @@ struct TEvRecompileRequest: public TEventLocal& query, bool isQueryActionPrepare, TInstant deadline, TKqpDbCountersPtr dbCounters, const TGUCSettings::TPtr& gUCSettings, const TMaybe& applicationName, std::shared_ptr> intrestedInResult, const TIntrusivePtr& userRequestContext, - NLWTrace::TOrbit orbit = {}, TKqpTempTablesState::TConstPtr tempTablesState = nullptr, TMaybe queryAst = Nothing()) + NLWTrace::TOrbit orbit = {}, TKqpTempTablesState::TConstPtr tempTablesState = nullptr, TMaybe queryAst = Nothing(), + bool split = false, NYql::TExprContext* splitCtx = nullptr, NYql::TExprNode::TPtr splitExpr = nullptr) : UserToken(userToken) , Uid(uid) , Query(query) @@ -90,6 +91,9 @@ struct TEvRecompileRequest: public TEventLocal> IntrestedInResult; TMaybe QueryAst; + bool Split = false; + + NYql::TExprContext* SplitCtx = nullptr; + NYql::TExprNode::TPtr SplitExpr = nullptr; }; struct TEvCompileResponse: public TEventLocal { diff --git a/ydb/core/kqp/common/compilation/result.cpp b/ydb/core/kqp/common/compilation/result.cpp index 292cd0d03c4b..02672db8189a 100644 --- a/ydb/core/kqp/common/compilation/result.cpp +++ b/ydb/core/kqp/common/compilation/result.cpp @@ -2,4 +2,11 @@ namespace NKikimr::NKqp { +std::shared_ptr TKqpCompileResult::GetAst() const { + if (QueryAst) { + return QueryAst->Ast; + } + return nullptr; +} + } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/common/compilation/result.h b/ydb/core/kqp/common/compilation/result.h index e5ab7cfc7d6a..1f74b7f8f045 100644 --- a/ydb/core/kqp/common/compilation/result.h +++ b/ydb/core/kqp/common/compilation/result.h @@ -1,5 +1,6 @@ #pragma once #include +#include #include #include #include @@ -14,24 +15,26 @@ struct TKqpCompileResult { using TConstPtr = std::shared_ptr; TKqpCompileResult(const TString& uid, const Ydb::StatusIds::StatusCode& status, const NYql::TIssues& issues, - ETableReadType maxReadType, TMaybe query = {}, std::shared_ptr ast = {}, + ETableReadType maxReadType, TMaybe query = {}, TMaybe queryAst = {}, bool needToSplit = false, const TMaybe& commandTagName = {}) : Status(status) , Issues(issues) , Query(std::move(query)) , Uid(uid) , MaxReadType(maxReadType) - , Ast(std::move(ast)) + , QueryAst(std::move(queryAst)) , NeedToSplit(needToSplit) , CommandTagName(commandTagName) {} static std::shared_ptr Make(const TString& uid, const Ydb::StatusIds::StatusCode& status, const NYql::TIssues& issues, ETableReadType maxReadType, TMaybe query = {}, - std::shared_ptr ast = {}, bool needToSplit = false, const TMaybe& commandTagName = {}) + TMaybe queryAst = {}, bool needToSplit = false, const TMaybe& commandTagName = {}) { - return std::make_shared(uid, status, issues, maxReadType, std::move(query), std::move(ast), needToSplit, commandTagName); + return std::make_shared(uid, status, issues, maxReadType, std::move(query), std::move(queryAst), needToSplit, commandTagName); } + std::shared_ptr GetAst() const; + Ydb::StatusIds::StatusCode Status; NYql::TIssues Issues; @@ -40,7 +43,7 @@ struct TKqpCompileResult { ETableReadType MaxReadType; bool AllowCache = true; - std::shared_ptr Ast; + TMaybe QueryAst; bool NeedToSplit = false; TMaybe CommandTagName = {}; diff --git a/ydb/core/kqp/common/events/events.h b/ydb/core/kqp/common/events/events.h index 45c833ef88d3..426bdfd22841 100644 --- a/ydb/core/kqp/common/events/events.h +++ b/ydb/core/kqp/common/events/events.h @@ -111,6 +111,18 @@ struct TEvKqp { struct TEvScriptRequest : public TEventLocal { TEvScriptRequest() = default; + const TString& GetDatabase() const { + return Record.GetRequest().GetDatabase(); + } + + const TString& GetDatabaseId() const { + return Record.GetRequest().GetDatabaseId(); + } + + void SetDatabaseId(const TString& databaseId) { + Record.MutableRequest()->SetDatabaseId(databaseId); + } + mutable NKikimrKqp::TEvQueryRequest Record; TDuration ForgetAfter; TDuration ResultsTtl; @@ -164,6 +176,40 @@ struct TEvKqp { return issues; } }; + + struct TEvUpdateDatabaseInfo : public TEventLocal { + TEvUpdateDatabaseInfo(const TString& database, Ydb::StatusIds::StatusCode status, NYql::TIssues issues) + : Status(status) + , Database(database) + , Issues(std::move(issues)) + {} + + TEvUpdateDatabaseInfo(const TString& database, const TString& databaseId, bool serverless) + : Status(Ydb::StatusIds::SUCCESS) + , Database(database) + , DatabaseId(databaseId) + , Serverless(serverless) + , Issues({}) + {} + + Ydb::StatusIds::StatusCode Status; + TString Database; + TString DatabaseId; + bool Serverless = false; + NYql::TIssues Issues; + }; + + struct TEvDelayedRequestError : public TEventLocal { + TEvDelayedRequestError(THolder requestEvent, Ydb::StatusIds::StatusCode status, NYql::TIssues issues) + : RequestEvent(std::move(requestEvent)) + , Status(status) + , Issues(std::move(issues)) + {} + + THolder RequestEvent; + Ydb::StatusIds::StatusCode Status; + NYql::TIssues Issues; + }; }; } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/common/events/query.h b/ydb/core/kqp/common/events/query.h index 93f3ebb5b0dd..b180f665492d 100644 --- a/ydb/core/kqp/common/events/query.h +++ b/ydb/core/kqp/common/events/query.h @@ -1,4 +1,5 @@ #pragma once +#include #include #include #include @@ -69,7 +70,9 @@ struct TEvQueryRequest: public NActors::TEventLocalSetUsePublicResponseDataFormat(true); + } bool IsSerializable() const override { return true; @@ -342,6 +345,22 @@ struct TEvQueryRequest: public NActors::TEventLocal GetPoolConfig() const { + return PoolConfig; + } + + const TString& GetDatabaseId() const { + return DatabaseId ? DatabaseId : Record.GetRequest().GetDatabaseId(); + } + + void SetDatabaseId(const TString& databaseId) { + DatabaseId = databaseId; + } + mutable NKikimrKqp::TEvQueryRequest Record; private: @@ -354,6 +373,7 @@ struct TEvQueryRequest: public NActors::TEventLocal Token_; TActorId RequestActorId; TString Database; + TString DatabaseId; TString SessionId; TString YqlText; TString QueryId; @@ -370,6 +390,7 @@ struct TEvQueryRequest: public NActors::TEventLocal UserRequestContext; TDuration ProgressStatsPeriod; + std::optional PoolConfig; }; struct TEvDataQueryStreamPart: public TEventPB { - TEvForgetScriptExecutionOperation(const TString& database, const NOperationId::TOperationId& id) +template +struct TEventWithDatabaseId : public NActors::TEventLocal { + TEventWithDatabaseId(const TString& database) : Database(database) - , OperationId(id) {} + const TString& GetDatabase() const { + return Database; + } + + const TString& GetDatabaseId() const { + return DatabaseId; + } + + void SetDatabaseId(const TString& databaseId) { + DatabaseId = databaseId; + } + const TString Database; + TString DatabaseId; +}; + +struct TEvForgetScriptExecutionOperation : public TEventWithDatabaseId { + TEvForgetScriptExecutionOperation(const TString& database, const NOperationId::TOperationId& id) + : TEventWithDatabaseId(database) + , OperationId(id) + {} + const NOperationId::TOperationId OperationId; }; @@ -43,14 +64,12 @@ struct TEvForgetScriptExecutionOperationResponse : public NActors::TEventLocal { - explicit TEvGetScriptExecutionOperation(const TString& database, const NOperationId::TOperationId& id) - : Database(database) +struct TEvGetScriptExecutionOperation : public TEventWithDatabaseId { + TEvGetScriptExecutionOperation(const TString& database, const NOperationId::TOperationId& id) + : TEventWithDatabaseId(database) , OperationId(id) - { - } + {} - TString Database; NOperationId::TOperationId OperationId; }; @@ -97,14 +116,13 @@ struct TEvGetScriptExecutionOperationResponse : public NActors::TEventLocal Metadata; }; -struct TEvListScriptExecutionOperations : public NActors::TEventLocal { +struct TEvListScriptExecutionOperations : public TEventWithDatabaseId { TEvListScriptExecutionOperations(const TString& database, const ui64 pageSize, const TString& pageToken) - : Database(database) + : TEventWithDatabaseId(database) , PageSize(pageSize) , PageToken(pageToken) {} - TString Database; ui64 PageSize; TString PageToken; }; @@ -151,14 +169,12 @@ struct TEvCheckAliveRequest : public NActors::TEventPB { }; -struct TEvCancelScriptExecutionOperation : public NActors::TEventLocal { - explicit TEvCancelScriptExecutionOperation(const TString& database, const NOperationId::TOperationId& id) - : Database(database) +struct TEvCancelScriptExecutionOperation : public TEventWithDatabaseId { + TEvCancelScriptExecutionOperation(const TString& database, const NOperationId::TOperationId& id) + : TEventWithDatabaseId(database) , OperationId(id) - { - } + {} - TString Database; NOperationId::TOperationId OperationId; }; diff --git a/ydb/core/kqp/common/events/workload_service.h b/ydb/core/kqp/common/events/workload_service.h index bdde6089d1ef..385e4c7eded8 100644 --- a/ydb/core/kqp/common/events/workload_service.h +++ b/ydb/core/kqp/common/events/workload_service.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -12,15 +13,25 @@ namespace NKikimr::NKqp::NWorkload { +struct TEvSubscribeOnPoolChanges : public NActors::TEventLocal { + TEvSubscribeOnPoolChanges(const TString& databaseId, const TString& poolId) + : DatabaseId(databaseId) + , PoolId(poolId) + {} + + const TString DatabaseId; + const TString PoolId; +}; + struct TEvPlaceRequestIntoPool : public NActors::TEventLocal { - TEvPlaceRequestIntoPool(const TString& database, const TString& sessionId, const TString& poolId, TIntrusiveConstPtr userToken) - : Database(database) + TEvPlaceRequestIntoPool(const TString& databaseId, const TString& sessionId, const TString& poolId, TIntrusiveConstPtr userToken) + : DatabaseId(databaseId) , SessionId(sessionId) , PoolId(poolId) , UserToken(userToken) {} - const TString Database; + const TString DatabaseId; const TString SessionId; TString PoolId; // Can be changed to default pool id TIntrusiveConstPtr UserToken; @@ -41,15 +52,19 @@ struct TEvContinueRequest : public NActors::TEventLocal { - TEvCleanupRequest(const TString& database, const TString& sessionId, const TString& poolId) - : Database(database) + TEvCleanupRequest(const TString& databaseId, const TString& sessionId, const TString& poolId, TDuration duration, TDuration cpuConsumed) + : DatabaseId(databaseId) , SessionId(sessionId) , PoolId(poolId) + , Duration(duration) + , CpuConsumed(cpuConsumed) {} - const TString Database; + const TString DatabaseId; const TString SessionId; const TString PoolId; + const TDuration Duration; + const TDuration CpuConsumed; }; struct TEvCleanupResponse : public NActors::TEventLocal { @@ -62,4 +77,36 @@ struct TEvCleanupResponse : public NActors::TEventLocal { + TEvUpdatePoolInfo(const TString& databaseId, const TString& poolId, const std::optional& config, const std::optional& securityObject) + : DatabaseId(databaseId) + , PoolId(poolId) + , Config(config) + , SecurityObject(securityObject) + {} + + const TString DatabaseId; + const TString PoolId; + const std::optional Config; + const std::optional SecurityObject; +}; + +struct TEvFetchDatabaseResponse : public NActors::TEventLocal { + TEvFetchDatabaseResponse(Ydb::StatusIds::StatusCode status, const TString& database, const TString& databaseId, bool serverless, TPathId pathId, NYql::TIssues issues) + : Status(status) + , Database(database) + , DatabaseId(databaseId) + , Serverless(serverless) + , PathId(pathId) + , Issues(std::move(issues)) + {} + + const Ydb::StatusIds::StatusCode Status; + const TString Database; + const TString DatabaseId; + const bool Serverless; + const TPathId PathId; + const NYql::TIssues Issues; +}; + } // NKikimr::NKqp::NWorkload diff --git a/ydb/core/kqp/common/events/ya.make b/ydb/core/kqp/common/events/ya.make index 76d8e0fe6274..4ac5ea520cd1 100644 --- a/ydb/core/kqp/common/events/ya.make +++ b/ydb/core/kqp/common/events/ya.make @@ -16,6 +16,7 @@ PEERDIR( ydb/core/kqp/common/shutdown ydb/core/kqp/common/compilation ydb/core/resource_pools + ydb/core/scheme ydb/library/yql/dq/actors ydb/public/api/protos diff --git a/ydb/core/kqp/common/kqp.h b/ydb/core/kqp/common/kqp.h index 14785394faa1..e0bccb2e5027 100644 --- a/ydb/core/kqp/common/kqp.h +++ b/ydb/core/kqp/common/kqp.h @@ -33,18 +33,9 @@ namespace NKikimr::NKqp { -void ConvertKqpQueryResultToDbResult(const NKikimrMiniKQL::TResult& from, Ydb::ResultSet* to); - TString ScriptExecutionRunnerActorIdString(const NActors::TActorId& actorId); bool ScriptExecutionRunnerActorIdFromString(const TString& executionId, TActorId& actorId); -template -inline void ConvertKqpQueryResultsToDbResult(const TFrom& from, TTo* to) { - const auto& results = from.GetResults(); - for (const auto& result : results) { - ConvertKqpQueryResultToDbResult(result, to->add_result_sets()); - } -} class TKqpRequestInfo { public: @@ -80,7 +71,7 @@ class IQueryReplayBackend : public TNonCopyable { /// Accepts query text virtual void Collect(const TString& queryData) = 0; - virtual bool IsNull() { return false; } + virtual bool IsNull() { return false; } virtual ~IQueryReplayBackend() {}; diff --git a/ydb/core/kqp/common/kqp_event_impl.cpp b/ydb/core/kqp/common/kqp_event_impl.cpp index cadd44a1c89e..d63b6f821fe2 100644 --- a/ydb/core/kqp/common/kqp_event_impl.cpp +++ b/ydb/core/kqp/common/kqp_event_impl.cpp @@ -90,6 +90,12 @@ void TEvKqp::TEvQueryRequest::PrepareRemote() const { Record.MutableRequest()->SetPoolId(PoolId); } + Record.MutableRequest()->SetUsePublicResponseDataFormat(true); + + if (!DatabaseId.empty()) { + Record.MutableRequest()->SetDatabaseId(DatabaseId); + } + Record.MutableRequest()->SetSessionId(SessionId); Record.MutableRequest()->SetAction(QueryAction); Record.MutableRequest()->SetType(QueryType); diff --git a/ydb/core/kqp/common/kqp_timeouts.cpp b/ydb/core/kqp/common/kqp_timeouts.cpp index 4389a3e3241d..1a59fbef4d13 100644 --- a/ydb/core/kqp/common/kqp_timeouts.cpp +++ b/ydb/core/kqp/common/kqp_timeouts.cpp @@ -19,9 +19,10 @@ ui64 GetDefaultQueryTimeoutMs(NKikimrKqp::EQueryType queryType, case NKikimrKqp::QUERY_TYPE_SQL_DML: case NKikimrKqp::QUERY_TYPE_PREPARED_DML: case NKikimrKqp::QUERY_TYPE_AST_DML: + return queryLimits.GetDataQueryTimeoutMs(); case NKikimrKqp::QUERY_TYPE_SQL_GENERIC_QUERY: case NKikimrKqp::QUERY_TYPE_SQL_GENERIC_CONCURRENT_QUERY: - return queryLimits.GetDataQueryTimeoutMs(); + return queryServiceConfig.GetQueryTimeoutDefaultSeconds() * 1000; case NKikimrKqp::QUERY_TYPE_SQL_GENERIC_SCRIPT: return queryServiceConfig.GetScriptOperationTimeoutDefaultSeconds() diff --git a/ydb/core/kqp/common/kqp_tx.cpp b/ydb/core/kqp/common/kqp_tx.cpp index e3209f08dec9..0e1b96aa1511 100644 --- a/ydb/core/kqp/common/kqp_tx.cpp +++ b/ydb/core/kqp/common/kqp_tx.cpp @@ -7,24 +7,33 @@ namespace NKqp { using namespace NYql; -TIssue GetLocksInvalidatedIssue(const TKqpTransactionContext& txCtx, const TMaybe& invalidatedLock) { +NYql::TIssue GetLocksInvalidatedIssue(const TKqpTransactionContext& txCtx, const TKikimrPathId& pathId) { TStringBuilder message; message << "Transaction locks invalidated."; - TMaybe tableName; - if (invalidatedLock) { - TKikimrPathId id(invalidatedLock->GetSchemeShard(), invalidatedLock->GetPathId()); - auto table = txCtx.TableByIdMap.FindPtr(id); - if (table) { - tableName = *table; + if (pathId.OwnerId() != 0) { + auto table = txCtx.TableByIdMap.FindPtr(pathId); + if (!table) { + return YqlIssue(TPosition(), TIssuesIds::KIKIMR_LOCKS_INVALIDATED, message << " Unknown table."); } + return YqlIssue(TPosition(), TIssuesIds::KIKIMR_LOCKS_INVALIDATED, message << " Table: " << *table); + } else { + // Olap tables don't return SchemeShard in locks, thus we use tableId here. + for (const auto& [pathId, table] : txCtx.TableByIdMap) { + if (pathId.TableId() == pathId.TableId()) { + return YqlIssue(TPosition(), TIssuesIds::KIKIMR_LOCKS_INVALIDATED, message << " Table: " << table); + } + } + return YqlIssue(TPosition(), TIssuesIds::KIKIMR_LOCKS_INVALIDATED, message << " Unknown table."); } +} - if (tableName) { - message << " Table: " << *tableName; - } - - return YqlIssue(TPosition(), TIssuesIds::KIKIMR_LOCKS_INVALIDATED, message); +TIssue GetLocksInvalidatedIssue(const TKqpTransactionContext& txCtx, const TKqpTxLock& invalidatedLock) { + return GetLocksInvalidatedIssue( + txCtx, + TKikimrPathId( + invalidatedLock.GetSchemeShard(), + invalidatedLock.GetPathId())); } std::pair> MergeLocks(const NKikimrMiniKQL::TType& type, const NKikimrMiniKQL::TValue& value, @@ -127,7 +136,8 @@ bool NeedSnapshot(const TKqpTransactionContext& txCtx, const NYql::TKikimrConfig { Y_UNUSED(config); - if (*txCtx.EffectiveIsolationLevel != NKikimrKqp::ISOLATION_LEVEL_SERIALIZABLE) + if (*txCtx.EffectiveIsolationLevel != NKikimrKqp::ISOLATION_LEVEL_SERIALIZABLE && + *txCtx.EffectiveIsolationLevel != NKikimrKqp::ISOLATION_LEVEL_SNAPSHOT_RO) return false; if (txCtx.GetSnapshot().IsValid()) @@ -166,6 +176,13 @@ bool NeedSnapshot(const TKqpTransactionContext& txCtx, const NYql::TKikimrConfig for (const auto &input : stage.GetInputs()) { hasStreamLookup |= input.GetTypeCase() == NKqpProto::TKqpPhyConnection::kStreamLookup; } + + for (const auto &tableOp : stage.GetTableOps()) { + if (tableOp.GetTypeCase() == NKqpProto::TKqpPhyTableOperation::kReadOlapRange) { + // always need snapshot for OLAP reads + return true; + } + } } } @@ -188,6 +205,11 @@ bool NeedSnapshot(const TKqpTransactionContext& txCtx, const NYql::TKikimrConfig return false; } + // We need snapshot for stream lookup, besause it's used for dependent reads + if (hasStreamLookup) { + return true; + } + // We need snapshot when there are multiple table read phases, most // likely it involves multiple tables and we would have to use a // distributed commit otherwise. Taking snapshot helps as avoid TLI diff --git a/ydb/core/kqp/common/kqp_tx.h b/ydb/core/kqp/common/kqp_tx.h index 289c7b6650b2..af3ff88c35dc 100644 --- a/ydb/core/kqp/common/kqp_tx.h +++ b/ydb/core/kqp/common/kqp_tx.h @@ -212,8 +212,7 @@ class TKqpTransactionContext : public NYql::TKikimrTransactionContextBase { break; case Ydb::Table::TransactionSettings::kSnapshotReadOnly: - // TODO: (KIKIMR-3374) Use separate isolation mode to avoid optimistic locks. - EffectiveIsolationLevel = NKikimrKqp::ISOLATION_LEVEL_SERIALIZABLE; + EffectiveIsolationLevel = NKikimrKqp::ISOLATION_LEVEL_SNAPSHOT_RO; Readonly = true; break; @@ -434,6 +433,7 @@ class TTransactionsCache { } }; +NYql::TIssue GetLocksInvalidatedIssue(const TKqpTransactionContext& txCtx, const NYql::TKikimrPathId& pathId); std::pair> MergeLocks(const NKikimrMiniKQL::TType& type, const NKikimrMiniKQL::TValue& value, TKqpTransactionContext& txCtx); diff --git a/ydb/core/kqp/common/kqp_user_request_context.cpp b/ydb/core/kqp/common/kqp_user_request_context.cpp index 92cda71e90c8..63e11c27fc10 100644 --- a/ydb/core/kqp/common/kqp_user_request_context.cpp +++ b/ydb/core/kqp/common/kqp_user_request_context.cpp @@ -3,12 +3,13 @@ namespace NKikimr::NKqp { void TUserRequestContext::Out(IOutputStream& o) const { - o << "{" << " TraceId: " << TraceId << ", Database: " << Database << ", SessionId: " << SessionId << ", CurrentExecutionId: " << CurrentExecutionId << ", CustomerSuppliedId: " << CustomerSuppliedId << ", PoolId: " << PoolId << "}"; + o << "{" << " TraceId: " << TraceId << ", Database: " << Database << ", DatabaseId: " << DatabaseId << ", SessionId: " << SessionId << ", CurrentExecutionId: " << CurrentExecutionId << ", CustomerSuppliedId: " << CustomerSuppliedId << ", PoolId: " << PoolId << "}"; } void SerializeCtxToMap(const TUserRequestContext& ctx, google::protobuf::Map& resultMap) { resultMap["TraceId"] = ctx.TraceId; resultMap["Database"] = ctx.Database; + resultMap["DatabaseId"] = ctx.DatabaseId; resultMap["SessionId"] = ctx.SessionId; resultMap["CurrentExecutionId"] = ctx.CurrentExecutionId; resultMap["CustomerSuppliedId"] = ctx.CustomerSuppliedId; diff --git a/ydb/core/kqp/common/kqp_user_request_context.h b/ydb/core/kqp/common/kqp_user_request_context.h index 1d5a966bd0fb..9a17252fccf7 100644 --- a/ydb/core/kqp/common/kqp_user_request_context.h +++ b/ydb/core/kqp/common/kqp_user_request_context.h @@ -11,11 +11,12 @@ namespace NKikimr::NKqp { struct TUserRequestContext : public TAtomicRefCount { TString TraceId; TString Database; + TString DatabaseId; TString SessionId; TString CurrentExecutionId; TString CustomerSuppliedId; TString PoolId; - NResourcePool::TPoolSettings PoolConfig; + std::optional PoolConfig; TUserRequestContext() = default; diff --git a/ydb/core/kqp/common/kqp_yql.h b/ydb/core/kqp/common/kqp_yql.h index f1a52fc7a897..dfd4bdbc4963 100644 --- a/ydb/core/kqp/common/kqp_yql.h +++ b/ydb/core/kqp/common/kqp_yql.h @@ -48,6 +48,7 @@ constexpr TStringBuf KqpTableSinkName = "KqpTableSinkName"; static constexpr std::string_view TKqpStreamLookupStrategyName = "LookupRows"sv; static constexpr std::string_view TKqpStreamLookupJoinStrategyName = "LookupJoinRows"sv; +static constexpr std::string_view TKqpStreamLookupSemiJoinStrategyName = "LookupSemiJoinRows"sv; struct TKqpReadTableSettings { static constexpr TStringBuf SkipNullKeysSettingName = "SkipNullKeys"; diff --git a/ydb/core/kqp/common/simple/kqp_event_ids.h b/ydb/core/kqp/common/simple/kqp_event_ids.h index de1be6b95043..571944731486 100644 --- a/ydb/core/kqp/common/simple/kqp_event_ids.h +++ b/ydb/core/kqp/common/simple/kqp_event_ids.h @@ -44,7 +44,9 @@ struct TKqpEvents { EvListSessionsRequest, EvListSessionsResponse, EvListProxyNodesRequest, - EvListProxyNodesResponse + EvListProxyNodesResponse, + EvUpdateDatabaseInfo, + EvDelayedRequestError }; static_assert (EvCompileInvalidateRequest + 1 == EvAbortExecution); @@ -174,6 +176,9 @@ struct TKqpWorkloadServiceEvents { EvContinueRequest, EvCleanupRequest, EvCleanupResponse, + EvUpdatePoolInfo, + EvSubscribeOnPoolChanges, + EvFetchDatabaseResponse, }; }; diff --git a/ydb/core/kqp/common/simple/query_id.cpp b/ydb/core/kqp/common/simple/query_id.cpp index 849c384e969a..241abd3cf82c 100644 --- a/ydb/core/kqp/common/simple/query_id.cpp +++ b/ydb/core/kqp/common/simple/query_id.cpp @@ -4,16 +4,18 @@ #include #include +#include #include namespace NKikimr::NKqp { -TKqpQueryId::TKqpQueryId(const TString& cluster, const TString& database, const TString& text, +TKqpQueryId::TKqpQueryId(const TString& cluster, const TString& database, const TString& databaseId, const TString& text, const TKqpQuerySettings& settings, std::shared_ptr> queryParameterTypes, const TGUCSettings& gUCSettings) : Cluster(cluster) , Database(database) + , DatabaseId(databaseId) , Text(text) , Settings(settings) , QueryParameterTypes(queryParameterTypes) @@ -41,6 +43,7 @@ bool TKqpQueryId::IsSql() const { bool TKqpQueryId::operator==(const TKqpQueryId& other) const { if (!(Cluster == other.Cluster && Database == other.Database && + DatabaseId == other.DatabaseId && UserSid == other.UserSid && Text == other.Text && Settings == other.Settings && @@ -74,4 +77,26 @@ bool TKqpQueryId::operator==(const TKqpQueryId& other) const { return true; } +TString TKqpQueryId::SerializeToString() const { + TStringBuilder result = TStringBuilder() << "{" + << "Cluster: " << Cluster << ", " + << "Database: " << Database << ", " + << "DatabaseId: " << DatabaseId << ", " + << "UserSid: " << UserSid << ", " + << "Text: " << EscapeC(Text) << ", " + << "Settings: " << Settings.SerializeToString() << ", "; + if (QueryParameterTypes) { + result << "QueryParameterTypes: ["; + for (const auto& param : *QueryParameterTypes) { + result << "name: " << param.first << ", type: " << param.second.ShortDebugString(); + } + result << "], "; + } else { + result << "QueryParameterTypes: , "; + } + + result << "GUCSettings: " << GUCSettings.SerializeToString() << "}"; + return result; +} + } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/common/simple/query_id.h b/ydb/core/kqp/common/simple/query_id.h index 3470333b5072..120d524d9808 100644 --- a/ydb/core/kqp/common/simple/query_id.h +++ b/ydb/core/kqp/common/simple/query_id.h @@ -13,6 +13,7 @@ namespace NKikimr::NKqp { struct TKqpQueryId { TString Cluster; TString Database; + TString DatabaseId; TString UserSid; TString Text; TKqpQuerySettings Settings; @@ -21,7 +22,7 @@ struct TKqpQueryId { TGUCSettings GUCSettings; public: - TKqpQueryId(const TString& cluster, const TString& database, const TString& text, + TKqpQueryId(const TString& cluster, const TString& database, const TString& databaseId, const TString& text, const TKqpQuerySettings& settings, std::shared_ptr> queryParameterTypes, const TGUCSettings& gUCSettings); @@ -44,6 +45,8 @@ struct TKqpQueryId { GUCSettings.GetHash()); return THash()(tuple); } + + TString SerializeToString() const; }; } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/common/simple/settings.h b/ydb/core/kqp/common/simple/settings.h index f6227f45602f..70b7c4aaa285 100644 --- a/ydb/core/kqp/common/simple/settings.h +++ b/ydb/core/kqp/common/simple/settings.h @@ -3,7 +3,9 @@ #include #include +#include #include +#include #include @@ -39,6 +41,14 @@ struct TKqpQuerySettings { auto tuple = std::make_tuple(DocumentApiRestricted, IsInternalCall, QueryType, Syntax); return THash()(tuple); } + + TString SerializeToString() const { + TStringBuilder result = TStringBuilder() << "{" + << "DocumentApiRestricted: " << DocumentApiRestricted << ", " + << "IsInternalCall: " << IsInternalCall << ", " + << "QueryType: " << QueryType << "}"; + return result; + } }; } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/compile_service/kqp_compile_actor.cpp b/ydb/core/kqp/compile_service/kqp_compile_actor.cpp index 776d2c534583..2a5c02ad776c 100644 --- a/ydb/core/kqp/compile_service/kqp_compile_actor.cpp +++ b/ydb/core/kqp/compile_service/kqp_compile_actor.cpp @@ -146,7 +146,8 @@ class TKqpCompileActor : public TActorBootstrapped { .SetIsEnableExternalDataSources(AppData(ctx)->FeatureFlags.GetEnableExternalDataSources()) .SetIsEnablePgConstsToParams(Config->EnablePgConstsToParams) .SetApplicationName(ApplicationName) - .SetQueryParameters(QueryId.QueryParameterTypes); + .SetQueryParameters(QueryId.QueryParameterTypes) + .SetIsEnablePgSyntax(AppData(ctx)->FeatureFlags.GetEnablePgSyntax()); return ParseStatements(QueryId.Text, QueryId.Settings.Syntax, QueryId.IsSql(), settingsBuilder, PerStatementResult); } @@ -172,11 +173,8 @@ class TKqpCompileActor : public TActorBootstrapped { } void StartSplitting(const TActorContext &ctx) { - YQL_ENSURE(PerStatementResult); - const auto prepareSettings = PrepareCompilationSettings(ctx); - - auto result = KqpHost->SplitQuery(QueryId.Text, prepareSettings); + auto result = KqpHost->SplitQuery(QueryRef, prepareSettings); Become(&TKqpCompileActor::CompileState); ReplySplitResult(ctx, std::move(result)); @@ -268,20 +266,19 @@ class TKqpCompileActor : public TActorBootstrapped { std::shared_ptr loader = std::make_shared( QueryId.Cluster, TlsActivationContext->ActorSystem(), Config, true, TempTablesState); - Gateway = CreateKikimrIcGateway(QueryId.Cluster, QueryId.Settings.QueryType, QueryId.Database, std::move(loader), + Gateway = CreateKikimrIcGateway(QueryId.Cluster, QueryId.Settings.QueryType, QueryId.Database, QueryId.DatabaseId, std::move(loader), ctx.ExecutorThread.ActorSystem, ctx.SelfID.NodeId(), counters, QueryServiceConfig); Gateway->SetToken(QueryId.Cluster, UserToken); Config->FeatureFlags = AppData(ctx)->FeatureFlags; KqpHost = CreateKqpHost(Gateway, QueryId.Cluster, QueryId.Database, Config, ModuleResolverState->ModuleResolver, - FederatedQuerySetup, UserToken, GUCSettings, ApplicationName, AppData(ctx)->FunctionRegistry, - false, false, std::move(TempTablesState), nullptr, SplitCtx); + FederatedQuerySetup, UserToken, GUCSettings, QueryServiceConfig, ApplicationName, AppData(ctx)->FunctionRegistry, + false, false, std::move(TempTablesState), nullptr, SplitCtx, UserRequestContext); IKqpHost::TPrepareSettings prepareSettings; prepareSettings.DocumentApiRestricted = QueryId.Settings.DocumentApiRestricted; prepareSettings.IsInternalCall = QueryId.Settings.IsInternalCall; - prepareSettings.PerStatementResult = PerStatementResult; switch (QueryId.Settings.Syntax) { case Ydb::Query::Syntax::SYNTAX_YQL_V1: @@ -382,9 +379,9 @@ class TKqpCompileActor : public TActorBootstrapped { void ReplyError(Ydb::StatusIds::StatusCode status, const TIssues& issues) { if (!KqpCompileResult) { - KqpCompileResult = TKqpCompileResult::Make(Uid, status, issues, ETableReadType::Other, std::move(QueryId)); + KqpCompileResult = TKqpCompileResult::Make(Uid, status, issues, ETableReadType::Other, std::move(QueryId), std::move(QueryAst)); } else { - KqpCompileResult = TKqpCompileResult::Make(Uid, status, issues, ETableReadType::Other, std::move(KqpCompileResult->Query)); + KqpCompileResult = TKqpCompileResult::Make(Uid, status, issues, ETableReadType::Other, std::move(KqpCompileResult->Query), std::move(KqpCompileResult->QueryAst)); } Reply(); @@ -453,16 +450,12 @@ class TKqpCompileActor : public TActorBootstrapped { } void FillCompileResult(std::unique_ptr preparingQuery, NKikimrKqp::EQueryType queryType, - bool allowCache) { + bool allowCache, bool success) { auto preparedQueryHolder = std::make_shared( - preparingQuery.release(), AppData()->FunctionRegistry); + preparingQuery.release(), AppData()->FunctionRegistry, !success); preparedQueryHolder->MutableLlvmSettings().Fill(Config, queryType); KqpCompileResult->PreparedQuery = preparedQueryHolder; KqpCompileResult->AllowCache = CanCacheQuery(KqpCompileResult->PreparedQuery->GetPhysicalQuery()) && allowCache; - - if (QueryAst) { - KqpCompileResult->Ast = QueryAst->Ast; - } } void Handle(TEvKqp::TEvContinueProcess::TPtr &ev, const TActorContext &ctx) { @@ -481,7 +474,7 @@ class TKqpCompileActor : public TActorBootstrapped { if (kqpResult.NeedToSplit) { KqpCompileResult = TKqpCompileResult::Make( - Uid, status, kqpResult.Issues(), ETableReadType::Other, std::move(QueryId), {}, true); + Uid, status, kqpResult.Issues(), ETableReadType::Other, std::move(QueryId), std::move(QueryAst), true); Reply(); return; } @@ -499,12 +492,12 @@ class TKqpCompileActor : public TActorBootstrapped { auto queryType = QueryId.Settings.QueryType; - KqpCompileResult = TKqpCompileResult::Make(Uid, status, kqpResult.Issues(), maxReadType, std::move(QueryId)); + KqpCompileResult = TKqpCompileResult::Make(Uid, status, kqpResult.Issues(), maxReadType, std::move(QueryId), std::move(QueryAst)); KqpCompileResult->CommandTagName = kqpResult.CommandTagName; if (status == Ydb::StatusIds::SUCCESS) { YQL_ENSURE(kqpResult.PreparingQuery); - FillCompileResult(std::move(kqpResult.PreparingQuery), queryType, kqpResult.AllowCache); + FillCompileResult(std::move(kqpResult.PreparingQuery), queryType, kqpResult.AllowCache, true); auto now = TInstant::Now(); auto duration = now - StartTime; @@ -515,7 +508,7 @@ class TKqpCompileActor : public TActorBootstrapped { << ", duration: " << duration); } else { if (kqpResult.PreparingQuery) { - FillCompileResult(std::move(kqpResult.PreparingQuery), queryType, kqpResult.AllowCache); + FillCompileResult(std::move(kqpResult.PreparingQuery), queryType, kqpResult.AllowCache, false); } LOG_ERROR_S(ctx, NKikimrServices::KQP_COMPILE_ACTOR, "Compilation failed" @@ -611,6 +604,9 @@ void ApplyServiceConfig(TKikimrConfiguration& kqpConfig, const TTableServiceConf kqpConfig.IdxLookupJoinsPrefixPointLimit = serviceConfig.GetIdxLookupJoinPointsLimit(); kqpConfig.OldLookupJoinBehaviour = serviceConfig.GetOldLookupJoinBehaviour(); kqpConfig.EnableSpillingGenericQuery = serviceConfig.GetEnableQueryServiceSpilling(); + kqpConfig.DefaultCostBasedOptimizationLevel = serviceConfig.GetDefaultCostBasedOptimizationLevel(); + kqpConfig.EnableConstantFolding = serviceConfig.GetEnableConstantFolding(); + kqpConfig.SetDefaultEnabledSpillingNodes(serviceConfig.GetEnableSpillingNodes()); if (const auto limit = serviceConfig.GetResourceManager().GetMkqlHeavyProgramMemoryLimit()) { kqpConfig._KqpYqlCombinerMemoryLimit = std::max(1_GB, limit - (limit >> 2U)); diff --git a/ydb/core/kqp/compile_service/kqp_compile_computation_pattern_service.cpp b/ydb/core/kqp/compile_service/kqp_compile_computation_pattern_service.cpp index 6cdde2f174d4..ed8159b8591a 100644 --- a/ydb/core/kqp/compile_service/kqp_compile_computation_pattern_service.cpp +++ b/ydb/core/kqp/compile_service/kqp_compile_computation_pattern_service.cpp @@ -62,7 +62,7 @@ class TKqpCompileComputationPatternService : public TActorBootstrappedPattern->Compile({}, nullptr); - patternCache->NotifyPatternCompiled(patternToCompile.SerializedProgram, patternToCompile.Entry); + patternCache->NotifyPatternCompiled(patternToCompile.SerializedProgram); patternToCompile.Entry = nullptr; Counters->CompiledComputationPatterns->Inc(); diff --git a/ydb/core/kqp/compile_service/kqp_compile_service.cpp b/ydb/core/kqp/compile_service/kqp_compile_service.cpp index ff52fe2277f9..bf921172acd2 100644 --- a/ydb/core/kqp/compile_service/kqp_compile_service.cpp +++ b/ydb/core/kqp/compile_service/kqp_compile_service.cpp @@ -41,21 +41,25 @@ class TKqpQueryCache { YQL_ENSURE(compileResult->PreparedQuery); auto queryIt = QueryIndex.emplace(query, compileResult->Uid); + if (!queryIt.second) { + EraseByUid(compileResult->Uid); + QueryIndex.erase(query); + } Y_ENSURE(queryIt.second); } void InsertAst(const TKqpCompileResult::TConstPtr& compileResult) { Y_ENSURE(compileResult->Query); - Y_ENSURE(compileResult->Ast); + Y_ENSURE(compileResult->GetAst()); - AstIndex.emplace(GetQueryIdWithAst(*compileResult->Query, *compileResult->Ast), compileResult->Uid); + AstIndex.emplace(GetQueryIdWithAst(*compileResult->Query, *compileResult->GetAst()), compileResult->Uid); } bool Insert(const TKqpCompileResult::TConstPtr& compileResult, bool isEnableAstCache, bool isPerStatementExecution) { if (!isPerStatementExecution) { InsertQuery(compileResult); } - if (isEnableAstCache && compileResult->Ast) { + if (isEnableAstCache && compileResult->GetAst()) { InsertAst(compileResult); } @@ -72,8 +76,8 @@ class TKqpQueryCache { auto queryId = *removedItem->Value.CompileResult->Query; QueryIndex.erase(queryId); - if (removedItem->Value.CompileResult->Ast) { - AstIndex.erase(GetQueryIdWithAst(queryId, *removedItem->Value.CompileResult->Ast)); + if (removedItem->Value.CompileResult->GetAst()) { + AstIndex.erase(GetQueryIdWithAst(queryId, *removedItem->Value.CompileResult->GetAst())); } auto indexIt = Index.find(*removedItem); if (indexIt != Index.end()) { @@ -149,7 +153,7 @@ class TKqpQueryCache { } } } - return TKqpQueryId{query.Cluster, query.Database, ast.Root->ToString(), query.Settings, astPgParams, query.GUCSettings}; + return TKqpQueryId{query.Cluster, query.Database, query.DatabaseId, ast.Root->ToString(), query.Settings, astPgParams, query.GUCSettings}; } TKqpCompileResult::TConstPtr FindByQuery(const TKqpQueryId& query, bool promote) { @@ -186,8 +190,8 @@ class TKqpQueryCache { Y_ABORT_UNLESS(item->Value.CompileResult->Query); auto queryId = *item->Value.CompileResult->Query; QueryIndex.erase(queryId); - if (item->Value.CompileResult->Ast) { - AstIndex.erase(GetQueryIdWithAst(queryId, *item->Value.CompileResult->Ast)); + if (item->Value.CompileResult->GetAst()) { + AstIndex.erase(GetQueryIdWithAst(queryId, *item->Value.CompileResult->GetAst())); } Index.erase(it); @@ -323,6 +327,8 @@ struct TKqpCompileRequest { NYql::TExprContext* SplitCtx; NYql::TExprNode::TPtr SplitExpr; + bool FindInCache = true; + bool IsIntrestedInResult() const { return IntrestedInResult->load(); } @@ -528,9 +534,15 @@ class TKqpCompileService : public TActorBootstrapped { bool enableCreateTableAs = TableServiceConfig.GetEnableCreateTableAs(); auto blockChannelsMode = TableServiceConfig.GetBlockChannelsMode(); + bool enableImplicitQueryParameterTypes = TableServiceConfig.GetEnableImplicitQueryParameterTypes(); + auto mkqlHeavyLimit = TableServiceConfig.GetResourceManager().GetMkqlHeavyProgramMemoryLimit(); bool enableQueryServiceSpilling = TableServiceConfig.GetEnableQueryServiceSpilling(); + ui64 defaultCostBasedOptimizationLevel = TableServiceConfig.GetDefaultCostBasedOptimizationLevel(); + bool enableConstantFolding = TableServiceConfig.GetEnableConstantFolding(); + + TString enableSpillingNodes = TableServiceConfig.GetEnableSpillingNodes(); TableServiceConfig.Swap(event.MutableConfig()->MutableTableServiceConfig()); LOG_INFO(*TlsActivationContext, NKikimrServices::KQP_COMPILE_SERVICE, "Updated config"); @@ -558,7 +570,11 @@ class TKqpCompileService : public TActorBootstrapped { TableServiceConfig.GetExtractPredicateRangesLimit() != rangesLimit || TableServiceConfig.GetResourceManager().GetMkqlHeavyProgramMemoryLimit() != mkqlHeavyLimit || TableServiceConfig.GetIdxLookupJoinPointsLimit() != idxLookupPointsLimit || - TableServiceConfig.GetEnableQueryServiceSpilling() != enableQueryServiceSpilling) { + TableServiceConfig.GetEnableSpillingNodes() != enableSpillingNodes || + TableServiceConfig.GetEnableQueryServiceSpilling() != enableQueryServiceSpilling || + TableServiceConfig.GetEnableImplicitQueryParameterTypes() != enableImplicitQueryParameterTypes || + TableServiceConfig.GetDefaultCostBasedOptimizationLevel() != defaultCostBasedOptimizationLevel || + TableServiceConfig.GetEnableConstantFolding() != enableConstantFolding) { QueryCache.Clear(); @@ -611,6 +627,7 @@ class TKqpCompileService : public TActorBootstrapped { << ", queryUid: " << (request.Uid ? *request.Uid : "") << ", queryText: \"" << (request.Query ? EscapeC(request.Query->Text) : "") << "\"" << ", keepInCache: " << request.KeepInCache + << ", split: " << request.Split << *request.UserRequestContext); *Counters->CompileQueryCacheSize = QueryCache.Size(); @@ -668,6 +685,8 @@ class TKqpCompileService : public TActorBootstrapped { Y_ENSURE(query.UserSid == userSid); } + LOG_DEBUG_S(ctx, NKikimrServices::KQP_COMPILE_SERVICE, "Try to find query by queryId, queryId: " << query.SerializeToString()); + auto compileResult = QueryCache.FindByQuery(query, request.KeepInCache); if (HasTempTablesNameClashes(compileResult, request.TempTablesState)) { compileResult = nullptr; @@ -697,7 +716,7 @@ class TKqpCompileService : public TActorBootstrapped { request.Deadline, ev->Get()->Split ? ECompileActorAction::SPLIT - : TableServiceConfig.GetEnableAstCache() + : (TableServiceConfig.GetEnableAstCache() && !request.QueryAst) ? ECompileActorAction::PARSE : ECompileActorAction::COMPILE); TKqpCompileRequest compileRequest(ev->Sender, CreateGuidAsString(), std::move(*request.Query), @@ -754,19 +773,37 @@ class TKqpCompileService : public TActorBootstrapped { } if (compileResult || request.Query) { - QueryCache.EraseByUid(request.Uid); - Counters->ReportCompileRequestCompile(dbCounters); NWilson::TSpan compileServiceSpan(TWilsonKqp::CompileService, ev->Get() ? std::move(ev->TraceId) : NWilson::TTraceId(), "CompileService"); - TKqpCompileSettings compileSettings(true, request.IsQueryActionPrepare, false, request.Deadline, TableServiceConfig.GetEnableAstCache() ? ECompileActorAction::PARSE : ECompileActorAction::COMPILE); - TKqpCompileRequest compileRequest(ev->Sender, request.Uid, request.Query ? *request.Query : *compileResult->Query, + TKqpCompileSettings compileSettings( + true, + request.IsQueryActionPrepare, + false, + request.Deadline, + ev->Get()->Split + ? ECompileActorAction::SPLIT + : (TableServiceConfig.GetEnableAstCache() && !request.QueryAst) + ? ECompileActorAction::PARSE + : ECompileActorAction::COMPILE); + auto query = request.Query ? *request.Query : *compileResult->Query; + if (compileResult) { + query.UserSid = compileResult->Query->UserSid; + if (query != *compileResult->Query) { + LOG_WARN_S(ctx, NKikimrServices::KQP_COMPILE_SERVICE, "queryId in recompile request and queryId in cache are different" + << ", queryId in request: " << query.SerializeToString() + << ", queryId in cache: " << compileResult->Query->SerializeToString() + ); + } + } + TKqpCompileRequest compileRequest(ev->Sender, request.Uid, compileResult ? *compileResult->Query : *request.Query, compileSettings, request.UserToken, dbCounters, request.GUCSettings, request.ApplicationName, ev->Cookie, std::move(ev->Get()->IntrestedInResult), ev->Get()->UserRequestContext, ev->Get() ? std::move(ev->Get()->Orbit) : NLWTrace::TOrbit(), std::move(compileServiceSpan), std::move(ev->Get()->TempTablesState)); + compileRequest.FindInCache = false; if (TableServiceConfig.GetEnableAstCache() && request.QueryAst) { return CompileByAst(*request.QueryAst, compileRequest, ctx); @@ -824,6 +861,7 @@ class TKqpCompileService : public TActorBootstrapped { if (compileResult->NeedToSplit) { Reply(compileRequest.Sender, compileResult, compileStats, ctx, compileRequest.Cookie, std::move(compileRequest.Orbit), std::move(compileRequest.CompileServiceSpan), (CollectDiagnostics ? ev->Get()->ReplayMessageUserView : std::nullopt)); + ProcessQueue(ctx); return; } @@ -835,7 +873,7 @@ class TKqpCompileService : public TActorBootstrapped { try { if (compileResult->Status == Ydb::StatusIds::SUCCESS) { if (!hasTempTablesNameClashes) { - UpdateQueryCache(compileResult, keepInCache, compileRequest.CompileSettings.IsQueryActionPrepare, isPerStatementExecution); + UpdateQueryCache(ctx, compileResult, keepInCache, compileRequest.CompileSettings.IsQueryActionPrepare, isPerStatementExecution); } if (ev->Get()->ReplayMessage && !QueryReplayBackend->IsNull()) { @@ -917,15 +955,21 @@ class TKqpCompileService : public TActorBootstrapped { return compileResult->PreparedQuery->HasTempTables(tempTablesState, withSessionId); } - void UpdateQueryCache(TKqpCompileResult::TConstPtr compileResult, bool keepInCache, bool isQueryActionPrepare, bool isPerStatementExecution) { + void UpdateQueryCache(const TActorContext& ctx, TKqpCompileResult::TConstPtr compileResult, bool keepInCache, bool isQueryActionPrepare, bool isPerStatementExecution) { if (QueryCache.FindByUid(compileResult->Uid, false)) { QueryCache.Replace(compileResult); } else if (keepInCache) { + if (compileResult->Query) { + LOG_DEBUG_S(ctx, NKikimrServices::KQP_COMPILE_SERVICE, "Insert query into compile cache, queryId: " << compileResult->Query->SerializeToString()); + if (QueryCache.FindByQuery(*compileResult->Query, keepInCache)) { + LOG_ERROR_S(ctx, NKikimrServices::KQP_COMPILE_SERVICE, "Trying to insert query into compile cache when it is already there"); + } + } if (QueryCache.Insert(compileResult, TableServiceConfig.GetEnableAstCache(), isPerStatementExecution)) { Counters->CompileQueryCacheEvicted->Inc(); } if (compileResult->Query && isQueryActionPrepare) { - if (InsertPreparingQuery(compileResult, true, isPerStatementExecution)) { + if (InsertPreparingQuery(ctx, compileResult, true, isPerStatementExecution)) { Counters->CompileQueryCacheEvicted->Inc(); }; } @@ -936,9 +980,11 @@ class TKqpCompileService : public TActorBootstrapped { YQL_ENSURE(queryAst.Ast); YQL_ENSURE(queryAst.Ast->IsOk()); YQL_ENSURE(queryAst.Ast->Root); + LOG_DEBUG_S(ctx, NKikimrServices::KQP_COMPILE_SERVICE, "Try to find query by ast, queryId: " << compileRequest.Query.SerializeToString() + << ", ast: " << queryAst.Ast->Root->ToString()); auto compileResult = QueryCache.FindByAst(compileRequest.Query, *queryAst.Ast, compileRequest.CompileSettings.KeepInCache); - if (HasTempTablesNameClashes(compileResult, compileRequest.TempTablesState)) { + if (!compileRequest.FindInCache || HasTempTablesNameClashes(compileResult, compileRequest.TempTablesState)) { compileResult = nullptr; } @@ -949,7 +995,7 @@ class TKqpCompileService : public TActorBootstrapped { << ", sender: " << compileRequest.Sender << ", queryUid: " << compileResult->Uid); - compileResult->Ast->PgAutoParamValues = std::move(queryAst.Ast->PgAutoParamValues); + compileResult->GetAst()->PgAutoParamValues = std::move(queryAst.Ast->PgAutoParamValues); ReplyFromCache(compileRequest.Sender, compileResult, ctx, compileRequest.Cookie, std::move(compileRequest.Orbit), std::move(compileRequest.CompileServiceSpan)); return; @@ -961,7 +1007,6 @@ class TKqpCompileService : public TActorBootstrapped { compileRequest.Orbit, compileRequest.Query.UserSid); - compileRequest.CompileSettings.Action = ECompileActorAction::COMPILE; compileRequest.QueryAst = std::move(queryAst); if (!RequestsQueue.Enqueue(std::move(compileRequest))) { @@ -994,6 +1039,7 @@ class TKqpCompileService : public TActorBootstrapped { return; } + compileRequest.CompileSettings.Action = ECompileActorAction::COMPILE; CompileByAst(astStatements.front(), compileRequest, ctx); } @@ -1004,7 +1050,7 @@ class TKqpCompileService : public TActorBootstrapped { } private: - bool InsertPreparingQuery(const TKqpCompileResult::TConstPtr& compileResult, bool keepInCache, bool isPerStatementExecution) { + bool InsertPreparingQuery(const TActorContext& ctx, const TKqpCompileResult::TConstPtr& compileResult, bool keepInCache, bool isPerStatementExecution) { YQL_ENSURE(compileResult->Query); auto query = *compileResult->Query; @@ -1023,12 +1069,13 @@ class TKqpCompileService : public TActorBootstrapped { if (QueryCache.FindByQuery(query, keepInCache)) { return false; } - if (compileResult->Ast && QueryCache.FindByAst(query, *compileResult->Ast, keepInCache)) { + if (compileResult->GetAst() && QueryCache.FindByAst(query, *compileResult->GetAst(), keepInCache)) { return false; } - auto newCompileResult = TKqpCompileResult::Make(CreateGuidAsString(), compileResult->Status, compileResult->Issues, compileResult->MaxReadType, std::move(query), compileResult->Ast); + auto newCompileResult = TKqpCompileResult::Make(CreateGuidAsString(), compileResult->Status, compileResult->Issues, compileResult->MaxReadType, std::move(query), compileResult->QueryAst); newCompileResult->AllowCache = compileResult->AllowCache; newCompileResult->PreparedQuery = compileResult->PreparedQuery; + LOG_DEBUG_S(ctx, NKikimrServices::KQP_COMPILE_SERVICE, "Insert preparing query with params, queryId: " << query.SerializeToString()); return QueryCache.Insert(newCompileResult, TableServiceConfig.GetEnableAstCache(), isPerStatementExecution); } diff --git a/ydb/core/kqp/compute_actor/kqp_compute_actor.cpp b/ydb/core/kqp/compute_actor/kqp_compute_actor.cpp index 74b0f6f14f79..3376baff7be6 100644 --- a/ydb/core/kqp/compute_actor/kqp_compute_actor.cpp +++ b/ydb/core/kqp/compute_actor/kqp_compute_actor.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include @@ -78,7 +78,7 @@ NYql::NDq::IDqAsyncIoFactory::TPtr CreateKqpAsyncIoFactory( if (federatedQuerySetup) { auto s3HttpRetryPolicy = NYql::GetHTTPDefaultRetryPolicy(NYql::THttpRetryPolicyOptions{.RetriedCurlCodes = NYql::FqRetriedCurlCodes()}); - s3ActorsFactory->RegisterS3ReadActorFactory(*factory, federatedQuerySetup->CredentialsFactory, federatedQuerySetup->HttpGateway, s3HttpRetryPolicy); + s3ActorsFactory->RegisterS3ReadActorFactory(*factory, federatedQuerySetup->CredentialsFactory, federatedQuerySetup->HttpGateway, s3HttpRetryPolicy, federatedQuerySetup->S3ReadActorFactoryConfig, nullptr, federatedQuerySetup->S3GatewayConfig.GetAllowLocalFiles()); s3ActorsFactory->RegisterS3WriteActorFactory(*factory, federatedQuerySetup->CredentialsFactory, federatedQuerySetup->HttpGateway, s3HttpRetryPolicy); if (federatedQuerySetup->ConnectorClient) { @@ -131,18 +131,18 @@ namespace NKikimr::NKqp { using namespace NYql::NDq; using namespace NYql::NDqProto; -IActor* CreateKqpScanComputeActor(const TActorId& executerId, ui64 txId, +IActor* CreateKqpScanComputeActor(const TActorId& executerId, ui64 txId, TMaybe lockTxId, ui32 lockNodeId, TDqTask* task, IDqAsyncIoFactory::TPtr asyncIoFactory, const NYql::NDq::TComputeRuntimeSettings& settings, const TComputeMemoryLimits& memoryLimits, NWilson::TTraceId traceId, - TIntrusivePtr arena) { - return new NScanPrivate::TKqpScanComputeActor(executerId, txId, task, std::move(asyncIoFactory), + TIntrusivePtr arena, TComputeActorSchedulingOptions schedulingOptions) { + return new NScanPrivate::TKqpScanComputeActor(std::move(schedulingOptions), executerId, txId, lockTxId, lockNodeId, task, std::move(asyncIoFactory), settings, memoryLimits, std::move(traceId), std::move(arena)); } IActor* CreateKqpScanFetcher(const NKikimrKqp::TKqpSnapshot& snapshot, std::vector&& computeActors, const NKikimrTxDataShard::TKqpTransaction::TScanTaskMeta& meta, const NYql::NDq::TComputeRuntimeSettings& settings, - const ui64 txId, const TShardsScanningPolicy& shardsScanningPolicy, TIntrusivePtr counters, NWilson::TTraceId traceId) { - return new NScanPrivate::TKqpScanFetcherActor(snapshot, settings, std::move(computeActors), txId, meta, shardsScanningPolicy, counters, std::move(traceId)); + const ui64 txId, TMaybe lockTxId, ui32 lockNodeId, const TShardsScanningPolicy& shardsScanningPolicy, TIntrusivePtr counters, NWilson::TTraceId traceId) { + return new NScanPrivate::TKqpScanFetcherActor(snapshot, settings, std::move(computeActors), txId, lockTxId, lockNodeId, meta, shardsScanningPolicy, counters, std::move(traceId)); } } diff --git a/ydb/core/kqp/compute_actor/kqp_compute_actor.h b/ydb/core/kqp/compute_actor/kqp_compute_actor.h index 0e191e113780..e41145665805 100644 --- a/ydb/core/kqp/compute_actor/kqp_compute_actor.h +++ b/ydb/core/kqp/compute_actor/kqp_compute_actor.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -48,16 +49,15 @@ IActor* CreateKqpComputeActor(const TActorId& executerId, ui64 txId, NYql::NDqPr const NYql::NDq::TComputeRuntimeSettings& settings, const NYql::NDq::TComputeMemoryLimits& memoryLimits, NWilson::TTraceId traceId, TIntrusivePtr arena, - const std::optional& federatedQuerySetup, const TGUCSettings::TPtr& GUCSettings); + const std::optional& federatedQuerySetup, const TGUCSettings::TPtr& GUCSettings, TComputeActorSchedulingOptions); -IActor* CreateKqpScanComputeActor(const TActorId& executerId, ui64 txId, +IActor* CreateKqpScanComputeActor(const TActorId& executerId, ui64 txId, TMaybe lockTxId, ui32 lockNodeId, NYql::NDqProto::TDqTask* task, NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, const NYql::NDq::TComputeRuntimeSettings& settings, const NYql::NDq::TComputeMemoryLimits& memoryLimits, NWilson::TTraceId traceId, - TIntrusivePtr arena); - + TIntrusivePtr arena, TComputeActorSchedulingOptions); IActor* CreateKqpScanFetcher(const NKikimrKqp::TKqpSnapshot& snapshot, std::vector&& computeActors, const NKikimrTxDataShard::TKqpTransaction::TScanTaskMeta& meta, const NYql::NDq::TComputeRuntimeSettings& settings, - const ui64 txId, const TShardsScanningPolicy& shardsScanningPolicy, TIntrusivePtr counters, NWilson::TTraceId traceId); + const ui64 txId, TMaybe lockTxId, ui32 lockNodeId, const TShardsScanningPolicy& shardsScanningPolicy, TIntrusivePtr counters, NWilson::TTraceId traceId); NYql::NDq::IDqAsyncIoFactory::TPtr CreateKqpAsyncIoFactory( TIntrusivePtr counters, diff --git a/ydb/core/kqp/compute_actor/kqp_compute_actor_factory.cpp b/ydb/core/kqp/compute_actor/kqp_compute_actor_factory.cpp index 9122f611e39c..ca44ab103223 100644 --- a/ydb/core/kqp/compute_actor/kqp_compute_actor_factory.cpp +++ b/ydb/core/kqp/compute_actor/kqp_compute_actor_factory.cpp @@ -6,137 +6,179 @@ namespace NKikimr::NKqp::NComputeActor { + struct TMemoryQuotaManager : public NYql::NDq::TGuaranteeQuotaManager { TMemoryQuotaManager(std::shared_ptr resourceManager , NRm::EKqpMemoryPool memoryPool , std::shared_ptr state - , ui64 txId - , ui64 taskId - , ui64 limit - , ui64 reasonableSpillingTreshold) + , TIntrusivePtr tx + , TIntrusivePtr task + , ui64 limit) : NYql::NDq::TGuaranteeQuotaManager(limit, limit) , ResourceManager(std::move(resourceManager)) , MemoryPool(memoryPool) , State(std::move(state)) - , TxId(txId) - , TaskId(taskId) - , ReasonableSpillingTreshold(reasonableSpillingTreshold) + , Tx(std::move(tx)) + , Task(std::move(task)) { } ~TMemoryQuotaManager() override { - State->OnTaskTerminate(TxId, TaskId, Success); - ResourceManager->FreeResources(TxId, TaskId); + if (State) { + State->OnTaskTerminate(Tx->TxId, Task->TaskId, Success); + } + + ResourceManager->FreeResources(Tx, Task); } bool AllocateExtraQuota(ui64 extraSize) override { - auto result = ResourceManager->AllocateResources(TxId, TaskId, + auto result = ResourceManager->AllocateResources(Tx, Task, NRm::TKqpResourcesRequest{.MemoryPool = MemoryPool, .Memory = extraSize}); if (!result) { AFL_WARN(NKikimrServices::KQP_COMPUTE) ("problem", "cannot_allocate_memory") - ("tx_id", TxId) - ("task_id", TaskId) + ("tx_id", Tx->TxId) + ("task_id", Task->TaskId) ("memory", extraSize); return false; } - TotalQueryAllocationsSize = result.TotalAllocatedQueryMemory; - return true; } void FreeExtraQuota(ui64 extraSize) override { - ResourceManager->FreeResources(TxId, TaskId, - NRm::TKqpResourcesRequest{.MemoryPool = MemoryPool, .Memory = extraSize} - ); + NRm::TKqpResourcesRequest request = NRm::TKqpResourcesRequest{.MemoryPool = MemoryPool, .Memory = extraSize}; + ResourceManager->FreeResources(Tx, Task, Task->FitRequest(request)); } bool IsReasonableToUseSpilling() const override { - return TotalQueryAllocationsSize >= ReasonableSpillingTreshold; + return Task->IsReasonableToStartSpilling(); + } + + TString MemoryConsumptionDetails() const override { + return Tx->ToString(); } void TerminateHandler(bool success, const NYql::TIssues& issues) { AFL_DEBUG(NKikimrServices::KQP_COMPUTE) ("problem", "finish_compute_actor") - ("tx_id", TxId)("task_id", TaskId)("success", success)("message", issues.ToOneLineString()); + ("tx_id", Tx->TxId)("task_id", Task->TaskId)("success", success)("message", issues.ToOneLineString()); Success = success; } std::shared_ptr ResourceManager; NRm::EKqpMemoryPool MemoryPool; std::shared_ptr State; - ui64 TxId; - ui64 TaskId; + TIntrusivePtr Tx; + TIntrusivePtr Task; bool Success = true; - ui64 TotalQueryAllocationsSize = 0; ui64 ReasonableSpillingTreshold = 0; }; class TKqpCaFactory : public IKqpNodeComputeActorFactory { - NKikimrConfig::TTableServiceConfig::TResourceManager Config; std::shared_ptr ResourceManager_; NYql::NDq::IDqAsyncIoFactory::TPtr AsyncIoFactory; const std::optional FederatedQuerySetup; + std::atomic MkqlLightProgramMemoryLimit = 0; + std::atomic MkqlHeavyProgramMemoryLimit = 0; + std::atomic MinChannelBufferSize = 0; + std::atomic MinMemAllocSize = 8_MB; + std::atomic MinMemFreeSize = 32_MB; + public: TKqpCaFactory(const NKikimrConfig::TTableServiceConfig::TResourceManager& config, std::shared_ptr resourceManager, NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, const std::optional federatedQuerySetup) - : Config(config) - , ResourceManager_(resourceManager) + : ResourceManager_(resourceManager) , AsyncIoFactory(asyncIoFactory) , FederatedQuerySetup(federatedQuerySetup) - {} + { + ApplyConfig(config); + } - TActorId CreateKqpComputeActor(const TActorId& executerId, ui64 txId, NYql::NDqProto::TDqTask* dqTask, - const NYql::NDq::TComputeRuntimeSettings& settings, - NWilson::TTraceId traceId, TIntrusivePtr arena, const TString& serializedGUCSettings, - TComputeStagesWithScan& computesByStage, ui64 outputChunkMaxSize, std::shared_ptr state, - NRm::EKqpMemoryPool memoryPool, ui32 numberOfTasks) + void ApplyConfig(const NKikimrConfig::TTableServiceConfig::TResourceManager& config) override { + MkqlLightProgramMemoryLimit.store(config.GetMkqlLightProgramMemoryLimit()); + MkqlHeavyProgramMemoryLimit.store(config.GetMkqlHeavyProgramMemoryLimit()); + MinChannelBufferSize.store(config.GetMinChannelBufferSize()); + MinMemAllocSize.store(config.GetMinMemAllocSize()); + MinMemFreeSize.store(config.GetMinMemFreeSize()); + } + + TActorStartResult CreateKqpComputeActor(TCreateArgs&& args) override { NYql::NDq::TComputeMemoryLimits memoryLimits; memoryLimits.ChannelBufferSize = 0; - memoryLimits.MkqlLightProgramMemoryLimit = Config.GetMkqlLightProgramMemoryLimit(); - memoryLimits.MkqlHeavyProgramMemoryLimit = Config.GetMkqlHeavyProgramMemoryLimit(); + memoryLimits.MkqlLightProgramMemoryLimit = MkqlLightProgramMemoryLimit.load(); + memoryLimits.MkqlHeavyProgramMemoryLimit = MkqlHeavyProgramMemoryLimit.load(); + memoryLimits.MinMemAllocSize = MinMemAllocSize.load(); + memoryLimits.MinMemFreeSize = MinMemFreeSize.load(); - auto estimation = EstimateTaskResources(*dqTask, Config, numberOfTasks); + auto estimation = ResourceManager_->EstimateTaskResources(*args.Task, args.NumberOfTasks); + NRm::TKqpResourcesRequest resourcesRequest; + resourcesRequest.MemoryPool = args.MemoryPool; + resourcesRequest.ExecutionUnits = 1; + resourcesRequest.Memory = memoryLimits.MkqlLightProgramMemoryLimit; + + TIntrusivePtr task = MakeIntrusive(args.Task->GetId(), args.TxInfo->CreatedAt); + + auto rmResult = ResourceManager_->AllocateResources( + args.TxInfo, task, resourcesRequest); + + if (!rmResult) { + return NRm::TKqpRMAllocateResult{rmResult}; + } { ui32 inputChannelsCount = 0; - for (auto&& i : dqTask->GetInputs()) { + for (auto&& i : args.Task->GetInputs()) { inputChannelsCount += i.ChannelsSize(); } - memoryLimits.ChannelBufferSize = std::max(estimation.ChannelBufferMemoryLimit / std::max(1, inputChannelsCount), Config.GetMinChannelBufferSize()); - memoryLimits.OutputChunkMaxSize = outputChunkMaxSize; + memoryLimits.ChannelBufferSize = std::max(estimation.ChannelBufferMemoryLimit / std::max(1, inputChannelsCount), MinChannelBufferSize.load()); + memoryLimits.OutputChunkMaxSize = args.OutputChunkMaxSize; AFL_DEBUG(NKikimrServices::KQP_COMPUTE)("event", "channel_info") ("ch_size", estimation.ChannelBufferMemoryLimit) ("ch_count", estimation.ChannelBuffersCount) ("ch_limit", memoryLimits.ChannelBufferSize) - ("inputs", dqTask->InputsSize()) + ("inputs", args.Task->InputsSize()) ("input_channels_count", inputChannelsCount); } - auto& taskOpts = dqTask->GetProgram().GetSettings(); + auto& taskOpts = args.Task->GetProgram().GetSettings(); auto limit = taskOpts.GetHasMapJoin() || taskOpts.GetHasStateAggregation() ? memoryLimits.MkqlHeavyProgramMemoryLimit : memoryLimits.MkqlLightProgramMemoryLimit; memoryLimits.MemoryQuotaManager = std::make_shared( ResourceManager_, - memoryPool, - std::move(state), - txId, - dqTask->GetId(), - limit, - Config.GetReasonableSpillingTreshold()); - - auto runtimeSettings = settings; + args.MemoryPool, + std::move(args.State), + std::move(args.TxInfo), + std::move(task), + limit); + + auto runtimeSettings = args.RuntimeSettings; + runtimeSettings.ExtraMemoryAllocationPool = args.MemoryPool; + runtimeSettings.UseSpilling = args.WithSpilling; + runtimeSettings.StatsMode = args.StatsMode; + + if (runtimeSettings.UseSpilling) { + args.Task->SetEnableSpilling(runtimeSettings.UseSpilling); + } + + if (args.Deadline) { + runtimeSettings.Timeout = args.Deadline - TAppData::TimeProvider->Now(); + } + + if (args.RlPath) { + runtimeSettings.RlPath = args.RlPath; + } + NYql::NDq::IMemoryQuotaManager::TWeakPtr memoryQuotaManager = memoryLimits.MemoryQuotaManager; runtimeSettings.TerminateHandler = [memoryQuotaManager] (bool success, const NYql::TIssues& issues) { @@ -157,29 +199,34 @@ class TKqpCaFactory : public IKqpNodeComputeActorFactory { }; ETableKind tableKind = ETableKind::Unknown; - if (dqTask->HasMetaId()) { - YQL_ENSURE(computesByStage.GetMetaById(*dqTask, meta) || dqTask->GetMeta().UnpackTo(&meta), "cannot take meta on MetaId exists in tasks"); + if (args.Task->HasMetaId()) { + YQL_ENSURE(args.ComputesByStages); + YQL_ENSURE(args.ComputesByStages->GetMetaById(*args.Task, meta) || args.Task->GetMeta().UnpackTo(&meta), "cannot take meta on MetaId exists in tasks"); tableKind = tableKindExtract(meta); - } else if (dqTask->GetMeta().UnpackTo(&meta)) { + } else if (args.Task->GetMeta().UnpackTo(&meta)) { tableKind = tableKindExtract(meta); } if (tableKind == ETableKind::Datashard || tableKind == ETableKind::Olap) { - auto& info = computesByStage.UpsertTaskWithScan(*dqTask, meta, !AppData()->FeatureFlags.GetEnableSeparationComputeActorsFromRead()); - IActor* computeActor = CreateKqpScanComputeActor(executerId, txId, dqTask, + YQL_ENSURE(args.ComputesByStages); + auto& info = args.ComputesByStages->UpsertTaskWithScan(*args.Task, meta, !AppData()->FeatureFlags.GetEnableSeparationComputeActorsFromRead()); + IActor* computeActor = CreateKqpScanComputeActor(args.ExecuterId, args.TxId, args.LockTxId, args.LockNodeId, args.Task, AsyncIoFactory, runtimeSettings, memoryLimits, - std::move(traceId), std::move(arena)); + std::move(args.TraceId), std::move(args.Arena), + std::move(args.SchedulingOptions)); TActorId result = TlsActivationContext->Register(computeActor); info.MutableActorIds().emplace_back(result); return result; } else { std::shared_ptr GUCSettings; - if (!serializedGUCSettings.empty()) { - GUCSettings = std::make_shared(serializedGUCSettings); + if (!args.SerializedGUCSettings.empty()) { + GUCSettings = std::make_shared(args.SerializedGUCSettings); } - IActor* computeActor = ::NKikimr::NKqp::CreateKqpComputeActor(executerId, txId, dqTask, AsyncIoFactory, - runtimeSettings, memoryLimits, std::move(traceId), std::move(arena), FederatedQuerySetup, GUCSettings); - return TlsActivationContext->Register(computeActor); + IActor* computeActor = ::NKikimr::NKqp::CreateKqpComputeActor(args.ExecuterId, args.TxId, args.Task, AsyncIoFactory, + runtimeSettings, memoryLimits, std::move(args.TraceId), std::move(args.Arena), FederatedQuerySetup, GUCSettings, + std::move(args.SchedulingOptions)); + return args.ShareMailbox ? TlsActivationContext->AsActorContext().RegisterWithSameMailbox(computeActor) : + TlsActivationContext->AsActorContext().Register(computeActor); } } }; @@ -192,4 +239,4 @@ std::shared_ptr MakeKqpCaFactory(const NKikimrConfi return std::make_shared(config, resourceManager, asyncIoFactory, federatedQuerySetup); } -} \ No newline at end of file +} diff --git a/ydb/core/kqp/compute_actor/kqp_compute_actor_factory.h b/ydb/core/kqp/compute_actor/kqp_compute_actor_factory.h index 13f0fa11a0da..9dcb44a35966 100644 --- a/ydb/core/kqp/compute_actor/kqp_compute_actor_factory.h +++ b/ydb/core/kqp/compute_actor/kqp_compute_actor_factory.h @@ -6,6 +6,8 @@ #include #include +#include + #include namespace NKikimr::NKqp { @@ -103,11 +105,35 @@ struct IKqpNodeComputeActorFactory { virtual ~IKqpNodeComputeActorFactory() = default; public: - virtual NActors::TActorId CreateKqpComputeActor(const NActors::TActorId& executerId, ui64 txId, NYql::NDqProto::TDqTask* task, - const NYql::NDq::TComputeRuntimeSettings& settings, - NWilson::TTraceId traceId, TIntrusivePtr arena, const TString& serializedGUCSettings, - TComputeStagesWithScan& computeStages, ui64 outputChunkMaxSize, std::shared_ptr state, - NKikimr::NKqp::NRm::EKqpMemoryPool memoryPool, ui32 numberOfTasks) = 0; + struct TCreateArgs { + const NActors::TActorId& ExecuterId; + const ui64 TxId; + const TMaybe LockTxId; + const ui32 LockNodeId; + NYql::NDqProto::TDqTask* Task; + TIntrusivePtr TxInfo; + const NYql::NDq::TComputeRuntimeSettings& RuntimeSettings; + NWilson::TTraceId TraceId; + TIntrusivePtr Arena; + const TString& SerializedGUCSettings; + const ui32 NumberOfTasks; + const ui64 OutputChunkMaxSize; + const NKikimr::NKqp::NRm::EKqpMemoryPool MemoryPool; + const bool WithSpilling; + const NYql::NDqProto::EDqStatsMode StatsMode; + const TInstant& Deadline; + const bool ShareMailbox; + const TMaybe& RlPath; + + TComputeStagesWithScan* ComputesByStages = nullptr; + std::shared_ptr State = nullptr; + TComputeActorSchedulingOptions SchedulingOptions = {}; + }; + + typedef std::variant TActorStartResult; + virtual TActorStartResult CreateKqpComputeActor(TCreateArgs&& args) = 0; + + virtual void ApplyConfig(const NKikimrConfig::TTableServiceConfig::TResourceManager& config) = 0; }; std::shared_ptr MakeKqpCaFactory(const NKikimrConfig::TTableServiceConfig::TResourceManager& config, @@ -115,4 +141,4 @@ std::shared_ptr MakeKqpCaFactory(const NKikimrConfi NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, const std::optional federatedQuerySetup); -} // namespace NKikimr::NKqp::NComputeActor \ No newline at end of file +} // namespace NKikimr::NKqp::NComputeActor diff --git a/ydb/core/kqp/compute_actor/kqp_compute_actor_impl.h b/ydb/core/kqp/compute_actor/kqp_compute_actor_impl.h index 2633037dcb17..bdfbb2eeb7db 100644 --- a/ydb/core/kqp/compute_actor/kqp_compute_actor_impl.h +++ b/ydb/core/kqp/compute_actor/kqp_compute_actor_impl.h @@ -14,8 +14,8 @@ using namespace NYql::NDq; class TKqpTaskRunnerExecutionContext : public TDqTaskRunnerExecutionContext { public: - TKqpTaskRunnerExecutionContext(ui64 txId, bool withSpilling, IDqChannelStorage::TWakeUpCallback&& wakeUp) - : TDqTaskRunnerExecutionContext(txId, std::move(wakeUp)) + TKqpTaskRunnerExecutionContext(ui64 txId, bool withSpilling, TWakeUpCallback&& wakeUpCallback, TErrorCallback&& errorCallback) + : TDqTaskRunnerExecutionContext(txId, std::move(wakeUpCallback), std::move(errorCallback)) , WithSpilling_(withSpilling) { } diff --git a/ydb/core/kqp/compute_actor/kqp_compute_events.h b/ydb/core/kqp/compute_actor/kqp_compute_events.h index 6092c4a1a378..a9dd127a64b0 100644 --- a/ydb/core/kqp/compute_actor/kqp_compute_events.h +++ b/ydb/core/kqp/compute_actor/kqp_compute_events.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include #include @@ -11,164 +11,181 @@ namespace NKikimr::NKqp { -struct TEvKqpCompute { - struct TEvRemoteScanData : public TEventPB {}; +struct TLocksInfo { + TVector Locks; + TVector BrokenLocks; +}; - class IShardScanStats { - public: - virtual ~IShardScanStats() = default; - virtual THashMap GetMetrics() const = 0; - }; +namespace NInternalImplementation { +struct TEvRemoteScanData: public TEventPB { +}; - /* - * Scan communications. - * - * TEvScanData is intentionally preserved as a local event for performance reasons: leaf compute - * actors are communicating with shard scans using this message, so big amount of unfiltered data - * is expected. However, it is possible that after query planning datashard would migrate to other - * node. To support scans in this case we provide serialization routines. For now such remote scan - * is considered as rare event and not worth of some fast serialization, so we just use protobuf. - * - * TEvScanDataAck follows the same pattern mostly for symmetry reasons. - */ - struct TEvScanData : public NActors::TEventLocal { - TEvScanData(const ui32 scanId, const ui32 generation = 0) - : ScanId(scanId) - , Generation(generation) - , Finished(false) {} - - std::optional AvailablePacks; - ui32 ScanId; - ui32 Generation; - TVector Rows; - std::shared_ptr ArrowBatch; - std::vector> SplittedBatches; - - TOwnedCellVec LastKey; - TDuration CpuTime; - TDuration WaitTime; - ui32 PageFaults = 0; // number of page faults occurred when filling in this message - bool RequestedBytesLimitReached = false; - bool Finished = false; - bool PageFault = false; // page fault was the reason for sending this message - mutable THolder Remote; - std::shared_ptr StatsOnFinished; - - template - const T& GetStatsAs() const { - Y_ABORT_UNLESS(!!StatsOnFinished); - return VerifyDynamicCast(*StatsOnFinished); - } +class IShardScanStats { +public: + virtual ~IShardScanStats() = default; + virtual THashMap GetMetrics() const = 0; +}; - template - bool CheckStatsIs() const { - auto p = dynamic_cast(StatsOnFinished.get()); - return p; +/* + * Scan communications. + * + * TEvScanData is intentionally preserved as a local event for performance reasons: leaf compute + * actors are communicating with shard scans using this message, so big amount of unfiltered data + * is expected. However, it is possible that after query planning datashard would migrate to other + * node. To support scans in this case we provide serialization routines. For now such remote scan + * is considered as rare event and not worth of some fast serialization, so we just use protobuf. + * + * TEvScanDataAck follows the same pattern mostly for symmetry reasons. + */ +struct TEvScanData: public NActors::TEventLocal { + TEvScanData(const ui32 scanId, const ui32 generation = 0) + : ScanId(scanId) + , Generation(generation) + , Finished(false) { + } + + std::optional AvailablePacks; + ui32 ScanId; + ui32 Generation; + TVector Rows; + std::shared_ptr ArrowBatch; + std::vector> SplittedBatches; + + TOwnedCellVec LastKey; + TDuration CpuTime; + TDuration WaitTime; + ui32 PageFaults = 0; // number of page faults occurred when filling in this message + bool RequestedBytesLimitReached = false; + bool Finished = false; + bool PageFault = false; // page fault was the reason for sending this message + mutable THolder Remote; + std::shared_ptr StatsOnFinished; + TLocksInfo LocksInfo; + + template + const T& GetStatsAs() const { + Y_ABORT_UNLESS(!!StatsOnFinished); + return VerifyDynamicCast(*StatsOnFinished); + } + + template + bool CheckStatsIs() const { + auto p = dynamic_cast(StatsOnFinished.get()); + return p; + } + + ui32 GetRowsCount() const { + if (ArrowBatch) { + return ArrowBatch->num_rows(); + } else { + return Rows.size(); } + } - ui32 GetRowsCount() const { - if (ArrowBatch) { - return ArrowBatch->num_rows(); - } else { - return Rows.size(); - } - } + bool IsEmpty() const { + return GetRowsCount() == 0; + } - bool IsEmpty() const { - return GetRowsCount() == 0; - } + bool IsSerializable() const override { + return true; + } - bool IsSerializable() const override { - return true; - } + ui32 CalculateSerializedSize() const override { + InitRemote(); + return Remote->CalculateSerializedSizeCached(); + } - ui32 CalculateSerializedSize() const override { - InitRemote(); - return Remote->CalculateSerializedSizeCached(); - } + bool SerializeToArcadiaStream(NActors::TChunkSerializer* chunker) const override { + InitRemote(); + return Remote->SerializeToArcadiaStream(chunker); + } - bool SerializeToArcadiaStream(NActors::TChunkSerializer* chunker) const override { - InitRemote(); - return Remote->SerializeToArcadiaStream(chunker); + NKikimrDataEvents::EDataFormat GetDataFormat() const { + if (ArrowBatch != nullptr || SplittedBatches.size()) { + return NKikimrDataEvents::FORMAT_ARROW; } - - NKikimrDataEvents::EDataFormat GetDataFormat() const { - if (ArrowBatch != nullptr || SplittedBatches.size()) { - return NKikimrDataEvents::FORMAT_ARROW; - } - return NKikimrDataEvents::FORMAT_CELLVEC; + return NKikimrDataEvents::FORMAT_CELLVEC; + } + + + static NActors::IEventBase* Load(TEventSerializedData* data) { + auto pbEv = THolder(static_cast(TEvRemoteScanData::Load(data))); + auto ev = MakeHolder(pbEv->Record.GetScanId(), pbEv->Record.GetGeneration()); + + ev->CpuTime = TDuration::MicroSeconds(pbEv->Record.GetCpuTimeUs()); + ev->WaitTime = TDuration::MilliSeconds(pbEv->Record.GetWaitTimeMs()); + ev->PageFault = pbEv->Record.GetPageFault(); + ev->PageFaults = pbEv->Record.GetPageFaults(); + ev->Finished = pbEv->Record.GetFinished(); + ev->RequestedBytesLimitReached = pbEv->Record.GetRequestedBytesLimitReached(); + ev->LastKey = TOwnedCellVec(TSerializedCellVec(pbEv->Record.GetLastKey()).GetCells()); + if (pbEv->Record.HasAvailablePacks()) { + ev->AvailablePacks = pbEv->Record.GetAvailablePacks(); } + auto rows = pbEv->Record.GetRows(); + ev->Rows.reserve(rows.size()); + for (const auto& row : rows) { + ev->Rows.emplace_back(TSerializedCellVec(row).GetCells()); + } - static NActors::IEventBase* Load(TEventSerializedData* data) { - auto pbEv = THolder(static_cast(TEvRemoteScanData::Load(data))); - auto ev = MakeHolder(pbEv->Record.GetScanId(), pbEv->Record.GetGeneration()); - - ev->CpuTime = TDuration::MicroSeconds(pbEv->Record.GetCpuTimeUs()); - ev->WaitTime = TDuration::MilliSeconds(pbEv->Record.GetWaitTimeMs()); - ev->PageFault = pbEv->Record.GetPageFault(); - ev->PageFaults = pbEv->Record.GetPageFaults(); - ev->Finished = pbEv->Record.GetFinished(); - ev->RequestedBytesLimitReached = pbEv->Record.GetRequestedBytesLimitReached(); - ev->LastKey = TOwnedCellVec(TSerializedCellVec(pbEv->Record.GetLastKey()).GetCells()); - if (pbEv->Record.HasAvailablePacks()) { - ev->AvailablePacks = pbEv->Record.GetAvailablePacks(); - } - - auto rows = pbEv->Record.GetRows(); - ev->Rows.reserve(rows.size()); - for (const auto& row: rows) { - ev->Rows.emplace_back(TSerializedCellVec(row).GetCells()); + if (pbEv->Record.HasArrowBatch()) { + auto batch = pbEv->Record.GetArrowBatch(); + auto schema = NArrow::DeserializeSchema(batch.GetSchema()); + ev->ArrowBatch = NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches({ NArrow::DeserializeBatch(batch.GetBatch(), schema) })); + } + return ev.Release(); + } + +private: + void InitRemote() const { + if (!Remote) { + Remote = MakeHolder(); + + Remote->Record.SetScanId(ScanId); + Remote->Record.SetGeneration(Generation); + Remote->Record.SetCpuTimeUs(CpuTime.MicroSeconds()); + Remote->Record.SetWaitTimeMs(WaitTime.MilliSeconds()); + Remote->Record.SetPageFaults(PageFaults); + Remote->Record.SetFinished(Finished); + Remote->Record.SetRequestedBytesLimitReached(RequestedBytesLimitReached); + Remote->Record.SetPageFaults(PageFaults); + Remote->Record.SetPageFault(PageFault); + Remote->Record.SetLastKey(TSerializedCellVec::Serialize(LastKey)); + if (AvailablePacks) { + Remote->Record.SetAvailablePacks(*AvailablePacks); } - if (pbEv->Record.HasArrowBatch()) { - auto batch = pbEv->Record.GetArrowBatch(); - auto schema = NArrow::DeserializeSchema(batch.GetSchema()); - ev->ArrowBatch = NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches({NArrow::DeserializeBatch(batch.GetBatch(), schema)})); + switch (GetDataFormat()) { + case NKikimrDataEvents::FORMAT_UNSPECIFIED: + case NKikimrDataEvents::FORMAT_CELLVEC: { + Remote->Record.MutableRows()->Reserve(Rows.size()); + for (const auto& row : Rows) { + Remote->Record.AddRows(TSerializedCellVec::Serialize(row)); + } + break; + } + case NKikimrDataEvents::FORMAT_ARROW: { + Y_DEBUG_ABORT_UNLESS(ArrowBatch != nullptr); + auto* protoArrowBatch = Remote->Record.MutableArrowBatch(); + protoArrowBatch->SetSchema(NArrow::SerializeSchema(*ArrowBatch->schema())); + protoArrowBatch->SetBatch(NArrow::SerializeBatchNoCompression(NArrow::ToBatch(ArrowBatch, true))); + break; + } } - return ev.Release(); } + } +}; - private: - void InitRemote() const { - if (!Remote) { - Remote = MakeHolder(); +} - Remote->Record.SetScanId(ScanId); - Remote->Record.SetGeneration(Generation); - Remote->Record.SetCpuTimeUs(CpuTime.MicroSeconds()); - Remote->Record.SetWaitTimeMs(WaitTime.MilliSeconds()); - Remote->Record.SetPageFaults(PageFaults); - Remote->Record.SetFinished(Finished); - Remote->Record.SetRequestedBytesLimitReached(RequestedBytesLimitReached); - Remote->Record.SetPageFaults(PageFaults); - Remote->Record.SetPageFault(PageFault); - Remote->Record.SetLastKey(TSerializedCellVec::Serialize(LastKey)); - if (AvailablePacks) { - Remote->Record.SetAvailablePacks(*AvailablePacks); - } +struct TEvKqpCompute { + using TEvRemoteScanData = NInternalImplementation::TEvRemoteScanData; - switch (GetDataFormat()) { - case NKikimrDataEvents::FORMAT_UNSPECIFIED: - case NKikimrDataEvents::FORMAT_CELLVEC: { - Remote->Record.MutableRows()->Reserve(Rows.size()); - for (const auto& row: Rows) { - Remote->Record.AddRows(TSerializedCellVec::Serialize(row)); - } - break; - } - case NKikimrDataEvents::FORMAT_ARROW: { - Y_DEBUG_ABORT_UNLESS(ArrowBatch != nullptr); - auto* protoArrowBatch = Remote->Record.MutableArrowBatch(); - protoArrowBatch->SetSchema(NArrow::SerializeSchema(*ArrowBatch->schema())); - protoArrowBatch->SetBatch(NArrow::SerializeBatchNoCompression(NArrow::ToBatch(ArrowBatch, true))); - break; - } - } - } - } - }; + using IShardScanStats = NInternalImplementation::IShardScanStats; + + using TEvScanData = NInternalImplementation::TEvScanData; struct TEvRemoteScanDataAck: public NActors::TEventPB { @@ -247,5 +264,4 @@ struct TEvKqpCompute { TKqpComputeEvents::EvKillScanTablet> {}; }; - } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/compute_actor/kqp_pure_compute_actor.cpp b/ydb/core/kqp/compute_actor/kqp_pure_compute_actor.cpp index 1c00eca6c792..ab43bc1e5ff1 100644 --- a/ydb/core/kqp/compute_actor/kqp_pure_compute_actor.cpp +++ b/ydb/core/kqp/compute_actor/kqp_pure_compute_actor.cpp @@ -14,8 +14,8 @@ TKqpComputeActor::TKqpComputeActor(const TActorId& executerId, ui64 txId, NDqPro IDqAsyncIoFactory::TPtr asyncIoFactory, const TComputeRuntimeSettings& settings, const TComputeMemoryLimits& memoryLimits, NWilson::TTraceId traceId, TIntrusivePtr arena, - const std::optional& federatedQuerySetup, const TGUCSettings::TPtr& GUCSettings) - : TBase(executerId, txId, task, std::move(asyncIoFactory), AppData()->FunctionRegistry, settings, memoryLimits, /* ownMemoryQuota = */ true, /* passExceptions = */ true, /*taskCounters = */ nullptr, std::move(traceId), std::move(arena), GUCSettings) + const std::optional& federatedQuerySetup, const TGUCSettings::TPtr& GUCSettings, TComputeActorSchedulingOptions schedulingOptions) + : TBase(std::move(schedulingOptions), executerId, txId, task, std::move(asyncIoFactory), AppData()->FunctionRegistry, settings, memoryLimits, /* ownMemoryQuota = */ true, /* passExceptions = */ true, /*taskCounters = */ nullptr, std::move(traceId), std::move(arena), GUCSettings) , ComputeCtx(settings.StatsMode) , FederatedQuerySetup(federatedQuerySetup) { @@ -72,9 +72,10 @@ void TKqpComputeActor::DoBootstrap() { auto taskRunner = MakeDqTaskRunner(TBase::GetAllocatorPtr(), execCtx, settings, logger); SetTaskRunner(taskRunner); - auto wakeup = [this]{ ContinueExecute(); }; + auto wakeupCallback = [this]{ ContinueExecute(); }; + auto errorCallback = [this](const TString& error){ SendError(error); }; try { - PrepareTaskRunner(TKqpTaskRunnerExecutionContext(std::get(TxId), RuntimeSettings.UseSpilling, std::move(wakeup))); + PrepareTaskRunner(TKqpTaskRunnerExecutionContext(std::get(TxId), RuntimeSettings.UseSpilling, std::move(wakeupCallback), std::move(errorCallback))); } catch (const NMiniKQL::TKqpEnsureFail& e) { InternalError((TIssuesIds::EIssueCode) e.GetCode(), e.GetMessage()); return; @@ -121,9 +122,12 @@ void TKqpComputeActor::DoBootstrap() { ContinueExecute(); Become(&TKqpComputeActor::StateFunc); + + TBase::DoBoostrap(); } STFUNC(TKqpComputeActor::StateFunc) { + CA_LOG_D("CA StateFunc " << ev->GetTypeRewrite()); try { switch (ev->GetTypeRewrite()) { hFunc(TEvKqpCompute::TEvScanInitActor, HandleExecute); @@ -133,10 +137,7 @@ STFUNC(TKqpComputeActor::StateFunc) { BaseStateFuncBody(ev); } } catch (const TMemoryLimitExceededException& e) { - InternalError(TIssuesIds::KIKIMR_PRECONDITION_FAILED, TStringBuilder() - << "Mkql memory limit exceeded, limit: " << GetMkqlMemoryLimit() - << ", host: " << HostName() - << ", canAllocateExtraMemory: " << CanAllocateExtraMemory); + TBase::OnMemoryLimitExceptionHandler(); } catch (const NMiniKQL::TKqpEnsureFail& e) { InternalError((TIssuesIds::EIssueCode) e.GetCode(), e.GetMessage()); } catch (const yexception& e) { @@ -281,10 +282,10 @@ IActor* CreateKqpComputeActor(const TActorId& executerId, ui64 txId, NDqProto::T const TComputeRuntimeSettings& settings, const TComputeMemoryLimits& memoryLimits, NWilson::TTraceId traceId, TIntrusivePtr arena, const std::optional& federatedQuerySetup, - const TGUCSettings::TPtr& GUCSettings) + const TGUCSettings::TPtr& GUCSettings, TComputeActorSchedulingOptions cpuOptions) { return new TKqpComputeActor(executerId, txId, task, std::move(asyncIoFactory), - settings, memoryLimits, std::move(traceId), std::move(arena), federatedQuerySetup, GUCSettings); + settings, memoryLimits, std::move(traceId), std::move(arena), federatedQuerySetup, GUCSettings, std::move(cpuOptions)); } } // namespace NKqp diff --git a/ydb/core/kqp/compute_actor/kqp_pure_compute_actor.h b/ydb/core/kqp/compute_actor/kqp_pure_compute_actor.h index 590a9bcab774..613f5e2786c6 100644 --- a/ydb/core/kqp/compute_actor/kqp_pure_compute_actor.h +++ b/ydb/core/kqp/compute_actor/kqp_pure_compute_actor.h @@ -8,17 +8,16 @@ #include #include #include +#include #include #include -#include - namespace NKikimr { namespace NKqp { -class TKqpComputeActor : public TDqSyncComputeActorBase { - using TBase = TDqSyncComputeActorBase; +class TKqpComputeActor : public TSchedulableComputeActorBase { + using TBase = TSchedulableComputeActorBase; public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { @@ -29,7 +28,8 @@ class TKqpComputeActor : public TDqSyncComputeActorBase { IDqAsyncIoFactory::TPtr asyncIoFactory, const TComputeRuntimeSettings& settings, const TComputeMemoryLimits& memoryLimits, NWilson::TTraceId traceId, TIntrusivePtr arena, - const std::optional& federatedQuerySetup, const TGUCSettings::TPtr& GUCSettings); + const std::optional& federatedQuerySetup, const TGUCSettings::TPtr& GUCSettings, + TComputeActorSchedulingOptions); void DoBootstrap(); @@ -68,7 +68,8 @@ IActor* CreateKqpComputeActor(const TActorId& executerId, ui64 txId, NDqProto::T IDqAsyncIoFactory::TPtr asyncIoFactory, const TComputeRuntimeSettings& settings, const TComputeMemoryLimits& memoryLimits, NWilson::TTraceId traceId, TIntrusivePtr arena, - const std::optional& federatedQuerySetup, const TGUCSettings::TPtr& GUCSettings); + const std::optional& federatedQuerySetup, const TGUCSettings::TPtr& GUCSettings, + TComputeActorSchedulingOptions); } // namespace NKqp } // namespace NKikimr diff --git a/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.cpp b/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.cpp index b144be2b09b0..07c3ab88ed02 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.cpp +++ b/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.cpp @@ -23,13 +23,15 @@ static constexpr TDuration RL_MAX_BATCH_DELAY = TDuration::Seconds(50); } // anonymous namespace -TKqpScanComputeActor::TKqpScanComputeActor(const TActorId& executerId, ui64 txId, NDqProto::TDqTask* task, - IDqAsyncIoFactory::TPtr asyncIoFactory, +TKqpScanComputeActor::TKqpScanComputeActor(TComputeActorSchedulingOptions cpuOptions, const TActorId& executerId, ui64 txId, TMaybe lockTxId, ui32 lockNodeId, + NDqProto::TDqTask* task, IDqAsyncIoFactory::TPtr asyncIoFactory, const TComputeRuntimeSettings& settings, const TComputeMemoryLimits& memoryLimits, NWilson::TTraceId traceId, TIntrusivePtr arena) - : TBase(executerId, txId, task, std::move(asyncIoFactory), AppData()->FunctionRegistry, settings, + : TBase(std::move(cpuOptions), executerId, txId, task, std::move(asyncIoFactory), AppData()->FunctionRegistry, settings, memoryLimits, /* ownMemoryQuota = */ true, /* passExceptions = */ true, /*taskCounters = */ nullptr, std::move(traceId), std::move(arena)) , ComputeCtx(settings.StatsMode) + , LockTxId(lockTxId) + , LockNodeId(lockNodeId) { InitializeTask(); YQL_ENSURE(GetTask().GetMeta().UnpackTo(&Meta), "Invalid task meta: " << GetTask().GetMeta().DebugString()); @@ -103,6 +105,19 @@ void TKqpScanComputeActor::FillExtraStats(NDqProto::TDqComputeActorStats* dst, b } } +TMaybe TKqpScanComputeActor::ExtraData() { + NKikimrTxDataShard::TEvKqpInputActorResultInfo resultInfo; + for (const auto& lock : Locks) { + resultInfo.AddLocks()->CopyFrom(lock); + } + for (const auto& lock : BrokenLocks) { + resultInfo.AddLocks()->CopyFrom(lock); + } + google::protobuf::Any result; + result.PackFrom(resultInfo); + return result; +} + void TKqpScanComputeActor::HandleEvWakeup(EEvWakeupTag tag) { AFL_DEBUG(NKikimrServices::KQP_COMPUTE)("event", "HandleEvWakeup")("self_id", SelfId()); switch (tag) { @@ -130,10 +145,18 @@ void TKqpScanComputeActor::Handle(TEvScanExchange::TEvTerminateFromFetcher::TPtr void TKqpScanComputeActor::Handle(TEvScanExchange::TEvSendData::TPtr& ev) { ALS_DEBUG(NKikimrServices::KQP_COMPUTE) << "TEvSendData: " << ev->Sender << "/" << SelfId(); auto& msg = *ev->Get(); + + for (const auto& lock : msg.GetLocksInfo().Locks) { + Locks.insert(lock); + } + for (const auto& lock : msg.GetLocksInfo().BrokenLocks) { + BrokenLocks.insert(lock); + } + auto guard = TaskRunner->BindAllocator(); if (!!msg.GetArrowBatch()) { ScanData->AddData(NMiniKQL::TBatchDataAccessor(msg.GetArrowBatch(), std::move(msg.MutableDataIndexes())), msg.GetTabletId(), TaskRunner->GetHolderFactory()); - } else { + } else if (!msg.GetRows().empty()) { ScanData->AddData(std::move(msg.MutableRows()), msg.GetTabletId(), TaskRunner->GetHolderFactory()); } if (IsQuotingEnabled()) { @@ -220,7 +243,8 @@ void TKqpScanComputeActor::DoBootstrap() { TBase::SetTaskRunner(taskRunner); auto wakeup = [this] { ContinueExecute(); }; - TBase::PrepareTaskRunner(TKqpTaskRunnerExecutionContext(std::get(TxId), RuntimeSettings.UseSpilling, std::move(wakeup))); + auto errorCallback = [this](const TString& error){ SendError(error); }; + TBase::PrepareTaskRunner(TKqpTaskRunnerExecutionContext(std::get(TxId), RuntimeSettings.UseSpilling, std::move(wakeup), std::move(errorCallback))); ComputeCtx.AddTableScan(0, Meta, GetStatsMode()); ScanData = &ComputeCtx.GetTableScan(0); @@ -228,6 +252,8 @@ void TKqpScanComputeActor::DoBootstrap() { ScanData->TaskId = GetTask().GetId(); ScanData->TableReader = CreateKqpTableReader(*ScanData); Become(&TKqpScanComputeActor::StateFunc); + + TBase::DoBoostrap(); } } diff --git a/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.h b/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.h index 69b0bb613c1a..b89d6aee71a4 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.h +++ b/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.h @@ -2,15 +2,15 @@ #include "kqp_scan_events.h" #include -#include +#include #include #include namespace NKikimr::NKqp::NScanPrivate { -class TKqpScanComputeActor: public NYql::NDq::TDqSyncComputeActorBase { +class TKqpScanComputeActor: public TSchedulableComputeActorBase { private: - using TBase = NYql::NDq::TDqSyncComputeActorBase; + using TBase = TSchedulableComputeActorBase; NMiniKQL::TKqpScanComputeContext ComputeCtx; NKikimrTxDataShard::TKqpTransaction::TScanTaskMeta Meta; using TBase::TaskRunner; @@ -22,15 +22,49 @@ class TKqpScanComputeActor: public NYql::NDq::TDqSyncComputeActorBase Fetchers; NMiniKQL::TKqpScanComputeContext::TScanData* ScanData = nullptr; + const TMaybe LockTxId; + const ui32 LockNodeId; + + struct TLockHash { + bool operator()(const NKikimrDataEvents::TLock& lock) { + return MultiHash( + lock.GetLockId(), + lock.GetDataShard(), + lock.GetSchemeShard(), + lock.GetPathId(), + lock.GetGeneration(), + lock.GetCounter(), + lock.GetHasWrites()); + } + }; + + struct TLockEqual { + bool operator()(const NKikimrDataEvents::TLock& lhs, const NKikimrDataEvents::TLock& rhs) { + return lhs.GetLockId() == rhs.GetLockId() + && lhs.GetDataShard() == rhs.GetDataShard() + && lhs.GetSchemeShard() == rhs.GetSchemeShard() + && lhs.GetPathId() == rhs.GetPathId() + && lhs.GetGeneration() == rhs.GetGeneration() + && lhs.GetCounter() == rhs.GetCounter() + && lhs.GetHasWrites() == rhs.GetHasWrites(); + } + }; + + using TLocksHashSet = THashSet; + + TLocksHashSet Locks; + TLocksHashSet BrokenLocks; + ui64 CalcMkqlMemoryLimit() override { return TBase::CalcMkqlMemoryLimit() + ComputeCtx.GetTableScans().size() * MemoryLimits.ChannelBufferSize; } + public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::KQP_SCAN_COMPUTE_ACTOR; } - TKqpScanComputeActor(const TActorId& executerId, ui64 txId, + TKqpScanComputeActor(TComputeActorSchedulingOptions, const TActorId& executerId, ui64 txId, TMaybe lockTxId, ui32 lockNodeId, NYql::NDqProto::TDqTask* task, NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, const NYql::NDq::TComputeRuntimeSettings& settings, const NYql::NDq::TComputeMemoryLimits& memoryLimits, NWilson::TTraceId traceId, TIntrusivePtr arena); @@ -46,10 +80,7 @@ class TKqpScanComputeActor: public NYql::NDq::TDqSyncComputeActorBase ExtraData() override; + void HandleEvWakeup(EEvWakeupTag tag); void Handle(TEvScanExchange::TEvTerminateFromFetcher::TPtr& ev); diff --git a/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.cpp b/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.cpp index 0bc9a487e2ce..7eab5ef78196 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.cpp +++ b/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.cpp @@ -20,23 +20,22 @@ std::vector> TShardScannerInfo::OnReceiveData( } else { Finished = true; } - if (data.IsEmpty()) { - AFL_ENSURE(data.Finished); - return {}; - } AFL_ENSURE(ActorId); AFL_ENSURE(!DataChunksInFlightCount)("data_chunks_in_flightCount", DataChunksInFlightCount); std::vector> result; - if (data.SplittedBatches.size() > 1) { + if (data.IsEmpty()) { + AFL_ENSURE(data.Finished); + result.emplace_back(std::make_unique(selfPtr, std::make_unique(TabletId, data.LocksInfo))); + } else if (data.SplittedBatches.size() > 1) { ui32 idx = 0; AFL_ENSURE(data.ArrowBatch); for (auto&& i : data.SplittedBatches) { - result.emplace_back(std::make_unique(selfPtr, std::make_unique(data.ArrowBatch, TabletId, std::move(i)), idx++)); + result.emplace_back(std::make_unique(selfPtr, std::make_unique(data.ArrowBatch, TabletId, std::move(i), data.LocksInfo), idx++)); } } else if (data.ArrowBatch) { - result.emplace_back(std::make_unique(selfPtr, std::make_unique(data.ArrowBatch, TabletId))); + result.emplace_back(std::make_unique(selfPtr, std::make_unique(data.ArrowBatch, TabletId, data.LocksInfo))); } else { - result.emplace_back(std::make_unique(selfPtr, std::make_unique(std::move(data.Rows), TabletId))); + result.emplace_back(std::make_unique(selfPtr, std::make_unique(std::move(data.Rows), TabletId, data.LocksInfo))); } AFL_DEBUG(NKikimrServices::KQP_COMPUTE)("event", "receive_data")("actor_id", ActorId)("count_chunks", result.size()); DataChunksInFlightCount = result.size(); diff --git a/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.h b/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.h index 1e4d9ac58937..2d684d2f6b09 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.h +++ b/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.h @@ -23,6 +23,7 @@ class TComputeTaskData; class TShardScannerInfo { private: std::optional ActorId; + const ui64 ScanId; const ui64 TabletId; const ui64 Generation; i64 DataChunksInFlightCount = 0; @@ -51,15 +52,16 @@ class TShardScannerInfo { } } public: - TShardScannerInfo(TShardState& state, const IExternalObjectsProvider& externalObjectsProvider) - : TabletId(state.TabletId) + TShardScannerInfo(const ui64 scanId, TShardState& state, const IExternalObjectsProvider& externalObjectsProvider) + : ScanId(scanId) + , TabletId(state.TabletId) , Generation(++state.Generation) { const bool subscribed = std::exchange(state.SubscribedOnTablet, true); const auto& keyColumnTypes = externalObjectsProvider.GetKeyColumnTypes(); auto ranges = state.GetScanRanges(keyColumnTypes); - auto ev = externalObjectsProvider.BuildEvKqpScan(0, Generation, ranges); + auto ev = externalObjectsProvider.BuildEvKqpScan(ScanId, Generation, ranges); AFL_DEBUG(NKikimrServices::KQP_COMPUTE)("event", "start_scanner")("tablet_id", TabletId)("generation", Generation) ("info", state.ToString(keyColumnTypes))("range", DebugPrintRanges(keyColumnTypes, ranges, *AppData()->TypeRegistry)) @@ -250,6 +252,7 @@ class TInFlightShards: public NComputeActor::TScanShardsStatistics { THashMap ShardsByActorId; bool IsActiveFlag = true; THashMap> ShardScanners; + const ui64 ScanId; const IExternalObjectsProvider& ExternalObjectsProvider; public: @@ -313,7 +316,7 @@ class TInFlightShards: public NComputeActor::TScanShardsStatistics { AFL_ENSURE(state.TabletId); AFL_ENSURE(!state.ActorId)("actor_id", state.ActorId); state.State = NComputeActor::EShardState::Starting; - auto newScanner = std::make_shared(state, ExternalObjectsProvider); + auto newScanner = std::make_shared(ScanId, state, ExternalObjectsProvider); AFL_ENSURE(ShardScanners.emplace(state.TabletId, newScanner).second); } @@ -356,8 +359,9 @@ class TInFlightShards: public NComputeActor::TScanShardsStatistics { return nullptr; } - TInFlightShards(const IExternalObjectsProvider& externalObjectsProvider) - : ExternalObjectsProvider(externalObjectsProvider) + TInFlightShards(const ui64 scanId, const IExternalObjectsProvider& externalObjectsProvider) + : ScanId(scanId) + , ExternalObjectsProvider(externalObjectsProvider) { } bool IsActive() const { diff --git a/ydb/core/kqp/compute_actor/kqp_scan_events.h b/ydb/core/kqp/compute_actor/kqp_scan_events.h index af455be7b7e7..08ac0e535a88 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_events.h +++ b/ydb/core/kqp/compute_actor/kqp_scan_events.h @@ -43,33 +43,42 @@ struct TEvScanExchange { YDB_ACCESSOR_DEF(TVector, Rows); YDB_READONLY(ui64, TabletId, 0); YDB_ACCESSOR_DEF(std::vector, DataIndexes); + YDB_READONLY_DEF(TLocksInfo, LocksInfo); public: ui32 GetRowsCount() const { return ArrowBatch ? ArrowBatch->num_rows() : Rows.size(); } - TEvSendData(const std::shared_ptr& arrowBatch, const ui64 tabletId) + TEvSendData(const std::shared_ptr& arrowBatch, const ui64 tabletId, const TLocksInfo& locksInfo) : ArrowBatch(arrowBatch) , TabletId(tabletId) + , LocksInfo(locksInfo) { Y_ABORT_UNLESS(ArrowBatch); Y_ABORT_UNLESS(ArrowBatch->num_rows()); } - TEvSendData(const std::shared_ptr& arrowBatch, const ui64 tabletId, std::vector&& dataIndexes) + TEvSendData(const std::shared_ptr& arrowBatch, const ui64 tabletId, std::vector&& dataIndexes, const TLocksInfo& locksInfo) : ArrowBatch(arrowBatch) , TabletId(tabletId) , DataIndexes(std::move(dataIndexes)) + , LocksInfo(locksInfo) { Y_ABORT_UNLESS(ArrowBatch); Y_ABORT_UNLESS(ArrowBatch->num_rows()); } - TEvSendData(TVector&& rows, const ui64 tabletId) + TEvSendData(TVector&& rows, const ui64 tabletId, const TLocksInfo& locksInfo) : Rows(std::move(rows)) - , TabletId(tabletId) { + , TabletId(tabletId) + , LocksInfo(locksInfo) { Y_ABORT_UNLESS(Rows.size()); } + + TEvSendData(const ui64 tabletId, const TLocksInfo& locksInfo) + : TabletId(tabletId) + , LocksInfo(locksInfo) { + } }; class TEvAckData: public NActors::TEventLocal { diff --git a/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp b/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp index 4d4bfed29c89..73f4c86398b9 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp +++ b/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp @@ -23,18 +23,20 @@ static constexpr ui64 MAX_SHARD_RESOLVES = 3; TKqpScanFetcherActor::TKqpScanFetcherActor(const NKikimrKqp::TKqpSnapshot& snapshot, - const TComputeRuntimeSettings& settings, std::vector&& computeActors, const ui64 txId, + const TComputeRuntimeSettings& settings, std::vector&& computeActors, const ui64 txId, const TMaybe lockTxId, const ui32 lockNodeId, const NKikimrTxDataShard::TKqpTransaction_TScanTaskMeta& meta, const TShardsScanningPolicy& shardsScanningPolicy, TIntrusivePtr counters, NWilson::TTraceId traceId) : Meta(meta) , ScanDataMeta(Meta) , RuntimeSettings(settings) , TxId(txId) + , LockTxId(lockTxId) + , LockNodeId(lockNodeId) , ComputeActorIds(std::move(computeActors)) , Snapshot(snapshot) , ShardsScanningPolicy(shardsScanningPolicy) , Counters(counters) - , InFlightShards(*this) + , InFlightShards(ScanId, *this) , InFlightComputes(ComputeActorIds) { Y_UNUSED(traceId); @@ -86,7 +88,11 @@ void TKqpScanFetcherActor::Bootstrap() { void TKqpScanFetcherActor::HandleExecute(TEvScanExchange::TEvAckData::TPtr& ev) { Y_ABORT_UNLESS(ev->Get()->GetFreeSpace()); - ALS_DEBUG(NKikimrServices::KQP_COMPUTE) << "EvAckData (" << SelfId() << "): " << ev->Sender; + AFL_DEBUG(NKikimrServices::KQP_COMPUTE)("event", "AckDataFromCompute")("self_id", SelfId())("scan_id", ScanId) + ("packs_to_send", InFlightComputes.GetPacksToSendCount()) + ("from", ev->Sender)("shards remain", PendingShards.size()) + ("in flight scans", InFlightShards.GetScansCount()) + ("in flight shards", InFlightShards.GetShardsCount()); InFlightComputes.OnComputeAck(ev->Sender, ev->Get()->GetFreeSpace()); CheckFinish(); } @@ -119,6 +125,25 @@ void TKqpScanFetcherActor::HandleExecute(TEvKqpCompute::TEvScanData::TPtr& ev) { } AFL_ENSURE(state->State == EShardState::Running)("state", state->State)("actor_id", state->ActorId)("ev_sender", ev->Sender); + AFL_DEBUG(NKikimrServices::KQP_COMPUTE) + ("Recv TEvScanData from ShardID=", ev->Sender) + ("ScanId", ev->Get()->ScanId) + ("Finished", ev->Get()->Finished) + ("Lock", [&]() { + TStringBuilder builder; + for (const auto& lock : ev->Get()->LocksInfo.Locks) { + builder << lock.ShortDebugString(); + } + return builder; + }()) + ("BrokenLocks", [&]() { + TStringBuilder builder; + for (const auto& lock : ev->Get()->LocksInfo.BrokenLocks) { + builder << lock.ShortDebugString(); + } + return builder; + }()); + TInstant startTime = TActivationContext::Now(); if (ev->Get()->Finished) { state->State = EShardState::PostRunning; @@ -411,6 +436,10 @@ std::unique_ptr TKqpScanFetcherActor::BuildEv ev->Record.SetStatsMode(RuntimeSettings.StatsMode); ev->Record.SetScanId(scanId); ev->Record.SetTxId(std::get(TxId)); + if (LockTxId) { + ev->Record.SetLockTxId(*LockTxId); + } + ev->Record.SetLockNodeId(LockNodeId); ev->Record.SetTablePath(ScanDataMeta.TablePath); ev->Record.SetSchemaVersion(ScanDataMeta.TableId.SchemaVersion); @@ -458,12 +487,17 @@ void TKqpScanFetcherActor::ProcessPendingScanDataItem(TEvKqpCompute::TEvScanData state->LastKey = std::move(msg.LastKey); const ui64 rowsCount = msg.GetRowsCount(); - CA_LOG_D("action=got EvScanData;rows=" << rowsCount << ";finished=" << msg.Finished << ";exceeded=" << msg.RequestedBytesLimitReached - << ";from=" << ev->Sender << ";shards remain=" << PendingShards.size() - << ";in flight scans=" << InFlightShards.GetScansCount() - << ";in flight shards=" << InFlightShards.GetShardsCount() - << ";delayed_for=" << latency.SecondsFloat() << " seconds by ratelimiter" - << ";tablet_id=" << state->TabletId); + AFL_ENSURE(!LockTxId || !msg.LocksInfo.Locks.empty() || !msg.LocksInfo.BrokenLocks.empty()); + AFL_ENSURE(LockTxId || (msg.LocksInfo.Locks.empty() && msg.LocksInfo.BrokenLocks.empty())); + AFL_DEBUG(NKikimrServices::KQP_COMPUTE)("action","got EvScanData")("rows", rowsCount)("finished", msg.Finished)("exceeded", msg.RequestedBytesLimitReached) + ("scan", ScanId)("packs_to_send", InFlightComputes.GetPacksToSendCount()) + ("from", ev->Sender)("shards remain", PendingShards.size()) + ("in flight scans", InFlightShards.GetScansCount()) + ("in flight shards", InFlightShards.GetShardsCount()) + ("delayed_for_seconds_by_ratelimiter", latency.SecondsFloat()) + ("tablet_id", state->TabletId) + ("locks", msg.LocksInfo.Locks.size()) + ("broken locks", msg.LocksInfo.BrokenLocks.size()); auto shardScanner = InFlightShards.GetShardScannerVerified(state->TabletId); auto tasksForCompute = shardScanner->OnReceiveData(msg, shardScanner); AFL_ENSURE(tasksForCompute.size() == 1 || tasksForCompute.size() == 0 || tasksForCompute.size() == ComputeActorIds.size())("size", tasksForCompute.size())("compute_size", ComputeActorIds.size()); diff --git a/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.h b/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.h index 962aee326470..0bd2bfc1d58b 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.h +++ b/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.h @@ -50,13 +50,15 @@ class TKqpScanFetcherActor: public NActors::TActorBootstrapped LockTxId; + const ui32 LockNodeId; public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::KQP_SCAN_FETCH_ACTOR; } TKqpScanFetcherActor(const NKikimrKqp::TKqpSnapshot& snapshot, const NYql::NDq::TComputeRuntimeSettings& settings, - std::vector&& computeActors, const ui64 txId, + std::vector&& computeActors, const ui64 txId, const TMaybe lockTxId, const ui32 lockNodeId, const NKikimrTxDataShard::TKqpTransaction_TScanTaskMeta& meta, const TShardsScanningPolicy& shardsScanningPolicy, TIntrusivePtr counters, NWilson::TTraceId traceId); @@ -168,6 +170,9 @@ class TKqpScanFetcherActor: public NActors::TActorBootstrapped PendingShards; std::deque PendingResolveShards; + static inline TAtomicCounter ScanIdCounter = 0; + const ui64 ScanId = ScanIdCounter.Inc(); + TInFlightShards InFlightShards; TInFlightComputes InFlightComputes; ui32 TotalRetries = 0; diff --git a/ydb/core/kqp/compute_actor/ya.make b/ydb/core/kqp/compute_actor/ya.make index 50d029c6bacf..7b45cfa31530 100644 --- a/ydb/core/kqp/compute_actor/ya.make +++ b/ydb/core/kqp/compute_actor/ya.make @@ -22,7 +22,8 @@ PEERDIR( ydb/core/kqp/runtime ydb/core/tx/datashard ydb/core/tx/scheme_cache - ydb/core/formats/arrow/protos + ydb/library/formats/arrow/protos + ydb/library/formats/arrow/common ydb/library/yql/dq/actors/compute ydb/library/yql/providers/generic/actors ydb/library/yql/providers/s3/actors_factory diff --git a/ydb/core/kqp/counters/kqp_counters.cpp b/ydb/core/kqp/counters/kqp_counters.cpp index a8b0b8a1a2c5..be0d9fa8d1a0 100644 --- a/ydb/core/kqp/counters/kqp_counters.cpp +++ b/ydb/core/kqp/counters/kqp_counters.cpp @@ -8,7 +8,6 @@ #include #include - #include #include @@ -776,7 +775,10 @@ TKqpCounters::TKqpCounters(const ::NMonitoring::TDynamicCounterPtr& counters, co RmExternalMemory = KqpGroup->GetCounter("RM/ExternalMemory", false); RmNotEnoughMemory = KqpGroup->GetCounter("RM/NotEnoughMemory", true); RmNotEnoughComputeActors = KqpGroup->GetCounter("RM/NotEnoughComputeActors", true); + RmOnStartAllocs = KqpGroup->GetCounter("Rm/OnStartAllocs", true); RmExtraMemAllocs = KqpGroup->GetCounter("RM/ExtraMemAllocs", true); + RmExtraMemFree = KqpGroup->GetCounter("RM/ExtraMemFree", true); + RmOnCompleteFree = KqpGroup->GetCounter("RM/OnCompleteFree", true); RmInternalError = KqpGroup->GetCounter("RM/InternalError", true); RmSnapshotLatency = KqpGroup->GetHistogram( "RM/SnapshotLatency", NMonitoring::ExponentialHistogram(20, 2, 1)); @@ -826,6 +828,15 @@ TKqpCounters::TKqpCounters(const ::NMonitoring::TDynamicCounterPtr& counters, co "PhyTx/ScanTxTotalTimeMs", NMonitoring::ExponentialHistogram(20, 2, 1)); FullScansExecuted = KqpGroup->GetCounter("FullScans", true); + + SchedulerThrottled = KqpGroup->GetCounter("NodeScheduler/ThrottledUs", true); + SchedulerGroupsCount = KqpGroup->GetCounter("NodeScheduler/GroupsCount", false); + SchedulerValuesCount = KqpGroup->GetCounter("NodeScheduler/ValuesCount", false); + SchedulerCapacity = KqpGroup->GetCounter("NodeScheduler/Capacity"); + ComputeActorExecutions = KqpGroup->GetHistogram("NodeScheduler/BatchUs", NMonitoring::ExponentialHistogram(20, 2, 1)); + ComputeActorDelays = KqpGroup->GetHistogram("NodeScheduler/Delays", NMonitoring::ExponentialHistogram(20, 2, 1)); + ThrottledActorsSpuriousActivations = KqpGroup->GetCounter("NodeScheduler/SpuriousActivations", true); + SchedulerDelays = KqpGroup->GetHistogram("NodeScheduler/Delay", NMonitoring::ExponentialHistogram(20, 2, 1)); } ::NMonitoring::TDynamicCounterPtr TKqpCounters::GetKqpCounters() const { diff --git a/ydb/core/kqp/counters/kqp_counters.h b/ydb/core/kqp/counters/kqp_counters.h index 4a3328cbb0d1..0c4042a14ef1 100644 --- a/ydb/core/kqp/counters/kqp_counters.h +++ b/ydb/core/kqp/counters/kqp_counters.h @@ -350,7 +350,7 @@ class TKqpCounters : public TKqpCountersBase, public NYql::NDq::TSpillingCounter ::NMonitoring::TDynamicCounterPtr WorkloadManagerGroup; ::NMonitoring::TDynamicCounters::TCounterPtr FullScansExecuted; - + // Lease updates counters ::NMonitoring::THistogramPtr LeaseUpdateLatency; ::NMonitoring::THistogramPtr RunActorLeaseUpdateBacklog; @@ -377,6 +377,9 @@ class TKqpCounters : public TKqpCountersBase, public NYql::NDq::TSpillingCounter ::NMonitoring::TDynamicCounters::TCounterPtr RmNotEnoughMemory; ::NMonitoring::TDynamicCounters::TCounterPtr RmNotEnoughComputeActors; ::NMonitoring::TDynamicCounters::TCounterPtr RmExtraMemAllocs; + ::NMonitoring::TDynamicCounters::TCounterPtr RmOnStartAllocs; + ::NMonitoring::TDynamicCounters::TCounterPtr RmExtraMemFree; + ::NMonitoring::TDynamicCounters::TCounterPtr RmOnCompleteFree; ::NMonitoring::TDynamicCounters::TCounterPtr RmInternalError; NMonitoring::THistogramPtr RmSnapshotLatency; NMonitoring::THistogramPtr NodeServiceStartEventDelivery; @@ -406,6 +409,16 @@ class TKqpCounters : public TKqpCountersBase, public NYql::NDq::TSpillingCounter ::NMonitoring::TDynamicCounters::TCounterPtr DataShardIteratorMessages; ::NMonitoring::TDynamicCounters::TCounterPtr IteratorDeliveryProblems; + // Scheduler signals + ::NMonitoring::TDynamicCounters::TCounterPtr SchedulerThrottled; + ::NMonitoring::TDynamicCounters::TCounterPtr SchedulerCapacity; + NMonitoring::THistogramPtr ComputeActorExecutions; + NMonitoring::THistogramPtr ComputeActorDelays; + ::NMonitoring::TDynamicCounters::TCounterPtr ThrottledActorsSpuriousActivations; + NMonitoring::THistogramPtr SchedulerDelays; + NMonitoring::TDynamicCounters::TCounterPtr SchedulerGroupsCount; + NMonitoring::TDynamicCounters::TCounterPtr SchedulerValuesCount; + // Sequences counters ::NMonitoring::TDynamicCounters::TCounterPtr SequencerActorsCount; ::NMonitoring::TDynamicCounters::TCounterPtr SequencerErrors; diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index 9bc57b456d02..c22246335295 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -206,51 +206,16 @@ class TKqpDataExecuter : public TKqpExecuterBaseCollectStatsByLongTasks && HasOlapTable; - } - - void FillResponseStats(Ydb::StatusIds::StatusCode status) { - auto& response = *ResponseEv->Record.MutableResponse(); - - response.SetStatus(status); - - if (Stats) { - ReportEventElapsedTime(); - - Stats->FinishTs = TInstant::Now(); - Stats->Finish(); - - if (LogStatsByLongTasks() || CollectFullStats(Request.StatsMode)) { - for (ui32 txId = 0; txId < Request.Transactions.size(); ++txId) { - const auto& tx = Request.Transactions[txId].Body; - auto planWithStats = AddExecStatsToTxPlan(tx->GetPlan(), response.GetResult().GetStats()); - response.MutableResult()->MutableStats()->AddTxPlansWithStats(planWithStats); - } - } - - if (LogStatsByLongTasks()) { - const auto& txPlansWithStats = response.GetResult().GetStats().GetTxPlansWithStats(); - if (!txPlansWithStats.empty()) { - LOG_N("Full stats: " << txPlansWithStats); - } - } - - Stats.reset(); - } - } - void Finalize() { - if (LocksBroken) { - TString message = "Transaction locks invalidated."; + YQL_ENSURE(!AlreadyReplied); - return ReplyErrorAndDie(Ydb::StatusIds::ABORTED, - YqlIssue({}, TIssuesIds::KIKIMR_LOCKS_INVALIDATED, message)); + if (LocksBroken) { + return ReplyErrorAndDie( + Ydb::StatusIds::ABORTED, + YqlIssue(TPosition(), TIssuesIds::KIKIMR_LOCKS_INVALIDATED, "Transaction locks invalidated. Unknown table.")); } - auto& response = *ResponseEv->Record.MutableResponse(); - - FillResponseStats(Ydb::StatusIds::SUCCESS); + ResponseEv->Record.MutableResponse()->SetStatus(Ydb::StatusIds::SUCCESS); Counters->TxProxyMon->ReportStatusOK->Inc(); auto addLocks = [this](const auto& data) { @@ -279,6 +244,9 @@ class TKqpDataExecuter : public TKqpExecuterBaseSnapshot = GetSnapshot(); @@ -287,7 +255,7 @@ class TKqpDataExecuter : public TKqpExecuterBaseLockHandle = std::move(LockHandle); } - BuildLocks(*response.MutableResult()->MutableLocks(), Locks); + BuildLocks(*ResponseEv->Record.MutableResponse()->MutableResult()->MutableLocks(), Locks); } auto resultSize = ResponseEv->GetByteSize(); @@ -313,9 +281,7 @@ class TKqpDataExecuter : public TKqpExecuterBaseResultsSize()); - Send(Target, ResponseEv.release()); + AlreadyReplied = true; PassAway(); } @@ -355,6 +321,8 @@ class TKqpDataExecuter : public TKqpExecuterBaseRecord.GetIssues(), issues); - LOG_D("Got evWrite result, shard: " << shardId << ", status: " - << NKikimrDataEvents::TEvWriteResult::EStatus_Name(res->Record.GetStatus()) - << ", error: " << issues.ToString()); + LOG_D("Recv EvWriteResult (prepare) from ShardID=" << shardId + << ", Status=" << NKikimrDataEvents::TEvWriteResult::EStatus_Name(ev->Get()->GetStatus()) + << ", TxId=" << ev->Get()->Record.GetTxId() + << ", Locks= " << [&]() { + TStringBuilder builder; + for (const auto& lock : ev->Get()->Record.GetTxLocks()) { + builder << lock.ShortDebugString(); + } + return builder; + }() + << ", Cookie=" << ev->Cookie + << ", error=" << issues.ToString()); + if (Stats) { Stats->AddDatashardPrepareStats(std::move(*res->Record.MutableTxStats())); } @@ -516,6 +494,18 @@ class TKqpDataExecuter : public TKqpExecuterBaseRecord.DebugString()); + YQL_ENSURE(shardState->State == TShardState::EState::Preparing); + Counters->TxProxyMon->TxResultAborted->Inc(); + LocksBroken = true; + + YQL_ENSURE(!res->Record.GetTxLocks().empty()); + ResponseEv->BrokenLockPathId = NYql::TKikimrPathId( + res->Record.GetTxLocks(0).GetSchemeShard(), + res->Record.GetTxLocks(0).GetPathId()); + ReplyErrorAndDie(Ydb::StatusIds::ABORTED, {}); + } default: { return ShardError(res->Record); @@ -562,7 +552,7 @@ class TKqpDataExecuter : public TKqpExecuterBaseGet()->Record.GetState() == NDqProto::COMPUTE_STATE_FAILURE) { CancelProposal(0); } - HandleComputeStats(ev); + HandleComputeState(ev); } void HandlePrepare(TEvPipeCache::TEvDeliveryProblem::TPtr& ev) { @@ -891,6 +881,7 @@ class TKqpDataExecuter : public TKqpExecuterBase(); ev->Record.SetCoordinatorID(TxCoordinator); @@ -1041,7 +1033,7 @@ class TKqpDataExecuter : public TKqpExecuterBaseRecord.GetIssues(), issues); - LOG_D("Got evWrite result, shard: " << shardId << ", status: " - << NKikimrDataEvents::TEvWriteResult::EStatus_Name(res->Record.GetStatus()) - << ", error: " << issues.ToString()); + + LOG_D("Recv EvWriteResult (execute) from ShardID=" << shardId + << ", Status=" << NKikimrDataEvents::TEvWriteResult::EStatus_Name(ev->Get()->GetStatus()) + << ", TxId=" << ev->Get()->Record.GetTxId() + << ", Locks= " << [&]() { + TStringBuilder builder; + for (const auto& lock : ev->Get()->Record.GetTxLocks()) { + builder << lock.ShortDebugString(); + } + return builder; + }() + << ", Cookie=" << ev->Cookie + << ", error=" << issues.ToString()); if (Stats) { Stats->AddDatashardStats(std::move(*res->Record.MutableTxStats())); @@ -1183,6 +1185,18 @@ class TKqpDataExecuter : public TKqpExecuterBaseRecord.DebugString()); + YQL_ENSURE(shardState->State == TShardState::EState::Executing); + shardState->State = TShardState::EState::Finished; + Counters->TxProxyMon->TxResultAborted->Inc(); + LocksBroken = true; + YQL_ENSURE(!res->Record.GetTxLocks().empty()); + ResponseEv->BrokenLockPathId = NYql::TKikimrPathId( + res->Record.GetTxLocks(0).GetSchemeShard(), + res->Record.GetTxLocks(0).GetPathId()); + ReplyErrorAndDie(Ydb::StatusIds::ABORTED, {}); + } default: { return ShardError(res->Record); @@ -1235,29 +1249,15 @@ class TKqpDataExecuter : public TKqpExecuterBaseState = TShardState::EState::Finished; Counters->TxProxyMon->TxResultAborted->Inc(); // TODO: dedicated counter? - LocksBroken = true; - TMaybe tableName; if (!res->Record.GetTxLocks().empty()) { - auto& lock = res->Record.GetTxLocks(0); - auto tableId = TTableId(lock.GetSchemeShard(), lock.GetPathId()); - auto it = FindIf(TasksGraph.GetStagesInfo(), [tableId](const auto& x){ return x.second.Meta.TableId.HasSamePath(tableId); }); - if (it != TasksGraph.GetStagesInfo().end()) { - tableName = it->second.Meta.TableConstInfo->Path; - } + ResponseEv->BrokenLockPathId = TKikimrPathId( + res->Record.GetTxLocks(0).GetSchemeShard(), + res->Record.GetTxLocks(0).GetPathId()); + return ReplyErrorAndDie(Ydb::StatusIds::ABORTED, {}); } - // Reply as soon as we know which table had locks invalidated - if (tableName) { - auto message = TStringBuilder() - << "Transaction locks invalidated. Table: " << *tableName; - - return ReplyErrorAndDie(Ydb::StatusIds::ABORTED, - YqlIssue({}, TIssuesIds::KIKIMR_LOCKS_INVALIDATED, message)); - } - - // Receive more replies from other shards CheckExecutionComplete(); return; } @@ -1702,18 +1702,15 @@ class TKqpDataExecuter : public TKqpExecuterBase(); evWriteTransaction->Record = evWrite; - evWriteTransaction->Record.SetTxMode(NKikimrDataEvents::TEvWrite::MODE_PREPARE); + evWriteTransaction->Record.SetTxMode(ImmediateTx ? NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE : NKikimrDataEvents::TEvWrite::MODE_PREPARE); evWriteTransaction->Record.SetTxId(TxId); - evWriteTransaction->Record.MutableLocks()->SetOp(NKikimrDataEvents::TKqpLocks::Commit); - auto locksCount = evWriteTransaction->Record.GetLocks().LocksSize(); shardState.DatashardState->ShardReadLocks = locksCount > 0; @@ -1724,6 +1721,29 @@ class TKqpDataExecuter : public TKqpExecuterBaseStagesSize(); ++stageIdx) { auto& stage = tx.Body->GetStages(stageIdx); auto& stageInfo = TasksGraph.GetStageInfo(TStageId(txIdx, stageIdx)); @@ -1888,8 +1909,10 @@ class TKqpDataExecuter : public TKqpExecuterBasesecond.TaskCount : 0); + } break; default: YQL_ENSURE(false, "unknown source type"); @@ -2167,7 +2190,7 @@ class TKqpDataExecuter : public TKqpExecuterBase; using TEvWriteTxs = THashMap; - using TTopicTabletTxs = THashMap; + using TTopicTabletTxs = NTopic::TTopicOperationTransactions; void ContinueExecute() { if (Stats) { @@ -2424,10 +2447,10 @@ class TKqpDataExecuter : public TKqpExecuterBaseOrbit, TxId, ComputeTasks.size(), DatashardTxs.size() + EvWriteTxs.size()); @@ -2469,6 +2490,8 @@ class TKqpDataExecuter : public TKqpExecuterBasePlanExecution(); @@ -2587,13 +2612,12 @@ class TKqpDataExecuter : public TKqpExecuterBase(); + auto ev = std::make_unique(); - if (writeId.Defined()) { + if (t.hasWrite && writeId.Defined()) { auto* w = transaction.MutableWriteId(); w->SetNodeId(SelfId().NodeId()); w->SetKeyId(*writeId); @@ -2629,6 +2653,23 @@ class TKqpDataExecuter : public TKqpExecuterBaseGetPendingComputeTasks().empty() && Planner->GetPendingComputeActors().empty()) { + LOG_I("Shutdown immediately - nothing to wait"); + PassAway(); + } else { + this->Become(&TThis::WaitShutdownState); + LOG_I("Waiting for shutdown of " << Planner->GetPendingComputeTasks().size() << " tasks and " + << Planner->GetPendingComputeActors().size() << " compute actors"); + // TODO(ilezhankin): the CA awaiting timeout should be configurable. + TActivationContext::Schedule(TDuration::Seconds(10), new IEventHandle(SelfId(), SelfId(), new TEvents::TEvPoison)); + } + } else { + PassAway(); + } + } + void PassAway() override { auto totalTime = TInstant::Now() - StartTime; Counters->Counters->DataTxTotalTimeHistogram->Collect(totalTime.MilliSeconds()); @@ -2646,6 +2687,54 @@ class TKqpDataExecuter : public TKqpExecuterBaseGetTypeRewrite()) { + hFunc(TEvDqCompute::TEvState, HandleShutdown); + hFunc(TEvInterconnect::TEvNodeDisconnected, HandleShutdown); + hFunc(TEvents::TEvPoison, HandleShutdown); + default: + LOG_E("Unexpected event: " << ev->GetTypeName()); // ignore all other events + } + } + + void HandleShutdown(TEvDqCompute::TEvState::TPtr& ev) { + HandleComputeStats(ev); + + if (Planner->GetPendingComputeTasks().empty() && Planner->GetPendingComputeActors().empty()) { + PassAway(); + } + } + + void HandleShutdown(TEvInterconnect::TEvNodeDisconnected::TPtr& ev) { + const auto nodeId = ev->Get()->NodeId; + LOG_N("Node has disconnected while shutdown: " << nodeId); + + YQL_ENSURE(Planner); + + for (const auto& task : TasksGraph.GetTasks()) { + if (task.Meta.NodeId == nodeId && !task.Meta.Completed) { + if (task.ComputeActorId) { + Planner->CompletedCA(task.Id, task.ComputeActorId); + } else { + Planner->TaskNotStarted(task.Id); + } + } + } + + if (Planner->GetPendingComputeTasks().empty() && Planner->GetPendingComputeActors().empty()) { + PassAway(); + } + } + + void HandleShutdown(TEvents::TEvPoison::TPtr& ev) { + // Self-poison means timeout - don't wait anymore. + LOG_I("Timed out on waiting for Compute Actors to finish - forcing shutdown"); + + if (ev->Sender == SelfId()) { + PassAway(); + } + } + private: void ReplyTxStateUnknown(ui64 shardId) { auto message = TStringBuilder() << "Tx state unknown for shard " << shardId << ", txid " << TxId; diff --git a/ydb/core/kqp/executer_actor/kqp_executer.h b/ydb/core/kqp/executer_actor/kqp_executer.h index a871450170d7..203f6666214a 100644 --- a/ydb/core/kqp/executer_actor/kqp_executer.h +++ b/ydb/core/kqp/executer_actor/kqp_executer.h @@ -26,6 +26,7 @@ struct TEvKqpExecuter { NLWTrace::TOrbit Orbit; IKqpGateway::TKqpSnapshot Snapshot; + std::optional BrokenLockPathId; ui64 ResultRowsCount = 0; ui64 ResultRowsBytes = 0; diff --git a/ydb/core/kqp/executer_actor/kqp_executer_impl.cpp b/ydb/core/kqp/executer_actor/kqp_executer_impl.cpp index 40adee90bd9c..1b735ef19fca 100644 --- a/ydb/core/kqp/executer_actor/kqp_executer_impl.cpp +++ b/ydb/core/kqp/executer_actor/kqp_executer_impl.cpp @@ -39,7 +39,7 @@ void TEvKqpExecuter::TEvTxResponse::TakeResult(ui32 idx, NDq::TDqSerializedBatch ResultRowsBytes += rows.Size(); auto guard = AllocState->TypeEnv.BindAllocator(); auto& result = TxResults[idx]; - if (rows.RowCount() || !result.IsStream) { + if (rows.RowCount()) { NDq::TDqDataSerializer dataSerializer( AllocState->TypeEnv, AllocState->HolderFactory, static_cast(rows.Proto.GetTransportVersion())); diff --git a/ydb/core/kqp/executer_actor/kqp_executer_impl.h b/ydb/core/kqp/executer_actor/kqp_executer_impl.h index 4bfb1ec0ad86..950e6d4d5c1b 100644 --- a/ydb/core/kqp/executer_actor/kqp_executer_impl.h +++ b/ydb/core/kqp/executer_actor/kqp_executer_impl.h @@ -81,6 +81,10 @@ struct TShardRangesWithShardId { const TShardKeyRanges* Ranges; }; +struct TStageScheduleInfo { + double StageCost = 0.0; + ui32 TaskCount = 0; +}; TActorId ReportToRl(ui64 ru, const TString& database, const TString& userToken, const NKikimrKqp::TRlPath& path); @@ -177,10 +181,10 @@ class TKqpExecuterBase : public TActorBootstrapped { } void ReportEventElapsedTime() { - if (Stats) { - ui64 elapsedMicros = TlsActivationContext->GetCurrentEventTicksAsSeconds() * 1'000'000; - Stats->ExecuterCpuTime += TDuration::MicroSeconds(elapsedMicros); - } + YQL_ENSURE(Stats); + + ui64 elapsedMicros = TlsActivationContext->GetCurrentEventTicksAsSeconds() * 1'000'000; + Stats->ExecuterCpuTime += TDuration::MicroSeconds(elapsedMicros); } protected: @@ -330,11 +334,10 @@ class TKqpExecuterBase : public TActorBootstrapped { } YQL_ENSURE(channel.DstTask == 0); + YQL_ENSURE(Stats); - if (Stats) { - Stats->ResultBytes += batch.Size(); - Stats->ResultRows += batch.RowCount(); - } + Stats->ResultBytes += batch.Size(); + Stats->ResultRows += batch.RowCount(); LOG_T("Got result, channelId: " << channel.Id << ", shardId: " << task.Meta.ShardId << ", inputIndex: " << channel.DstInputIndex << ", from: " << ev->Sender @@ -380,7 +383,7 @@ class TKqpExecuterBase : public TActorBootstrapped { this->Send(channelComputeActorId, ackEv.Release(), /* TODO: undelivery */ 0, /* cookie */ channelId); } - void HandleComputeStats(NYql::NDq::TEvDqCompute::TEvState::TPtr& ev) { + bool HandleComputeStats(NYql::NDq::TEvDqCompute::TEvState::TPtr& ev) { TActorId computeActor = ev->Sender; auto& state = ev->Get()->Record; ui64 taskId = state.GetTaskId(); @@ -391,7 +394,9 @@ class TKqpExecuterBase : public TActorBootstrapped { << ", state: " << NYql::NDqProto::EComputeState_Name((NYql::NDqProto::EComputeState) state.GetState()) << ", stats: " << state.GetStats()); - if (Stats && state.HasStats() && Request.ProgressStatsPeriod) { + YQL_ENSURE(Stats); + + if (state.HasStats() && Request.ProgressStatsPeriod) { Stats->UpdateTaskStats(taskId, state.GetStats()); auto now = TInstant::Now(); if (LastProgressStats + Request.ProgressStatsPeriod <= now) { @@ -408,84 +413,62 @@ class TKqpExecuterBase : public TActorBootstrapped { } } + YQL_ENSURE(Planner); + bool ack = Planner->AcknowledgeCA(taskId, computeActor, &state); + + switch (state.GetState()) { + case NYql::NDqProto::COMPUTE_STATE_FAILURE: + case NYql::NDqProto::COMPUTE_STATE_FINISHED: + // Don't finalize stats twice. + if (Planner->CompletedCA(taskId, computeActor)) { + ExtraData[computeActor].Swap(state.MutableExtraData()); + + Stats->AddComputeActorStats( + computeActor.NodeId(), + std::move(*state.MutableStats()), + TDuration::MilliSeconds(AggregationSettings.GetCollectLongTasksStatsTimeoutMs()) + ); + + LastTaskId = taskId; + LastComputeActorId = computeActor.ToString(); + } + default: + ; // ignore all other states. + } + + return ack; + } + + void HandleComputeState(NYql::NDq::TEvDqCompute::TEvState::TPtr& ev) { + TActorId computeActor = ev->Sender; + auto& state = ev->Get()->Record; + ui64 taskId = state.GetTaskId(); + + bool populateChannels = HandleComputeStats(ev); + switch (state.GetState()) { case NYql::NDqProto::COMPUTE_STATE_UNKNOWN: { YQL_ENSURE(false, "unexpected state from " << computeActor << ", task: " << taskId); return; } - case NYql::NDqProto::COMPUTE_STATE_FAILURE: { - ReplyErrorAndDie(NYql::NDq::DqStatusToYdbStatus(state.GetStatusCode()), state.MutableIssues()); - return; - } - case NYql::NDqProto::COMPUTE_STATE_EXECUTING: { - // initial TEvState event from Compute Actor - // there can be race with RM answer - if (Planner) { - if (Planner->GetPendingComputeTasks().erase(taskId)) { - auto it = Planner->GetPendingComputeActors().emplace(computeActor, TProgressStat()); - YQL_ENSURE(it.second); - - if (state.HasStats()) { - it.first->second.Set(state.GetStats()); - } - - auto& task = TasksGraph.GetTask(taskId); - task.ComputeActorId = computeActor; - - THashMap> updates; - CollectTaskChannelsUpdates(task, updates); - PropagateChannelsUpdates(updates); - } else { - auto it = Planner->GetPendingComputeActors().find(computeActor); - if (it != Planner->GetPendingComputeActors().end()) { - if (state.HasStats()) { - it->second.Set(state.GetStats()); - } - } - } + if (populateChannels) { + auto& task = TasksGraph.GetTask(taskId); + THashMap> updates; + CollectTaskChannelsUpdates(task, updates); + PropagateChannelsUpdates(updates); } break; } - case NYql::NDqProto::COMPUTE_STATE_FINISHED: { - if (Stats) { - Stats->AddComputeActorStats( - computeActor.NodeId(), - std::move(*state.MutableStats()), - TDuration::MilliSeconds(AggregationSettings.GetCollectLongTasksStatsTimeoutMs()) - ); - } - ExtraData[computeActor].Swap(state.MutableExtraData()); - - LastTaskId = taskId; - LastComputeActorId = computeActor.ToString(); - - if (Planner) { - auto it = Planner->GetPendingComputeActors().find(computeActor); - if (it == Planner->GetPendingComputeActors().end()) { - LOG_W("Got execution state for compute actor: " << computeActor - << ", task: " << taskId - << ", state: " << NYql::NDqProto::EComputeState_Name((NYql::NDqProto::EComputeState) state.GetState()) - << ", too early (waiting reply from RM)"); - - if (Planner && Planner->GetPendingComputeTasks().erase(taskId)) { - LOG_E("Got execution state for compute actor: " << computeActor - << ", for unknown task: " << state.GetTaskId() - << ", state: " << NYql::NDqProto::EComputeState_Name((NYql::NDqProto::EComputeState) state.GetState())); - return; - } - } else { - if (state.HasStats()) { - it->second.Set(state.GetStats()); - } - LastStats.emplace_back(std::move(it->second)); - Planner->GetPendingComputeActors().erase(it); - YQL_ENSURE(Planner->GetPendingComputeTasks().find(taskId) == Planner->GetPendingComputeTasks().end()); - } - } - } + default: + ; // ignore all other states. + } + + if (state.GetState() == NYql::NDqProto::COMPUTE_STATE_FAILURE) { + ReplyErrorAndDie(NYql::NDq::DqStatusToYdbStatus(state.GetStatusCode()), state.MutableIssues()); + return; } static_cast(this)->CheckExecutionComplete(); @@ -512,6 +495,7 @@ class TKqpExecuterBase : public TActorBootstrapped { } TasksGraph.GetMeta().SetLockTxId(lockTxId); + TasksGraph.GetMeta().SetLockNodeId(SelfId().NodeId()); LWTRACK(KqpBaseExecuterHandleReady, ResponseEv->Orbit, TxId); if (IsDebugLogEnabled()) { @@ -532,9 +516,9 @@ class TKqpExecuterBase : public TActorBootstrapped { auto now = TAppData::TimeProvider->Now(); StartResolveTime = now; - if (Stats) { - Stats->StartTs = now; - } + YQL_ENSURE(Stats); + + Stats->StartTs = now; } TMaybe FindReadRangesSource(const NKqpProto::TKqpPhyStage& stage) { @@ -683,20 +667,14 @@ class TKqpExecuterBase : public TActorBootstrapped { auto taskId = startedTask.GetTaskId(); auto& task = TasksGraph.GetTask(taskId); - task.ComputeActorId = ActorIdFromProto(startedTask.GetActorId()); - - LOG_D("Executing task: " << taskId << " on compute actor: " << task.ComputeActorId); - - if (Planner) { - if (Planner->GetPendingComputeTasks().erase(taskId) == 0) { - LOG_D("Executing task: " << taskId << ", compute actor: " << task.ComputeActorId << ", already finished"); - } else { - auto result = Planner->GetPendingComputeActors().emplace(std::make_pair(task.ComputeActorId, TProgressStat())); - YQL_ENSURE(result.second); - - CollectTaskChannelsUpdates(task, channelsUpdates); - } + TActorId computeActorId = ActorIdFromProto(startedTask.GetActorId()); + LOG_D("Executing task: " << taskId << " on compute actor: " << computeActorId); + YQL_ENSURE(Planner); + bool channelUpdates = Planner->AcknowledgeCA(taskId, computeActorId, nullptr); + if (channelUpdates) { + CollectTaskChannelsUpdates(task, channelsUpdates); } + } PropagateChannelsUpdates(channelsUpdates); @@ -711,7 +689,7 @@ class TKqpExecuterBase : public TActorBootstrapped { if (statusCode == Ydb::StatusIds::INTERNAL_ERROR) { InternalError(issues); } else if (statusCode == Ydb::StatusIds::TIMEOUT) { - AbortExecutionAndDie(ev->Sender, NYql::NDqProto::StatusIds::TIMEOUT, "Request timeout exceeded"); + TimeoutError(ev->Sender); } else { RuntimeError(NYql::NDq::DqStatusToYdbStatus(msg.GetStatusCode()), issues); } @@ -789,16 +767,9 @@ class TKqpExecuterBase : public TActorBootstrapped { LastResourceUsageUpdate = now; TProgressStat::TEntry consumption; - if (Planner) { - for (const auto& p : Planner->GetPendingComputeActors()) { - const auto& t = p.second.GetLastUsage(); - consumption += t; - } - } - for (const auto& p : LastStats) { - const auto& t = p.GetLastUsage(); - consumption += t; + if (Planner) { + consumption += Planner->CalculateConsumptionUpdate(); } auto ru = NRuCalc::CalcRequestUnit(consumption); @@ -811,13 +782,7 @@ class TKqpExecuterBase : public TActorBootstrapped { return; if (Planner) { - for (auto& p : Planner->GetPendingComputeActors()) { - p.second.Update(); - } - } - - for (auto& p : LastStats) { - p.Update(); + Planner->ShiftConsumption(); } if (Request.RlPath) { @@ -846,6 +811,40 @@ class TKqpExecuterBase : public TActorBootstrapped { } } + std::map ScheduleByCost(const IKqpGateway::TPhysicalTxData& tx, const TVector& resourceSnapshot) { + std::map result; + if (!resourceSnapshot.empty()) // can't schedule w/o node count + { + // collect costs and schedule stages with external sources only + double totalCost = 0.0; + for (ui32 stageIdx = 0; stageIdx < tx.Body->StagesSize(); ++stageIdx) { + auto& stage = tx.Body->GetStages(stageIdx); + if (stage.SourcesSize() > 0 && stage.GetSources(0).GetTypeCase() == NKqpProto::TKqpSource::kExternalSource) { + if (stage.GetStageCost() > 0.0 && stage.GetTaskCount() == 0) { + totalCost += stage.GetStageCost(); + result.emplace(stageIdx, TStageScheduleInfo{.StageCost = stage.GetStageCost()}); + } + } + } + // assign task counts + if (!result.empty()) { + // allow use 2/3 of threads in single stage + ui32 maxStageTaskCount = (TStagePredictor::GetUsableThreads() * 2 + 2) / 3; + // total limit per mode is x2 + ui32 maxTotalTaskCount = maxStageTaskCount * 2; + for (auto& [_, stageInfo] : result) { + // schedule tasks evenly between nodes + stageInfo.TaskCount = + std::max( + std::min(static_cast(maxTotalTaskCount * stageInfo.StageCost / totalCost), maxStageTaskCount) + , 1 + ) * resourceSnapshot.size(); + } + } + } + return result; + } + void BuildSysViewScanTasks(TStageInfo& stageInfo) { Y_DEBUG_ABORT_UNLESS(stageInfo.Meta.IsSysView()); @@ -951,7 +950,7 @@ class TKqpExecuterBase : public TActorBootstrapped { } } - void BuildReadTasksFromSource(TStageInfo& stageInfo, const TVector& resourceSnapshot) { + void BuildReadTasksFromSource(TStageInfo& stageInfo, const TVector& resourceSnapshot, ui32 scheduledTaskCount) { const auto& stage = stageInfo.Meta.GetStage(stageInfo.Id); YQL_ENSURE(stage.GetSources(0).HasExternalSource()); @@ -962,7 +961,16 @@ class TKqpExecuterBase : public TActorBootstrapped { ui32 taskCount = externalSource.GetPartitionedTaskParams().size(); - if (!resourceSnapshot.empty()) { + auto taskCountHint = stage.GetTaskCount(); + if (taskCountHint == 0) { + taskCountHint = scheduledTaskCount; + } + + if (taskCountHint) { + if (taskCount > taskCountHint) { + taskCount = taskCountHint; + } + } else if (!resourceSnapshot.empty()) { ui32 maxTaskcount = resourceSnapshot.size() * 2; if (taskCount > maxTaskcount) { taskCount = maxTaskcount; @@ -1031,8 +1039,7 @@ class TKqpExecuterBase : public TActorBootstrapped { std::sort(std::begin(shardsRanges), std::end(shardsRanges), [&](const TShardRangesWithShardId& lhs, const TShardRangesWithShardId& rhs) { // Special case for infinity if (lhs.Ranges->GetRightBorder().first->GetCells().empty() || rhs.Ranges->GetRightBorder().first->GetCells().empty()) { - YQL_ENSURE(!lhs.Ranges->GetRightBorder().first->GetCells().empty() || !rhs.Ranges->GetRightBorder().first->GetCells().empty()); - return rhs.Ranges->GetRightBorder().first->GetCells().empty(); + return !lhs.Ranges->GetRightBorder().first->GetCells().empty(); } return CompareTypedCellVectors( lhs.Ranges->GetRightBorder().first->GetCells().data(), @@ -1187,8 +1194,9 @@ class TKqpExecuterBase : public TActorBootstrapped { : Nothing(); YQL_ENSURE(!shardsResolved || nodeId); + YQL_ENSURE(Stats); - if (shardId && Stats) { + if (shardId) { Stats->AffectedShards.insert(*shardId); } @@ -1256,11 +1264,13 @@ class TKqpExecuterBase : public TActorBootstrapped { if (partitions.size() > 0 && source.GetSequentialInFlightShards() > 0 && partitions.size() > source.GetSequentialInFlightShards()) { auto [startShard, shardInfo] = MakeVirtualTablePartition(source, stageInfo, HolderFactory(), TypeEnv()); - if (Stats) { - for (auto& [shardId, _] : partitions) { - Stats->AffectedShards.insert(shardId); - } + + YQL_ENSURE(Stats); + + for (auto& [shardId, _] : partitions) { + Stats->AffectedShards.insert(shardId); } + if (shardInfo.KeyReadRanges) { addPartiton(startShard, {}, shardInfo, source.GetSequentialInFlightShards()); fillRangesForTasks(); @@ -1300,13 +1310,9 @@ class TKqpExecuterBase : public TActorBootstrapped { const auto& input = stage.GetInputs(inputIndex); // Current assumptions: - // 1. `Broadcast` can not be the 1st stage input unless it's a single input - // 2. All stage's inputs, except 1st one, must be a `Broadcast` or `UnionAll` - if (inputIndex == 0) { - if (stage.InputsSize() > 1) { - YQL_ENSURE(input.GetTypeCase() != NKqpProto::TKqpPhyConnection::kBroadcast); - } - } else { + // 1. All stage's inputs, except 1st one, must be a `Broadcast` or `UnionAll` + // 2. Stages where 1st input is `Broadcast` are not partitioned. + if (inputIndex > 0) { switch (input.GetTypeCase()) { case NKqpProto::TKqpPhyConnection::kBroadcast: case NKqpProto::TKqpPhyConnection::kHashShuffle: @@ -1342,7 +1348,11 @@ class TKqpExecuterBase : public TActorBootstrapped { } if (isShuffle) { - partitionsCount = std::max(partitionsCount, GetMaxTasksAggregation(stageInfo, inputTasks, nodesCount)); + if (stage.GetTaskCount()) { + partitionsCount = stage.GetTaskCount(); + } else { + partitionsCount = std::max(partitionsCount, GetMaxTasksAggregation(stageInfo, inputTasks, nodesCount)); + } } for (ui32 i = 0; i < partitionsCount; ++i) { @@ -1527,6 +1537,8 @@ class TKqpExecuterBase : public TActorBootstrapped { THashMap assignedShardsCount; auto& stage = stageInfo.Meta.GetStage(stageInfo.Id); + YQL_ENSURE(Stats); + const auto& tableInfo = stageInfo.Meta.TableConstInfo; const auto& keyTypes = tableInfo->KeyColumnTypes; ui32 metaId = 0; @@ -1555,7 +1567,7 @@ class TKqpExecuterBase : public TActorBootstrapped { nodeShards[nodeId].emplace_back(TShardInfoWithId(i.first, std::move(i.second))); } - if (Stats && CollectProfileStats(Request.StatsMode)) { + if (CollectProfileStats(Request.StatsMode)) { for (auto&& i : nodeShards) { Stats->AddNodeShardsCount(stageInfo.Id.StageId, i.first, i.second.size()); } @@ -1662,14 +1674,14 @@ class TKqpExecuterBase : public TActorBootstrapped { protected: void TerminateComputeActors(Ydb::StatusIds::StatusCode code, const NYql::TIssues& issues) { for (const auto& task : this->TasksGraph.GetTasks()) { - if (task.ComputeActorId) { + if (task.ComputeActorId && !task.Meta.Completed) { LOG_I("aborting compute actor execution, message: " << issues.ToOneLineString() << ", compute actor: " << task.ComputeActorId << ", task: " << task.Id); auto ev = MakeHolder(NYql::NDq::YdbStatusToDqStatus(code), issues); this->Send(task.ComputeActorId, ev.Release()); } else { - LOG_I("task: " << task.Id << ", does not have Compute ActorId yet"); + LOG_I("task: " << task.Id << ", does not have the CA id yet or is already complete"); } } } @@ -1687,7 +1699,6 @@ class TKqpExecuterBase : public TActorBootstrapped { void InternalError(const NYql::TIssues& issues) { LOG_E(issues.ToOneLineString()); - TerminateComputeActors(Ydb::StatusIds::INTERNAL_ERROR, issues); auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::UNEXPECTED, "Internal error while executing transaction."); for (const NYql::TIssue& i : issues) { issue.AddSubIssue(MakeIntrusive(i)); @@ -1701,7 +1712,6 @@ class TKqpExecuterBase : public TActorBootstrapped { void ReplyUnavailable(const TString& message) { LOG_E("UNAVAILABLE: " << message); - TerminateComputeActors(Ydb::StatusIds::UNAVAILABLE, message); auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::KIKIMR_TEMPORARILY_UNAVAILABLE); issue.AddSubIssue(new NYql::TIssue(message)); ReplyErrorAndDie(Ydb::StatusIds::UNAVAILABLE, issue); @@ -1709,7 +1719,6 @@ class TKqpExecuterBase : public TActorBootstrapped { void RuntimeError(Ydb::StatusIds::StatusCode code, const NYql::TIssues& issues) { LOG_E(Ydb::StatusIds_StatusCode_Name(code) << ": " << issues.ToOneLineString()); - TerminateComputeActors(code, issues); ReplyErrorAndDie(code, issues); } @@ -1725,54 +1734,54 @@ class TKqpExecuterBase : public TActorBootstrapped { ReplyErrorAndDie(status, &issues); } - void AbortExecutionAndDie(TActorId abortSender, NYql::NDqProto::StatusIds::StatusCode status, const TString& message) { + void TimeoutError(TActorId abortSender) { if (AlreadyReplied) { + LOG_E("Timeout when we already replied - not good" << Endl << TBackTrace().PrintToString() << Endl); return; } + const auto status = NYql::NDqProto::StatusIds::TIMEOUT; + const TString message = "Request timeout exceeded"; + + TerminateComputeActors(Ydb::StatusIds::TIMEOUT, message); + + AlreadyReplied = true; + LOG_E("Abort execution: " << NYql::NDqProto::StatusIds_StatusCode_Name(status) << "," << message); if (ExecuterSpan) { ExecuterSpan.EndError(TStringBuilder() << NYql::NDqProto::StatusIds_StatusCode_Name(status)); } - static_cast(this)->FillResponseStats(Ydb::StatusIds::TIMEOUT); + ResponseEv->Record.MutableResponse()->SetStatus(Ydb::StatusIds::TIMEOUT); // TEvAbortExecution can come from either ComputeActor or SessionActor (== Target). if (abortSender != Target) { - auto abortEv = MakeHolder(status, "Request timeout exceeded"); + auto abortEv = MakeHolder(status, message); this->Send(Target, abortEv.Release()); } - AlreadyReplied = true; LOG_E("Sending timeout response to: " << Target); - this->Send(Target, ResponseEv.release()); - Request.Transactions.crop(0); - TerminateComputeActors(Ydb::StatusIds::TIMEOUT, message); - this->PassAway(); + this->Shutdown(); } virtual void ReplyErrorAndDie(Ydb::StatusIds::StatusCode status, google::protobuf::RepeatedPtrField* issues) { if (AlreadyReplied) { + LOG_E("Error when we already replied - not good" << Endl << TBackTrace().PrintToString() << Endl); return; } - if (Planner) { - for (auto computeActor : Planner->GetPendingComputeActors()) { - LOG_D("terminate compute actor " << computeActor.first); - - auto ev = MakeHolder(NYql::NDq::YdbStatusToDqStatus(status), "Terminate execution"); - this->Send(computeActor.first, ev.Release()); - } - } + TerminateComputeActors(status, "Terminate execution"); AlreadyReplied = true; auto& response = *ResponseEv->Record.MutableResponse(); response.SetStatus(status); - response.MutableIssues()->Swap(issues); + if (issues) { + response.MutableIssues()->Swap(issues); + } LOG_T("ReplyErrorAndDie. Response: " << response.DebugString() << ", to ActorId: " << Target); @@ -1790,9 +1799,7 @@ class TKqpExecuterBase : public TActorBootstrapped { ExecuterSpan.EndError(response.DebugString()); ExecuterStateSpan.EndError(response.DebugString()); - Request.Transactions.crop(0); - this->Send(Target, ResponseEv.release()); - this->PassAway(); + this->Shutdown(); } protected: @@ -1860,7 +1867,46 @@ class TKqpExecuterBase : public TActorBootstrapped { } protected: + // Introduced separate method from `PassAway()` - to not get confused with expectations from other actors, + // that `PassAway()` should kill actor immediately. + virtual void Shutdown() { + PassAway(); + } + void PassAway() override { + YQL_ENSURE(AlreadyReplied && ResponseEv); + + // Fill response stats + { + auto& response = *ResponseEv->Record.MutableResponse(); + + YQL_ENSURE(Stats); + + ReportEventElapsedTime(); + + Stats->FinishTs = TInstant::Now(); + Stats->Finish(); + + if (Stats->CollectStatsByLongTasks || CollectFullStats(Request.StatsMode)) { + response.MutableResult()->MutableStats()->ClearTxPlansWithStats(); + for (ui32 txId = 0; txId < Request.Transactions.size(); ++txId) { + const auto& tx = Request.Transactions[txId].Body; + auto planWithStats = AddExecStatsToTxPlan(tx->GetPlan(), response.GetResult().GetStats()); + response.MutableResult()->MutableStats()->AddTxPlansWithStats(planWithStats); + } + } + + if (Stats->CollectStatsByLongTasks) { + const auto& txPlansWithStats = response.GetResult().GetStats().GetTxPlansWithStats(); + if (!txPlansWithStats.empty()) { + LOG_N("Full stats: " << response.GetResult().GetStats()); + } + } + } + + Request.Transactions.crop(0); + this->Send(Target, ResponseEv.release()); + for (auto channelPair: ResultChannelProxies) { LOG_D("terminate result channel " << channelPair.first << " proxy at " << channelPair.second->SelfId()); @@ -1881,12 +1927,11 @@ class TKqpExecuterBase : public TActorBootstrapped { if (KqpTableResolverId) { this->Send(KqpTableResolverId, new TEvents::TEvPoison); - this->Send(this->SelfId(), new TEvents::TEvPoison); - LOG_T("Terminate, become ZombieState"); - this->Become(&TKqpExecuterBase::ZombieState); - } else { - IActor::PassAway(); } + + this->Send(this->SelfId(), new TEvents::TEvPoison); + LOG_T("Terminate, become ZombieState"); + this->Become(&TKqpExecuterBase::ZombieState); } STATEFN(ZombieState) { @@ -1949,8 +1994,6 @@ class TKqpExecuterBase : public TActorBootstrapped { TActorId KqpShardsResolverId; THashMap ExtraData; - TVector LastStats; - TInstant StartResolveTime; TInstant LastResourceUsageUpdate; diff --git a/ydb/core/kqp/executer_actor/kqp_executer_stats.cpp b/ydb/core/kqp/executer_actor/kqp_executer_stats.cpp index 33fbb0a00156..177fd8567ff0 100644 --- a/ydb/core/kqp/executer_actor/kqp_executer_stats.cpp +++ b/ydb/core/kqp/executer_actor/kqp_executer_stats.cpp @@ -6,8 +6,99 @@ namespace NKikimr::NKqp { using namespace NYql; using namespace NYql::NDq; +void ExportAggStats(std::vector& data, NYql::NDqProto::TDqStatsAggr& stats); + +ui64 NonZeroMin(ui64 a, ui64 b) { + return (b == 0) ? a : ((a == 0 || a > b) ? b : a); +} + +void TTimeSeriesStats::ExportAggStats(ui64 baseTimeMs, NYql::NDqProto::TDqStatsAggr& stats) { + NKikimr::NKqp::ExportAggStats(Values, stats); + ExportHistory(baseTimeMs, stats); +} + +void TTimeSeriesStats::ExportHistory(ui64 baseTimeMs, NYql::NDqProto::TDqStatsAggr& stats) { + Pack(); + if (!History.empty()) { + for (auto& h : History) { + auto& item = *stats.AddHistory(); + item.SetTimeMs((h.first <= baseTimeMs) ? 0 : (h.first - baseTimeMs)); + item.SetValue(h.second); + } + } +} + +void TTimeSeriesStats::Resize(ui32 taskCount) { + Values.resize(taskCount); +} + +void TTimeSeriesStats::SetNonZero(ui32 taskIndex, ui64 value) { + if (value) { + Sum += value; + Sum -= Values[taskIndex]; + Values[taskIndex] = value; + } + if (HistorySampleCount) { + auto nowMs = Now().MilliSeconds(); + + if (!History.empty() && History.back().first == nowMs) { + History.back().second = Sum; + return; + } + + if (History.size() > 1 && History.back().second == Sum && History[History.size() - 2].second == Sum) { + History.back().first = nowMs; + return; + } + + History.emplace_back(nowMs, Sum); + if (History.size() >= HistorySampleCount * 2) { + Pack(); + } + } +} + +void TTimeSeriesStats::Pack() { + if (HistorySampleCount == 0) { + History.clear(); + return; + } + if (History.size() > HistorySampleCount) { + + if (HistorySampleCount == 1) { + History.front() = History.back(); + return; + } + if (HistorySampleCount == 2) { + History[1] = History.back(); + History.resize(2); + return; + } + + std::vector> history; + ui32 count = History.size(); + ui32 delta = count - HistorySampleCount; + ui64 minTime = History.front().first; + ui64 maxTime = History.back().first; + ui64 deltaTime = (maxTime - minTime) / (HistorySampleCount - 1); + bool first = true; + ui64 nextTime = minTime; + for (auto& h : History) { + if (!first && delta && ((h.first < nextTime) || (delta + 1 == count))) { + delta--; + } else { + history.push_back(h); + nextTime += deltaTime; + first = false; + } + count--; + } + History.swap(history); + } +} + void TAsyncStats::Resize(ui32 taskCount) { - Bytes.resize(taskCount); + Bytes.Resize(taskCount); DecompressedBytes.resize(taskCount); Rows.resize(taskCount); Chunks.resize(taskCount); @@ -16,11 +107,25 @@ void TAsyncStats::Resize(ui32 taskCount) { PauseMessageMs.resize(taskCount); ResumeMessageMs.resize(taskCount); LastMessageMs.resize(taskCount); - WaitTimeUs.resize(taskCount); + WaitTimeUs.Resize(taskCount); WaitPeriods.resize(taskCount); ActiveTimeUs.resize(taskCount); } +void TAsyncStats::SetHistorySampleCount(ui32 historySampleCount) { + Bytes.HistorySampleCount = historySampleCount; + WaitTimeUs.HistorySampleCount = historySampleCount; +} + +void TAsyncStats::ExportHistory(ui64 baseTimeMs, NYql::NDqProto::TDqAsyncStatsAggr& stats) { + if (stats.HasBytes()) { + Bytes.ExportHistory(baseTimeMs, *stats.MutableBytes()); + } + if (stats.HasWaitTimeUs()) { + WaitTimeUs.ExportHistory(baseTimeMs, *stats.MutableWaitTimeUs()); + } +} + void TAsyncBufferStats::Resize(ui32 taskCount) { Ingress.Resize(taskCount); Push.Resize(taskCount); @@ -28,6 +133,28 @@ void TAsyncBufferStats::Resize(ui32 taskCount) { Egress.Resize(taskCount); } +void TAsyncBufferStats::SetHistorySampleCount(ui32 historySampleCount) { + Ingress.SetHistorySampleCount(historySampleCount); + Push.SetHistorySampleCount(historySampleCount); + Pop.SetHistorySampleCount(historySampleCount); + Egress.SetHistorySampleCount(historySampleCount); +} + +void TAsyncBufferStats::ExportHistory(ui64 baseTimeMs, NYql::NDqProto::TDqAsyncBufferStatsAggr& stats) { + if (stats.HasIngress()) { + Ingress.ExportHistory(baseTimeMs, *stats.MutableIngress()); + } + if (stats.HasPush()) { + Push.ExportHistory(baseTimeMs, *stats.MutablePush()); + } + if (stats.HasPop()) { + Pop.ExportHistory(baseTimeMs, *stats.MutablePop()); + } + if (stats.HasEgress()) { + Egress.ExportHistory(baseTimeMs, *stats.MutableEgress()); + } +} + void TTableStats::Resize(ui32 taskCount) { ReadRows.resize(taskCount); ReadBytes.resize(taskCount); @@ -39,7 +166,7 @@ void TTableStats::Resize(ui32 taskCount) { } void TStageExecutionStats::Resize(ui32 taskCount) { - CpuTimeUs.resize(taskCount); + CpuTimeUs.Resize(taskCount); SourceCpuTimeUs.resize(taskCount); InputRows.resize(taskCount); @@ -60,12 +187,72 @@ void TStageExecutionStats::Resize(ui32 taskCount) { WaitInputTimeUs.resize(taskCount); WaitOutputTimeUs.resize(taskCount); + SpillingComputeBytes.Resize(taskCount); + SpillingChannelBytes.Resize(taskCount); + SpillingComputeTimeUs.Resize(taskCount); + SpillingChannelTimeUs.Resize(taskCount); + for (auto& p : Ingress) p.second.Resize(taskCount); - for (auto& p : Egress) p.second.Resize(taskCount); - for (auto& p : Input) p.second.Resize(taskCount); - for (auto& p : Output) p.second.Resize(taskCount); + for (auto& p : Input) p.second.Resize(taskCount); + for (auto& p : Output) p.second.Resize(taskCount); + for (auto& p : Egress) p.second.Resize(taskCount); - MaxMemoryUsage.resize(taskCount); + MaxMemoryUsage.Resize(taskCount); +} + +void TStageExecutionStats::SetHistorySampleCount(ui32 historySampleCount) { + HistorySampleCount = historySampleCount; + CpuTimeUs.HistorySampleCount = historySampleCount; + MaxMemoryUsage.HistorySampleCount = historySampleCount; + SpillingComputeBytes.HistorySampleCount = historySampleCount; + SpillingChannelBytes.HistorySampleCount = historySampleCount; + SpillingComputeTimeUs.HistorySampleCount = historySampleCount; + SpillingChannelTimeUs.HistorySampleCount = historySampleCount; +} + +void TStageExecutionStats::ExportHistory(ui64 baseTimeMs, NYql::NDqProto::TDqStageStats& stageStats) { + if (stageStats.HasCpuTimeUs()) { + CpuTimeUs.ExportHistory(baseTimeMs, *stageStats.MutableCpuTimeUs()); + } + for (auto& p : *stageStats.MutableIngress()) { + auto it = Ingress.find(p.first); + if (it != Ingress.end()) { + it->second.ExportHistory(baseTimeMs, p.second); + } + } + for (auto& p : *stageStats.MutableInput()) { + auto it = Input.find(p.first); + if (it != Input.end()) { + it->second.ExportHistory(baseTimeMs, p.second); + } + } + for (auto& p : *stageStats.MutableOutput()) { + auto it = Output.find(p.first); + if (it != Output.end()) { + it->second.ExportHistory(baseTimeMs, p.second); + } + } + for (auto& p : *stageStats.MutableEgress()) { + auto it = Egress.find(p.first); + if (it != Egress.end()) { + it->second.ExportHistory(baseTimeMs, p.second); + } + } + if (stageStats.HasMaxMemoryUsage()) { + MaxMemoryUsage.ExportHistory(baseTimeMs, *stageStats.MutableMaxMemoryUsage()); + } + if (stageStats.HasSpillingComputeBytes()) { + SpillingComputeBytes.ExportHistory(baseTimeMs, *stageStats.MutableSpillingComputeBytes()); + } + if (stageStats.HasSpillingChannelBytes()) { + SpillingChannelBytes.ExportHistory(baseTimeMs, *stageStats.MutableSpillingChannelBytes()); + } + if (stageStats.HasSpillingComputeTimeUs()) { + SpillingComputeTimeUs.ExportHistory(baseTimeMs, *stageStats.MutableSpillingComputeTimeUs()); + } + if (stageStats.HasSpillingChannelTimeUs()) { + SpillingChannelTimeUs.ExportHistory(baseTimeMs, *stageStats.MutableSpillingChannelTimeUs()); + } } void SetNonZero(ui64& target, ui64 source) { @@ -74,8 +261,10 @@ void SetNonZero(ui64& target, ui64 source) { } } -void TStageExecutionStats::UpdateAsyncStats(i32 index, TAsyncStats& aggrAsyncStats, const NYql::NDqProto::TDqAsyncBufferStats& asyncStats) { - SetNonZero(aggrAsyncStats.Bytes[index], asyncStats.GetBytes()); +ui64 TStageExecutionStats::UpdateAsyncStats(i32 index, TAsyncStats& aggrAsyncStats, const NYql::NDqProto::TDqAsyncBufferStats& asyncStats) { + ui64 baseTimeMs = 0; + + aggrAsyncStats.Bytes.SetNonZero(index, asyncStats.GetBytes()); SetNonZero(aggrAsyncStats.DecompressedBytes[index], asyncStats.GetDecompressedBytes()); SetNonZero(aggrAsyncStats.Rows[index], asyncStats.GetRows()); SetNonZero(aggrAsyncStats.Chunks[index], asyncStats.GetChunks()); @@ -83,20 +272,33 @@ void TStageExecutionStats::UpdateAsyncStats(i32 index, TAsyncStats& aggrAsyncSta auto firstMessageMs = asyncStats.GetFirstMessageMs(); SetNonZero(aggrAsyncStats.FirstMessageMs[index], firstMessageMs); - SetNonZero(aggrAsyncStats.PauseMessageMs[index], asyncStats.GetPauseMessageMs()); - SetNonZero(aggrAsyncStats.ResumeMessageMs[index], asyncStats.GetResumeMessageMs()); + baseTimeMs = NonZeroMin(baseTimeMs, firstMessageMs); + + auto pauseMessageMs = asyncStats.GetPauseMessageMs(); + SetNonZero(aggrAsyncStats.PauseMessageMs[index], pauseMessageMs); + baseTimeMs = NonZeroMin(baseTimeMs, pauseMessageMs); + + auto resumeMessageMs = asyncStats.GetResumeMessageMs(); + SetNonZero(aggrAsyncStats.ResumeMessageMs[index], resumeMessageMs); + baseTimeMs = NonZeroMin(baseTimeMs, resumeMessageMs); + auto lastMessageMs = asyncStats.GetLastMessageMs(); SetNonZero(aggrAsyncStats.LastMessageMs[index], lastMessageMs); - SetNonZero(aggrAsyncStats.WaitTimeUs[index], asyncStats.GetWaitTimeUs()); + baseTimeMs = NonZeroMin(baseTimeMs, lastMessageMs); + + aggrAsyncStats.WaitTimeUs.SetNonZero(index, asyncStats.GetWaitTimeUs()); SetNonZero(aggrAsyncStats.WaitPeriods[index], asyncStats.GetWaitPeriods()); if (firstMessageMs && lastMessageMs > firstMessageMs) { aggrAsyncStats.ActiveTimeUs[index] = lastMessageMs - firstMessageMs; } + + return baseTimeMs; } -void TStageExecutionStats::UpdateStats(const NYql::NDqProto::TDqTaskStats& taskStats, ui64 maxMemoryUsage, ui64 durationUs) { +ui64 TStageExecutionStats::UpdateStats(const NYql::NDqProto::TDqTaskStats& taskStats, ui64 maxMemoryUsage, ui64 durationUs) { auto taskId = taskStats.GetTaskId(); auto it = Task2Index.find(taskId); + ui64 baseTimeMs = 0; ui32 taskCount = Task2Index.size(); @@ -109,7 +311,7 @@ void TStageExecutionStats::UpdateStats(const NYql::NDqProto::TDqTaskStats& taskS index = it->second; } - SetNonZero(CpuTimeUs[index], taskStats.GetCpuTimeUs()); + CpuTimeUs.SetNonZero(index, taskStats.GetCpuTimeUs()); SetNonZero(SourceCpuTimeUs[index], taskStats.GetSourceCpuTimeUs()); SetNonZero(InputRows[index], taskStats.GetInputRows()); @@ -124,12 +326,23 @@ void TStageExecutionStats::UpdateStats(const NYql::NDqProto::TDqTaskStats& taskS SetNonZero(EgressRows[index], taskStats.GetEgressRows()); SetNonZero(EgressBytes[index], taskStats.GetEgressBytes()); - SetNonZero(StartTimeMs[index], taskStats.GetStartTimeMs()); - SetNonZero(FinishTimeMs[index], taskStats.GetFinishTimeMs()); + auto startTimeMs = taskStats.GetStartTimeMs(); + SetNonZero(StartTimeMs[index], startTimeMs); + baseTimeMs = NonZeroMin(baseTimeMs, startTimeMs); + + auto finishTimeMs = taskStats.GetFinishTimeMs(); + SetNonZero(FinishTimeMs[index], finishTimeMs); + baseTimeMs = NonZeroMin(baseTimeMs, finishTimeMs); + SetNonZero(DurationUs[index], durationUs); SetNonZero(WaitInputTimeUs[index], taskStats.GetWaitInputTimeUs()); SetNonZero(WaitOutputTimeUs[index], taskStats.GetWaitOutputTimeUs()); + SpillingComputeBytes.SetNonZero(index, taskStats.GetSpillingComputeWriteBytes()); + SpillingChannelBytes.SetNonZero(index, taskStats.GetSpillingChannelWriteBytes()); + SpillingComputeTimeUs.SetNonZero(index, taskStats.GetSpillingComputeReadTimeUs() + taskStats.GetSpillingComputeWriteTimeUs()); + SpillingChannelTimeUs.SetNonZero(index, taskStats.GetSpillingChannelReadTimeUs() + taskStats.GetSpillingChannelWriteTimeUs()); + for (auto& tableStat : taskStats.GetTables()) { auto tablePath = tableStat.GetTablePath(); auto [it, inserted] = Tables.try_emplace(tablePath, taskCount); @@ -148,9 +361,12 @@ void TStageExecutionStats::UpdateStats(const NYql::NDqProto::TDqTaskStats& taskS if (ingressName) { auto [it, inserted] = Ingress.try_emplace(ingressName, taskCount); auto& asyncBufferStats = it->second; - UpdateAsyncStats(index, asyncBufferStats.Ingress, sourceStat.GetIngress()); - UpdateAsyncStats(index, asyncBufferStats.Push, sourceStat.GetPush()); - UpdateAsyncStats(index, asyncBufferStats.Pop, sourceStat.GetPop()); + if (inserted) { + asyncBufferStats.SetHistorySampleCount(HistorySampleCount); + } + baseTimeMs = NonZeroMin(baseTimeMs, UpdateAsyncStats(index, asyncBufferStats.Ingress, sourceStat.GetIngress())); + baseTimeMs = NonZeroMin(baseTimeMs, UpdateAsyncStats(index, asyncBufferStats.Push, sourceStat.GetPush())); + baseTimeMs = NonZeroMin(baseTimeMs, UpdateAsyncStats(index, asyncBufferStats.Pop, sourceStat.GetPop())); } } @@ -158,16 +374,22 @@ void TStageExecutionStats::UpdateStats(const NYql::NDqProto::TDqTaskStats& taskS auto stageId = inputChannelStat.GetSrcStageId(); auto [it, inserted] = Input.try_emplace(stageId, taskCount); auto& asyncBufferStats = it->second; - UpdateAsyncStats(index, asyncBufferStats.Push, inputChannelStat.GetPush()); - UpdateAsyncStats(index, asyncBufferStats.Pop, inputChannelStat.GetPop()); + if (inserted) { + asyncBufferStats.SetHistorySampleCount(HistorySampleCount); + } + baseTimeMs = NonZeroMin(baseTimeMs, UpdateAsyncStats(index, asyncBufferStats.Push, inputChannelStat.GetPush())); + baseTimeMs = NonZeroMin(baseTimeMs, UpdateAsyncStats(index, asyncBufferStats.Pop, inputChannelStat.GetPop())); } for (auto& outputChannelStat : taskStats.GetOutputChannels()) { auto stageId = outputChannelStat.GetDstStageId(); auto [it, inserted] = Output.try_emplace(stageId, taskCount); auto& asyncBufferStats = it->second; - UpdateAsyncStats(index, asyncBufferStats.Push, outputChannelStat.GetPush()); - UpdateAsyncStats(index, asyncBufferStats.Pop, outputChannelStat.GetPop()); + if (inserted) { + asyncBufferStats.SetHistorySampleCount(HistorySampleCount); + } + baseTimeMs = NonZeroMin(baseTimeMs, UpdateAsyncStats(index, asyncBufferStats.Push, outputChannelStat.GetPush())); + baseTimeMs = NonZeroMin(baseTimeMs, UpdateAsyncStats(index, asyncBufferStats.Pop, outputChannelStat.GetPop())); } for (auto& sinkStat : taskStats.GetSinks()) { @@ -175,13 +397,18 @@ void TStageExecutionStats::UpdateStats(const NYql::NDqProto::TDqTaskStats& taskS if (egressName) { auto [it, inserted] = Egress.try_emplace(egressName, taskCount); auto& asyncBufferStats = it->second; - UpdateAsyncStats(index, asyncBufferStats.Push, sinkStat.GetPush()); - UpdateAsyncStats(index, asyncBufferStats.Pop, sinkStat.GetPop()); - UpdateAsyncStats(index, asyncBufferStats.Ingress, sinkStat.GetEgress()); + if (inserted) { + asyncBufferStats.SetHistorySampleCount(HistorySampleCount); + } + baseTimeMs = NonZeroMin(baseTimeMs, UpdateAsyncStats(index, asyncBufferStats.Push, sinkStat.GetPush())); + baseTimeMs = NonZeroMin(baseTimeMs, UpdateAsyncStats(index, asyncBufferStats.Pop, sinkStat.GetPop())); + baseTimeMs = NonZeroMin(baseTimeMs, UpdateAsyncStats(index, asyncBufferStats.Ingress, sinkStat.GetEgress())); } } - SetNonZero(MaxMemoryUsage[index], maxMemoryUsage); + MaxMemoryUsage.SetNonZero(index, maxMemoryUsage); + + return baseTimeMs; } namespace { @@ -225,7 +452,9 @@ struct TAsyncGroupStat { ui64 Count = 0; }; -void UpdateAsyncAggr(NDqProto::TDqAsyncStatsAggr& asyncAggr, const NDqProto::TDqAsyncBufferStats& asyncStat) noexcept { +ui64 UpdateAsyncAggr(NDqProto::TDqAsyncStatsAggr& asyncAggr, const NDqProto::TDqAsyncBufferStats& asyncStat) noexcept { + ui64 baseTimeMs = 0; + UpdateAggr(asyncAggr.MutableBytes(), asyncStat.GetBytes()); UpdateAggr(asyncAggr.MutableDecompressedBytes(), asyncStat.GetDecompressedBytes()); UpdateAggr(asyncAggr.MutableRows(), asyncStat.GetRows()); @@ -235,16 +464,25 @@ void UpdateAsyncAggr(NDqProto::TDqAsyncStatsAggr& asyncAggr, const NDqProto::TDq auto firstMessageMs = asyncStat.GetFirstMessageMs(); if (firstMessageMs) { UpdateAggr(asyncAggr.MutableFirstMessageMs(), firstMessageMs); + baseTimeMs = NonZeroMin(baseTimeMs, firstMessageMs); } - if (asyncStat.GetPauseMessageMs()) { - UpdateAggr(asyncAggr.MutablePauseMessageMs(), asyncStat.GetPauseMessageMs()); + + auto pauseMessageMs = asyncStat.GetPauseMessageMs(); + if (pauseMessageMs) { + UpdateAggr(asyncAggr.MutablePauseMessageMs(), pauseMessageMs); + baseTimeMs = NonZeroMin(baseTimeMs, pauseMessageMs); } - if (asyncStat.GetResumeMessageMs()) { - UpdateAggr(asyncAggr.MutableResumeMessageMs(), asyncStat.GetResumeMessageMs()); + + auto resumeMessageMs = asyncStat.GetResumeMessageMs(); + if (resumeMessageMs) { + UpdateAggr(asyncAggr.MutableResumeMessageMs(), resumeMessageMs); + baseTimeMs = NonZeroMin(baseTimeMs, resumeMessageMs); } + auto lastMessageMs = asyncStat.GetLastMessageMs(); if (lastMessageMs) { UpdateAggr(asyncAggr.MutableLastMessageMs(), lastMessageMs); + baseTimeMs = NonZeroMin(baseTimeMs, lastMessageMs); } UpdateAggr(asyncAggr.MutableWaitTimeUs(), asyncStat.GetWaitTimeUs()); @@ -253,6 +491,8 @@ void UpdateAsyncAggr(NDqProto::TDqAsyncStatsAggr& asyncAggr, const NDqProto::TDq if (firstMessageMs && lastMessageMs >= firstMessageMs) { UpdateAggr(asyncAggr.MutableActiveTimeUs(), (lastMessageMs - firstMessageMs) * 1000); } + + return baseTimeMs; } NDqProto::TDqStageStats* GetOrCreateStageStats(const NYql::NDq::TStageId& stageId, @@ -365,30 +605,42 @@ void TQueryExecutionStats::AddComputeActorFullStatsByTask( UpdateAggr(stageStats->MutableEgressRows(), task.GetEgressRows()); UpdateAggr(stageStats->MutableEgressBytes(), task.GetEgressBytes()); - UpdateAggr(stageStats->MutableStartTimeMs(), task.GetStartTimeMs()); - UpdateAggr(stageStats->MutableFinishTimeMs(), task.GetFinishTimeMs()); + auto startTimeMs = task.GetStartTimeMs(); + UpdateAggr(stageStats->MutableStartTimeMs(), startTimeMs); + BaseTimeMs = NonZeroMin(BaseTimeMs, startTimeMs); + + auto finishTimeMs = task.GetFinishTimeMs(); + UpdateAggr(stageStats->MutableFinishTimeMs(), finishTimeMs); + BaseTimeMs = NonZeroMin(BaseTimeMs, finishTimeMs); + UpdateAggr(stageStats->MutableDurationUs(), stats.GetDurationUs()); UpdateAggr(stageStats->MutableWaitInputTimeUs(), task.GetWaitInputTimeUs()); UpdateAggr(stageStats->MutableWaitOutputTimeUs(), task.GetWaitOutputTimeUs()); + + UpdateAggr(stageStats->MutableSpillingComputeBytes(), task.GetSpillingComputeWriteBytes()); + UpdateAggr(stageStats->MutableSpillingChannelBytes(), task.GetSpillingChannelWriteBytes()); + UpdateAggr(stageStats->MutableSpillingComputeTimeUs(), task.GetSpillingComputeReadTimeUs() + task.GetSpillingComputeWriteTimeUs()); + UpdateAggr(stageStats->MutableSpillingChannelTimeUs(), task.GetSpillingChannelReadTimeUs() + task.GetSpillingChannelWriteTimeUs()); + FillStageDurationUs(*stageStats); for (auto& sourcesStat : task.GetSources()) { - UpdateAsyncAggr(*(*stageStats->MutableIngress())[sourcesStat.GetIngressName()].MutableIngress(), sourcesStat.GetIngress()); - UpdateAsyncAggr(*(*stageStats->MutableIngress())[sourcesStat.GetIngressName()].MutablePush(), sourcesStat.GetPush()); - UpdateAsyncAggr(*(*stageStats->MutableIngress())[sourcesStat.GetIngressName()].MutablePop(), sourcesStat.GetPop()); + BaseTimeMs = NonZeroMin(BaseTimeMs, UpdateAsyncAggr(*(*stageStats->MutableIngress())[sourcesStat.GetIngressName()].MutableIngress(), sourcesStat.GetIngress())); + BaseTimeMs = NonZeroMin(BaseTimeMs, UpdateAsyncAggr(*(*stageStats->MutableIngress())[sourcesStat.GetIngressName()].MutablePush(), sourcesStat.GetPush())); + BaseTimeMs = NonZeroMin(BaseTimeMs, UpdateAsyncAggr(*(*stageStats->MutableIngress())[sourcesStat.GetIngressName()].MutablePop(), sourcesStat.GetPop())); } for (auto& inputChannelStat : task.GetInputChannels()) { - UpdateAsyncAggr(*(*stageStats->MutableInput())[inputChannelStat.GetSrcStageId()].MutablePush(), inputChannelStat.GetPush()); - UpdateAsyncAggr(*(*stageStats->MutableInput())[inputChannelStat.GetSrcStageId()].MutablePop(), inputChannelStat.GetPop()); + BaseTimeMs = NonZeroMin(BaseTimeMs, UpdateAsyncAggr(*(*stageStats->MutableInput())[inputChannelStat.GetSrcStageId()].MutablePush(), inputChannelStat.GetPush())); + BaseTimeMs = NonZeroMin(BaseTimeMs, UpdateAsyncAggr(*(*stageStats->MutableInput())[inputChannelStat.GetSrcStageId()].MutablePop(), inputChannelStat.GetPop())); } for (auto& outputChannelStat : task.GetOutputChannels()) { - UpdateAsyncAggr(*(*stageStats->MutableOutput())[outputChannelStat.GetDstStageId()].MutablePush(), outputChannelStat.GetPush()); - UpdateAsyncAggr(*(*stageStats->MutableOutput())[outputChannelStat.GetDstStageId()].MutablePop(), outputChannelStat.GetPop()); + BaseTimeMs = NonZeroMin(BaseTimeMs, UpdateAsyncAggr(*(*stageStats->MutableOutput())[outputChannelStat.GetDstStageId()].MutablePush(), outputChannelStat.GetPush())); + BaseTimeMs = NonZeroMin(BaseTimeMs, UpdateAsyncAggr(*(*stageStats->MutableOutput())[outputChannelStat.GetDstStageId()].MutablePop(), outputChannelStat.GetPop())); } for (auto& sinksStat : task.GetSinks()) { - UpdateAsyncAggr(*(*stageStats->MutableEgress())[sinksStat.GetEgressName()].MutablePush(), sinksStat.GetPush()); - UpdateAsyncAggr(*(*stageStats->MutableEgress())[sinksStat.GetEgressName()].MutablePop(), sinksStat.GetPop()); - UpdateAsyncAggr(*(*stageStats->MutableEgress())[sinksStat.GetEgressName()].MutableEgress(), sinksStat.GetEgress()); + BaseTimeMs = NonZeroMin(BaseTimeMs, UpdateAsyncAggr(*(*stageStats->MutableEgress())[sinksStat.GetEgressName()].MutablePush(), sinksStat.GetPush())); + BaseTimeMs = NonZeroMin(BaseTimeMs, UpdateAsyncAggr(*(*stageStats->MutableEgress())[sinksStat.GetEgressName()].MutablePop(), sinksStat.GetPop())); + BaseTimeMs = NonZeroMin(BaseTimeMs, UpdateAsyncAggr(*(*stageStats->MutableEgress())[sinksStat.GetEgressName()].MutableEgress(), sinksStat.GetEgress())); } } @@ -449,6 +701,10 @@ void TQueryExecutionStats::AddComputeActorStats(ui32 /* nodeId */, NYql::NDqProt } // checking whether the task is long + + // TODO(ilezhankin): investigate - for some reason `task.FinishTimeMs` may be large (or small?) + // enough to result in an enormous duration - triggering the "long tasks" mode. + auto taskDuration = TDuration::MilliSeconds(task.GetFinishTimeMs() - task.GetStartTimeMs()); bool longTask = taskDuration > collectLongTaskStatsTimeout; if (longTask) { @@ -490,6 +746,8 @@ void TQueryExecutionStats::AddDatashardFullStatsByTask( const NYql::NDqProto::TDqTaskStats& task, ui64 datashardCpuTimeUs) { auto* stageStats = GetOrCreateStageStats(task, *TasksGraph, *Result); + // TODO: dedup with AddComputeActorFullStatsByTask + stageStats->SetTotalTasksCount(stageStats->GetTotalTasksCount() + 1); UpdateAggr(stageStats->MutableCpuTimeUs(), task.GetCpuTimeUs()); UpdateAggr(stageStats->MutableInputRows(), task.GetInputRows()); @@ -497,9 +755,14 @@ void TQueryExecutionStats::AddDatashardFullStatsByTask( UpdateAggr(stageStats->MutableOutputRows(), task.GetOutputRows()); UpdateAggr(stageStats->MutableOutputBytes(), task.GetOutputBytes()); - UpdateAggr(stageStats->MutableStartTimeMs(), task.GetStartTimeMs()); - UpdateAggr(stageStats->MutableFinishTimeMs(), task.GetFinishTimeMs()); - // UpdateAggr(stageStats->MutableDurationUs(), ??? ); + auto startTimeMs = task.GetStartTimeMs(); + UpdateAggr(stageStats->MutableStartTimeMs(), startTimeMs); + BaseTimeMs = NonZeroMin(BaseTimeMs, startTimeMs); + + auto finishTimeMs = task.GetFinishTimeMs(); + UpdateAggr(stageStats->MutableFinishTimeMs(), finishTimeMs); + BaseTimeMs = NonZeroMin(BaseTimeMs, finishTimeMs); + UpdateAggr(stageStats->MutableWaitInputTimeUs(), task.GetWaitInputTimeUs()); UpdateAggr(stageStats->MutableWaitOutputTimeUs(), task.GetWaitOutputTimeUs()); FillStageDurationUs(*stageStats); @@ -635,8 +898,9 @@ void TQueryExecutionStats::UpdateTaskStats(ui64 taskId, const NYql::NDqProto::TD auto [it, inserted] = StageStats.try_emplace(stageId); if (inserted) { it->second.StageId = TasksGraph->GetTask(taskStats.GetTaskId()).StageId; + it->second.SetHistorySampleCount(HistorySampleCount); } - it->second.UpdateStats(taskStats, stats.GetMaxMemoryUsage(), stats.GetDurationUs()); + BaseTimeMs = NonZeroMin(BaseTimeMs, it->second.UpdateStats(taskStats, stats.GetMaxMemoryUsage(), stats.GetDurationUs())); } void ExportAggStats(std::vector& data, NYql::NDqProto::TDqStatsMinMax& stats) { @@ -660,12 +924,13 @@ void ExportAggStats(std::vector& data, NYql::NDqProto::TDqStatsMinMax& sta } } -void ExportAggStats(std::vector& data, NYql::NDqProto::TDqStatsAggr& stats) { +void ExportOffsetAggStats(std::vector& data, NYql::NDqProto::TDqStatsAggr& stats, ui64 offset) { ui64 count = 0; ui64 min = 0; ui64 max = 0; ui64 sum = 0; for (auto d : data) { + d = (d <= offset) ? 0 : (d - offset); if (d) { if (count) { if (min > d) min = d; @@ -685,6 +950,10 @@ void ExportAggStats(std::vector& data, NYql::NDqProto::TDqStatsAggr& stats } } +void ExportAggStats(std::vector& data, NYql::NDqProto::TDqStatsAggr& stats) { + ExportOffsetAggStats(data, stats, 0); +} + ui64 ExportAggStats(std::vector& data) { ui64 sum = 0; for (auto d : data) { @@ -693,21 +962,21 @@ ui64 ExportAggStats(std::vector& data) { return sum; } -void ExportAggAsyncStats(TAsyncStats& data, NYql::NDqProto::TDqAsyncStatsAggr& stats) { - ExportAggStats(data.Bytes, *stats.MutableBytes()); +void TQueryExecutionStats::ExportAggAsyncStats(TAsyncStats& data, NYql::NDqProto::TDqAsyncStatsAggr& stats) { + data.Bytes.ExportAggStats(BaseTimeMs, *stats.MutableBytes()); ExportAggStats(data.Rows, *stats.MutableRows()); ExportAggStats(data.Chunks, *stats.MutableChunks()); ExportAggStats(data.Splits, *stats.MutableSplits()); - ExportAggStats(data.FirstMessageMs, *stats.MutableFirstMessageMs()); - ExportAggStats(data.PauseMessageMs, *stats.MutablePauseMessageMs()); - ExportAggStats(data.ResumeMessageMs, *stats.MutableResumeMessageMs()); - ExportAggStats(data.LastMessageMs, *stats.MutableLastMessageMs()); - ExportAggStats(data.WaitTimeUs, *stats.MutableWaitTimeUs()); + ExportOffsetAggStats(data.FirstMessageMs, *stats.MutableFirstMessageMs(), BaseTimeMs); + ExportOffsetAggStats(data.PauseMessageMs, *stats.MutablePauseMessageMs(), BaseTimeMs); + ExportOffsetAggStats(data.ResumeMessageMs, *stats.MutableResumeMessageMs(), BaseTimeMs); + ExportOffsetAggStats(data.LastMessageMs, *stats.MutableLastMessageMs(), BaseTimeMs); + data.WaitTimeUs.ExportAggStats(BaseTimeMs, *stats.MutableWaitTimeUs()); ExportAggStats(data.WaitPeriods, *stats.MutableWaitPeriods()); ExportAggStats(data.ActiveTimeUs, *stats.MutableActiveTimeUs()); } -void ExportAggAsyncBufferStats(TAsyncBufferStats& data, NYql::NDqProto::TDqAsyncBufferStatsAggr& stats) { +void TQueryExecutionStats::ExportAggAsyncBufferStats(TAsyncBufferStats& data, NYql::NDqProto::TDqAsyncBufferStatsAggr& stats) { ExportAggAsyncStats(data.Ingress, *stats.MutableIngress()); ExportAggAsyncStats(data.Push, *stats.MutablePush()); ExportAggAsyncStats(data.Pop, *stats.MutablePop()); @@ -725,8 +994,10 @@ void TQueryExecutionStats::ExportExecStats(NYql::NDqProto::TDqExecutionStats& st auto& stageStats = *protoStages[p.second.StageId.StageId]; stageStats.SetTotalTasksCount(p.second.Task2Index.size()); - ExportAggStats(p.second.CpuTimeUs, *stageStats.MutableCpuTimeUs()); + stageStats.SetBaseTimeMs(BaseTimeMs); + p.second.CpuTimeUs.ExportAggStats(BaseTimeMs, *stageStats.MutableCpuTimeUs()); ExportAggStats(p.second.SourceCpuTimeUs, *stageStats.MutableSourceCpuTimeUs()); + p.second.MaxMemoryUsage.ExportAggStats(BaseTimeMs, *stageStats.MutableMaxMemoryUsage()); ExportAggStats(p.second.InputRows, *stageStats.MutableInputRows()); ExportAggStats(p.second.InputBytes, *stageStats.MutableInputBytes()); @@ -740,11 +1011,17 @@ void TQueryExecutionStats::ExportExecStats(NYql::NDqProto::TDqExecutionStats& st ExportAggStats(p.second.EgressRows, *stageStats.MutableEgressRows()); ExportAggStats(p.second.EgressBytes, *stageStats.MutableEgressBytes()); - ExportAggStats(p.second.StartTimeMs, *stageStats.MutableStartTimeMs()); - ExportAggStats(p.second.FinishTimeMs, *stageStats.MutableFinishTimeMs()); + ExportOffsetAggStats(p.second.StartTimeMs, *stageStats.MutableStartTimeMs(), BaseTimeMs); + ExportOffsetAggStats(p.second.FinishTimeMs, *stageStats.MutableFinishTimeMs(), BaseTimeMs); ExportAggStats(p.second.DurationUs, *stageStats.MutableDurationUs()); ExportAggStats(p.second.WaitInputTimeUs, *stageStats.MutableWaitInputTimeUs()); ExportAggStats(p.second.WaitOutputTimeUs, *stageStats.MutableWaitOutputTimeUs()); + + p.second.SpillingComputeBytes.ExportAggStats(BaseTimeMs, *stageStats.MutableSpillingComputeBytes()); + p.second.SpillingChannelBytes.ExportAggStats(BaseTimeMs, *stageStats.MutableSpillingChannelBytes()); + p.second.SpillingComputeTimeUs.ExportAggStats(BaseTimeMs, *stageStats.MutableSpillingComputeTimeUs()); + p.second.SpillingChannelTimeUs.ExportAggStats(BaseTimeMs, *stageStats.MutableSpillingChannelTimeUs()); + FillStageDurationUs(stageStats); for (auto& p2 : p.second.Tables) { @@ -773,18 +1050,87 @@ void TQueryExecutionStats::ExportExecStats(NYql::NDqProto::TDqExecutionStats& st } } +void TQueryExecutionStats::AdjustAsyncAggr(NYql::NDqProto::TDqAsyncStatsAggr& stats) { + if (stats.HasFirstMessageMs()) { + AdjustDqStatsAggr(*stats.MutableFirstMessageMs()); + } + if (stats.HasPauseMessageMs()) { + AdjustDqStatsAggr(*stats.MutablePauseMessageMs()); + } + if (stats.HasResumeMessageMs()) { + AdjustDqStatsAggr(*stats.MutableResumeMessageMs()); + } + if (stats.HasLastMessageMs()) { + AdjustDqStatsAggr(*stats.MutableLastMessageMs()); + } +} + +void TQueryExecutionStats::AdjustAsyncBufferAggr(NYql::NDqProto::TDqAsyncBufferStatsAggr& stats) { + if (stats.HasIngress()) { + AdjustAsyncAggr(*stats.MutableIngress()); + } + if (stats.HasPush()) { + AdjustAsyncAggr(*stats.MutablePush()); + } + if (stats.HasPop()) { + AdjustAsyncAggr(*stats.MutablePop()); + } + if (stats.HasEgress()) { + AdjustAsyncAggr(*stats.MutableEgress()); + } +} + +void TQueryExecutionStats::AdjustDqStatsAggr(NYql::NDqProto::TDqStatsAggr& stats) { + if (auto min = stats.GetMin()) { + stats.SetMin(min - BaseTimeMs); + } + if (auto max = stats.GetMax()) { + stats.SetMax(max - BaseTimeMs); + } + if (auto cnt = stats.GetCnt()) { + stats.SetSum(stats.GetSum() - BaseTimeMs * cnt); + } +} + +void TQueryExecutionStats::AdjustBaseTime(NDqProto::TDqStageStats* stageStats) { + if (stageStats->HasStartTimeMs()) { + AdjustDqStatsAggr(*stageStats->MutableStartTimeMs()); + } + if (stageStats->HasFinishTimeMs()) { + AdjustDqStatsAggr(*stageStats->MutableFinishTimeMs()); + } + for (auto& p : *stageStats->MutableIngress()) { + AdjustAsyncBufferAggr(p.second); + } + for (auto& p : *stageStats->MutableInput()) { + AdjustAsyncBufferAggr(p.second); + } + for (auto& p : *stageStats->MutableOutput()) { + AdjustAsyncBufferAggr(p.second); + } + for (auto& p : *stageStats->MutableEgress()) { + AdjustAsyncBufferAggr(p.second); + } +} + void TQueryExecutionStats::Finish() { // Cerr << (TStringBuilder() << "-- finish: executerTime: " << ExecuterCpuTime.MicroSeconds() << Endl); - THashMap protoStages; + for (auto& [stageId, stagetype] : TasksGraph->GetStagesInfo()) { - GetOrCreateStageStats(stageId, *TasksGraph, *Result); + auto stageStats = GetOrCreateStageStats(stageId, *TasksGraph, *Result); + stageStats->SetBaseTimeMs(BaseTimeMs); + AdjustBaseTime(stageStats); + auto it = StageStats.find(stageId.StageId); + if (it != StageStats.end()) { + it->second.ExportHistory(BaseTimeMs, *stageStats); + } } Result->SetCpuTimeUs(Result->GetCpuTimeUs() + ExecuterCpuTime.MicroSeconds()); Result->SetDurationUs(FinishTs.MicroSeconds() - StartTs.MicroSeconds()); - // Result->Result* feilds are (temporary?) commented out in proto due to lack of use + // Result->Result* fields are (temporary?) commented out in proto due to lack of use // // Result->SetResultBytes(ResultBytes); // Result->SetResultRows(ResultRows); diff --git a/ydb/core/kqp/executer_actor/kqp_executer_stats.h b/ydb/core/kqp/executer_actor/kqp_executer_stats.h index e3fd08455753..6a7f9d94e821 100644 --- a/ydb/core/kqp/executer_actor/kqp_executer_stats.h +++ b/ydb/core/kqp/executer_actor/kqp_executer_stats.h @@ -14,9 +14,22 @@ NYql::NDqProto::EDqStatsMode GetDqStatsModeShard(Ydb::Table::QueryStatsCollectio bool CollectFullStats(Ydb::Table::QueryStatsCollection::Mode statsMode); bool CollectProfileStats(Ydb::Table::QueryStatsCollection::Mode statsMode); +struct TTimeSeriesStats { + std::vector Values; + ui32 HistorySampleCount = 0; + ui64 Sum = 0; + std::vector> History; + + void ExportHistory(ui64 baseTimeMs, NYql::NDqProto::TDqStatsAggr& stats); + void ExportAggStats(ui64 baseTimeMs, NYql::NDqProto::TDqStatsAggr& stats); + void Resize(ui32 taskCount); + void SetNonZero(ui32 taskIndex, ui64 value); + void Pack(); +}; + struct TAsyncStats { // Data - std::vector Bytes; + TTimeSeriesStats Bytes; std::vector DecompressedBytes; std::vector Rows; std::vector Chunks; @@ -26,11 +39,13 @@ struct TAsyncStats { std::vector PauseMessageMs; std::vector ResumeMessageMs; std::vector LastMessageMs; - std::vector WaitTimeUs; + TTimeSeriesStats WaitTimeUs; std::vector WaitPeriods; std::vector ActiveTimeUs; void Resize(ui32 taskCount); + void SetHistorySampleCount(ui32 historySampleCount); + void ExportHistory(ui64 baseTimeMs, NYql::NDqProto::TDqAsyncStatsAggr& stats); }; struct TAsyncBufferStats { @@ -46,6 +61,8 @@ struct TAsyncBufferStats { TAsyncStats Egress; void Resize(ui32 taskCount); + void SetHistorySampleCount(ui32 historySampleCount); + void ExportHistory(ui64 baseTimeMs, NYql::NDqProto::TDqAsyncBufferStatsAggr& stats); }; struct TTableStats { @@ -73,7 +90,7 @@ struct TStageExecutionStats { std::map Task2Index; - std::vector CpuTimeUs; + TTimeSeriesStats CpuTimeUs; std::vector SourceCpuTimeUs; std::vector InputRows; @@ -94,17 +111,25 @@ struct TStageExecutionStats { std::vector WaitInputTimeUs; std::vector WaitOutputTimeUs; + TTimeSeriesStats SpillingComputeBytes; + TTimeSeriesStats SpillingChannelBytes; + TTimeSeriesStats SpillingComputeTimeUs; + TTimeSeriesStats SpillingChannelTimeUs; + std::map Tables; std::map Ingress; std::map Egress; std::map Input; std::map Output; - std::vector MaxMemoryUsage; + TTimeSeriesStats MaxMemoryUsage; + ui32 HistorySampleCount; void Resize(ui32 taskCount); - void UpdateAsyncStats(i32 index, TAsyncStats& aggrAsyncStats, const NYql::NDqProto::TDqAsyncBufferStats& asyncStats); - void UpdateStats(const NYql::NDqProto::TDqTaskStats& taskStats, ui64 maxMemoryUsage, ui64 durationUs); + void SetHistorySampleCount(ui32 historySampleCount); + void ExportHistory(ui64 baseTimeMs, NYql::NDqProto::TDqStageStats& stageStats); + ui64 UpdateAsyncStats(i32 index, TAsyncStats& aggrAsyncStats, const NYql::NDqProto::TDqAsyncBufferStats& asyncStats); + ui64 UpdateStats(const NYql::NDqProto::TDqTaskStats& taskStats, ui64 maxMemoryUsage, ui64 durationUs); }; struct TQueryExecutionStats { @@ -112,6 +137,13 @@ struct TQueryExecutionStats { std::map> ShardsCountByNode; std::map UseLlvmByStageId; std::map StageStats; + ui64 BaseTimeMs = 0; + void ExportAggAsyncStats(TAsyncStats& data, NYql::NDqProto::TDqAsyncStatsAggr& stats); + void ExportAggAsyncBufferStats(TAsyncBufferStats& data, NYql::NDqProto::TDqAsyncBufferStatsAggr& stats); + void AdjustAsyncAggr(NYql::NDqProto::TDqAsyncStatsAggr& stats); + void AdjustAsyncBufferAggr(NYql::NDqProto::TDqAsyncBufferStatsAggr& stats); + void AdjustDqStatsAggr(NYql::NDqProto::TDqStatsAggr& stats); + void AdjustBaseTime(NYql::NDqProto::TDqStageStats* stageStats); public: const Ydb::Table::QueryStatsCollection::Mode StatsMode; const TKqpTasksGraph* const TasksGraph = nullptr; @@ -119,6 +151,7 @@ struct TQueryExecutionStats { // basic stats std::unordered_set AffectedShards; + ui32 HistorySampleCount = 0; ui32 TotalTasks = 0; ui64 ResultBytes = 0; ui64 ResultRows = 0; @@ -145,6 +178,7 @@ struct TQueryExecutionStats { , TasksGraph(tasksGraph) , Result(result) { + HistorySampleCount = 32; } void AddComputeActorStats( diff --git a/ydb/core/kqp/executer_actor/kqp_planner.cpp b/ydb/core/kqp/executer_actor/kqp_planner.cpp index dcc6049176bf..d35663cb9ae7 100644 --- a/ydb/core/kqp/executer_actor/kqp_planner.cpp +++ b/ydb/core/kqp/executer_actor/kqp_planner.cpp @@ -37,6 +37,11 @@ std::unique_ptr CheckTaskSize(ui64 TxId, const TIntru return nullptr; } +std::unique_ptr MakeActorStartFailureError(const TActorId& executerId, const TString& reason) { + auto ev = std::make_unique(NYql::NDqProto::StatusIds::OVERLOADED, reason); + return std::make_unique(executerId, executerId, ev.release()); +} + void BuildInitialTaskResources(const TKqpTasksGraph& graph, ui64 taskId, TTaskResourceEstimation& ret) { const auto& task = graph.GetTask(taskId); const auto& stageInfo = graph.GetStageInfo(task.StageId); @@ -48,6 +53,10 @@ void BuildInitialTaskResources(const TKqpTasksGraph& graph, ui64 taskId, TTaskRe ret.HeavyProgram = opts.GetHasMapJoin(); } +bool LimitCPU(TIntrusivePtr ctx) { + return ctx->PoolId && ctx->PoolConfig.has_value() && ctx->PoolConfig->TotalCpuLimitPercentPerNode > 0; +} + } bool TKqpPlanner::UseMockEmptyPlanner = false; @@ -55,10 +64,11 @@ bool TKqpPlanner::UseMockEmptyPlanner = false; // Task can allocate extra memory during execution. // So, we estimate total memory amount required for task as apriori task size multiplied by this constant. constexpr ui32 MEMORY_ESTIMATION_OVERFLOW = 2; -constexpr ui32 MAX_NON_PARALLEL_TASKS_EXECUTION_LIMIT = 8; TKqpPlanner::TKqpPlanner(TKqpPlanner::TArgs&& args) : TxId(args.TxId) + , LockTxId(args.LockTxId) + , LockNodeId(args.LockNodeId) , ExecuterId(args.Executer) , Snapshot(args.Snapshot) , Database(args.Database) @@ -81,7 +91,13 @@ TKqpPlanner::TKqpPlanner(TKqpPlanner::TArgs&& args) , OutputChunkMaxSize(args.OutputChunkMaxSize) , GUCSettings(std::move(args.GUCSettings)) , MayRunTasksLocally(args.MayRunTasksLocally) + , ResourceManager_(args.ResourceManager_) + , CaFactory_(args.CaFactory_) { + if (GUCSettings) { + SerializedGUCSettings = GUCSettings->SerializeToString(); + } + if (!Database) { // a piece of magic for tests if (const auto& domain = AppData()->DomainsInfo->Domain) { @@ -89,6 +105,10 @@ TKqpPlanner::TKqpPlanner(TKqpPlanner::TArgs&& args) LOG_E("Database not set, use " << Database); } } + + if (LimitCPU(UserRequestContext)) { + AllowSinglePartitionOpt = false; + } } // ResourcesSnapshot, ResourceEstimations @@ -165,6 +185,10 @@ std::unique_ptr TKqpPlanner::SerializeReque auto result = std::make_unique(TasksGraph.GetMeta().GetArenaIntrusivePtr()); auto& request = result->Record; request.SetTxId(TxId); + if (LockTxId) { + request.SetLockTxId(*LockTxId); + request.SetLockNodeId(LockNodeId); + } ActorIdToProto(ExecuterId, request.MutableExecuterActorId()); if (Deadline) { @@ -182,6 +206,7 @@ std::unique_ptr TKqpPlanner::SerializeReque request.SetStartAllOrFail(true); if (UseDataQueryPool) { request.MutableRuntimeSettings()->SetExecType(NYql::NDqProto::TComputeRuntimeSettings::DATA); + request.MutableRuntimeSettings()->SetUseSpilling(WithSpilling); } else { request.MutableRuntimeSettings()->SetExecType(NYql::NDqProto::TComputeRuntimeSettings::SCAN); request.MutableRuntimeSettings()->SetUseSpilling(WithSpilling); @@ -205,8 +230,19 @@ std::unique_ptr TKqpPlanner::SerializeReque request.SetOutputChunkMaxSize(OutputChunkMaxSize); } - if (GUCSettings) { - request.SetSerializedGUCSettings(GUCSettings->SerializeToString()); + if (SerializedGUCSettings) { + request.SetSerializedGUCSettings(SerializedGUCSettings); + } + + request.SetSchedulerGroup(UserRequestContext->PoolId); + request.SetDatabase(Database); + request.SetDatabaseId(UserRequestContext->DatabaseId); + if (UserRequestContext->PoolConfig.has_value()) { + request.SetMemoryPoolPercent(UserRequestContext->PoolConfig->QueryMemoryLimitPercentPerNode); + request.SetPoolMaxCpuShare(UserRequestContext->PoolConfig->TotalCpuLimitPercentPerNode / 100.0); + if (UserRequestContext->PoolConfig->QueryCpuLimitPercentPerNode >= 0) { + request.SetQueryCpuShare(UserRequestContext->PoolConfig->QueryCpuLimitPercentPerNode / 100.0); + } } return result; @@ -242,11 +278,26 @@ std::unique_ptr TKqpPlanner::AssignTasksToNodes() { PrepareToProcess(); - auto localResources = GetKqpResourceManager()->GetLocalResources(); + auto localResources = ResourceManager_->GetLocalResources(); Y_UNUSED(MEMORY_ESTIMATION_OVERFLOW); + + auto placingOptions = ResourceManager_->GetPlacingOptions(); + + ui64 nonParallelLimit = placingOptions.MaxNonParallelTasksExecutionLimit; + if (MayRunTasksLocally) { + // not applied to column shards and external sources + nonParallelLimit = placingOptions.MaxNonParallelDataQueryTasksLimit; + } + + bool singleNodeExecutionMakeSence = ( + ResourceEstimations.size() <= nonParallelLimit || + // all readers are located on the one node. + TasksPerNode.size() == 1 + ); + if (LocalRunMemoryEst * MEMORY_ESTIMATION_OVERFLOW <= localResources.Memory[NRm::EKqpMemoryPool::ScanQuery] && ResourceEstimations.size() <= localResources.ExecutionUnits && - ResourceEstimations.size() <= MAX_NON_PARALLEL_TASKS_EXECUTION_LIMIT) + singleNodeExecutionMakeSence) { ui64 selfNodeId = ExecuterId.NodeId(); for(ui64 taskId: ComputeTasks) { @@ -257,7 +308,7 @@ std::unique_ptr TKqpPlanner::AssignTasksToNodes() { } if (ResourcesSnapshot.empty()) { - ResourcesSnapshot = std::move(GetKqpResourceManager()->GetClusterResources()); + ResourcesSnapshot = std::move(ResourceManager_->GetClusterResources()); } if (ResourcesSnapshot.empty() || (ResourcesSnapshot.size() == 1 && ResourcesSnapshot[0].GetNodeId() == ExecuterId.NodeId())) { @@ -281,22 +332,24 @@ std::unique_ptr TKqpPlanner::AssignTasksToNodes() { return std::make_unique(ExecuterId, ExecuterId, ev.Release()); } - auto planner = (UseMockEmptyPlanner ? CreateKqpMockEmptyPlanner() : CreateKqpGreedyPlanner()); // KqpMockEmptyPlanner is a mock planner for tests + std::vector deepestTasks; + ui64 maxLevel = 0; + for(auto& task: TasksGraph.GetTasks()) { + // const auto& task = TasksGraph.GetTask(taskId); + const auto& stageInfo = TasksGraph.GetStageInfo(task.StageId); + const NKqpProto::TKqpPhyStage& stage = stageInfo.Meta.GetStage(stageInfo.Id); + const ui64 stageLevel = stage.GetProgram().GetSettings().GetStageLevel(); - auto ctx = TlsActivationContext->AsActorContext(); - if (ctx.LoggerSettings() && ctx.LoggerSettings()->Satisfies(NActors::NLog::PRI_DEBUG, NKikimrServices::KQP_EXECUTER)) { - planner->SetLogFunc([TxId = TxId, &UserRequestContext = UserRequestContext](TStringBuf msg) { LOG_D(msg); }); - } + if (stageLevel > maxLevel) { + maxLevel = stageLevel; + deepestTasks.clear(); + } - THashMap nodeIdtoIdx; - for (size_t idx = 0; idx < ResourcesSnapshot.size(); ++idx) { - nodeIdtoIdx[ResourcesSnapshot[idx].nodeid()] = idx; + if (stageLevel == maxLevel) { + deepestTasks.push_back(task.Id); + } } - LogMemoryStatistics([TxId = TxId, &UserRequestContext = UserRequestContext](TStringBuf msg) { LOG_D(msg); }); - - auto plan = planner->Plan(ResourcesSnapshot, ResourceEstimations); - THashMap alreadyAssigned; for(auto& [nodeId, tasks] : TasksPerNode) { for(ui64 taskId: tasks) { @@ -304,81 +357,127 @@ std::unique_ptr TKqpPlanner::AssignTasksToNodes() { } } - if (!plan.empty()) { - for (auto& group : plan) { - for(ui64 taskId: group.TaskIds) { - auto [it, success] = alreadyAssigned.emplace(taskId, group.NodeId); - if (success) { - TasksPerNode[group.NodeId].push_back(taskId); - } + if (deepestTasks.size() <= placingOptions.MaxNonParallelTopStageExecutionLimit) { + // looks like the merge / union all connection + for(ui64 taskId: deepestTasks) { + auto [it, success] = alreadyAssigned.emplace(taskId, ExecuterId.NodeId()); + if (success) { + TasksPerNode[ExecuterId.NodeId()].push_back(taskId); } } + } - return nullptr; - } else { - LogMemoryStatistics([TxId = TxId, &UserRequestContext = UserRequestContext](TStringBuf msg) { LOG_E(msg); }); + auto planner = (UseMockEmptyPlanner ? CreateKqpMockEmptyPlanner() : CreateKqpGreedyPlanner()); // KqpMockEmptyPlanner is a mock planner for tests - auto ev = MakeHolder(NYql::NDqProto::StatusIds::PRECONDITION_FAILED, - TStringBuilder() << "Not enough resources to execute query. " << "TraceId: " << UserRequestContext->TraceId); - return std::make_unique(ExecuterId, ExecuterId, ev.Release()); + auto ctx = TlsActivationContext->AsActorContext(); + if (ctx.LoggerSettings() && ctx.LoggerSettings()->Satisfies(NActors::NLog::PRI_DEBUG, NKikimrServices::KQP_EXECUTER)) { + planner->SetLogFunc([TxId = TxId, &UserRequestContext = UserRequestContext](TStringBuf msg) { LOG_D(msg); }); } -} -const IKqpGateway::TKqpSnapshot& TKqpPlanner::GetSnapshot() const { - return TasksGraph.GetMeta().Snapshot; -} + LogMemoryStatistics([TxId = TxId, &UserRequestContext = UserRequestContext](TStringBuf msg) { LOG_D(msg); }); -// optimizeProtoForLocalExecution - if we want to execute compute actor locally and don't want to serialize & then deserialize proto message -// instead we just give ptr to proto message and after that we swap/copy it -void TKqpPlanner::ExecuteDataComputeTask(ui64 taskId, bool shareMailbox, bool optimizeProtoForLocalExecution) { + ui64 selfNodeId = ExecuterId.NodeId(); + TString selfNodeDC; - auto& task = TasksGraph.GetTask(taskId); - NYql::NDqProto::TDqTask* taskDesc = ArenaSerializeTaskToProto(TasksGraph, task, /* serializeAsyncIoSettings = */ !optimizeProtoForLocalExecution); + TVector allNodes; + TVector executerDcNodes; + allNodes.reserve(ResourcesSnapshot.size()); - NYql::NDq::TComputeRuntimeSettings settings; - if (Deadline) { - settings.Timeout = Deadline - TAppData::TimeProvider->Now(); + for(auto& snapNode: ResourcesSnapshot) { + const TString& dc = snapNode.GetKqpProxyNodeResources().GetDataCenterId(); + if (snapNode.GetNodeId() == selfNodeId) { + selfNodeDC = dc; + break; + } } - settings.ExtraMemoryAllocationPool = NRm::EKqpMemoryPool::DataQuery; - settings.FailOnUndelivery = true; - settings.StatsMode = GetDqStatsMode(StatsMode); - settings.UseSpilling = WithSpilling; + for(auto& snapNode: ResourcesSnapshot) { + allNodes.push_back(&snapNode); + if (selfNodeDC == snapNode.GetKqpProxyNodeResources().GetDataCenterId()) { + executerDcNodes.push_back(&snapNode); + } + } - NYql::NDq::TComputeMemoryLimits limits; - limits.ChannelBufferSize = 32_MB; // Depends on NYql::NDq::TDqOutputChannelSettings::ChunkSizeLimit (now 48 MB) with a ratio of 1.5 - limits.OutputChunkMaxSize = OutputChunkMaxSize; - limits.MkqlLightProgramMemoryLimit = MkqlMemoryLimit > 0 ? std::min(500_MB, MkqlMemoryLimit) : 500_MB; - limits.MkqlHeavyProgramMemoryLimit = MkqlMemoryLimit > 0 ? std::min(2_GB, MkqlMemoryLimit) : 2_GB; + TVector plan; - auto& taskOpts = taskDesc->GetProgram().GetSettings(); - auto limit = taskOpts.GetHasMapJoin() /* || opts.GetHasSort()*/ - ? limits.MkqlHeavyProgramMemoryLimit - : limits.MkqlLightProgramMemoryLimit; + if (!executerDcNodes.empty() && placingOptions.PreferLocalDatacenterExecution) { + plan = planner->Plan(executerDcNodes, ResourceEstimations); + } + + if (plan.empty()) { + plan = planner->Plan(allNodes, ResourceEstimations); + } - limits.MemoryQuotaManager = std::make_shared(limit * 2, limit); + if (plan.empty()) { + LogMemoryStatistics([TxId = TxId, &UserRequestContext = UserRequestContext](TStringBuf msg) { LOG_E(msg); }); - auto computeActor = NKikimr::NKqp::CreateKqpComputeActor(ExecuterId, TxId, taskDesc, AsyncIoFactory, - settings, limits, ExecuterSpan.GetTraceId(), TasksGraph.GetMeta().GetArenaIntrusivePtr(), FederatedQuerySetup, GUCSettings); + auto ev = MakeHolder(NYql::NDqProto::StatusIds::PRECONDITION_FAILED, + TStringBuilder() << "Not enough resources to execute query. " << "TraceId: " << UserRequestContext->TraceId); + return std::make_unique(ExecuterId, ExecuterId, ev.Release()); + } - if (optimizeProtoForLocalExecution) { - TVector& taskSourceSettings = static_cast(computeActor)->MutableTaskSourceSettings(); - taskSourceSettings.assign(task.Inputs.size(), nullptr); - for (size_t i = 0; i < task.Inputs.size(); ++i) { - const auto input = task.Inputs[i]; - if (input.Type() == NYql::NDq::TTaskInputType::Source && Y_LIKELY(input.Meta.SourceSettings)) { - taskSourceSettings[i] = (&(*input.Meta.SourceSettings)); + for (auto& group : plan) { + for(ui64 taskId: group.TaskIds) { + auto [it, success] = alreadyAssigned.emplace(taskId, group.NodeId); + if (success) { + TasksPerNode[group.NodeId].push_back(taskId); } } } - auto computeActorId = shareMailbox ? TlsActivationContext->AsActorContext().RegisterWithSameMailbox(computeActor) : TlsActivationContext->AsActorContext().Register(computeActor); - task.ComputeActorId = computeActorId; + return nullptr; +} - LOG_D("Executing task: " << taskId << " on compute actor: " << task.ComputeActorId); +const IKqpGateway::TKqpSnapshot& TKqpPlanner::GetSnapshot() const { + return TasksGraph.GetMeta().Snapshot; +} - auto result = PendingComputeActors.emplace(task.ComputeActorId, TProgressStat()); - YQL_ENSURE(result.second); +// optimizeProtoForLocalExecution - if we want to execute compute actor locally and don't want to serialize & then deserialize proto message +// instead we just give ptr to proto message and after that we swap/copy it +TString TKqpPlanner::ExecuteDataComputeTask(ui64 taskId, ui32 computeTasksSize) { + auto& task = TasksGraph.GetTask(taskId); + NYql::NDqProto::TDqTask* taskDesc = ArenaSerializeTaskToProto(TasksGraph, task, true); + NYql::NDq::TComputeRuntimeSettings settings; + if (!TxInfo) { + double memoryPoolPercent = 100; + if (UserRequestContext->PoolConfig.has_value()) { + memoryPoolPercent = UserRequestContext->PoolConfig->QueryMemoryLimitPercentPerNode; + } + + TxInfo = MakeIntrusive( + TxId, TInstant::Now(), ResourceManager_->GetCounters(), + UserRequestContext->PoolId, memoryPoolPercent, Database); + } + + auto startResult = CaFactory_->CreateKqpComputeActor({ + .ExecuterId = ExecuterId, + .TxId = TxId, + .LockTxId = LockTxId, + .LockNodeId = LockNodeId, + .Task = taskDesc, + .TxInfo = TxInfo, + .RuntimeSettings = settings, + .TraceId = NWilson::TTraceId(ExecuterSpan.GetTraceId()), + .Arena = TasksGraph.GetMeta().GetArenaIntrusivePtr(), + .SerializedGUCSettings = SerializedGUCSettings, + .NumberOfTasks = computeTasksSize, + .OutputChunkMaxSize = OutputChunkMaxSize, + .MemoryPool = NRm::EKqpMemoryPool::DataQuery, + .WithSpilling = WithSpilling, + .StatsMode = GetDqStatsMode(StatsMode), + .Deadline = Deadline, + .ShareMailbox = (computeTasksSize <= 1), + .RlPath = Nothing(), + }); + + if (const auto* rmResult = std::get_if(&startResult)) { + return rmResult->GetFailReason(); + } + + TActorId* actorId = std::get_if(&startResult); + Y_ABORT_UNLESS(actorId); + AcknowledgeCA(taskId, *actorId, nullptr); + return TString(); } ui32 TKqpPlanner::GetnScanTasks() { @@ -415,9 +514,11 @@ std::unique_ptr TKqpPlanner::PlanExecution() { // explicit requirement to execute task on the same node because it has dependencies // on datashard tx. if (LocalComputeTasks) { - bool shareMailbox = (ComputeTasks.size() <= 1); for (ui64 taskId : ComputeTasks) { - ExecuteDataComputeTask(taskId, shareMailbox, /* optimizeProtoForLocalExecution = */ true); + auto result = ExecuteDataComputeTask(taskId, ComputeTasks.size()); + if (!result.empty()) { + return MakeActorStartFailureError(ExecuterId, result); + } } ComputeTasks.clear(); } @@ -427,7 +528,10 @@ std::unique_ptr TKqpPlanner::PlanExecution() { // to execute this task locally so we can avoid useless overhead for remote task launching. for (auto& [shardId, tasks]: TasksPerNode) { for (ui64 taskId: tasks) { - ExecuteDataComputeTask(taskId, true, /* optimizeProtoForLocalExecution = */ true); + auto result = ExecuteDataComputeTask(taskId, tasks.size()); + if (!result.empty()) { + return MakeActorStartFailureError(ExecuterId, result); + } } } @@ -452,10 +556,11 @@ std::unique_ptr TKqpPlanner::PlanExecution() { auto tasksOnNodeIt = TasksPerNode.find(ExecuterId.NodeId()); if (tasksOnNodeIt != TasksPerNode.end()) { auto& tasks = tasksOnNodeIt->second; - const bool shareMailbox = (tasks.size() <= 1); for (ui64 taskId: tasks) { - ExecuteDataComputeTask(taskId, shareMailbox, /* optimizeProtoForLocalExecution = */ true); - PendingComputeTasks.erase(taskId); + auto result = ExecuteDataComputeTask(taskId, tasks.size()); + if (!result.empty()) { + return MakeActorStartFailureError(ExecuterId, result); + } } } } @@ -497,17 +602,97 @@ void TKqpPlanner::Unsubscribe() { } } -THashMap& TKqpPlanner::GetPendingComputeActors() { +bool TKqpPlanner::AcknowledgeCA(ui64 taskId, TActorId computeActor, const NYql::NDqProto::TEvComputeActorState* state) { + auto& task = TasksGraph.GetTask(taskId); + if (!task.ComputeActorId) { + task.ComputeActorId = computeActor; + PendingComputeTasks.erase(taskId); + auto [it, success] = PendingComputeActors.try_emplace(computeActor); + YQL_ENSURE(success); + if (state && state->HasStats()) { + it->second.Set(state->GetStats()); + } + + return true; + } + + YQL_ENSURE(task.ComputeActorId == computeActor); + auto it = PendingComputeActors.find(computeActor); + if (!task.Meta.Completed) { + YQL_ENSURE(it != PendingComputeActors.end()); + } + + if (it != PendingComputeActors.end() && state && state->HasStats()) { + it->second.Set(state->GetStats()); + } + + return false; +} + +bool TKqpPlanner::CompletedCA(ui64 taskId, TActorId computeActor) { + auto& task = TasksGraph.GetTask(taskId); + if (task.Meta.Completed) { + YQL_ENSURE(!PendingComputeActors.contains(computeActor)); + return false; + } + + task.Meta.Completed = true; + auto it = PendingComputeActors.find(computeActor); + YQL_ENSURE(it != PendingComputeActors.end()); + LastStats.emplace_back(std::move(it->second)); + PendingComputeActors.erase(it); + + LOG_I("Compute actor has finished execution: " << computeActor.ToString()); + + return true; +} + +void TKqpPlanner::TaskNotStarted(ui64 taskId) { + // NOTE: should be invoked only while shutting down - when node is disconnected. + + auto& task = TasksGraph.GetTask(taskId); + + YQL_ENSURE(!task.ComputeActorId); + YQL_ENSURE(!task.Meta.Completed); + + PendingComputeTasks.erase(taskId); +} + +TProgressStat::TEntry TKqpPlanner::CalculateConsumptionUpdate() { + TProgressStat::TEntry consumption; + + for (const auto& p : PendingComputeActors) { + const auto& t = p.second.GetLastUsage(); + consumption += t; + } + + for (const auto& p : LastStats) { + const auto& t = p.GetLastUsage(); + consumption += t; + } + + return consumption; +} + +void TKqpPlanner::ShiftConsumption() { + for (auto& p : PendingComputeActors) { + p.second.Update(); + } + + for (auto& p : LastStats) { + p.Update(); + } +} + +const THashMap& TKqpPlanner::GetPendingComputeActors() { return PendingComputeActors; } -THashSet& TKqpPlanner::GetPendingComputeTasks() { +const THashSet& TKqpPlanner::GetPendingComputeTasks() { return PendingComputeTasks; } void TKqpPlanner::PrepareToProcess() { - auto rmConfig = GetKqpResourceManager()->GetConfig(); - ui32 tasksCount = ComputeTasks.size(); for (auto& [shardId, tasks] : TasksPerNode) { tasksCount += tasks.size(); @@ -518,7 +703,7 @@ void TKqpPlanner::PrepareToProcess() { for (size_t i = 0; i < ComputeTasks.size(); ++i) { BuildInitialTaskResources(TasksGraph, ComputeTasks[i], ResourceEstimations[i]); - EstimateTaskResources(rmConfig, ResourceEstimations[i], ComputeTasks.size()); + ResourceManager_->EstimateTaskResources(ResourceEstimations[i], ComputeTasks.size()); LocalRunMemoryEst += ResourceEstimations[i].TotalMemoryLimit; } @@ -526,7 +711,7 @@ void TKqpPlanner::PrepareToProcess() { for(auto& [nodeId, tasks] : TasksPerNode) { for (ui64 taskId: tasks) { BuildInitialTaskResources(TasksGraph, taskId, ResourceEstimations[currentEst]); - EstimateTaskResources(rmConfig, ResourceEstimations[currentEst], tasks.size()); + ResourceManager_->EstimateTaskResources(ResourceEstimations[currentEst], tasks.size()); LocalRunMemoryEst += ResourceEstimations[currentEst].TotalMemoryLimit; ++currentEst; } @@ -547,4 +732,4 @@ std::unique_ptr CreateKqpPlanner(TKqpPlanner::TArgs args) { return std::make_unique(std::move(args)); } -} // namespace NKikimr::NKqp +} // namespace NKikimr::NKqp \ No newline at end of file diff --git a/ydb/core/kqp/executer_actor/kqp_planner.h b/ydb/core/kqp/executer_actor/kqp_planner.h index 01efd3e79454..746cc531cd43 100644 --- a/ydb/core/kqp/executer_actor/kqp_planner.h +++ b/ydb/core/kqp/executer_actor/kqp_planner.h @@ -43,6 +43,8 @@ class TKqpPlanner { struct TArgs { TKqpTasksGraph& TasksGraph; const ui64 TxId; + const TMaybe LockTxId; + const ui32 LockNodeId; const TActorId& Executer; const IKqpGateway::TKqpSnapshot& Snapshot; const TString& Database; @@ -64,18 +66,25 @@ class TKqpPlanner { const ui64 OutputChunkMaxSize = 0; const TGUCSettings::TPtr GUCSettings; const bool MayRunTasksLocally = false; + const std::shared_ptr& ResourceManager_; + const std::shared_ptr& CaFactory_; }; TKqpPlanner(TKqpPlanner::TArgs&& args); bool SendStartKqpTasksRequest(ui32 requestId, const TActorId& target); std::unique_ptr PlanExecution(); std::unique_ptr AssignTasksToNodes(); + bool AcknowledgeCA(ui64 taskId, TActorId computeActor, const NYql::NDqProto::TEvComputeActorState* state); + bool CompletedCA(ui64 taskId, TActorId computeActor); + void TaskNotStarted(ui64 taskId); + TProgressStat::TEntry CalculateConsumptionUpdate(); + void ShiftConsumption(); void Submit(); ui32 GetCurrentRetryDelay(ui32 requestId); void Unsubscribe(); - THashMap& GetPendingComputeActors(); - THashSet& GetPendingComputeTasks(); + const THashMap& GetPendingComputeActors(); + const THashSet& GetPendingComputeTasks(); ui32 GetnScanTasks(); ui32 GetnComputeTasks(); @@ -83,7 +92,7 @@ class TKqpPlanner { private: const IKqpGateway::TKqpSnapshot& GetSnapshot() const; - void ExecuteDataComputeTask(ui64 taskId, bool shareMailbox, bool optimizeProtoForLocalExecution); + TString ExecuteDataComputeTask(ui64 taskId, ui32 computeTasksSize); void PrepareToProcess(); TString GetEstimationsInfo() const; @@ -94,6 +103,8 @@ class TKqpPlanner { private: const ui64 TxId; + const TMaybe LockTxId; + const ui32 LockNodeId; const TActorId ExecuterId; TVector ComputeTasks; THashMap> TasksPerNode; @@ -128,6 +139,11 @@ class TKqpPlanner { const ui64 OutputChunkMaxSize; const TGUCSettings::TPtr GUCSettings; const bool MayRunTasksLocally; + TString SerializedGUCSettings; + std::shared_ptr ResourceManager_; + std::shared_ptr CaFactory_; + TIntrusivePtr TxInfo; + TVector LastStats; public: static bool UseMockEmptyPlanner; // for tests: if true then use TKqpMockEmptyPlanner that leads to the error diff --git a/ydb/core/kqp/executer_actor/kqp_planner_strategy.cpp b/ydb/core/kqp/executer_actor/kqp_planner_strategy.cpp index ce395eb98ec5..0e3755ec4e53 100644 --- a/ydb/core/kqp/executer_actor/kqp_planner_strategy.cpp +++ b/ydb/core/kqp/executer_actor/kqp_planner_strategy.cpp @@ -90,16 +90,16 @@ class TNodesManager { return result; } - TNodesManager(const TVector& nodeResources) { + TNodesManager(const TVector& nodeResources) { for (auto& node : nodeResources) { - if (!node.GetAvailableComputeActors()) { + if (!node->GetAvailableComputeActors()) { continue; } Nodes.emplace_back(TNodeDesc{ - node.GetNodeId(), - ActorIdFromProto(node.GetResourceManagerActorId()), - node.GetTotalMemory() - node.GetUsedMemory(), - node.GetAvailableComputeActors(), + node->GetNodeId(), + ActorIdFromProto(node->GetResourceManagerActorId()), + node->GetTotalMemory() - node->GetUsedMemory(), + node->GetAvailableComputeActors(), {} }); } @@ -111,7 +111,7 @@ class TKqpGreedyPlanner : public IKqpPlannerStrategy { public: ~TKqpGreedyPlanner() override {} - TVector Plan(const TVector& nodeResources, + TVector Plan(const TVector& nodeResources, const TVector& tasks) override { TVector result; @@ -161,7 +161,7 @@ class TKqpMockEmptyPlanner : public IKqpPlannerStrategy { public: ~TKqpMockEmptyPlanner() override {} - TVector Plan(const TVector&, + TVector Plan(const TVector&, const TVector&) override { return {}; diff --git a/ydb/core/kqp/executer_actor/kqp_planner_strategy.h b/ydb/core/kqp/executer_actor/kqp_planner_strategy.h index 548e18e1511b..841515f93c48 100644 --- a/ydb/core/kqp/executer_actor/kqp_planner_strategy.h +++ b/ydb/core/kqp/executer_actor/kqp_planner_strategy.h @@ -23,7 +23,7 @@ class IKqpPlannerStrategy { TVector TaskIds; }; - virtual TVector Plan(const TVector& nodeResources, + virtual TVector Plan(const TVector& nodeResources, const TVector& estimatedResources) = 0; protected: diff --git a/ydb/core/kqp/executer_actor/kqp_scan_executer.cpp b/ydb/core/kqp/executer_actor/kqp_scan_executer.cpp index 771b15510290..a4b7363fe7a2 100644 --- a/ydb/core/kqp/executer_actor/kqp_scan_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_scan_executer.cpp @@ -112,7 +112,7 @@ class TKqpScanExecuter : public TKqpExecuterBaseGetTypeRewrite()) { - hFunc(TEvDqCompute::TEvState, HandleComputeStats); + hFunc(TEvDqCompute::TEvState, HandleComputeState); hFunc(TEvDqCompute::TEvChannelData, HandleChannelData); // from CA hFunc(TEvKqpExecuter::TEvStreamDataAck, HandleStreamAck); hFunc(TEvKqp::TEvAbortExecution, HandleAbortExecution); @@ -274,34 +274,11 @@ class TKqpScanExecuter : public TKqpExecuterBaseRecord.MutableResponse(); - - response.SetStatus(status); - - if (Stats) { - ReportEventElapsedTime(); - - Stats->FinishTs = TInstant::Now(); - Stats->Finish(); - - if (Stats->CollectStatsByLongTasks || CollectFullStats(Request.StatsMode)) { - const auto& tx = Request.Transactions[0].Body; - auto planWithStats = AddExecStatsToTxPlan(tx->GetPlan(), response.GetResult().GetStats()); - response.MutableResult()->MutableStats()->AddTxPlansWithStats(planWithStats); - } - - if (Stats->CollectStatsByLongTasks) { - const auto& txPlansWithStats = response.GetResult().GetStats().GetTxPlansWithStats(); - if (!txPlansWithStats.empty()) { - LOG_N("Full stats: " << txPlansWithStats); - } - } - } - } - void Finalize() { - FillResponseStats(Ydb::StatusIds::SUCCESS); + YQL_ENSURE(!AlreadyReplied); + AlreadyReplied = true; + + ResponseEv->Record.MutableResponse()->SetStatus(Ydb::StatusIds::SUCCESS); LWTRACK(KqpScanExecuterFinalize, ResponseEv->Orbit, TxId, LastTaskId, LastComputeActorId, ResponseEv->ResultsSize()); @@ -309,8 +286,6 @@ class TKqpScanExecuter : public TKqpExecuterBase +#include #include #include #include @@ -71,6 +72,7 @@ class TKqpSchemeExecuter : public TActorBootstrapped { , RequestType(requestType) , KqpTempTablesAgentActor(kqpTempTablesAgentActor) { + YQL_ENSURE(RequestContext); YQL_ENSURE(PhyTx); YQL_ENSURE(PhyTx->GetType() == NKqpProto::TKqpPhyTx::TYPE_SCHEME); @@ -307,6 +309,47 @@ class TKqpSchemeExecuter : public TActorBootstrapped { break; } + case NKqpProto::TKqpSchemeOperation::kAnalyzeTable: { + const auto& analyzeOperation = schemeOp.GetAnalyzeTable(); + + auto analyzePromise = NewPromise(); + + TVector columns{analyzeOperation.columns().begin(), analyzeOperation.columns().end()}; + IActor* analyzeActor = new TAnalyzeActor(analyzeOperation.GetTablePath(), columns, analyzePromise); + + auto actorSystem = TlsActivationContext->AsActorContext().ExecutorThread.ActorSystem; + RegisterWithSameMailbox(analyzeActor); + + auto selfId = SelfId(); + analyzePromise.GetFuture().Subscribe([actorSystem, selfId](const TFuture& future) { + auto ev = MakeHolder(); + ev->Result = future.GetValue(); + + actorSystem->Send(selfId, ev.Release()); + }); + + Become(&TKqpSchemeExecuter::ExecuteState); + return; + } + + case NKqpProto::TKqpSchemeOperation::kCreateTopic: { + const auto& modifyScheme = schemeOp.GetCreateTopic(); + ev->Record.MutableTransaction()->MutableModifyScheme()->CopyFrom(modifyScheme); + break; + } + + case NKqpProto::TKqpSchemeOperation::kAlterTopic: { + const auto& modifyScheme = schemeOp.GetAlterTopic(); + ev->Record.MutableTransaction()->MutableModifyScheme()->CopyFrom(modifyScheme); + break; + } + + case NKqpProto::TKqpSchemeOperation::kDropTopic: { + const auto& modifyScheme = schemeOp.GetDropTopic(); + ev->Record.MutableTransaction()->MutableModifyScheme()->CopyFrom(modifyScheme); + break; + } + default: InternalError(TStringBuilder() << "Unexpected scheme operation: " << (ui32) schemeOp.GetOperationCase()); @@ -360,6 +403,7 @@ class TKqpSchemeExecuter : public TActorBootstrapped { NMetadata::NModifications::IOperationsManager::TExternalModificationContext context; context.SetDatabase(Database); + context.SetDatabaseId(RequestContext->DatabaseId); context.SetActorSystem(actorSystem); if (UserToken) { context.SetUserToken(*UserToken); @@ -435,7 +479,7 @@ class TKqpSchemeExecuter : public TActorBootstrapped { } void Handle(TEvPrivate::TEvMakeTempDirResult::TPtr& result) { - if (!result->Get()->Result.Success()) { + if (!result->Get()->Result.Success()) { InternalError(TStringBuilder() << "Error creating temporary directory for session " << SessionId << ": " << result->Get()->Result.Issues().ToString(true)); diff --git a/ydb/core/kqp/executer_actor/kqp_tasks_graph.cpp b/ydb/core/kqp/executer_actor/kqp_tasks_graph.cpp index b89b83e45785..fb53a62369bb 100644 --- a/ydb/core/kqp/executer_actor/kqp_tasks_graph.cpp +++ b/ydb/core/kqp/executer_actor/kqp_tasks_graph.cpp @@ -536,7 +536,8 @@ bool IsCrossShardChannel(const TKqpTasksGraph& tasksGraph, const TChannel& chann return false; } - return targetShard != tasksGraph.GetTask(channel.SrcTask).Meta.ShardId; + ui64 srcShard = tasksGraph.GetTask(channel.SrcTask).Meta.ShardId; + return srcShard && targetShard != srcShard; } void TShardKeyRanges::AddPoint(TSerializedCellVec&& point) { @@ -1127,6 +1128,7 @@ void FillInputDesc(const TKqpTasksGraph& tasksGraph, NYql::NDqProto::TTaskInput& if (lockTxId) { input.Meta.StreamLookupSettings->SetLockTxId(*lockTxId); + input.Meta.StreamLookupSettings->SetLockNodeId(tasksGraph.GetMeta().LockNodeId); } transformProto->MutableSettings()->PackFrom(*input.Meta.StreamLookupSettings); } else if (input.Meta.SequencerSettings) { diff --git a/ydb/core/kqp/executer_actor/kqp_tasks_graph.h b/ydb/core/kqp/executer_actor/kqp_tasks_graph.h index e9141c7d5f84..6c0c1c729e73 100644 --- a/ydb/core/kqp/executer_actor/kqp_tasks_graph.h +++ b/ydb/core/kqp/executer_actor/kqp_tasks_graph.h @@ -91,6 +91,7 @@ struct TStageInfoMeta { struct TGraphMeta { IKqpGateway::TKqpSnapshot Snapshot; TMaybe LockTxId; + ui32 LockNodeId; std::unordered_map ResultChannelProxies; TActorId ExecuterId; bool UseFollowers = false; @@ -117,6 +118,10 @@ struct TGraphMeta { void SetLockTxId(TMaybe lockTxId) { LockTxId = lockTxId; } + + void SetLockNodeId(ui32 lockNodeId) { + LockNodeId = lockNodeId; + } }; struct TTaskInputMeta { @@ -171,6 +176,7 @@ struct TTaskMeta { ui32 Type = Unknown; TActorId ResultChannelActorId; + bool Completed = false; THashMap TaskParams; // Params for sources/sinks TVector ReadRanges; // Partitioning for sources THashMap SecureParams; diff --git a/ydb/core/kqp/executer_actor/ut/kqp_executer_ut.cpp b/ydb/core/kqp/executer_actor/ut/kqp_executer_ut.cpp index 4889ee332b27..937ceb64e1d7 100644 --- a/ydb/core/kqp/executer_actor/ut/kqp_executer_ut.cpp +++ b/ydb/core/kqp/executer_actor/ut/kqp_executer_ut.cpp @@ -8,6 +8,7 @@ #include #include +#include namespace NKikimr { namespace NKqp { @@ -28,7 +29,7 @@ NKqpProto::TKqpPhyTx BuildTxPlan(const TString& sql, TIntrusivePtr IModuleResolver::TPtr moduleResolver; UNIT_ASSERT(GetYqlDefaultModuleResolver(moduleCtx, moduleResolver)); - auto qp = CreateKqpHost(gateway, cluster, "/Root", config, moduleResolver, NYql::IHTTPGateway::Make(), nullptr, nullptr, Nothing(), nullptr, nullptr, false, false, nullptr, actorSystem); + auto qp = CreateKqpHost(gateway, cluster, "/Root", config, moduleResolver, NYql::IHTTPGateway::Make(), nullptr, NKikimrConfig::TQueryServiceConfig(), nullptr, false, false, nullptr, actorSystem); auto result = qp->SyncPrepareDataQuery(sql, IKqpHost::TPrepareSettings()); result.Issues().PrintTo(Cerr); UNIT_ASSERT(result.Success()); @@ -41,7 +42,7 @@ NKqpProto::TKqpPhyTx BuildTxPlan(const TString& sql, TIntrusivePtr [[maybe_unused]] TIntrusivePtr MakeIcGateway(const TKikimrRunner& kikimr) { auto actorSystem = kikimr.GetTestServer().GetRuntime()->GetAnyNodeActorSystem(); - return CreateKikimrIcGateway(TString(DefaultKikimrClusterName), "/Root", TKqpGatewaySettings(), + return CreateKikimrIcGateway(TString(DefaultKikimrClusterName), "/Root", "/Root", TKqpGatewaySettings(), actorSystem, kikimr.GetTestServer().GetRuntime()->GetNodeId(0), TAlignedPagePoolCounters(), kikimr.GetTestServer().GetSettings().AppConfig->GetQueryServiceConfig()); } diff --git a/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json b/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json index 205fecddc662..532ed349b4e3 100644 --- a/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json +++ b/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json @@ -209,7 +209,10 @@ { "Name": "TKqlStreamLookupIndex", "Base": "TKqlLookupIndexBase", - "Match": {"Type": "Callable", "Name": "KqlStreamLookupIndex"} + "Match": {"Type": "Callable", "Name": "KqlStreamLookupIndex"}, + "Children": [ + {"Index": 4, "Name": "LookupStrategy", "Type": "TCoAtom"} + ] }, { "Name": "TKqlEffectBase", diff --git a/ydb/core/kqp/federated_query/kqp_federated_query_actors.cpp b/ydb/core/kqp/federated_query/kqp_federated_query_actors.cpp index c6150636e5df..6cc75e9915ba 100644 --- a/ydb/core/kqp/federated_query/kqp_federated_query_actors.cpp +++ b/ydb/core/kqp/federated_query/kqp_federated_query_actors.cpp @@ -20,17 +20,31 @@ class TDescribeSecretsActor: public NActors::TActorBootstrappedGetSecretValue(NMetadata::NSecret::TSecretIdOrValue::BuildAsId(secretId), secretValue); - if (!isFound) { - if (!AskSent) { - AskSent = true; - Send(NMetadata::NProvider::MakeServiceId(SelfId().NodeId()), new NMetadata::NProvider::TEvAskSnapshot(GetSecretsSnapshotParser())); - } else { - CompleteAndPassAway(TEvDescribeSecretsResponse::TDescription(Ydb::StatusIds::BAD_REQUEST, { NYql::TIssue("secret with name '" + secretId.GetSecretId() + "' not found") })); - } + bool isFound = snapshot->GetSecretValue(NMetadata::NSecret::TSecretIdOrValue::BuildAsId(secretId), secretValue); + if (isFound) { + secretValues.push_back(secretValue); + continue; + } + + auto secretIds = snapshot->GetSecretIds(UserToken, secretId.GetSecretId()); + if (secretIds.size() > 1) { + CompleteAndPassAway(TEvDescribeSecretsResponse::TDescription(Ydb::StatusIds::BAD_REQUEST, { NYql::TIssue("several secrets with name '" + secretId.GetSecretId() + "' were found") })); return; } - secretValues.push_back(secretValue); + + isFound = !secretIds.empty() && snapshot->GetSecretValue(NMetadata::NSecret::TSecretIdOrValue::BuildAsId(secretIds[0]), secretValue); + if (isFound) { + secretValues.push_back(secretValue); + continue; + } + + if (!AskSent) { + AskSent = true; + Send(NMetadata::NProvider::MakeServiceId(SelfId().NodeId()), new NMetadata::NProvider::TEvAskSnapshot(GetSecretsSnapshotParser())); + } else { + CompleteAndPassAway(TEvDescribeSecretsResponse::TDescription(Ydb::StatusIds::BAD_REQUEST, { NYql::TIssue("secret with name '" + secretId.GetSecretId() + "' not found") })); + } + return; } CompleteAndPassAway(TEvDescribeSecretsResponse::TDescription(secretValues)); @@ -49,7 +63,8 @@ class TDescribeSecretsActor: public NActors::TActorBootstrapped& secretIds, NThreading::TPromise promise) - : SecretIds(CreateSecretIds(ownerUserId, secretIds)) + : UserToken(NACLib::TUserToken{ownerUserId, TVector{}}) + , SecretIds(CreateSecretIds(ownerUserId, secretIds)) , Promise(promise) {} @@ -74,6 +89,7 @@ class TDescribeSecretsActor: public NActors::TActorBootstrapped UserToken; const std::vector SecretIds; NThreading::TPromise Promise; bool AskSent = false; diff --git a/ydb/core/kqp/federated_query/kqp_federated_query_helpers.cpp b/ydb/core/kqp/federated_query/kqp_federated_query_helpers.cpp index 18986fb7cf41..f767e5f131ed 100644 --- a/ydb/core/kqp/federated_query/kqp_federated_query_helpers.cpp +++ b/ydb/core/kqp/federated_query/kqp_federated_query_helpers.cpp @@ -72,6 +72,8 @@ namespace NKikimr::NKqp { S3GatewayConfig = queryServiceConfig.GetS3(); + S3ReadActorFactoryConfig = NYql::NDq::CreateReadActorFactoryConfig(S3GatewayConfig); + YtGatewayConfig = queryServiceConfig.GetYt(); YtGateway = MakeYtGateway(appData->FunctionRegistry, queryServiceConfig); @@ -127,7 +129,8 @@ namespace NKikimr::NKqp { GenericGatewaysConfig, YtGatewayConfig, YtGateway, - nullptr}; + nullptr, + S3ReadActorFactoryConfig}; // Init DatabaseAsyncResolver only if all requirements are met if (DatabaseResolverActorId && MdbEndpointGenerator && diff --git a/ydb/core/kqp/federated_query/kqp_federated_query_helpers.h b/ydb/core/kqp/federated_query/kqp_federated_query_helpers.h index 678de407f43f..f5e5aa6bb1a5 100644 --- a/ydb/core/kqp/federated_query/kqp_federated_query_helpers.h +++ b/ydb/core/kqp/federated_query/kqp_federated_query_helpers.h @@ -10,6 +10,7 @@ #include #include #include +#include namespace NKikimrConfig { class TQueryServiceConfig; @@ -30,6 +31,7 @@ namespace NKikimr::NKqp { NYql::TYtGatewayConfig YtGatewayConfig; NYql::IYtGateway::TPtr YtGateway; NMiniKQL::TComputationNodeFactory ComputationFactory; + NYql::NDq::TS3ReadActorFactoryConfig S3ReadActorFactoryConfig; }; struct IKqpFederatedQuerySetupFactory { @@ -65,6 +67,7 @@ namespace NKikimr::NKqp { NYql::NConnector::IClient::TPtr ConnectorClient; std::optional DatabaseResolverActorId; NYql::IMdbEndpointGenerator::TPtr MdbEndpointGenerator; + NYql::NDq::TS3ReadActorFactoryConfig S3ReadActorFactoryConfig; }; struct TKqpFederatedQuerySetupFactoryMock: public IKqpFederatedQuerySetupFactory { @@ -94,7 +97,7 @@ namespace NKikimr::NKqp { std::optional Make(NActors::TActorSystem*) override { return TKqpFederatedQuerySetup{ - HttpGateway, ConnectorClient, CredentialsFactory, DatabaseAsyncResolver, S3GatewayConfig, GenericGatewayConfig, YtGatewayConfig, YtGateway, ComputationFactories}; + HttpGateway, ConnectorClient, CredentialsFactory, DatabaseAsyncResolver, S3GatewayConfig, GenericGatewayConfig, YtGatewayConfig, YtGateway, ComputationFactories, S3ReadActorFactoryConfig}; } private: @@ -107,6 +110,7 @@ namespace NKikimr::NKqp { NYql::TYtGatewayConfig YtGatewayConfig; NYql::IYtGateway::TPtr YtGateway; NMiniKQL::TComputationNodeFactory ComputationFactories; + NYql::NDq::TS3ReadActorFactoryConfig S3ReadActorFactoryConfig; }; IKqpFederatedQuerySetupFactory::TPtr MakeKqpFederatedQuerySetupFactory( diff --git a/ydb/core/kqp/finalize_script_service/kqp_finalize_script_actor.cpp b/ydb/core/kqp/finalize_script_service/kqp_finalize_script_actor.cpp index 6927cf0549d6..0ed438e31ef8 100644 --- a/ydb/core/kqp/finalize_script_service/kqp_finalize_script_actor.cpp +++ b/ydb/core/kqp/finalize_script_service/kqp_finalize_script_actor.cpp @@ -19,6 +19,8 @@ namespace NKikimr::NKqp { namespace { class TScriptFinalizerActor : public TActorBootstrapped { + static constexpr size_t MAX_ARTIFACTS_SIZE_BYTES = 40_MB; + public: TScriptFinalizerActor(TEvScriptFinalizeRequest::TPtr request, const NKikimrConfig::TQueryServiceConfig& queryServiceConfig, @@ -37,6 +39,14 @@ class TScriptFinalizerActor : public TActorBootstrapped { void CompressScriptArtifacts() const { auto& description = Request->Get()->Description; + + TString astTruncateDescription; + if (size_t planSize = description.QueryPlan.value_or("").size(); description.QueryAst && description.QueryAst->size() + planSize > MAX_ARTIFACTS_SIZE_BYTES) { + astTruncateDescription = TStringBuilder() << "Query artifacts size is " << description.QueryAst->size() + planSize << " bytes (plan + ast), that is larger than allowed limit " << MAX_ARTIFACTS_SIZE_BYTES << " bytes, ast was truncated"; + size_t toRemove = std::min(description.QueryAst->size() + planSize - MAX_ARTIFACTS_SIZE_BYTES, description.QueryAst->size()); + description.QueryAst = TruncateString(*description.QueryAst, description.QueryAst->size() - toRemove); + } + auto ast = description.QueryAst; if (Compressor.IsEnabled() && ast) { const auto& [astCompressionMethod, astCompressed] = Compressor.Compress(*ast); @@ -45,12 +55,15 @@ class TScriptFinalizerActor : public TActorBootstrapped { } if (description.QueryAst && description.QueryAst->size() > NDataShard::NLimits::MaxWriteValueSize) { - NYql::TIssue astTruncatedIssue(TStringBuilder() << "Query ast size is " << description.QueryAst->size() << " bytes, that is larger than allowed limit " << NDataShard::NLimits::MaxWriteValueSize << " bytes, ast was truncated"); + astTruncateDescription = TStringBuilder() << "Query ast size is " << description.QueryAst->size() << " bytes, that is larger than allowed limit " << NDataShard::NLimits::MaxWriteValueSize << " bytes, ast was truncated"; + description.QueryAst = TruncateString(*ast, NDataShard::NLimits::MaxWriteValueSize - 1_KB); + description.QueryAstCompressionMethod = std::nullopt; + } + + if (astTruncateDescription) { + NYql::TIssue astTruncatedIssue(astTruncateDescription); astTruncatedIssue.SetCode(NYql::DEFAULT_ERROR, NYql::TSeverityIds::S_INFO); description.Issues.AddIssue(astTruncatedIssue); - - description.QueryAst = ast->substr(0, NDataShard::NLimits::MaxWriteValueSize - 1_KB) + "...\n(TRUNCATED)"; - description.QueryAstCompressionMethod = std::nullopt; } } @@ -181,7 +194,11 @@ class TScriptFinalizerActor : public TActorBootstrapped { void Handle(NFq::TEvents::TEvEffectApplicationResult::TPtr& ev) { if (ev->Get()->FatalError) { - FinishScriptFinalization(Ydb::StatusIds::BAD_REQUEST, std::move(ev->Get()->Issues)); + NYql::TIssue rootIssue("Failed to commit/abort s3 multipart uploads"); + for (const NYql::TIssue& issue : ev->Get()->Issues) { + rootIssue.AddSubIssue(MakeIntrusive(issue)); + } + FinishScriptFinalization(Ydb::StatusIds::BAD_REQUEST, {rootIssue}); } else { FinishScriptFinalization(); } @@ -219,6 +236,11 @@ class TScriptFinalizerActor : public TActorBootstrapped { PassAway(); } +private: + static TString TruncateString(const TString& str, size_t size) { + return str.substr(0, std::min(str.size(), size)) + "...\n(TRUNCATED)"; + } + private: const TActorId ReplyActor; const TString ExecutionId; diff --git a/ydb/core/kqp/gateway/actors/analyze_actor.cpp b/ydb/core/kqp/gateway/actors/analyze_actor.cpp new file mode 100644 index 000000000000..84f18e3047d5 --- /dev/null +++ b/ydb/core/kqp/gateway/actors/analyze_actor.cpp @@ -0,0 +1,247 @@ +#include "analyze_actor.h" + +#include +#include +#include +#include + + +namespace NKikimr::NKqp { + +enum { + FirstRoundCookie = 0, + SecondRoundCookie = 1, +}; + +using TNavigate = NSchemeCache::TSchemeCacheNavigate; + +TString MakeOperationId() { + TULIDGenerator ulidGen; + return ulidGen.Next(TActivationContext::Now()).ToBinary(); +} + +TAnalyzeActor::TAnalyzeActor(TString tablePath, TVector columns, NThreading::TPromise promise) + : TablePath(tablePath) + , Columns(columns) + , Promise(promise) + , OperationId(MakeOperationId()) +{} + +void TAnalyzeActor::Bootstrap() { + using TNavigate = NSchemeCache::TSchemeCacheNavigate; + auto navigate = std::make_unique(); + auto& entry = navigate->ResultSet.emplace_back(); + entry.Path = SplitPath(TablePath); + entry.Operation = TNavigate::EOp::OpTable; + entry.RequestType = TNavigate::TEntry::ERequestType::ByPath; + navigate->Cookie = FirstRoundCookie; + + Send(NKikimr::MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(navigate.release())); + + Become(&TAnalyzeActor::StateWork); +} + +void TAnalyzeActor::Handle(NStat::TEvStatistics::TEvAnalyzeResponse::TPtr& ev, const TActorContext& ctx) { + Y_UNUSED(ctx); + + const auto& record = ev->Get()->Record; + const TString operationId = record.GetOperationId(); + const auto status = record.GetStatus(); + + if (status != NKikimrStat::TEvAnalyzeResponse::STATUS_SUCCESS) { + ALOG_CRIT(NKikimrServices::KQP_GATEWAY, + "TAnalyzeActor, TEvAnalyzeResponse has status=" << status); + } + + if (operationId != OperationId) { + ALOG_CRIT(NKikimrServices::KQP_GATEWAY, + "TAnalyzeActor, TEvAnalyzeResponse has operationId=" << operationId + << " , but expected " << OperationId); + } + + NYql::IKikimrGateway::TGenericResult result; + result.SetSuccess(); + Promise.SetValue(std::move(result)); + this->Die(ctx); +} + +void TAnalyzeActor::Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx) { + std::unique_ptr navigate(ev->Get()->Request.Release()); + Y_ABORT_UNLESS(navigate->ResultSet.size() == 1); + auto& entry = navigate->ResultSet.front(); + + if (entry.Status != TNavigate::EStatus::Ok) { + NYql::EYqlIssueCode error; + switch (entry.Status) { + case TNavigate::EStatus::PathErrorUnknown: + case TNavigate::EStatus::RootUnknown: + case TNavigate::EStatus::PathNotTable: + case TNavigate::EStatus::TableCreationNotComplete: + error = NYql::TIssuesIds::KIKIMR_SCHEME_ERROR; + case TNavigate::EStatus::LookupError: + case TNavigate::EStatus::RedirectLookupError: + error = NYql::TIssuesIds::KIKIMR_TEMPORARILY_UNAVAILABLE; + default: + error = NYql::TIssuesIds::DEFAULT_ERROR; + } + Promise.SetValue( + NYql::NCommon::ResultFromIssues( + error, + TStringBuilder() << "Can't get statistics aggregator ID. " << entry.Status, + {} + ) + ); + this->Die(ctx); + return; + } + + if (navigate->Cookie == SecondRoundCookie) { + if (entry.DomainInfo->Params.HasStatisticsAggregator()) { + SendStatisticsAggregatorAnalyze(entry, ctx); + } else { + Promise.SetValue( + NYql::NCommon::ResultFromIssues( + NYql::TIssuesIds::DEFAULT_ERROR, + TStringBuilder() << "Can't get statistics aggregator ID.", {} + ) + ); + this->Die(ctx); + } + return; + } + + PathId = entry.TableId.PathId; + + auto& domainInfo = entry.DomainInfo; + + auto navigateDomainKey = [this] (TPathId domainKey) { + using TNavigate = NSchemeCache::TSchemeCacheNavigate; + auto navigate = std::make_unique(); + auto& entry = navigate->ResultSet.emplace_back(); + entry.TableId = TTableId(domainKey.OwnerId, domainKey.LocalPathId); + entry.Operation = TNavigate::EOp::OpPath; + entry.RequestType = TNavigate::TEntry::ERequestType::ByTableId; + entry.RedirectRequired = false; + navigate->Cookie = SecondRoundCookie; + + Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(navigate.release())); + }; + + if (!domainInfo->IsServerless()) { + if (domainInfo->Params.HasStatisticsAggregator()) { + SendStatisticsAggregatorAnalyze(entry, ctx); + return; + } + + navigateDomainKey(domainInfo->DomainKey); + } else { + navigateDomainKey(domainInfo->ResourcesDomainKey); + } +} + +TDuration TAnalyzeActor::CalcBackoffTime() { + ui32 backoffSlots = 1 << RetryCount; + TDuration maxDuration = RetryInterval * backoffSlots; + + double uncertaintyRatio = std::max(std::min(UncertainRatio, 1.0), 0.0); + double uncertaintyMultiplier = RandomNumber() * uncertaintyRatio - uncertaintyRatio + 1.0; + + double durationMs = round(maxDuration.MilliSeconds() * uncertaintyMultiplier); + durationMs = std::max(std::min(durationMs, MaxBackoffDurationMs), 0.0); + return TDuration::MilliSeconds(durationMs); +} + +void TAnalyzeActor::Handle(TEvPipeCache::TEvDeliveryProblem::TPtr& ev, const TActorContext& ctx) { + Y_UNUSED(ev, ctx); + + if (RetryCount >= MaxRetryCount) { + Promise.SetValue( + NYql::NCommon::ResultFromError( + YqlIssue( + {}, NYql::TIssuesIds::UNEXPECTED, + TStringBuilder() << "Can't establish connection with the Statistics Aggregator!" + ) + ) + ); + this->Die(ctx); + return; + } + + ++RetryCount; + Schedule(CalcBackoffTime(), new TEvAnalyzePrivate::TEvAnalyzeRetry()); +} + +void TAnalyzeActor::Handle(TEvAnalyzePrivate::TEvAnalyzeRetry::TPtr& ev, const TActorContext& ctx) { + Y_UNUSED(ev, ctx); + + auto analyzeRequest = std::make_unique(); + analyzeRequest->Record = Request.Record; + Send( + MakePipePerNodeCacheID(false), + new TEvPipeCache::TEvForward(analyzeRequest.release(), StatisticsAggregatorId.value(), true), + IEventHandle::FlagTrackDelivery + ); +} + +void TAnalyzeActor::SendStatisticsAggregatorAnalyze(const NSchemeCache::TSchemeCacheNavigate::TEntry& entry, const TActorContext& ctx) { + Y_ABORT_UNLESS(entry.DomainInfo->Params.HasStatisticsAggregator()); + + StatisticsAggregatorId = entry.DomainInfo->Params.GetStatisticsAggregator(); + + auto& record = Request.Record; + record.SetOperationId(OperationId); + auto table = record.AddTables(); + + PathIdFromPathId(PathId, table->MutablePathId()); + + + THashMap tagByColumnName; + for (const auto& [_, tableInfo]: entry.Columns) { + tagByColumnName[TString(tableInfo.Name)] = tableInfo.Id; + } + + for (const auto& columnName: Columns) { + if (!tagByColumnName.contains(columnName)){ + Promise.SetValue( + NYql::NCommon::ResultFromError( + YqlIssue( + {}, NYql::TIssuesIds::UNEXPECTED, + TStringBuilder() << "No such column: " << columnName << " in the " << TablePath + ) + ) + ); + this->Die(ctx); + return; + } + + *table->MutableColumnTags()->Add() = tagByColumnName[columnName]; + } + + auto analyzeRequest = std::make_unique(); + analyzeRequest->Record = Request.Record; + Send( + MakePipePerNodeCacheID(false), + new TEvPipeCache::TEvForward(analyzeRequest.release(), entry.DomainInfo->Params.GetStatisticsAggregator(), true), + IEventHandle::FlagTrackDelivery + ); +} + +void TAnalyzeActor::HandleUnexpectedEvent(ui32 typeRewrite) { + ALOG_CRIT( + NKikimrServices::KQP_GATEWAY, + "TAnalyzeActor, unexpected event, request type: " << typeRewrite; + ); + + Promise.SetValue( + NYql::NCommon::ResultFromError( + YqlIssue( + {}, NYql::TIssuesIds::UNEXPECTED, + TStringBuilder() << "Unexpected event: " << typeRewrite + ) + ) + ); + + this->PassAway(); +} + +}// end of NKikimr::NKqp diff --git a/ydb/core/kqp/gateway/actors/analyze_actor.h b/ydb/core/kqp/gateway/actors/analyze_actor.h new file mode 100644 index 000000000000..f59fba90c2b1 --- /dev/null +++ b/ydb/core/kqp/gateway/actors/analyze_actor.h @@ -0,0 +1,74 @@ +#include +#include +#include +#include + +#include +#include + + +namespace NKikimr::NKqp { + + +struct TEvAnalyzePrivate { + enum EEv { + EvAnalyzeRetry = EventSpaceBegin(TEvents::ES_PRIVATE), + EvEnd + }; + + struct TEvAnalyzeRetry : public TEventLocal {}; +}; + +class TAnalyzeActor : public NActors::TActorBootstrapped { +public: + TAnalyzeActor(TString tablePath, TVector columns, NThreading::TPromise promise); + + void Bootstrap(); + + STFUNC(StateWork) { + switch(ev->GetTypeRewrite()) { + HFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); + HFunc(NStat::TEvStatistics::TEvAnalyzeResponse, Handle); + HFunc(TEvPipeCache::TEvDeliveryProblem, Handle); + HFunc(TEvAnalyzePrivate::TEvAnalyzeRetry, Handle); + default: + HandleUnexpectedEvent(ev->GetTypeRewrite()); + } + } + +private: + void Handle(NStat::TEvStatistics::TEvAnalyzeResponse::TPtr& ev, const TActorContext& ctx); + + void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx); + + void Handle(TEvPipeCache::TEvDeliveryProblem::TPtr& ev, const TActorContext& ctx); + + void Handle(TEvAnalyzePrivate::TEvAnalyzeRetry::TPtr& ev, const TActorContext& ctx); + + void HandleUnexpectedEvent(ui32 typeRewrite); + +private: + void SendStatisticsAggregatorAnalyze(const NSchemeCache::TSchemeCacheNavigate::TEntry&, const TActorContext&); + + TDuration CalcBackoffTime(); + +private: + TString TablePath; + TVector Columns; + NThreading::TPromise Promise; + // For Statistics Aggregator + std::optional StatisticsAggregatorId; + TPathId PathId; + TString OperationId; + + // for retries + NStat::TEvStatistics::TEvAnalyze Request; + TDuration RetryInterval = TDuration::MilliSeconds(5); + size_t RetryCount = 0; + + constexpr static size_t MaxRetryCount = 10; + constexpr static double UncertainRatio = 0.5; + constexpr static double MaxBackoffDurationMs = TDuration::Seconds(15).MilliSeconds(); +}; + +} // end of NKikimr::NKqp diff --git a/ydb/core/kqp/gateway/actors/ya.make b/ydb/core/kqp/gateway/actors/ya.make index 69553c63d3f0..2b426471d0d4 100644 --- a/ydb/core/kqp/gateway/actors/ya.make +++ b/ydb/core/kqp/gateway/actors/ya.make @@ -2,6 +2,7 @@ LIBRARY() SRCS( scheme.cpp + analyze_actor.cpp ) PEERDIR( @@ -11,6 +12,7 @@ PEERDIR( ydb/library/yql/providers/common/gateway ydb/core/tx/schemeshard ydb/library/actors/core + ydb/library/services ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/kqp/gateway/behaviour/external_data_source/manager.cpp b/ydb/core/kqp/gateway/behaviour/external_data_source/manager.cpp index 077db86859e2..b9d76ce063e5 100644 --- a/ydb/core/kqp/gateway/behaviour/external_data_source/manager.cpp +++ b/ydb/core/kqp/gateway/behaviour/external_data_source/manager.cpp @@ -72,10 +72,12 @@ void FillCreateExternalDataSourceDesc(NKikimrSchemeOp::TExternalDataSourceDescri static const TSet properties { "database_name", "protocol", // managed PG, CH - "mdb_cluster_id", // managed PG, CH + "mdb_cluster_id", // managed PG, CH, GP, MY "database_id", // managed YDB "use_tls", - "schema", // managed PG + "schema", // managed PG, GP + "service_name", // oracle + "folder_id" // logging }; for (const auto& property: properties) { diff --git a/ydb/core/kqp/gateway/behaviour/resource_pool/manager.cpp b/ydb/core/kqp/gateway/behaviour/resource_pool/manager.cpp index 42c4687a19c6..cbc6f50e4f53 100644 --- a/ydb/core/kqp/gateway/behaviour/resource_pool/manager.cpp +++ b/ydb/core/kqp/gateway/behaviour/resource_pool/manager.cpp @@ -118,10 +118,10 @@ void FillResourcePoolDescription(NKikimrSchemeOp::TResourcePoolDescription& reso TPoolSettings resourcePoolSettings; auto& properties = *resourcePoolDescription.MutableProperties()->MutableProperties(); - for (const auto& [property, setting] : GetPropertiesMap(resourcePoolSettings, true)) { + for (const auto& [property, setting] : resourcePoolSettings.GetPropertiesMap(true)) { if (std::optional value = featuresExtractor.Extract(property)) { try { - std::visit(TSettingsParser{*value}, setting); + std::visit(TPoolSettings::TParser{*value}, setting); } catch (...) { throw yexception() << "Failed to parse property " << property << ": " << CurrentExceptionMessage(); } @@ -129,7 +129,7 @@ void FillResourcePoolDescription(NKikimrSchemeOp::TResourcePoolDescription& reso continue; } - TString value = std::visit(TSettingsExtractor(), setting); + const TString value = std::visit(TPoolSettings::TExtractor(), setting); properties.insert({property, value}); } @@ -138,8 +138,14 @@ void FillResourcePoolDescription(NKikimrSchemeOp::TResourcePoolDescription& reso } if (settings.GetObjectId() == NResourcePool::DEFAULT_POOL_ID) { - if (properties.contains("concurrent_query_limit")) { - ythrow yexception() << "Can not change property concurrent_query_limit for default pool"; + std::vector forbiddenProperties = { + "concurrent_query_limit", + "database_load_cpu_threshold" + }; + for (const TString& property : forbiddenProperties) { + if (properties.contains(property)) { + ythrow yexception() << "Can not change property " << property << " for default pool"; + } } } } @@ -186,19 +192,19 @@ TResourcePoolManager::TAsyncStatus TResourcePoolManager::DoModify(const NYql::TO TResourcePoolManager::TAsyncStatus TResourcePoolManager::CreateResourcePool(const NYql::TCreateObjectSettings& settings, TInternalModificationContext& context, ui32 nodeId) const { NKqpProto::TKqpSchemeOperation schemeOperation; PrepareCreateResourcePool(schemeOperation, settings, context); - return ExecuteSchemeRequest(schemeOperation.GetCreateResourcePool(), context.GetExternalData(), nodeId); + return ExecuteSchemeRequest(schemeOperation.GetCreateResourcePool(), context.GetExternalData(), nodeId, NKqpProto::TKqpSchemeOperation::kCreateResourcePool); } TResourcePoolManager::TAsyncStatus TResourcePoolManager::AlterResourcePool(const NYql::TCreateObjectSettings& settings, TInternalModificationContext& context, ui32 nodeId) const { NKqpProto::TKqpSchemeOperation schemeOperation; PrepareAlterResourcePool(schemeOperation, settings, context); - return ExecuteSchemeRequest(schemeOperation.GetAlterResourcePool(), context.GetExternalData(), nodeId); + return ExecuteSchemeRequest(schemeOperation.GetAlterResourcePool(), context.GetExternalData(), nodeId, NKqpProto::TKqpSchemeOperation::kAlterResourcePool); } TResourcePoolManager::TAsyncStatus TResourcePoolManager::DropResourcePool(const NYql::TCreateObjectSettings& settings, TInternalModificationContext& context, ui32 nodeId) const { NKqpProto::TKqpSchemeOperation schemeOperation; PrepareDropResourcePool(schemeOperation, settings, context); - return ExecuteSchemeRequest(schemeOperation.GetDropResourcePool(), context.GetExternalData(), nodeId); + return ExecuteSchemeRequest(schemeOperation.GetDropResourcePool(), context.GetExternalData(), nodeId, NKqpProto::TKqpSchemeOperation::kDropResourcePool); } //// Deferred modification @@ -235,7 +241,7 @@ void TResourcePoolManager::PrepareCreateResourcePool(NKqpProto::TKqpSchemeOperat } auto& schemeTx = *schemeOperation.MutableCreateResourcePool(); - schemeTx.SetWorkingDir(JoinPath({context.GetExternalData().GetDatabase(), ".resource_pools/"})); + schemeTx.SetWorkingDir(JoinPath({context.GetExternalData().GetDatabase(), ".metadata/workload_manager/pools/"})); schemeTx.SetOperationType(NKikimrSchemeOp::ESchemeOpCreateResourcePool); FillResourcePoolDescription(*schemeTx.MutableCreateResourcePool(), settings); @@ -243,7 +249,7 @@ void TResourcePoolManager::PrepareCreateResourcePool(NKqpProto::TKqpSchemeOperat void TResourcePoolManager::PrepareAlterResourcePool(NKqpProto::TKqpSchemeOperation& schemeOperation, const NYql::TDropObjectSettings& settings, TInternalModificationContext& context) const { auto& schemeTx = *schemeOperation.MutableAlterResourcePool(); - schemeTx.SetWorkingDir(JoinPath({context.GetExternalData().GetDatabase(), ".resource_pools/"})); + schemeTx.SetWorkingDir(JoinPath({context.GetExternalData().GetDatabase(), ".metadata/workload_manager/pools/"})); schemeTx.SetOperationType(NKikimrSchemeOp::ESchemeOpAlterResourcePool); FillResourcePoolDescription(*schemeTx.MutableCreateResourcePool(), settings); @@ -251,7 +257,7 @@ void TResourcePoolManager::PrepareAlterResourcePool(NKqpProto::TKqpSchemeOperati void TResourcePoolManager::PrepareDropResourcePool(NKqpProto::TKqpSchemeOperation& schemeOperation, const NYql::TDropObjectSettings& settings, TInternalModificationContext& context) const { auto& schemeTx = *schemeOperation.MutableDropResourcePool(); - schemeTx.SetWorkingDir(JoinPath({context.GetExternalData().GetDatabase(), ".resource_pools/"})); + schemeTx.SetWorkingDir(JoinPath({context.GetExternalData().GetDatabase(), ".metadata/workload_manager/pools/"})); schemeTx.SetOperationType(NKikimrSchemeOp::ESchemeOpDropResourcePool); schemeTx.MutableDrop()->SetName(settings.GetObjectId()); @@ -265,11 +271,11 @@ TResourcePoolManager::TAsyncStatus TResourcePoolManager::ExecutePrepared(const N try { switch (schemeOperation.GetOperationCase()) { case NKqpProto::TKqpSchemeOperation::kCreateResourcePool: - return ExecuteSchemeRequest(schemeOperation.GetCreateResourcePool(), context, nodeId); + return ExecuteSchemeRequest(schemeOperation.GetCreateResourcePool(), context, nodeId, schemeOperation.GetOperationCase()); case NKqpProto::TKqpSchemeOperation::kAlterResourcePool: - return ExecuteSchemeRequest(schemeOperation.GetAlterResourcePool(), context, nodeId); + return ExecuteSchemeRequest(schemeOperation.GetAlterResourcePool(), context, nodeId, schemeOperation.GetOperationCase()); case NKqpProto::TKqpSchemeOperation::kDropResourcePool: - return ExecuteSchemeRequest(schemeOperation.GetDropResourcePool(), context, nodeId); + return ExecuteSchemeRequest(schemeOperation.GetDropResourcePool(), context, nodeId, schemeOperation.GetOperationCase()); default: return NThreading::MakeFuture(TYqlConclusionStatus::Fail(TStringBuilder() << "Execution of prepare operation for RESOURCE_POOL object: unsupported operation: " << static_cast(schemeOperation.GetOperationCase()))); } @@ -288,8 +294,13 @@ TResourcePoolManager::TAsyncStatus TResourcePoolManager::ChainFeatures(TAsyncSta }); } -TResourcePoolManager::TAsyncStatus TResourcePoolManager::ExecuteSchemeRequest(const NKikimrSchemeOp::TModifyScheme& schemeTx, const TExternalModificationContext& context, ui32 nodeId) const { - auto validationFuture = CheckFeatureFlag(context, nodeId); +TResourcePoolManager::TAsyncStatus TResourcePoolManager::ExecuteSchemeRequest(const NKikimrSchemeOp::TModifyScheme& schemeTx, const TExternalModificationContext& context, ui32 nodeId, NKqpProto::TKqpSchemeOperation::OperationCase operationCase) const { + TAsyncStatus validationFuture = NThreading::MakeFuture(TYqlConclusionStatus::Success()); + if (operationCase != NKqpProto::TKqpSchemeOperation::kDropResourcePool) { + validationFuture = ChainFeatures(validationFuture, [context, nodeId] { + return CheckFeatureFlag(context, nodeId); + }); + } return ChainFeatures(validationFuture, [schemeTx, context] { return SendSchemeRequest(schemeTx, context); }); diff --git a/ydb/core/kqp/gateway/behaviour/resource_pool/manager.h b/ydb/core/kqp/gateway/behaviour/resource_pool/manager.h index edab893dbe6c..64406f78e328 100644 --- a/ydb/core/kqp/gateway/behaviour/resource_pool/manager.h +++ b/ydb/core/kqp/gateway/behaviour/resource_pool/manager.h @@ -31,7 +31,7 @@ class TResourcePoolManager : public NMetadata::NModifications::IOperationsManage void PrepareDropResourcePool(NKqpProto::TKqpSchemeOperation& schemeOperation, const NYql::TDropObjectSettings& settings, TInternalModificationContext& context) const; TAsyncStatus ChainFeatures(TAsyncStatus lastFeature, std::function callback) const; - TAsyncStatus ExecuteSchemeRequest(const NKikimrSchemeOp::TModifyScheme& schemeTx, const TExternalModificationContext& context, ui32 nodeId) const; + TAsyncStatus ExecuteSchemeRequest(const NKikimrSchemeOp::TModifyScheme& schemeTx, const TExternalModificationContext& context, ui32 nodeId, NKqpProto::TKqpSchemeOperation::OperationCase operationCase) const; }; } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/behaviour.cpp b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/behaviour.cpp new file mode 100644 index 000000000000..aad9f7831007 --- /dev/null +++ b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/behaviour.cpp @@ -0,0 +1,31 @@ +#include "behaviour.h" +#include "initializer.h" +#include "manager.h" + + +namespace NKikimr::NKqp { + +TResourcePoolClassifierBehaviour::TFactory::TRegistrator TResourcePoolClassifierBehaviour::Registrator(TResourcePoolClassifierConfig::GetTypeId()); + +NMetadata::NInitializer::IInitializationBehaviour::TPtr TResourcePoolClassifierBehaviour::ConstructInitializer() const { + return std::make_shared(); +} + +NMetadata::NModifications::IOperationsManager::TPtr TResourcePoolClassifierBehaviour::ConstructOperationsManager() const { + return std::make_shared(); +} + +TString TResourcePoolClassifierBehaviour::GetInternalStorageTablePath() const { + return "workload_manager/classifiers/resource_pool_classifiers"; +} + +TString TResourcePoolClassifierBehaviour::GetTypeId() const { + return TResourcePoolClassifierConfig::GetTypeId(); +} + +NMetadata::IClassBehaviour::TPtr TResourcePoolClassifierBehaviour::GetInstance() { + static std::shared_ptr result = std::make_shared(); + return result; +} + +} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/behaviour.h b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/behaviour.h new file mode 100644 index 000000000000..42c8440f80ed --- /dev/null +++ b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/behaviour.h @@ -0,0 +1,25 @@ +#pragma once + +#include "object.h" + +#include +#include + + +namespace NKikimr::NKqp { + +class TResourcePoolClassifierBehaviour : public NMetadata::TClassBehaviour { + static TFactory::TRegistrator Registrator; + +protected: + virtual NMetadata::NInitializer::IInitializationBehaviour::TPtr ConstructInitializer() const override; + virtual NMetadata::NModifications::IOperationsManager::TPtr ConstructOperationsManager() const override; + virtual TString GetInternalStorageTablePath() const override; + +public: + virtual TString GetTypeId() const override; + + static IClassBehaviour::TPtr GetInstance(); +}; + +} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/checker.cpp b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/checker.cpp new file mode 100644 index 000000000000..10f4d6b1f54c --- /dev/null +++ b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/checker.cpp @@ -0,0 +1,353 @@ +#include "checker.h" +#include "fetcher.h" + +#include +#include +#include +#include +#include + +#include + + +namespace NKikimr::NKqp { + +namespace { + +using namespace NActors; +using namespace NResourcePool; +using namespace NWorkload; + + +struct TEvPrivate { + // Event ids + enum EEv : ui32 { + EvRanksCheckerResponse = EventSpaceBegin(TEvents::ES_PRIVATE), + + EvEnd + }; + + static_assert(EvEnd < EventSpaceEnd(TEvents::ES_PRIVATE), "expect EvEnd < EventSpaceEnd(TEvents::ES_PRIVATE)"); + + struct TEvRanksCheckerResponse : public TEventLocal { + TEvRanksCheckerResponse(Ydb::StatusIds::StatusCode status, i64 maxRank, ui64 numberClassifiers, NYql::TIssues issues) + : Status(status) + , MaxRank(maxRank) + , NumberClassifiers(numberClassifiers) + , Issues(std::move(issues)) + {} + + const Ydb::StatusIds::StatusCode Status; + const i64 MaxRank; + const ui64 NumberClassifiers; + const NYql::TIssues Issues; + }; +}; + +class TRanksCheckerActor : public NKikimr::TQueryBase { + using TBase = NKikimr::TQueryBase; + +public: + TRanksCheckerActor(const TString& databaseId, const TString& sessionId, const TString& transactionId, const std::unordered_map& ranksToCheck) + : TBase(NKikimrServices::KQP_GATEWAY, sessionId) + , DatabaseId(databaseId) + , RanksToCheck(ranksToCheck) + { + TxId = transactionId; + SetOperationInfo(__func__, DatabaseId); + } + + void OnRunQuery() override { + const auto& tablePath = TResourcePoolClassifierConfig::GetBehaviour()->GetStorageTablePath(); + + TStringBuilder sql = TStringBuilder() << R"( + -- TRanksCheckerActor::OnRunQuery + DECLARE $database_id AS Text; + )"; + + NYdb::TParamsBuilder params; + params + .AddParam("$database_id") + .Utf8(CanonizePath(DatabaseId)) + .Build(); + + if (!RanksToCheck.empty()) { + sql << R"( + DECLARE $ranks AS List; + PRAGMA AnsiInForEmptyOrNullableItemsCollections; + + SELECT + rank, name + FROM `)" << tablePath << R"(` + WHERE database = $database_id + AND rank IN $ranks; + )"; + + auto& param = params.AddParam("$ranks").BeginList(); + for (const auto& [rank, _] : RanksToCheck) { + param.AddListItem().Int64(rank); + } + param.EndList().Build(); + + ExpectedResultSets++; + } + + sql << R"( + SELECT + MAX(rank) AS MaxRank, + COUNT(*) AS NumberClassifiers + FROM `)" << tablePath << R"(` + WHERE database = $database_id; + )"; + + RunDataQuery(sql, ¶ms, TTxControl::ContinueTx()); + } + + void OnQueryResult() override { + if (ResultSets.size() != ExpectedResultSets) { + Finish(Ydb::StatusIds::INTERNAL_ERROR, "Unexpected database response"); + return; + } + + ui64 resultSetId = 0; + if (!RanksToCheck.empty()) { + NYdb::TResultSetParser result(ResultSets[resultSetId++]); + while (result.TryNextRow()) { + TMaybe rank = result.ColumnParser("rank").GetOptionalInt64(); + if (!rank) { + continue; + } + + TMaybe name = result.ColumnParser("name").GetOptionalUtf8(); + if (!name) { + continue; + } + + if (auto it = RanksToCheck.find(*rank); it != RanksToCheck.end() && it->second != *name) { + Finish(Ydb::StatusIds::ALREADY_EXISTS, TStringBuilder() << "Classifier with rank " << *rank << " already exists, its name " << *name); + return; + } + } + } + + { // Classifiers stats + NYdb::TResultSetParser result(ResultSets[resultSetId++]); + if (!result.TryNextRow()) { + Finish(Ydb::StatusIds::INTERNAL_ERROR, "Unexpected database response"); + return; + } + + MaxRank = result.ColumnParser("MaxRank").GetOptionalInt64().GetOrElse(0); + NumberClassifiers = result.ColumnParser("NumberClassifiers").GetUint64(); + } + + Finish(); + } + + void OnFinish(Ydb::StatusIds::StatusCode status, NYql::TIssues&& issues) override { + Send(Owner, new TEvPrivate::TEvRanksCheckerResponse(status, MaxRank, NumberClassifiers, std::move(issues))); + } + +private: + const TString DatabaseId; + const std::unordered_map RanksToCheck; + + ui64 ExpectedResultSets = 1; + i64 MaxRank = 0; + ui64 NumberClassifiers = 0; +}; + +class TResourcePoolClassifierPreparationActor : public TActorBootstrapped { +public: + TResourcePoolClassifierPreparationActor(std::vector&& patchedObjects, NMetadata::NModifications::IAlterPreparationController::TPtr controller, const NMetadata::NModifications::IOperationsManager::TInternalModificationContext& context, const NMetadata::NModifications::TAlterOperationContext& alterContext) + : Context(context) + , AlterContext(alterContext) + , Controller(std::move(controller)) + , PatchedObjects(std::move(patchedObjects)) + {} + + void Bootstrap() { + Become(&TResourcePoolClassifierPreparationActor::StateFunc); + ValidateRanks(); + GetDatabaseInfo(); + } + + void Handle(TEvPrivate::TEvRanksCheckerResponse::TPtr& ev) { + if (ev->Get()->Status != Ydb::StatusIds::SUCCESS) { + FailAndPassAway("Resource pool classifier rank check failed", ev->Get()->Status, ev->Get()->Issues); + return; + } + + if (Context.GetActivityType() == NMetadata::NModifications::IOperationsManager::EActivityType::Create && ev->Get()->NumberClassifiers >= CLASSIFIER_COUNT_LIMIT) { + FailAndPassAway(TStringBuilder() << "Number of resource pool classifiers reached limit in " << CLASSIFIER_COUNT_LIMIT); + return; + } + + i64 maxRank = ev->Get()->MaxRank; + for (auto& object : PatchedObjects) { + if (object.GetRank() != -1) { + continue; + } + if (maxRank > std::numeric_limits::max() - CLASSIFIER_RANK_OFFSET) { + FailAndPassAway(TStringBuilder() << "The rank could not be set automatically, the maximum rank of the resource pool classifier is too high: " << ev->Get()->MaxRank); + return; + } + + maxRank += CLASSIFIER_RANK_OFFSET; + object.SetRank(maxRank); + } + + RanksChecked = true; + TryFinish(); + } + + void Handle(TEvFetchDatabaseResponse::TPtr& ev) { + if (ev->Get()->Status != Ydb::StatusIds::SUCCESS) { + FailAndPassAway("Database check failed", ev->Get()->Status, ev->Get()->Issues); + return; + } + + Serverless = ev->Get()->Serverless; + + Send(NConsole::MakeConfigsDispatcherID(SelfId().NodeId()), new NConsole::TEvConfigsDispatcher::TEvGetConfigRequest( + (ui32)NKikimrConsole::TConfigItem::FeatureFlagsItem + ), IEventHandle::FlagTrackDelivery); + } + + void Handle(TEvents::TEvUndelivered::TPtr& ev) { + switch (ev->Get()->SourceType) { + case NConsole::TEvConfigsDispatcher::EvGetConfigRequest: + CheckFeatureFlag(AppData()->FeatureFlags); + break; + + default: + break; + } + } + + void Handle(NConsole::TEvConfigsDispatcher::TEvGetConfigResponse::TPtr& ev) { + CheckFeatureFlag(ev->Get()->Config->GetFeatureFlags()); + } + + void Handle(NMetadata::NProvider::TEvRefreshSubscriberData::TPtr& ev) { + const auto& snapshot = ev->Get()->GetSnapshotAs(); + for (const auto& objectRecord : AlterContext.GetRestoreObjectIds().GetTableRecords()) { + TResourcePoolClassifierConfig object; + TResourcePoolClassifierConfig::TDecoder::DeserializeFromRecord(object, objectRecord); + + if (!snapshot->GetClassifierConfig(object.GetDatabase(), object.GetName())) { + FailAndPassAway(TStringBuilder() << "Classifier with name " << object.GetName() << " not found in database with id " << object.GetDatabase()); + return; + } + } + + ExistenceChecked = true; + TryFinish(); + } + + STRICT_STFUNC(StateFunc, + hFunc(TEvPrivate::TEvRanksCheckerResponse, Handle); + hFunc(TEvFetchDatabaseResponse, Handle); + hFunc(TEvents::TEvUndelivered, Handle); + hFunc(NConsole::TEvConfigsDispatcher::TEvGetConfigResponse, Handle); + hFunc(NMetadata::NProvider::TEvRefreshSubscriberData, Handle) + ) + +private: + void GetDatabaseInfo() const { + const auto& externalContext = Context.GetExternalData(); + const auto userToken = externalContext.GetUserToken() ? MakeIntrusive(*externalContext.GetUserToken()) : nullptr; + Register(CreateDatabaseFetcherActor(SelfId(), externalContext.GetDatabase(), userToken, NACLib::EAccessRights::GenericFull)); + } + + void ValidateRanks() { + if (Context.GetActivityType() == NMetadata::NModifications::IOperationsManager::EActivityType::Drop) { + RanksChecked = true; + TryFinish(); + return; + } + + std::unordered_map ranksToNames; + for (const auto& object : PatchedObjects) { + const auto rank = object.GetRank(); + if (rank == -1) { + continue; + } + if (!ranksToNames.insert({rank, object.GetName()}).second) { + FailAndPassAway(TStringBuilder() << "Found duplicate rank " << rank); + } + } + + Register(new TQueryRetryActor>( + SelfId(), Context.GetExternalData().GetDatabaseId(), AlterContext.GetSessionId(), AlterContext.GetTransactionId(), ranksToNames + )); + } + + void CheckFeatureFlag(const NKikimrConfig::TFeatureFlags& featureFlags) { + if (Context.GetActivityType() == NMetadata::NModifications::IOperationsManager::EActivityType::Drop) { + FeatureFlagChecked = true; + ValidateExistence(); + return; + } + + if (!featureFlags.GetEnableResourcePools()) { + FailAndPassAway("Resource pool classifiers are disabled. Please contact your system administrator to enable it"); + return; + } + if (Serverless && !featureFlags.GetEnableResourcePoolsOnServerless()) { + FailAndPassAway("Resource pool classifiers are disabled for serverless domains. Please contact your system administrator to enable it"); + return; + } + + FeatureFlagChecked = true; + ValidateExistence(); + } + + void ValidateExistence() { + if (Context.GetActivityType() != NMetadata::NModifications::IOperationsManager::EActivityType::Create && NMetadata::NProvider::TServiceOperator::IsEnabled()) { + Send(NMetadata::NProvider::MakeServiceId(SelfId().NodeId()), new NMetadata::NProvider::TEvAskSnapshot(std::make_shared())); + return; + } + + ExistenceChecked = true; + TryFinish(); + } + + void FailAndPassAway(const TString& message, Ydb::StatusIds::StatusCode status, NYql::TIssues issues) { + FailAndPassAway(TStringBuilder() << message << ", status: " << status << ", reason: " << issues.ToOneLineString()); + } + + void FailAndPassAway(const TString& message) { + Controller->OnPreparationProblem(message); + PassAway(); + } + + void TryFinish() { + if (!FeatureFlagChecked || !RanksChecked || !ExistenceChecked) { + return; + } + + Controller->OnPreparationFinished(std::move(PatchedObjects)); + PassAway(); + } + +private: + const NMetadata::NModifications::IOperationsManager::TInternalModificationContext Context; + const NMetadata::NModifications::TAlterOperationContext AlterContext; + + bool Serverless = false; + bool FeatureFlagChecked = false; + bool RanksChecked = false; + bool ExistenceChecked = false; + + NMetadata::NModifications::IAlterPreparationController::TPtr Controller; + std::vector PatchedObjects; +}; + +} // anonymous namespace + +IActor* CreateResourcePoolClassifierPreparationActor(std::vector&& patchedObjects, NMetadata::NModifications::IAlterPreparationController::TPtr controller, const NMetadata::NModifications::IOperationsManager::TInternalModificationContext& context, const NMetadata::NModifications::TAlterOperationContext& alterContext) { + return new TResourcePoolClassifierPreparationActor(std::move(patchedObjects), std::move(controller), context, alterContext); +} + +} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/checker.h b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/checker.h new file mode 100644 index 000000000000..3018087f7553 --- /dev/null +++ b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/checker.h @@ -0,0 +1,12 @@ +#pragma once + +#include "object.h" + +#include + + +namespace NKikimr::NKqp { + +NActors::IActor* CreateResourcePoolClassifierPreparationActor(std::vector&& patchedObjects, NMetadata::NModifications::IAlterPreparationController::TPtr controller, const NMetadata::NModifications::IOperationsManager::TInternalModificationContext& context, const NMetadata::NModifications::TAlterOperationContext& alterContext); + +} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/fetcher.cpp b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/fetcher.cpp new file mode 100644 index 000000000000..e9f7f3d59bb0 --- /dev/null +++ b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/fetcher.cpp @@ -0,0 +1,10 @@ +#include "fetcher.h" + + +namespace NKikimr::NKqp { + +std::vector TResourcePoolClassifierSnapshotsFetcher::DoGetManagers() const { + return {TResourcePoolClassifierConfig::GetBehaviour()}; +} + +} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/fetcher.h b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/fetcher.h new file mode 100644 index 000000000000..29611f0cbeb0 --- /dev/null +++ b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/fetcher.h @@ -0,0 +1,13 @@ +#pragma once + +#include "snapshot.h" + + +namespace NKikimr::NKqp { + +class TResourcePoolClassifierSnapshotsFetcher : public NMetadata::NFetcher::TSnapshotsFetcher { +protected: + virtual std::vector DoGetManagers() const override; +}; + +} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/initializer.cpp b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/initializer.cpp new file mode 100644 index 000000000000..39230296e4be --- /dev/null +++ b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/initializer.cpp @@ -0,0 +1,41 @@ +#include "initializer.h" +#include "object.h" + + +namespace NKikimr::NKqp { + +namespace { + +void AddColumn(Ydb::Table::CreateTableRequest& request, const TString& name, Ydb::Type::PrimitiveTypeId type, bool primary = false) { + if (primary) { + request.add_primary_key(name); + } + + auto& column = *request.add_columns(); + column.set_name(name); + column.mutable_type()->mutable_optional_type()->mutable_item()->set_type_id(type); +} + +} // anonymous namespace + +void TResourcePoolClassifierInitializer::DoPrepare(NMetadata::NInitializer::IInitializerInput::TPtr controller) const { + TVector result; + { + Ydb::Table::CreateTableRequest request; + request.set_session_id(""); + request.set_path(TResourcePoolClassifierConfig::GetBehaviour()->GetStorageTablePath()); + AddColumn(request, TResourcePoolClassifierConfig::TDecoder::Database, Ydb::Type::UTF8, true); + AddColumn(request, TResourcePoolClassifierConfig::TDecoder::Name, Ydb::Type::UTF8, true); + AddColumn(request, TResourcePoolClassifierConfig::TDecoder::Rank, Ydb::Type::INT64); + AddColumn(request, TResourcePoolClassifierConfig::TDecoder::ConfigJson, Ydb::Type::JSON_DOCUMENT); + result.emplace_back(std::make_shared>(request, "create")); + + auto historyRequest = TResourcePoolClassifierConfig::AddHistoryTableScheme(request); + result.emplace_back(std::make_shared>(historyRequest, "create_history")); + } + result.emplace_back(NMetadata::NInitializer::TACLModifierConstructor::GetReadOnlyModifier(TResourcePoolClassifierConfig::GetBehaviour()->GetStorageTablePath(), "acl")); + result.emplace_back(NMetadata::NInitializer::TACLModifierConstructor::GetReadOnlyModifier(TResourcePoolClassifierConfig::GetBehaviour()->GetStorageHistoryTablePath(), "acl_history")); + controller->OnPreparationFinished(result); +} + +} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/initializer.h b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/initializer.h new file mode 100644 index 000000000000..c1743cfcbcfd --- /dev/null +++ b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/initializer.h @@ -0,0 +1,13 @@ +#pragma once + +#include + + +namespace NKikimr::NKqp { + +class TResourcePoolClassifierInitializer : public NMetadata::NInitializer::IInitializationBehaviour { +protected: + virtual void DoPrepare(NMetadata::NInitializer::IInitializerInput::TPtr controller) const override; +}; + +} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/manager.cpp b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/manager.cpp new file mode 100644 index 000000000000..5791ab5befa7 --- /dev/null +++ b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/manager.cpp @@ -0,0 +1,110 @@ +#include "manager.h" +#include "checker.h" + +#include +#include + + +namespace NKikimr::NKqp { + +namespace { + +using namespace NResourcePool; + +NMetadata::NInternal::TTableRecord GetResourcePoolClassifierRecord(const NYql::TObjectSettingsImpl& settings, const NMetadata::NModifications::IOperationsManager::TInternalModificationContext& context) { + NMetadata::NInternal::TTableRecord result; + result.SetColumn(TResourcePoolClassifierConfig::TDecoder::Database, NMetadata::NInternal::TYDBValue::Utf8(context.GetExternalData().GetDatabaseId())); + result.SetColumn(TResourcePoolClassifierConfig::TDecoder::Name, NMetadata::NInternal::TYDBValue::Utf8(settings.GetObjectId())); + return result; +} + +} // anonymous namespace + +NMetadata::NModifications::TOperationParsingResult TResourcePoolClassifierManager::DoBuildPatchFromSettings(const NYql::TObjectSettingsImpl& settings, TInternalModificationContext& context) const { + try { + switch (context.GetActivityType()) { + case EActivityType::Create: + case EActivityType::Alter: + return FillResourcePoolClassifierInfo(settings, context); + case EActivityType::Drop: + return FillDropInfo(settings, context); + case EActivityType::Upsert: + return TConclusionStatus::Fail("Upsert operation for RESOURCE_POOL_CLASSIFIER objects is not implemented"); + case EActivityType::Undefined: + return TConclusionStatus::Fail("Undefined operation for RESOURCE_POOL_CLASSIFIER object"); + } + } catch (...) { + return TConclusionStatus::Fail(CurrentExceptionMessage()); + } +} + +NMetadata::NModifications::TOperationParsingResult TResourcePoolClassifierManager::FillResourcePoolClassifierInfo(const NYql::TObjectSettingsImpl& settings, const TInternalModificationContext& context) const { + NMetadata::NInternal::TTableRecord result = GetResourcePoolClassifierRecord(settings, context); + + auto& featuresExtractor = settings.GetFeaturesExtractor(); + featuresExtractor.ValidateResetFeatures(); + + NJson::TJsonValue configJson = NJson::JSON_MAP; + TClassifierSettings resourcePoolClassifierSettings; + for (const auto& [property, setting] : resourcePoolClassifierSettings.GetPropertiesMap()) { + if (std::optional value = featuresExtractor.Extract(property)) { + try { + std::visit(TClassifierSettings::TParser{*value}, setting); + } catch (...) { + throw yexception() << "Failed to parse property " << property << ": " << CurrentExceptionMessage(); + } + } else if (featuresExtractor.ExtractResetFeature(property)) { + if (property == "resource_pool") { + ythrow yexception() << "Cannot reset required property resource_pool"; + } + } else { + continue; + } + + const TString value = std::visit(TClassifierSettings::TExtractor(), setting); + if (property == TResourcePoolClassifierConfig::TDecoder::Rank) { + result.SetColumn(property, NMetadata::NInternal::TYDBValue::Int64(FromString(value))); + } else { + configJson.InsertValue(property, value); + } + } + + if (context.GetActivityType() == EActivityType::Create) { + if (!configJson.GetMap().contains("resource_pool")) { + ythrow yexception() << "Missing required property resource_pool"; + } + + static const TString extraPathSymbolsAllowed = "!\"#$%&'()*+,-.:;<=>?@[\\]^_`{|}~"; + const auto& name = settings.GetObjectId(); + if (const auto brokenAt = PathPartBrokenAt(name, extraPathSymbolsAllowed); brokenAt != name.end()) { + ythrow yexception() << "Symbol '" << *brokenAt << "'" << " is not allowed in the resource pool classifier name '" << name << "'"; + } + } + resourcePoolClassifierSettings.Validate(); + + NJsonWriter::TBuf writer; + writer.WriteJsonValue(&configJson); + result.SetColumn(TResourcePoolClassifierConfig::TDecoder::ConfigJson, NMetadata::NInternal::TYDBValue::Utf8(writer.Str())); + + if (!featuresExtractor.IsFinished()) { + ythrow yexception() << "Unknown property: " << featuresExtractor.GetRemainedParamsString(); + } + + return result; +} + +NMetadata::NModifications::TOperationParsingResult TResourcePoolClassifierManager::FillDropInfo(const NYql::TObjectSettingsImpl& settings, const TInternalModificationContext& context) const { + return GetResourcePoolClassifierRecord(settings, context); +} + +void TResourcePoolClassifierManager::DoPrepareObjectsBeforeModification(std::vector&& patchedObjects, NMetadata::NModifications::IAlterPreparationController::TPtr controller, const TInternalModificationContext& context, const NMetadata::NModifications::TAlterOperationContext& alterContext) const { + auto* actorSystem = context.GetExternalData().GetActorSystem(); + if (!actorSystem) { + controller->OnPreparationProblem("This place needs an actor system. Please contact internal support"); + return; + } + + actorSystem->Register(CreateResourcePoolClassifierPreparationActor(std::move(patchedObjects), std::move(controller), context, alterContext)); +} + +} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/manager.h b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/manager.h new file mode 100644 index 000000000000..947069f8a1ab --- /dev/null +++ b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/manager.h @@ -0,0 +1,21 @@ +#pragma once + +#include "object.h" + +#include + + +namespace NKikimr::NKqp { + +class TResourcePoolClassifierManager : public NMetadata::NModifications::TGenericOperationsManager { +protected: + virtual NMetadata::NModifications::TOperationParsingResult DoBuildPatchFromSettings(const NYql::TObjectSettingsImpl& settings, TInternalModificationContext& context) const override; + + virtual void DoPrepareObjectsBeforeModification(std::vector&& patchedObjects, NMetadata::NModifications::IAlterPreparationController::TPtr controller, const TInternalModificationContext& context, const NMetadata::NModifications::TAlterOperationContext& alterContext) const override; + +private: + NMetadata::NModifications::TOperationParsingResult FillResourcePoolClassifierInfo(const NYql::TObjectSettingsImpl& settings, const TInternalModificationContext& context) const; + NMetadata::NModifications::TOperationParsingResult FillDropInfo(const NYql::TObjectSettingsImpl& settings, const TInternalModificationContext& context) const; +}; + +} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/object.cpp b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/object.cpp new file mode 100644 index 000000000000..b34541962d70 --- /dev/null +++ b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/object.cpp @@ -0,0 +1,137 @@ +#include "object.h" +#include "behaviour.h" + +#include + + +namespace NKikimr::NKqp { + +namespace { + +using namespace NResourcePool; + + +class TJsonConfigsMerger : public NMetadata::NModifications::IColumnValuesMerger { +public: + virtual TConclusionStatus Merge(Ydb::Value& value, const Ydb::Value& patch) const override { + NJson::TJsonValue selfConfigJson; + if (!NJson::ReadJsonTree(value.text_value(), &selfConfigJson)) { + return TConclusionStatus::Fail("Failed to parse object json config"); + } + + NJson::TJsonValue otherConfigJson; + if (!NJson::ReadJsonTree(patch.text_value(), &otherConfigJson)) { + return TConclusionStatus::Fail("Failed to parse patch json config"); + } + + for (const auto& [key, value] : otherConfigJson.GetMap()) { + selfConfigJson.InsertValue(key, value); + } + + NJsonWriter::TBuf writer; + writer.WriteJsonValue(&selfConfigJson); + *value.mutable_text_value() = writer.Str(); + + return TConclusionStatus::Success(); + } +}; + +} // anonymous namespace + + +//// TResourcePoolClassifierConfig::TDecoder + +TResourcePoolClassifierConfig::TDecoder::TDecoder(const Ydb::ResultSet& rawData) + : DatabaseIdx(GetFieldIndex(rawData, Database)) + , NameIdx(GetFieldIndex(rawData, Name)) + , RankIdx(GetFieldIndex(rawData, Rank)) + , ConfigJsonIdx(GetFieldIndex(rawData, ConfigJson)) +{} + +//// TResourcePoolClassifierConfig + +NMetadata::NModifications::IColumnValuesMerger::TPtr TResourcePoolClassifierConfig::BuildMerger(const TString& columnName) const { + if (columnName == TDecoder::ConfigJson) { + return std::make_shared(); + } + return TBase::BuildMerger(columnName); +} + +bool TResourcePoolClassifierConfig::DeserializeFromRecord(const TDecoder& decoder, const Ydb::Value& rawData) { + if (!decoder.Read(decoder.GetDatabaseIdx(), Database, rawData)) { + return false; + } + if (!decoder.Read(decoder.GetNameIdx(), Name, rawData)) { + return false; + } + if (!decoder.Read(decoder.GetRankIdx(), Rank, rawData)) { + Rank = -1; + } + + TString configJsonString; + if (!decoder.Read(decoder.GetConfigJsonIdx(), configJsonString, rawData)) { + return false; + } + if (!NJson::ReadJsonTree(configJsonString, &ConfigJson)) { + return false; + } + + return true; +} + +NMetadata::NInternal::TTableRecord TResourcePoolClassifierConfig::SerializeToRecord() const { + NMetadata::NInternal::TTableRecord result; + result.SetColumn(TDecoder::Database, NMetadata::NInternal::TYDBValue::Utf8(Database)); + result.SetColumn(TDecoder::Name, NMetadata::NInternal::TYDBValue::Utf8(Name)); + result.SetColumn(TDecoder::Rank, NMetadata::NInternal::TYDBValue::Int64(Rank)); + + NJsonWriter::TBuf writer; + writer.WriteJsonValue(&ConfigJson); + result.SetColumn(TDecoder::ConfigJson, NMetadata::NInternal::TYDBValue::Utf8(writer.Str())); + + return result; +} + +TClassifierSettings TResourcePoolClassifierConfig::GetClassifierSettings() const { + TClassifierSettings resourcePoolClassifierSettings; + + resourcePoolClassifierSettings.Rank = Rank; + + const auto& properties = resourcePoolClassifierSettings.GetPropertiesMap(); + for (const auto& [propery, value] : ConfigJson.GetMap()) { + const auto it = properties.find(propery); + if (it == properties.end()) { + continue; + } + try { + std::visit(TClassifierSettings::TParser{value.GetString()}, it->second); + } catch (...) { + continue; + } + } + + return resourcePoolClassifierSettings; +} + +NJson::TJsonValue TResourcePoolClassifierConfig::GetDebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + result.InsertValue(TDecoder::Database, Database); + result.InsertValue(TDecoder::Name, Name); + result.InsertValue(TDecoder::Rank, Rank); + result.InsertValue(TDecoder::ConfigJson, ConfigJson); + return result; +} + +bool TResourcePoolClassifierConfig::operator==(const TResourcePoolClassifierConfig& other) const { + return std::tie(Database, Name, Rank, ConfigJson) == std::tie(other.Database, other.Name, other.Rank, other.ConfigJson); +} + +NMetadata::IClassBehaviour::TPtr TResourcePoolClassifierConfig::GetBehaviour() { + return TResourcePoolClassifierBehaviour::GetInstance(); +} + +TString TResourcePoolClassifierConfig::GetTypeId() { + return "RESOURCE_POOL_CLASSIFIER"; +} + +} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/object.h b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/object.h new file mode 100644 index 000000000000..854a15c2827f --- /dev/null +++ b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/object.h @@ -0,0 +1,49 @@ +#pragma once + +#include + +#include +#include + + +namespace NKikimr::NKqp { + +class TResourcePoolClassifierConfig : public NMetadata::NModifications::TObject { + using TBase = NMetadata::NModifications::TObject; + + YDB_ACCESSOR_DEF(TString, Database); + YDB_ACCESSOR_DEF(TString, Name); + YDB_ACCESSOR_DEF(i64, Rank); + YDB_ACCESSOR_DEF(NJson::TJsonValue, ConfigJson); + +public: + class TDecoder : public NMetadata::NInternal::TDecoderBase { + private: + YDB_READONLY(i32, DatabaseIdx, -1); + YDB_READONLY(i32, NameIdx, -1); + YDB_READONLY(i32, RankIdx, -1); + YDB_READONLY(i32, ConfigJsonIdx, -1); + + public: + static inline const TString Database = "database"; + static inline const TString Name = "name"; + static inline const TString Rank = "rank"; + static inline const TString ConfigJson = "config"; + + explicit TDecoder(const Ydb::ResultSet& rawData); + }; + + virtual NMetadata::NModifications::IColumnValuesMerger::TPtr BuildMerger(const TString& columnName) const override; + NMetadata::NInternal::TTableRecord SerializeToRecord() const; + bool DeserializeFromRecord(const TDecoder& decoder, const Ydb::Value& rawData); + + NResourcePool::TClassifierSettings GetClassifierSettings() const; + NJson::TJsonValue GetDebugJson() const; + + bool operator==(const TResourcePoolClassifierConfig& other) const; + + static NMetadata::IClassBehaviour::TPtr GetBehaviour(); + static TString GetTypeId(); +}; + +} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/snapshot.cpp b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/snapshot.cpp new file mode 100644 index 000000000000..5c08fd5ea280 --- /dev/null +++ b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/snapshot.cpp @@ -0,0 +1,37 @@ +#include "snapshot.h" + + +namespace NKikimr::NKqp { + +bool TResourcePoolClassifierSnapshot::DoDeserializeFromResultSet(const Ydb::Table::ExecuteQueryResult& rawData) { + Y_ABORT_UNLESS(rawData.result_sets().size() == 1); + ParseSnapshotObjects(rawData.result_sets()[0], [this](TResourcePoolClassifierConfig&& config) { + ResourcePoolClassifierConfigs[config.GetDatabase()].emplace(config.GetName(), config); + }); + return true; +} + +TString TResourcePoolClassifierSnapshot::DoSerializeToString() const { + NJson::TJsonValue result = NJson::JSON_MAP; + auto& jsonResourcePoolClassifiers = result.InsertValue("resource_pool_classifiers", NJson::JSON_ARRAY); + for (const auto& [_, configsMap] : ResourcePoolClassifierConfigs) { + for (const auto& [_, config] : configsMap) { + jsonResourcePoolClassifiers.AppendValue(config.GetDebugJson()); + } + } + return result.GetStringRobust(); +} + +std::optional TResourcePoolClassifierSnapshot::GetClassifierConfig(const TString& database, const TString& name) const { + const auto databaseIt = ResourcePoolClassifierConfigs.find(database); + if (databaseIt == ResourcePoolClassifierConfigs.end()) { + return std::nullopt; + } + const auto configIt = databaseIt->second.find(name); + if (configIt == databaseIt->second.end()) { + return std::nullopt; + } + return configIt->second; +} + +} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/snapshot.h b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/snapshot.h new file mode 100644 index 000000000000..2ab9130fa295 --- /dev/null +++ b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/snapshot.h @@ -0,0 +1,26 @@ +#pragma once + +#include "object.h" + +#include + + +namespace NKikimr::NKqp { + +class TResourcePoolClassifierSnapshot : public NMetadata::NFetcher::ISnapshot { + using TBase = NMetadata::NFetcher::ISnapshot; + using TConfigsMap = std::unordered_map>; + + YDB_ACCESSOR_DEF(TConfigsMap, ResourcePoolClassifierConfigs); + +protected: + virtual bool DoDeserializeFromResultSet(const Ydb::Table::ExecuteQueryResult& rawData) override; + virtual TString DoSerializeToString() const override; + +public: + using TBase::TBase; + + std::optional GetClassifierConfig(const TString& database, const TString& name) const; +}; + +} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/ya.make b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/ya.make new file mode 100644 index 000000000000..535929964307 --- /dev/null +++ b/ydb/core/kqp/gateway/behaviour/resource_pool_classifier/ya.make @@ -0,0 +1,26 @@ +LIBRARY() + +SRCS( + GLOBAL behaviour.cpp + checker.cpp + fetcher.cpp + initializer.cpp + manager.cpp + object.cpp + snapshot.cpp +) + +PEERDIR( + ydb/core/cms/console + ydb/core/kqp/workload_service/actors + ydb/core/protos + ydb/core/resource_pools + ydb/library/query_actor + ydb/services/metadata/abstract + ydb/services/metadata/initializer + ydb/services/metadata/manager +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.cpp b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.cpp index 1467a61bc38e..b000a2fd94a4 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.cpp +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.cpp @@ -16,15 +16,21 @@ TConclusionStatus TAlterColumnOperation::DoDeserialize(NYql::TObjectSettingsImpl if (StorageId && !*StorageId) { return TConclusionStatus::Fail("STORAGE_ID cannot be empty string"); } + { + auto status = AccessorConstructor.DeserializeFromRequest(features); + if (status.IsFail()) { + return status; + } + } { auto result = DictionaryEncodingDiff.DeserializeFromRequestFeatures(features); - if (!result) { - return TConclusionStatus::Fail(result.GetErrorMessage()); + if (result.IsFail()) { + return result; } } { auto status = Serializer.DeserializeFromRequest(features); - if (!status) { + if (status.IsFail()) { return status; } } @@ -40,6 +46,9 @@ void TAlterColumnOperation::DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTable if (!!Serializer) { Serializer.SerializeToProto(*column->MutableSerializer()); } + if (!!AccessorConstructor) { + *column->MutableDataAccessorConstructor() = AccessorConstructor.SerializeToProto(); + } *column->MutableDictionaryEncoding() = DictionaryEncodingDiff.SerializeToProto(); if (DefaultValue) { column->SetDefaultValue(*DefaultValue); diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.h b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.h index ee51b47bb8df..23d1aef28abb 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.h +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.h @@ -1,4 +1,5 @@ #include "abstract.h" +#include #include #include @@ -18,6 +19,7 @@ class TAlterColumnOperation : public ITableStoreOperation { NArrow::NSerialization::TSerializerContainer Serializer; NArrow::NDictionary::TEncodingDiff DictionaryEncodingDiff; std::optional DefaultValue; + NArrow::NAccessor::TRequestedConstructorContainer AccessorConstructor; public: TConclusionStatus DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) override; diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.cpp b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.cpp index fdc831d424d3..fc0e3b0d262e 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.cpp +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.cpp @@ -1,4 +1,5 @@ #include "alter_sharding.h" +#include #include namespace NKikimr::NKqp { @@ -26,4 +27,8 @@ void TAlterShardingOperation::DoSerializeScheme(NKikimrSchemeOp::TModifyScheme& scheme.MutableAlterColumnTable()->MutableReshardColumnTable()->SetIncrease(*Increase); } +void TAlterShardingOperation::DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& /*scheme*/) const { + AFL_VERIFY(false); +} + } diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.h b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.h index 52f58e14d7b5..cb81ee36da68 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.h +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.h @@ -1,5 +1,4 @@ #include "abstract.h" -#include namespace NKikimr::NKqp { @@ -12,9 +11,7 @@ class TAlterShardingOperation: public ITableStoreOperation { static inline const auto Registrator = TFactory::TRegistrator(GetTypeName()); private: std::optional Increase; - virtual void DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& /*scheme*/) const override { - AFL_VERIFY(false); - } + virtual void DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& /*scheme*/) const override; virtual void DoSerializeScheme(NKikimrSchemeOp::TModifyScheme& scheme, const bool isStandalone) const override; public: diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.cpp b/ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.cpp deleted file mode 100644 index 94a18e7e4140..000000000000 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.cpp +++ /dev/null @@ -1,21 +0,0 @@ -#include "drop_stat.h" -#include - -namespace NKikimr::NKqp { - -TConclusionStatus TDropStatOperation::DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) { - { - auto fValue = features.Extract("NAME"); - if (!fValue) { - return TConclusionStatus::Fail("can't find parameter NAME"); - } - Name = *fValue; - } - return TConclusionStatus::Success(); -} - -void TDropStatOperation::DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& schemaData) const { - *schemaData.AddDropStatistics() = Name; -} - -} diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.h b/ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.h deleted file mode 100644 index 777aae036858..000000000000 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.h +++ /dev/null @@ -1,19 +0,0 @@ -#include "abstract.h" - -namespace NKikimr::NKqp { - -class TDropStatOperation : public ITableStoreOperation { - static TString GetTypeName() { - return "DROP_STAT"; - } - - static inline auto Registrator = TFactory::TRegistrator(GetTypeName()); -private: - TString Name; -public: - TConclusionStatus DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) override; - void DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& schemaData) const override; -}; - -} - diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.cpp b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.cpp index ae0f08e3333d..61914cb6e005 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.cpp +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.cpp @@ -12,10 +12,6 @@ TConclusionStatus TUpsertIndexOperation::DoDeserialize(NYql::TObjectSettingsImpl } IndexName = *fValue; } - StorageId = features.Extract("STORAGE_ID"); - if (StorageId && !*StorageId) { - return TConclusionStatus::Fail("STORAGE_ID cannot be empty string"); - } TString indexType; { auto fValue = features.Extract("TYPE"); @@ -46,9 +42,6 @@ TConclusionStatus TUpsertIndexOperation::DoDeserialize(NYql::TObjectSettingsImpl void TUpsertIndexOperation::DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& schemaData) const { auto* indexProto = schemaData.AddUpsertIndexes(); - if (StorageId) { - indexProto->SetStorageId(*StorageId); - } indexProto->SetName(IndexName); IndexMetaConstructor.SerializeToProto(*indexProto); } diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.h b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.h index 12305f85f0ae..267829a1a5f4 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.h +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.h @@ -12,7 +12,6 @@ class TUpsertIndexOperation : public ITableStoreOperation { static inline auto Registrator = TFactory::TRegistrator(GetTypeName()); private: TString IndexName; - std::optional StorageId; NBackgroundTasks::TInterfaceProtoContainer IndexMetaConstructor; public: TConclusionStatus DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) override; diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.cpp b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.cpp deleted file mode 100644 index 9e8360dd5e35..000000000000 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.cpp +++ /dev/null @@ -1,49 +0,0 @@ -#include "upsert_stat.h" -#include -#include - -namespace NKikimr::NKqp { - -TConclusionStatus TUpsertStatOperation::DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) { - { - auto fValue = features.Extract("NAME"); - if (!fValue) { - return TConclusionStatus::Fail("can't find alter parameter NAME"); - } - Name = *fValue; - } - TString type; - { - auto fValue = features.Extract("TYPE"); - if (!fValue) { - return TConclusionStatus::Fail("can't find alter parameter TYPE"); - } - type = *fValue; - } - { - auto fValue = features.Extract("FEATURES"); - if (!fValue) { - return TConclusionStatus::Fail("can't find alter parameter FEATURES"); - } - if (!Constructor.Initialize(type)) { - return TConclusionStatus::Fail("can't initialize stat constructor object for type \"" + type + "\""); - } - NJson::TJsonValue jsonData; - if (!NJson::ReadJsonFastTree(*fValue, &jsonData)) { - return TConclusionStatus::Fail("incorrect json in request FEATURES parameter"); - } - auto result = Constructor->DeserializeFromJson(jsonData); - if (result.IsFail()) { - return result; - } - } - return TConclusionStatus::Success(); -} - -void TUpsertStatOperation::DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& schemaData) const { - auto* proto = schemaData.AddUpsertStatistics(); - proto->SetName(Name); - Constructor.SerializeToProto(*proto); -} - -} diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.h b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.h deleted file mode 100644 index 5d8abdffae8d..000000000000 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.h +++ /dev/null @@ -1,23 +0,0 @@ -#include "abstract.h" -#include - -namespace NKikimr::NKqp { - -class TUpsertStatOperation : public ITableStoreOperation { -private: - static TString GetTypeName() { - return "UPSERT_STAT"; - } - - static inline const auto Registrator = TFactory::TRegistrator(GetTypeName()); -private: - TString Name; - NOlap::NStatistics::TConstructorContainer Constructor; -public: - TConclusionStatus DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) override; - - void DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& schemaData) const override; -}; - -} - diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/ya.make b/ydb/core/kqp/gateway/behaviour/tablestore/operations/ya.make index 6094887573e1..e393435d9cc5 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/ya.make +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/ya.make @@ -7,8 +7,6 @@ SRCS( GLOBAL drop_column.cpp GLOBAL upsert_index.cpp GLOBAL drop_index.cpp - GLOBAL upsert_stat.cpp - GLOBAL drop_stat.cpp GLOBAL upsert_opt.cpp GLOBAL alter_sharding.cpp ) @@ -16,7 +14,6 @@ SRCS( PEERDIR( ydb/services/metadata/manager ydb/core/formats/arrow/serializer - ydb/core/tx/columnshard/engines/scheme/statistics/abstract ydb/core/tx/columnshard/engines/storage/optimizer/abstract ydb/core/kqp/gateway/utils ydb/core/protos diff --git a/ydb/core/kqp/gateway/behaviour/view/manager.cpp b/ydb/core/kqp/gateway/behaviour/view/manager.cpp index cfe2a73ac7d5..f7d421a790ae 100644 --- a/ydb/core/kqp/gateway/behaviour/view/manager.cpp +++ b/ydb/core/kqp/gateway/behaviour/view/manager.cpp @@ -2,6 +2,8 @@ #include #include +#include +#include #include namespace NKikimr::NKqp { @@ -10,13 +12,9 @@ namespace { using TYqlConclusionStatus = TViewManager::TYqlConclusionStatus; using TInternalModificationContext = TViewManager::TInternalModificationContext; +using TExternalModificationContext = TViewManager::TExternalModificationContext; -TString GetByKeyOrDefault(const NYql::TCreateObjectSettings& container, const TString& key) { - const auto value = container.GetFeaturesExtractor().Extract(key); - return value ? *value : TString{}; -} - -TYqlConclusionStatus CheckFeatureFlag(TInternalModificationContext& context) { +TYqlConclusionStatus CheckFeatureFlag(const TInternalModificationContext& context) { auto* const actorSystem = context.GetExternalData().GetActorSystem(); if (!actorSystem) { ythrow yexception() << "This place needs an actor system. Please contact internal support"; @@ -36,30 +34,51 @@ std::pair SplitPathByDb(const TString& objectId, return pathPair; } +std::pair SplitPathByObjectId(const TString& objectId) { + std::pair pathPair; + TString error; + if (!NSchemeHelpers::TrySplitTablePath(objectId, pathPair, error)) { + ythrow TBadArgumentException() << error; + } + return pathPair; +} + +void ValidateOptions(NYql::TFeaturesExtractor& features) { + // Current implementation does not persist the security_invoker option value. + if (features.Extract("security_invoker") != "true") { + ythrow TBadArgumentException() << "security_invoker option must be explicitly enabled"; + } + if (!features.IsFinished()) { + ythrow TBadArgumentException() << "Unknown property: " << features.GetRemainedParamsString(); + } +} + void FillCreateViewProposal(NKikimrSchemeOp::TModifyScheme& modifyScheme, const NYql::TCreateObjectSettings& settings, - const TString& database) { + const TExternalModificationContext& context) { - const auto pathPair = SplitPathByDb(settings.GetObjectId(), database); + const auto pathPair = SplitPathByDb(settings.GetObjectId(), context.GetDatabase()); modifyScheme.SetWorkingDir(pathPair.first); modifyScheme.SetOperationType(NKikimrSchemeOp::ESchemeOpCreateView); + modifyScheme.SetFailedOnAlreadyExists(!settings.GetExistingOk()); auto& viewDesc = *modifyScheme.MutableCreateView(); viewDesc.SetName(pathPair.second); - viewDesc.SetQueryText(GetByKeyOrDefault(settings, "query_text")); - if (!settings.GetFeaturesExtractor().IsFinished()) { - ythrow TBadArgumentException() << "Unknown property: " << settings.GetFeaturesExtractor().GetRemainedParamsString(); - } + auto& features = settings.GetFeaturesExtractor(); + viewDesc.SetQueryText(features.Extract("query_text").value_or("")); + ValidateOptions(features); + + NSQLTranslation::Serialize(context.GetTranslationSettings(), *viewDesc.MutableCapturedContext()); } void FillDropViewProposal(NKikimrSchemeOp::TModifyScheme& modifyScheme, - const NYql::TDropObjectSettings& settings, - const TString& database) { + const NYql::TDropObjectSettings& settings) { - const auto pathPair = SplitPathByDb(settings.GetObjectId(), database); + const auto pathPair = SplitPathByObjectId(settings.GetObjectId()); modifyScheme.SetWorkingDir(pathPair.first); modifyScheme.SetOperationType(NKikimrSchemeOp::ESchemeOpDropView); + modifyScheme.SetSuccessOnNotExist(settings.GetMissingOk()); auto& drop = *modifyScheme.MutableDrop(); drop.SetName(pathPair.second); @@ -67,9 +86,12 @@ void FillDropViewProposal(NKikimrSchemeOp::TModifyScheme& modifyScheme, NThreading::TFuture SendSchemeRequest(TEvTxUserProxy::TEvProposeTransaction* request, TActorSystem* actorSystem, - bool failOnAlreadyExists) { + bool failedOnAlreadyExists, + bool successOnNotExist) { const auto promiseScheme = NThreading::NewPromise(); - IActor* const requestHandler = new TSchemeOpRequestHandler(request, promiseScheme, failOnAlreadyExists); + IActor* const requestHandler = new TSchemeOpRequestHandler( + request, promiseScheme, failedOnAlreadyExists, successOnNotExist + ); actorSystem->Register(requestHandler); return promiseScheme.GetFuture().Apply([](const NThreading::TFuture& opResult) { if (opResult.HasValue()) { @@ -83,41 +105,50 @@ NThreading::TFuture SendSchemeRequest(TEvTxUserProxy::TEvP } NThreading::TFuture CreateView(const NYql::TCreateObjectSettings& settings, - TInternalModificationContext& context) { + const TInternalModificationContext& context) { auto proposal = MakeHolder(); proposal->Record.SetDatabaseName(context.GetExternalData().GetDatabase()); if (context.GetExternalData().GetUserToken()) { proposal->Record.SetUserToken(context.GetExternalData().GetUserToken()->GetSerializedToken()); } auto& schemeTx = *proposal->Record.MutableTransaction()->MutableModifyScheme(); - FillCreateViewProposal(schemeTx, settings, context.GetExternalData().GetDatabase()); - - return SendSchemeRequest(proposal.Release(), context.GetExternalData().GetActorSystem(), true); + FillCreateViewProposal(schemeTx, settings, context.GetExternalData()); + + return SendSchemeRequest( + proposal.Release(), + context.GetExternalData().GetActorSystem(), + schemeTx.GetFailedOnAlreadyExists(), + schemeTx.GetSuccessOnNotExist() + ); } NThreading::TFuture DropView(const NYql::TDropObjectSettings& settings, - TInternalModificationContext& context) { + const TInternalModificationContext& context) { auto proposal = MakeHolder(); proposal->Record.SetDatabaseName(context.GetExternalData().GetDatabase()); if (context.GetExternalData().GetUserToken()) { proposal->Record.SetUserToken(context.GetExternalData().GetUserToken()->GetSerializedToken()); } auto& schemeTx = *proposal->Record.MutableTransaction()->MutableModifyScheme(); - FillDropViewProposal(schemeTx, settings, context.GetExternalData().GetDatabase()); - - return SendSchemeRequest(proposal.Release(), context.GetExternalData().GetActorSystem(), false); + FillDropViewProposal(schemeTx, settings); + + return SendSchemeRequest( + proposal.Release(), + context.GetExternalData().GetActorSystem(), + schemeTx.GetFailedOnAlreadyExists(), + schemeTx.GetSuccessOnNotExist() + ); } void PrepareCreateView(NKqpProto::TKqpSchemeOperation& schemeOperation, const NYql::TObjectSettingsImpl& settings, - TInternalModificationContext& context) { - FillCreateViewProposal(*schemeOperation.MutableCreateView(), settings, context.GetExternalData().GetDatabase()); + const TInternalModificationContext& context) { + FillCreateViewProposal(*schemeOperation.MutableCreateView(), settings, context.GetExternalData()); } void PrepareDropView(NKqpProto::TKqpSchemeOperation& schemeOperation, - const NYql::TObjectSettingsImpl& settings, - TInternalModificationContext& context) { - FillDropViewProposal(*schemeOperation.MutableDropView(), settings, context.GetExternalData().GetDatabase()); + const NYql::TObjectSettingsImpl& settings) { + FillDropViewProposal(*schemeOperation.MutableDropView(), settings); } } @@ -173,7 +204,7 @@ TViewManager::TYqlConclusionStatus TViewManager::DoPrepare(NKqpProto::TKqpScheme PrepareCreateView(schemeOperation, settings, context); break; case EActivityType::Drop: - PrepareDropView(schemeOperation, settings, context); + PrepareDropView(schemeOperation, settings); break; } } catch (...) { @@ -198,10 +229,10 @@ NThreading::TFuture TViewManager::ExecutePrepared(const NK switch (schemeOperation.GetOperationCase()) { case NKqpProto::TKqpSchemeOperation::kCreateView: schemeTx.CopyFrom(schemeOperation.GetCreateView()); - return SendSchemeRequest(proposal.Release(), context.GetActorSystem(), true); + break; case NKqpProto::TKqpSchemeOperation::kDropView: schemeTx.CopyFrom(schemeOperation.GetDropView()); - return SendSchemeRequest(proposal.Release(), context.GetActorSystem(), false); + break; default: return NThreading::MakeFuture(TYqlConclusionStatus::Fail( TStringBuilder() @@ -210,6 +241,12 @@ NThreading::TFuture TViewManager::ExecutePrepared(const NK ) ); } + return SendSchemeRequest( + proposal.Release(), + context.GetActorSystem(), + schemeTx.GetFailedOnAlreadyExists(), + schemeTx.GetSuccessOnNotExist() + ); } } diff --git a/ydb/core/kqp/gateway/behaviour/view/ya.make b/ydb/core/kqp/gateway/behaviour/view/ya.make index 6cb342036bda..7d57b8ceaab8 100644 --- a/ydb/core/kqp/gateway/behaviour/view/ya.make +++ b/ydb/core/kqp/gateway/behaviour/view/ya.make @@ -8,6 +8,7 @@ SRCS( PEERDIR( ydb/core/base ydb/core/kqp/gateway/actors + ydb/core/kqp/provider ydb/core/tx/tx_proxy ydb/services/metadata/abstract ydb/services/metadata/manager diff --git a/ydb/core/kqp/gateway/behaviour/ya.make b/ydb/core/kqp/gateway/behaviour/ya.make index bd59426cc368..61c62535ac96 100644 --- a/ydb/core/kqp/gateway/behaviour/ya.make +++ b/ydb/core/kqp/gateway/behaviour/ya.make @@ -1,6 +1,7 @@ RECURSE( external_data_source resource_pool + resource_pool_classifier table tablestore ) diff --git a/ydb/core/kqp/gateway/kqp_gateway.h b/ydb/core/kqp/gateway/kqp_gateway.h index 1c9bfdcac103..681f02a0c3e7 100644 --- a/ydb/core/kqp/gateway/kqp_gateway.h +++ b/ydb/core/kqp/gateway/kqp_gateway.h @@ -27,6 +27,15 @@ class IRequestCtxMtSafe; } +namespace NKikimr::NKqp::NRm { + class IKqpResourceManager; +} + +namespace NKikimr::NKqp::NComputeActor { + struct IKqpNodeComputeActorFactory; +} + + namespace NKikimr::NKqp { const TStringBuf ParamNamePrefix = "%kqp%"; @@ -145,6 +154,8 @@ class IKqpGateway : public NYql::IKikimrGateway { Ydb::Table::QueryStatsCollection::Mode StatsMode = Ydb::Table::QueryStatsCollection::STATS_COLLECTION_NONE; TDuration ProgressStatsPeriod; TKqpSnapshot Snapshot = TKqpSnapshot(); + std::shared_ptr ResourceManager_; + std::shared_ptr CaFactory_; NKikimrKqp::EIsolationLevel IsolationLevel = NKikimrKqp::ISOLATION_LEVEL_UNDEFINED; TMaybe RlPath; bool NeedTxId = true; @@ -177,6 +188,7 @@ class IKqpGateway : public NYql::IKikimrGateway { public: virtual TString GetDatabase() = 0; + virtual TString GetDatabaseId() = 0; virtual bool GetDomainLoginOnly() = 0; virtual TMaybe GetDomainName() = 0; @@ -220,7 +232,7 @@ class IKqpGateway : public NYql::IKikimrGateway { const Ydb::Table::TransactionSettings& txSettings, const NActors::TActorId& target) = 0; }; -TIntrusivePtr CreateKikimrIcGateway(const TString& cluster, NKikimrKqp::EQueryType queryType, const TString& database, +TIntrusivePtr CreateKikimrIcGateway(const TString& cluster, NKikimrKqp::EQueryType queryType, const TString& database, const TString& databaseId, std::shared_ptr&& metadataLoader, NActors::TActorSystem* actorSystem, ui32 nodeId, TKqpRequestCounters::TPtr counters, const NKikimrConfig::TQueryServiceConfig& queryServiceConfig = NKikimrConfig::TQueryServiceConfig()); diff --git a/ydb/core/kqp/gateway/kqp_ic_gateway.cpp b/ydb/core/kqp/gateway/kqp_ic_gateway.cpp index e5d6bbfcf461..1d219192ea7f 100644 --- a/ydb/core/kqp/gateway/kqp_ic_gateway.cpp +++ b/ydb/core/kqp/gateway/kqp_ic_gateway.cpp @@ -1,5 +1,6 @@ #include "kqp_gateway.h" #include "actors/kqp_ic_gateway_actors.h" +#include "actors/analyze_actor.h" #include "actors/scheme.h" #include "kqp_metadata_loader.h" #include "local_rpc/helper.h" @@ -226,7 +227,7 @@ class TKqpScanQueryRequestHandler : public TRequestHandlerBase< void HandleResponse(typename TResponse::TPtr &ev, const TActorContext &ctx) { auto& response = *ev->Get()->Record.GetRef().MutableResponse(); - NKikimr::ConvertYdbResultToKqpResult(ResultSet,*response.AddResults()); + response.AddYdbResults()->CopyFrom(ResultSet); for (auto& execStats : Executions) { response.MutableQueryStats()->AddExecutions()->Swap(&execStats); } @@ -285,20 +286,18 @@ class TKqpStreamRequestHandler : public TRequestHandlerBase< virtual void HandleResponse(typename TResponse::TPtr &ev, const TActorContext &ctx) { auto& record = ev->Get()->Record.GetRef(); if (record.GetYdbStatus() == Ydb::StatusIds::SUCCESS) { - if (record.MutableResponse()->GetResults().size()) { + if (record.MutableResponse()->GetYdbResults().size()) { // Send result sets to RPC actor TStreamExecuteYqlScriptRPC auto evStreamPart = MakeHolder(); ActorIdToProto(this->SelfId(), evStreamPart->Record.MutableGatewayActorId()); - for (int i = 0; i < record.MutableResponse()->MutableResults()->size(); ++i) { + for (int i = 0; i < record.MutableResponse()->MutableYdbResults()->size(); ++i) { // Workaround to avoid errors on Pull execution stage which would expect some results - Ydb::ResultSet resultSet; - NKikimr::ConvertYdbResultToKqpResult(resultSet, *evStreamPart->Record.AddResults()); + evStreamPart->Record.AddResults(); } - evStreamPart->Record.MutableResults()->Swap(record.MutableResponse()->MutableResults()); + evStreamPart->Record.MutableResults()->Swap(record.MutableResponse()->MutableYdbResults()); this->Send(TargetActorId, evStreamPart.Release()); - // Save response without data to send it later ResponseHandle = ev.Release(); } else { @@ -404,7 +403,7 @@ class TKqpForwardStreamRequestHandler : public TRequestHandlerBase< auto& response = *ev->Get()->Record.GetRef().MutableResponse(); Ydb::ResultSet resultSet; - NKikimr::ConvertYdbResultToKqpResult(resultSet, *response.AddResults()); + response.AddYdbResults()->CopyFrom(resultSet); for (auto& execStats : Executions) { response.MutableQueryStats()->AddExecutions()->Swap(&execStats); } @@ -510,7 +509,7 @@ class TKqpGenericQueryRequestHandler: public TRequestHandlerBase< auto& response = *ev->Get()->Record.GetRef().MutableResponse(); for (auto& resultSet : ResultSets) { - ConvertYdbResultToKqpResult(std::move(resultSet.ResultSet), *response.AddResults()); + response.AddYdbResults()->Swap(&resultSet.ResultSet); } TBase::HandleResponse(ev, ctx); @@ -537,10 +536,11 @@ class TKqpSchemeExecuterRequestHandler: public TActorBootstrapped& requestType, const TString& database, - TIntrusiveConstPtr userToken, TPromise promise) + const TString& databaseId, TIntrusiveConstPtr userToken, TPromise promise) : PhyTx(std::move(phyTx)) , QueryType(queryType) , Database(database) + , DatabaseId(databaseId) , UserToken(std::move(userToken)) , Promise(promise) , RequestType(requestType) @@ -548,6 +548,7 @@ class TKqpSchemeExecuterRequestHandler: public TActorBootstrapped(); + ctx->DatabaseId = DatabaseId; IActor* actor = CreateKqpSchemeExecuter(PhyTx, QueryType, SelfId(), RequestType, Database, UserToken, false /* temporary */, TString() /* sessionId */, ctx); Register(actor); Become(&TThis::WaitState); @@ -579,6 +580,7 @@ class TKqpSchemeExecuterRequestHandler: public TActorBootstrapped UserToken; TPromise Promise; const TMaybe RequestType; @@ -671,8 +673,8 @@ void KqpResponseToQueryResult(const NKikimrKqp::TEvQueryResponse& response, IKqp queryResult.AddIssue(NYql::IssueFromMessage(issue)); } - for (auto& result : queryResponse.GetResults()) { - auto arenaResult = google::protobuf::Arena::CreateMessage( + for (auto& result : queryResponse.GetYdbResults()) { + auto arenaResult = google::protobuf::Arena::CreateMessage( queryResult.ProtobufArenaPtr.get()); arenaResult->CopyFrom(result); @@ -701,11 +703,12 @@ class TKikimrIcGateway : public IKqpGateway { using TNavigate = NSchemeCache::TSchemeCacheNavigate; public: - TKikimrIcGateway(const TString& cluster, NKikimrKqp::EQueryType queryType, const TString& database, std::shared_ptr&& metadataLoader, + TKikimrIcGateway(const TString& cluster, NKikimrKqp::EQueryType queryType, const TString& database, const TString& databaseId, std::shared_ptr&& metadataLoader, TActorSystem* actorSystem, ui32 nodeId, TKqpRequestCounters::TPtr counters, const NKikimrConfig::TQueryServiceConfig& queryServiceConfig) : Cluster(cluster) , QueryType(queryType) , Database(database) + , DatabaseId(databaseId) , ActorSystem(actorSystem) , NodeId(nodeId) , Counters(counters) @@ -728,6 +731,10 @@ class TKikimrIcGateway : public IKqpGateway { return Database; } + TString GetDatabaseId() override { + return DatabaseId; + } + TMaybe GetSetting(const TString& cluster, const TString& name) override { Y_UNUSED(cluster); Y_UNUSED(name); @@ -976,7 +983,11 @@ class TKikimrIcGateway : public IKqpGateway { return NotImplemented(); } - TFuture CreateTopic(const TString& cluster, Ydb::Topic::CreateTopicRequest&& request) override { + TFuture CreateTopic(const TString& cluster, Ydb::Topic::CreateTopicRequest&& request, bool existingOk) override { + if (existingOk) { + return MakeFuture(ResultFromError("IF NOT EXISTS statement is not supported for CREATE TOPIC in yql script")); + } + try { if (!CheckCluster(cluster)) { return InvalidCluster(cluster); @@ -988,9 +999,27 @@ class TKikimrIcGateway : public IKqpGateway { catch (yexception& e) { return MakeFuture(ResultFromException(e)); } + Y_UNUSED(existingOk); } - TFuture AlterTopic(const TString& cluster, Ydb::Topic::AlterTopicRequest&& request) override { + TFuture AlterTopicPrepared(NYql::TAlterTopicSettings&& settings) override { + auto schemaTxPromise = NewPromise(); + auto schemaTxFuture = schemaTxPromise.GetFuture(); + + NKikimr::NGRpcProxy::V1::TAlterTopicRequest request{ + std::move(settings.Request), settings.WorkDir, settings.Name, Database, GetTokenCompat(), + settings.MissingOk + }; + IActor* requestHandler = new NKikimr::NGRpcProxy::V1::TAlterTopicActorInternal(std::move(request), std::move(schemaTxPromise), settings.MissingOk); + RegisterActor(requestHandler); + return schemaTxFuture; + } + + TFuture AlterTopic(const TString& cluster, Ydb::Topic::AlterTopicRequest&& request, bool missingOk) override { + if (missingOk) { + return MakeFuture(ResultFromError("IF EXISTS statement is not supported for ALTER TOPIC in yql script")); + } + try { if (!CheckCluster(cluster)) { return InvalidCluster(cluster); @@ -1004,7 +1033,11 @@ class TKikimrIcGateway : public IKqpGateway { } } - TFuture DropTopic(const TString& cluster, const TString& topic) override { + TFuture DropTopic(const TString& cluster, const TString& topic, bool missingOk) override { + if (missingOk) { + return MakeFuture(ResultFromError("IF EXISTS statement is not supported for DROP TOPIC in yql script")); + } + try { if (!CheckCluster(cluster)) { return InvalidCluster(cluster); @@ -1019,6 +1052,7 @@ class TKikimrIcGateway : public IKqpGateway { catch (yexception& e) { return MakeFuture(ResultFromException(e)); } + } TFuture CreateReplication(const TString&, const NYql::TCreateReplicationSettings&) override { @@ -1386,6 +1420,22 @@ class TKikimrIcGateway : public IKqpGateway { } } + TFuture Analyze(const TString& cluster, const NYql::TAnalyzeSettings& settings) override { + try { + if (!CheckCluster(cluster)) { + return InvalidCluster(cluster); + } + + auto analyzePromise = NewPromise(); + IActor* analyzeActor = new TAnalyzeActor(settings.TablePath, settings.Columns, analyzePromise); + RegisterActor(analyzeActor); + + return analyzePromise.GetFuture(); + } catch (yexception& e) { + return MakeFuture(ResultFromException(e)); + } + } + template class IObjectModifier { public: @@ -1429,6 +1479,7 @@ class TKikimrIcGateway : public IKqpGateway { context.SetUserToken(*GetUserToken()); } context.SetDatabase(Owner.Database); + context.SetDatabaseId(Owner.DatabaseId); context.SetActorSystem(Owner.ActorSystem); return DoExecute(cBehaviour, settings, context).Apply([](const NThreading::TFuture& f) { if (f.HasValue() && !f.HasException() && f.GetValue().Ok()) { @@ -2180,7 +2231,7 @@ class TKikimrIcGateway : public IKqpGateway { TFuture SendSchemeExecuterRequest(const TString&, const TMaybe& requestType, const std::shared_ptr& phyTx) override { auto promise = NewPromise(); - IActor* requestHandler = new TKqpSchemeExecuterRequestHandler(phyTx, QueryType, requestType, Database, UserToken, promise); + IActor* requestHandler = new TKqpSchemeExecuterRequestHandler(phyTx, QueryType, requestType, Database, DatabaseId, UserToken, promise); RegisterActor(requestHandler); return promise.GetFuture(); } @@ -2260,6 +2311,7 @@ class TKikimrIcGateway : public IKqpGateway { TString Cluster; const NKikimrKqp::EQueryType QueryType; TString Database; + TString DatabaseId; TActorSystem* ActorSystem; ui32 NodeId; TKqpRequestCounters::TPtr Counters; @@ -2271,11 +2323,11 @@ class TKikimrIcGateway : public IKqpGateway { } // namespace -TIntrusivePtr CreateKikimrIcGateway(const TString& cluster, NKikimrKqp::EQueryType queryType, const TString& database, +TIntrusivePtr CreateKikimrIcGateway(const TString& cluster, NKikimrKqp::EQueryType queryType, const TString& database, const TString& databaseId, std::shared_ptr&& metadataLoader, TActorSystem* actorSystem, ui32 nodeId, TKqpRequestCounters::TPtr counters, const NKikimrConfig::TQueryServiceConfig& queryServiceConfig) { - return MakeIntrusive(cluster, queryType, database, std::move(metadataLoader), actorSystem, nodeId, + return MakeIntrusive(cluster, queryType, database, databaseId, std::move(metadataLoader), actorSystem, nodeId, counters, queryServiceConfig); } diff --git a/ydb/core/kqp/gateway/kqp_metadata_loader.cpp b/ydb/core/kqp/gateway/kqp_metadata_loader.cpp index 3959b377e3d1..fc9a3ac619c4 100644 --- a/ydb/core/kqp/gateway/kqp_metadata_loader.cpp +++ b/ydb/core/kqp/gateway/kqp_metadata_loader.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include @@ -281,7 +281,6 @@ TTableMetadataResult GetExternalDataSourceMetadataResult(const NSchemeCache::TSc tableMeta->ExternalSource.DataSourceAuth = description.GetAuth(); tableMeta->ExternalSource.Properties = description.GetProperties(); tableMeta->ExternalSource.DataSourcePath = tableName; - tableMeta->ExternalSource.TableLocation = JoinPath(entry.Path); return result; } @@ -303,7 +302,7 @@ TTableMetadataResult GetViewMetadataResult( metadata->SchemaVersion = description.GetVersion(); metadata->Kind = NYql::EKikimrTableKind::View; metadata->Attributes = schemeEntry.Attributes; - metadata->ViewPersistedData = {description.GetQueryText()}; + metadata->ViewPersistedData = {description.GetQueryText(), description.GetCapturedContext()}; return builtResult; } @@ -830,18 +829,23 @@ NThreading::TFuture TKqpTableMetadataLoader::LoadTableMeta switch (entry.Kind) { case EKind::KindExternalDataSource: { - if (externalPath) { - entry.Path = SplitPath(*externalPath); - } auto externalDataSourceMetadata = GetLoadTableMetadataResult(entry, cluster, mainCluster, table); if (!externalDataSourceMetadata.Success() || !settings.RequestAuthInfo_) { promise.SetValue(externalDataSourceMetadata); return; } + if (externalPath) { + externalDataSourceMetadata.Metadata->ExternalSource.TableLocation = *externalPath; + } LoadExternalDataSourceSecretValues(entry, userToken, ActorSystem) .Subscribe([promise, externalDataSourceMetadata, settings](const TFuture& result) mutable { UpdateExternalDataSourceSecretsValue(externalDataSourceMetadata, result.GetValue()); + if (!externalDataSourceMetadata.Success()) { + promise.SetValue(externalDataSourceMetadata); + return; + } + NExternalSource::IExternalSource::TPtr externalSource; if (settings.ExternalSourceFactory) { externalSource = settings.ExternalSourceFactory->GetOrCreate(externalDataSourceMetadata.Metadata->ExternalSource.Type); @@ -961,6 +965,7 @@ NThreading::TFuture TKqpTableMetadataLoader::LoadTableMeta auto s = resp.Simple; result.Metadata->RecordsCount = s.RowCount; result.Metadata->DataSize = s.BytesSize; + result.Metadata->StatsLoaded = response.Success; promise.SetValue(result); }); diff --git a/ydb/core/kqp/gateway/utils/scheme_helpers.cpp b/ydb/core/kqp/gateway/utils/scheme_helpers.cpp index 8b9657b2607c..072c92746026 100644 --- a/ydb/core/kqp/gateway/utils/scheme_helpers.cpp +++ b/ydb/core/kqp/gateway/utils/scheme_helpers.cpp @@ -81,6 +81,7 @@ void FillCreateExternalTableColumnDesc(NKikimrSchemeOp::TExternalTableDescriptio columnDesc.SetNotNull(columnIt->second.NotNull); } NKikimrExternalSources::TGeneral general; + general.set_location(settings.Location); auto& attributes = *general.mutable_attributes(); for (const auto& [key, value]: settings.SourceTypeParameters) { attributes.insert({key, value}); diff --git a/ydb/core/kqp/gateway/ya.make b/ydb/core/kqp/gateway/ya.make index 51767a6992a1..158d7e41ba03 100644 --- a/ydb/core/kqp/gateway/ya.make +++ b/ydb/core/kqp/gateway/ya.make @@ -18,8 +18,10 @@ PEERDIR( ydb/core/kqp/gateway/behaviour/table ydb/core/kqp/gateway/behaviour/external_data_source ydb/core/kqp/gateway/behaviour/resource_pool + ydb/core/kqp/gateway/behaviour/resource_pool_classifier ydb/core/kqp/gateway/behaviour/view ydb/core/kqp/gateway/utils + ydb/core/statistics/service ydb/library/yql/providers/result/expr_nodes ) diff --git a/ydb/core/kqp/host/kqp_explain_prepared.cpp b/ydb/core/kqp/host/kqp_explain_prepared.cpp index 91ee362da0f1..f76ce851140c 100644 --- a/ydb/core/kqp/host/kqp_explain_prepared.cpp +++ b/ydb/core/kqp/host/kqp_explain_prepared.cpp @@ -66,7 +66,7 @@ class TKqpExplainPreparedTransformer : public NYql::TGraphTransformerBase { PhyQuerySetTxPlans(query, TKqpPhysicalQuery(TransformCtx->ExplainTransformerInput), std::move(TxResults), ctx, Cluster, TransformCtx->Tables, TransformCtx->Config, TypeCtx, OptimizeCtx); - query.SetQueryAst(KqpExprToPrettyString(*TransformCtx->ExplainTransformerInput, ctx)); + query.SetQueryAst(KqpExprToPrettyString(*input, ctx)); TransformCtx->ExplainTransformerInput = nullptr; return TStatus::Ok; diff --git a/ydb/core/kqp/host/kqp_gateway_proxy.cpp b/ydb/core/kqp/host/kqp_gateway_proxy.cpp index 3c717ee29c7c..29986334cd8e 100644 --- a/ydb/core/kqp/host/kqp_gateway_proxy.cpp +++ b/ydb/core/kqp/host/kqp_gateway_proxy.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace NKikimr::NKqp { @@ -108,7 +109,7 @@ bool ConvertDataSlotToYdbTypedValue(NYql::EDataSlot fromType, const TString& fro case NYql::EDataSlot::Interval64: toType->set_type_id(Ydb::Type::INTERVAL64); toValue->set_int64_value(FromString(fromValue)); - break; + break; default: return false; } @@ -914,16 +915,120 @@ class TKqpGatewayProxy : public IKikimrGateway { return dropPromise.GetFuture(); } - TFuture CreateTopic(const TString& cluster, Ydb::Topic::CreateTopicRequest&& request) override { - FORWARD_ENSURE_NO_PREPARE(CreateTopic, cluster, std::move(request)); + TFuture CreateTopic(const TString& cluster, Ydb::Topic::CreateTopicRequest&& request, bool existingOk) override { + CHECK_PREPARED_DDL(CreateTopic); + Y_UNUSED(cluster); + + std::pair pathPair; + TString error; + auto createPromise = NewPromise(); + if (!NSchemeHelpers::SplitTablePath(request.path(), GetDatabase(), pathPair, error, false)) { + return MakeFuture(ResultFromError(error)); + } + NKikimrSchemeOp::TModifyScheme schemeTx; + schemeTx.SetOperationType(NKikimrSchemeOp::EOperationType::ESchemeOpCreatePersQueueGroup); + + schemeTx.SetWorkingDir(pathPair.first); + + auto pqDescr = schemeTx.MutableCreatePersQueueGroup(); + pqDescr->SetName(pathPair.second); + NKikimr::NGRpcProxy::V1::FillProposeRequestImpl(pathPair.second, request, schemeTx, AppData(ActorSystem), error, pathPair.first); + + if (IsPrepare()) { + auto& phyQuery = *SessionCtx->Query().PreparingQuery->MutablePhysicalQuery(); + auto& phyTx = *phyQuery.AddTransactions(); + phyTx.SetType(NKqpProto::TKqpPhyTx::TYPE_SCHEME); + + + phyTx.MutableSchemeOperation()->MutableCreateTopic()->Swap(&schemeTx); + phyTx.MutableSchemeOperation()->MutableCreateTopic()->SetFailedOnAlreadyExists(!existingOk); + TGenericResult result; + result.SetSuccess(); + createPromise.SetValue(result); + } else { + return Gateway->CreateTopic(cluster, std::move(request), existingOk); + } + return createPromise.GetFuture(); + } + + TFuture AlterTopic(const TString& cluster, Ydb::Topic::AlterTopicRequest&& request, bool missingOk) override { + CHECK_PREPARED_DDL(AlterTopic); + Y_UNUSED(cluster); + std::pair pathPair; + TString error; + if (!NSchemeHelpers::SplitTablePath(request.path(), GetDatabase(), pathPair, error, false)) { + return MakeFuture(ResultFromError(error)); + } + auto alterPromise = NewPromise(); + + if (IsPrepare()) { + TAlterTopicSettings settings{std::move(request), pathPair.second, pathPair.first, missingOk}; + auto getModifySchemeFuture = Gateway->AlterTopicPrepared(std::move(settings)); + + + auto* phyQuery = SessionCtx->Query().PreparingQuery->MutablePhysicalQuery(); + + getModifySchemeFuture.Subscribe([=] (const auto future) mutable { + TGenericResult result; + auto modifySchemeResult = future.GetValue(); + if (modifySchemeResult.Status == Ydb::StatusIds::SUCCESS) { + if (modifySchemeResult.ModifyScheme.HasAlterPersQueueGroup()) { + auto* phyTx = phyQuery->AddTransactions(); + phyTx->SetType(NKqpProto::TKqpPhyTx::TYPE_SCHEME); + phyTx->MutableSchemeOperation()->MutableAlterTopic()->Swap(&modifySchemeResult.ModifyScheme); + phyTx->MutableSchemeOperation()->MutableAlterTopic()->SetSuccessOnNotExist(missingOk); + } + result.SetSuccess(); + + } else { + result.SetStatus(NYql::YqlStatusFromYdbStatus(modifySchemeResult.Status)); + result.AddIssues(modifySchemeResult.Issues); + } + alterPromise.SetValue(result); + }); + + } else { + return Gateway->AlterTopic(cluster, std::move(request), missingOk); + } + return alterPromise.GetFuture(); + } - TFuture AlterTopic(const TString& cluster, Ydb::Topic::AlterTopicRequest&& request) override { - FORWARD_ENSURE_NO_PREPARE(AlterTopic, cluster, std::move(request)); + NThreading::TFuture AlterTopicPrepared(TAlterTopicSettings&& settings) override { + return Gateway->AlterTopicPrepared(std::move(settings)); } - TFuture DropTopic(const TString& cluster, const TString& topic) override { - FORWARD_ENSURE_NO_PREPARE(DropTopic, cluster, topic); + TFuture DropTopic(const TString& cluster, const TString& topic, bool missingOk) override { + CHECK_PREPARED_DDL(DropTopic); + Y_UNUSED(cluster); + + std::pair pathPair; + TString error; + auto dropPromise = NewPromise(); + if (!NSchemeHelpers::SplitTablePath(topic, GetDatabase(), pathPair, error, false)) { + return MakeFuture(ResultFromError(error)); + } + + if (IsPrepare()) { + auto& phyQuery = *SessionCtx->Query().PreparingQuery->MutablePhysicalQuery(); + auto& phyTx = *phyQuery.AddTransactions(); + phyTx.SetType(NKqpProto::TKqpPhyTx::TYPE_SCHEME); + + NKikimrSchemeOp::TModifyScheme schemeTx; + schemeTx.SetOperationType(NKikimrSchemeOp::EOperationType::ESchemeOpDropPersQueueGroup); + + schemeTx.SetWorkingDir(pathPair.first); + schemeTx.MutableDrop()->SetName(pathPair.second); + + phyTx.MutableSchemeOperation()->MutableDropTopic()->Swap(&schemeTx); + phyTx.MutableSchemeOperation()->MutableDropTopic()->SetSuccessOnNotExist(missingOk); + TGenericResult result; + result.SetSuccess(); + dropPromise.SetValue(result); + } else { + return Gateway->DropTopic(cluster, topic, missingOk); + } + return dropPromise.GetFuture(); } TFuture ModifyPermissions(const TString& cluster, @@ -1157,10 +1262,12 @@ class TKqpGatewayProxy : public IKikimrGateway { NMetadata::NModifications::IOperationsManager::TExternalModificationContext context; context.SetDatabase(SessionCtx->GetDatabase()); + context.SetDatabaseId(SessionCtx->GetDatabaseId()); context.SetActorSystem(ActorSystem); if (SessionCtx->GetUserToken()) { context.SetUserToken(*SessionCtx->GetUserToken()); } + context.SetTranslationSettings(SessionCtx->Query().TranslationSettings); auto& phyTx = phyTxRemover.Capture(SessionCtx->Query().PreparingQuery->MutablePhysicalQuery()); phyTx.SetType(NKqpProto::TKqpPhyTx::TYPE_SCHEME); @@ -1901,6 +2008,7 @@ class TKqpGatewayProxy : public IKikimrGateway { const auto parseResult = NYdb::ParseConnectionString(*connectionString); params.SetEndpoint(parseResult.Endpoint); params.SetDatabase(parseResult.Database); + params.SetEnableSsl(parseResult.EnableSsl); } if (const auto& endpoint = settings.Settings.Endpoint) { params.SetEndpoint(*endpoint); @@ -2055,6 +2163,39 @@ class TKqpGatewayProxy : public IKikimrGateway { } } + TFuture Analyze(const TString& cluster, const NYql::TAnalyzeSettings& settings) override { + CHECK_PREPARED_DDL(Analyze); + + try { + if (cluster != SessionCtx->GetCluster()) { + return MakeFuture(ResultFromError("Invalid cluster: " + cluster)); + } + + NKqpProto::TKqpAnalyzeOperation analyzeTx; + analyzeTx.SetTablePath(settings.TablePath); + for (const auto& column: settings.Columns) { + *analyzeTx.AddColumns() = column; + } + + if (IsPrepare()) { + auto& phyQuery = *SessionCtx->Query().PreparingQuery->MutablePhysicalQuery(); + auto& phyTx = *phyQuery.AddTransactions(); + phyTx.SetType(NKqpProto::TKqpPhyTx::TYPE_SCHEME); + + phyTx.MutableSchemeOperation()->MutableAnalyzeTable()->Swap(&analyzeTx); + + TGenericResult result; + result.SetSuccess(); + return MakeFuture(result); + } else { + return Gateway->Analyze(cluster, settings); + } + } + catch (yexception& e) { + return MakeFuture(ResultFromException(e)); + } + } + TVector GetCollectedSchemeData() override { return Gateway->GetCollectedSchemeData(); } diff --git a/ydb/core/kqp/host/kqp_host.cpp b/ydb/core/kqp/host/kqp_host.cpp index c54f1a26cc44..f2074d895c1b 100644 --- a/ydb/core/kqp/host/kqp_host.cpp +++ b/ydb/core/kqp/host/kqp_host.cpp @@ -288,8 +288,8 @@ class TAsyncExecuteYqlResult : public TKqpAsyncResultBase(queryResult.ProtobufArenaPtr.get())); - NKikimrMiniKQL::TResult* result = queryResult.Results.back(); + google::protobuf::Arena::CreateMessage(queryResult.ProtobufArenaPtr.get())); + Ydb::ResultSet* result = queryResult.Results.back(); if (!result->ParseFromArray(resultStr.data(), resultStr.size())) { queryResult = ResultFromError("Failed to parse run result."); @@ -1033,7 +1033,8 @@ class TKqpHost : public IKqpHost { std::optional federatedQuerySetup, const TIntrusiveConstPtr& userToken, const NKikimr::NMiniKQL::IFunctionRegistry* funcRegistry, bool keepConfigChanges, bool isInternalCall, TKqpTempTablesState::TConstPtr tempTablesState = nullptr, NActors::TActorSystem* actorSystem = nullptr, - NYql::TExprContext* ctx = nullptr) + NYql::TExprContext* ctx = nullptr, const NKikimrConfig::TQueryServiceConfig& queryServiceConfig = NKikimrConfig::TQueryServiceConfig(), + const TIntrusivePtr& userRequestContext = nullptr) : Gateway(gateway) , Cluster(cluster) , GUCSettings(gUCSettings) @@ -1044,13 +1045,14 @@ class TKqpHost : public IKqpHost { , KeepConfigChanges(keepConfigChanges) , IsInternalCall(isInternalCall) , FederatedQuerySetup(federatedQuerySetup) - , SessionCtx(new TKikimrSessionContext(funcRegistry, config, TAppData::TimeProvider, TAppData::RandomProvider, userToken)) + , SessionCtx(new TKikimrSessionContext(funcRegistry, config, TAppData::TimeProvider, TAppData::RandomProvider, userToken, nullptr, userRequestContext)) , Config(config) , TypesCtx(MakeIntrusive()) , PlanBuilder(CreatePlanBuilder(*TypesCtx)) , FakeWorld(ctx ? nullptr : ExprCtx->NewWorld(TPosition())) , ExecuteCtx(MakeIntrusive()) , ActorSystem(actorSystem ? actorSystem : NActors::TActivationContext::ActorSystem()) + , QueryServiceConfig(queryServiceConfig) { if (funcRegistry) { FuncRegistry = funcRegistry; @@ -1060,6 +1062,7 @@ class TKqpHost : public IKqpHost { } SessionCtx->SetDatabase(database); + SessionCtx->SetDatabaseId(Gateway->GetDatabaseId()); SessionCtx->SetCluster(cluster); if (tempTablesState) { SessionCtx->SetSessionId(tempTablesState->SessionId); @@ -1071,7 +1074,8 @@ class TKqpHost : public IKqpHost { ActorSystem, FederatedQuerySetup->S3GatewayConfig.GetGeneratorPathsLimit(), FederatedQuerySetup ? FederatedQuerySetup->CredentialsFactory : nullptr, - Config->FeatureFlags.GetEnableExternalSourceSchemaInference()); + Config->FeatureFlags.GetEnableExternalSourceSchemaInference(), + FederatedQuerySetup->S3GatewayConfig.GetAllowLocalFiles()); } } @@ -1225,8 +1229,21 @@ class TKqpHost : public IKqpHost { .SetIsEnableExternalDataSources(SessionCtx->Config().FeatureFlags.GetEnableExternalDataSources()) .SetIsEnablePgConstsToParams(SessionCtx->Config().EnablePgConstsToParams) .SetQueryParameters(query.ParameterTypes) - .SetApplicationName(ApplicationName); - auto astRes = ParseQuery(query.Text, isSql, sqlVersion, TypesCtx->DeprecatedSQL, ctx, settingsBuilder, result.KeepInCache, result.CommandTagName); + .SetApplicationName(ApplicationName) + .SetIsEnablePgSyntax(SessionCtx->Config().FeatureFlags.GetEnablePgSyntax()); + NSQLTranslation::TTranslationSettings effectiveSettings; + auto astRes = ParseQuery( + query.Text, + isSql, + sqlVersion, + TypesCtx->DeprecatedSQL, + ctx, + settingsBuilder, + result.KeepInCache, + result.CommandTagName, + &effectiveSettings + ); + SessionCtx->Query().TranslationSettings = std::move(effectiveSettings); if (astRes.ActualSyntaxType == NYql::ESyntaxType::Pg) { SessionCtx->Config().IndexAutoChooserMode = NKikimrConfig::TTableServiceConfig_EIndexAutoChooseMode::TTableServiceConfig_EIndexAutoChooseMode_MAX_USED_PREFIX; } @@ -1256,8 +1273,13 @@ class TKqpHost : public IKqpHost { YQL_CLOG(INFO, ProviderKqp) << "Compiled query:\n" << KqpExprToPrettyString(*queryExpr, ctx); if (Config->EnableCreateTableAs) { - result.QueryExprs = RewriteExpression(queryExpr, ctx, *TypesCtx, SessionCtx, Cluster); + auto [rewriteResults, rewriteIssues] = RewriteExpression(queryExpr, ctx, *TypesCtx, SessionCtx, Cluster); + ctx.IssueManager.AddIssues(rewriteIssues); + if (!rewriteIssues.Empty()) { + return result; + } + result.QueryExprs = rewriteResults; for (const auto& resultPart : result.QueryExprs) { YQL_CLOG(INFO, ProviderKqp) << "Splitted Compiled query part:\n" << KqpExprToPrettyString(*resultPart, ctx); } @@ -1277,7 +1299,7 @@ class TKqpHost : public IKqpHost { settingsBuilder .SetSqlAutoCommit(false) .SetUsePgParser(settings.UsePgParser); - auto compileResult = CompileYqlQuery(query, /* isSql */ true, *ExprCtx, sqlVersion, settingsBuilder, settings.PerStatementResult); + auto compileResult = CompileYqlQuery(query, /* isSql */ true, *ExprCtx, sqlVersion, settingsBuilder); return TSplitResult{ .Ctx = std::move(ExprCtxStorage), @@ -1287,7 +1309,7 @@ class TKqpHost : public IKqpHost { } TCompileExprResult CompileYqlQuery(const TKqpQueryRef& query, bool isSql, TExprContext& ctx, TMaybe& sqlVersion, - TKqpTranslationSettingsBuilder& settingsBuilder, bool perStatementResult) const + TKqpTranslationSettingsBuilder& settingsBuilder) const { auto compileResult = CompileQuery(query, isSql, ctx, sqlVersion, settingsBuilder); if (!compileResult.QueryExprs) { @@ -1299,12 +1321,7 @@ class TKqpHost : public IKqpHost { } // Currently used only for create table as - if (!perStatementResult && compileResult.QueryExprs.size() > 1) { - ctx.AddError(YqlIssue(TPosition(), TIssuesIds::KIKIMR_BAD_REQUEST, - "Query can be executed only in per-statement mode (NoTx)")); - compileResult.QueryExprs = {}; - return compileResult; - } else if (compileResult.QueryExprs.size() > 1) { + if (compileResult.QueryExprs.size() > 1) { return compileResult; } @@ -1376,7 +1393,7 @@ class TKqpHost : public IKqpHost { TKqpTranslationSettingsBuilder settingsBuilder(SessionCtx->Query().Type, SessionCtx->Config()._KqpYqlSyntaxVersion.Get().GetRef(), Cluster, query.Text, SessionCtx->Config().BindingsMode, GUCSettings); settingsBuilder.SetSqlAutoCommit(false) .SetUsePgParser(settings.UsePgParser); - auto compileResult = CompileYqlQuery(query, isSql, ctx, sqlVersion, settingsBuilder, false); + auto compileResult = CompileYqlQuery(query, isSql, ctx, sqlVersion, settingsBuilder); if (compileResult.QueryExprs.empty()) { return nullptr; } @@ -1436,7 +1453,7 @@ class TKqpHost : public IKqpHost { TMaybe sqlVersion; TKqpTranslationSettingsBuilder settingsBuilder(SessionCtx->Query().Type, SessionCtx->Config()._KqpYqlSyntaxVersion.Get().GetRef(), Cluster, query.Text, SessionCtx->Config().BindingsMode, GUCSettings); settingsBuilder.SetSqlAutoCommit(false); - auto compileResult = CompileYqlQuery(query, /* isSql */ true, ctx, sqlVersion, settingsBuilder, false); + auto compileResult = CompileYqlQuery(query, /* isSql */ true, ctx, sqlVersion, settingsBuilder); if (compileResult.QueryExprs.empty()) { return nullptr; } @@ -1464,7 +1481,7 @@ class TKqpHost : public IKqpHost { TMaybe sqlVersion; TKqpTranslationSettingsBuilder settingsBuilder(SessionCtx->Query().Type, SessionCtx->Config()._KqpYqlSyntaxVersion.Get().GetRef(), Cluster, queryAst.Text, SessionCtx->Config().BindingsMode, GUCSettings); settingsBuilder.SetSqlAutoCommit(false); - auto compileResult = CompileYqlQuery(queryAst, false, ctx, sqlVersion, settingsBuilder, false); + auto compileResult = CompileYqlQuery(queryAst, false, ctx, sqlVersion, settingsBuilder); if (compileResult.QueryExprs.empty()) { return nullptr; } @@ -1510,7 +1527,7 @@ class TKqpHost : public IKqpHost { TKqpTranslationSettingsBuilder settingsBuilder(SessionCtx->Query().Type, SessionCtx->Config()._KqpYqlSyntaxVersion.Get().GetRef(), Cluster, query.Text, SessionCtx->Config().BindingsMode, GUCSettings); settingsBuilder.SetSqlAutoCommit(false) .SetUsePgParser(settings.UsePgParser); - auto compileResult = CompileYqlQuery(query, /* isSql */ true, ctx, sqlVersion, settingsBuilder, settings.PerStatementResult); + auto compileResult = CompileYqlQuery(query, /* isSql */ true, ctx, sqlVersion, settingsBuilder); if (compileResult.QueryExprs.empty()) { return nullptr; } @@ -1547,7 +1564,7 @@ class TKqpHost : public IKqpHost { TMaybe sqlVersion = 1; TKqpTranslationSettingsBuilder settingsBuilder(SessionCtx->Query().Type, SessionCtx->Config()._KqpYqlSyntaxVersion.Get().GetRef(), Cluster, query.Text, SessionCtx->Config().BindingsMode, GUCSettings); settingsBuilder.SetSqlAutoCommit(false); - auto compileResult = CompileYqlQuery(query, true, ctx, sqlVersion, settingsBuilder, false); + auto compileResult = CompileYqlQuery(query, true, ctx, sqlVersion, settingsBuilder); if (compileResult.QueryExprs.empty()) { return nullptr; } @@ -1568,7 +1585,7 @@ class TKqpHost : public IKqpHost { TMaybe sqlVersion; TKqpTranslationSettingsBuilder settingsBuilder(SessionCtx->Query().Type, SessionCtx->Config()._KqpYqlSyntaxVersion.Get().GetRef(), Cluster, queryAst.Text, SessionCtx->Config().BindingsMode, GUCSettings); settingsBuilder.SetSqlAutoCommit(false); - auto compileResult = CompileYqlQuery(queryAst, false, ctx, sqlVersion, settingsBuilder, false); + auto compileResult = CompileYqlQuery(queryAst, false, ctx, sqlVersion, settingsBuilder); if (compileResult.QueryExprs.empty()) { return nullptr; } @@ -1595,7 +1612,7 @@ class TKqpHost : public IKqpHost { TKqpTranslationSettingsBuilder settingsBuilder(SessionCtx->Query().Type, SessionCtx->Config()._KqpYqlSyntaxVersion.Get().GetRef(), Cluster, script.Text, SessionCtx->Config().BindingsMode, GUCSettings); settingsBuilder.SetSqlAutoCommit(true) .SetUsePgParser(settings.UsePgParser); - auto compileResult = CompileYqlQuery(script, true, ctx, sqlVersion, settingsBuilder, false); + auto compileResult = CompileYqlQuery(script, true, ctx, sqlVersion, settingsBuilder); if (compileResult.QueryExprs.empty()) { return nullptr; } @@ -1624,7 +1641,7 @@ class TKqpHost : public IKqpHost { TKqpTranslationSettingsBuilder settingsBuilder(SessionCtx->Query().Type, SessionCtx->Config()._KqpYqlSyntaxVersion.Get().GetRef(), Cluster, script.Text, SessionCtx->Config().BindingsMode, GUCSettings); settingsBuilder.SetSqlAutoCommit(true) .SetUsePgParser(settings.UsePgParser); - auto compileResult = CompileYqlQuery(script, true, ctx, sqlVersion, settingsBuilder, false); + auto compileResult = CompileYqlQuery(script, true, ctx, sqlVersion, settingsBuilder); if (compileResult.QueryExprs.empty()) { return nullptr; } @@ -1648,7 +1665,7 @@ class TKqpHost : public IKqpHost { TMaybe sqlVersion; TKqpTranslationSettingsBuilder settingsBuilder(SessionCtx->Query().Type, SessionCtx->Config()._KqpYqlSyntaxVersion.Get().GetRef(), Cluster, script.Text, SessionCtx->Config().BindingsMode, GUCSettings); settingsBuilder.SetSqlAutoCommit(true); - auto compileResult = CompileYqlQuery(script, true, ctx, sqlVersion, settingsBuilder, false); + auto compileResult = CompileYqlQuery(script, true, ctx, sqlVersion, settingsBuilder); if (compileResult.QueryExprs.empty()) { return nullptr; } @@ -1676,7 +1693,7 @@ class TKqpHost : public IKqpHost { TMaybe sqlVersion; TKqpTranslationSettingsBuilder settingsBuilder(SessionCtx->Query().Type, SessionCtx->Config()._KqpYqlSyntaxVersion.Get().GetRef(), Cluster, script.Text, SessionCtx->Config().BindingsMode, GUCSettings); settingsBuilder.SetSqlAutoCommit(true); - auto compileResult = CompileYqlQuery(script, true, ctx, sqlVersion, settingsBuilder, false); + auto compileResult = CompileYqlQuery(script, true, ctx, sqlVersion, settingsBuilder); if (compileResult.QueryExprs.empty()) { return nullptr; } @@ -1695,7 +1712,9 @@ class TKqpHost : public IKqpHost { state->Configuration->AllowAtomicUploadCommit = queryType == EKikimrQueryType::Script; state->Configuration->Init(FederatedQuerySetup->S3GatewayConfig, TypesCtx); state->Gateway = FederatedQuerySetup->HttpGateway; + state->GatewayRetryPolicy = NYql::GetHTTPDefaultRetryPolicy(NYql::THttpRetryPolicyOptions{.RetriedCurlCodes = NYql::FqRetriedCurlCodes()}); state->ExecutorPoolId = AppData()->UserPoolId; + state->ActorSystem = ActorSystem; auto dataSource = NYql::CreateS3DataSource(state); auto dataSink = NYql::CreateS3DataSink(state); @@ -1759,7 +1778,7 @@ class TKqpHost : public IKqpHost { void Init(EKikimrQueryType queryType) { TransformCtx = MakeIntrusive(Config, SessionCtx->QueryPtr(), SessionCtx->TablesPtr()); - KqpRunner = CreateKqpRunner(Gateway, Cluster, TypesCtx, SessionCtx, TransformCtx, *FuncRegistry); + KqpRunner = CreateKqpRunner(Gateway, Cluster, TypesCtx, SessionCtx, TransformCtx, *FuncRegistry, ActorSystem); ExprCtx->NodesAllocationLimit = SessionCtx->Config()._KqpExprNodesAllocationLimit.Get().GetRef(); ExprCtx->StringsAllocationLimit = SessionCtx->Config()._KqpExprStringsAllocationLimit.Get().GetRef(); @@ -1774,7 +1793,7 @@ class TKqpHost : public IKqpHost { auto queryExecutor = MakeIntrusive(Gateway, Cluster, SessionCtx, KqpRunner); auto kikimrDataSource = CreateKikimrDataSource(*FuncRegistry, *TypesCtx, gatewayProxy, SessionCtx, - ExternalSourceFactory, IsInternalCall); + ExternalSourceFactory, IsInternalCall, GUCSettings); auto kikimrDataSink = CreateKikimrDataSink(*FuncRegistry, *TypesCtx, gatewayProxy, SessionCtx, ExternalSourceFactory, queryExecutor); FillSettings.AllResultsBytesLimit = Nothing(); @@ -1824,10 +1843,15 @@ class TKqpHost : public IKqpHost { || settingName == "FilterPushdownOverJoinOptionalSide" || settingName == "DisableFilterPushdownOverJoinOptionalSide" || settingName == "RotateJoinTree" + || settingName == "TimeOrderRecoverDelay" + || settingName == "TimeOrderRecoverAhead" + || settingName == "TimeOrderRecoverRowLimit" + || settingName == "MatchRecognizeStream" ; }; auto configProvider = CreateConfigProvider(*TypesCtx, gatewaysConfig, {}, allowSettings); TypesCtx->AddDataSource(ConfigProviderName, configProvider); + TypesCtx->MatchRecognize = QueryServiceConfig.GetEnableMatchRecognize(); YQL_ENSURE(TypesCtx->Initialize(*ExprCtx)); @@ -1929,6 +1953,7 @@ class TKqpHost : public IKqpHost { TKqpTempTablesState::TConstPtr TempTablesState; NActors::TActorSystem* ActorSystem = nullptr; + NKikimrConfig::TQueryServiceConfig QueryServiceConfig; }; } // namespace @@ -1949,11 +1974,11 @@ Ydb::Table::QueryStatsCollection::Mode GetStatsMode(NYql::EKikimrStatsMode stats TIntrusivePtr CreateKqpHost(TIntrusivePtr gateway, const TString& cluster, const TString& database, TKikimrConfiguration::TPtr config, IModuleResolver::TPtr moduleResolver, std::optional federatedQuerySetup, const TIntrusiveConstPtr& userToken, const TGUCSettings::TPtr& gUCSettings, - const TMaybe& applicationName, const NKikimr::NMiniKQL::IFunctionRegistry* funcRegistry, bool keepConfigChanges, - bool isInternalCall, TKqpTempTablesState::TConstPtr tempTablesState, NActors::TActorSystem* actorSystem, NYql::TExprContext* ctx) + const NKikimrConfig::TQueryServiceConfig& queryServiceConfig, const TMaybe& applicationName, const NKikimr::NMiniKQL::IFunctionRegistry* funcRegistry, bool keepConfigChanges, + bool isInternalCall, TKqpTempTablesState::TConstPtr tempTablesState, NActors::TActorSystem* actorSystem, NYql::TExprContext* ctx, const TIntrusivePtr& userRequestContext) { return MakeIntrusive(gateway, cluster, database, gUCSettings, applicationName, config, moduleResolver, federatedQuerySetup, userToken, funcRegistry, - keepConfigChanges, isInternalCall, std::move(tempTablesState), actorSystem, ctx); + keepConfigChanges, isInternalCall, std::move(tempTablesState), actorSystem, ctx, queryServiceConfig, userRequestContext); } } // namespace NKqp diff --git a/ydb/core/kqp/host/kqp_host.h b/ydb/core/kqp/host/kqp_host.h index ff94738619f0..b52c5dbb488e 100644 --- a/ydb/core/kqp/host/kqp_host.h +++ b/ydb/core/kqp/host/kqp_host.h @@ -45,7 +45,6 @@ class IKqpHost : public TThrRefBase { struct TPrepareSettings: public TExecSettings { TMaybe IsInternalCall; TMaybe ConcurrentResults; - bool PerStatementResult; TString ToString() const { return TStringBuilder() << "TPrepareSettings{" @@ -120,10 +119,10 @@ class IKqpHost : public TThrRefBase { TIntrusivePtr CreateKqpHost(TIntrusivePtr gateway, const TString& cluster, const TString& database, NYql::TKikimrConfiguration::TPtr config, NYql::IModuleResolver::TPtr moduleResolver, std::optional federatedQuerySetup, const TIntrusiveConstPtr& userToken, const TGUCSettings::TPtr& gUCSettings, - const TMaybe& applicationName = Nothing(), const NKikimr::NMiniKQL::IFunctionRegistry* funcRegistry = nullptr, + const NKikimrConfig::TQueryServiceConfig& queryServiceConfig, const TMaybe& applicationName = Nothing(), const NKikimr::NMiniKQL::IFunctionRegistry* funcRegistry = nullptr, bool keepConfigChanges = false, bool isInternalCall = false, TKqpTempTablesState::TConstPtr tempTablesState = nullptr, NActors::TActorSystem* actorSystem = nullptr /*take from TLS by default*/, - NYql::TExprContext* ctx = nullptr); + NYql::TExprContext* ctx = nullptr, const TIntrusivePtr& userRequestContext = nullptr); } // namespace NKqp } // namespace NKikimr diff --git a/ydb/core/kqp/host/kqp_host_impl.h b/ydb/core/kqp/host/kqp_host_impl.h index 83e238e1cd93..e925a4cc3c40 100644 --- a/ydb/core/kqp/host/kqp_host_impl.h +++ b/ydb/core/kqp/host/kqp_host_impl.h @@ -265,7 +265,7 @@ class IKqpRunner : public TThrRefBase { TIntrusivePtr CreateKqpRunner(TIntrusivePtr gateway, const TString& cluster, const TIntrusivePtr& typesCtx, const TIntrusivePtr& sessionCtx, - const TIntrusivePtr& transformCtx, const NMiniKQL::IFunctionRegistry& funcRegistry); + const TIntrusivePtr& transformCtx, const NMiniKQL::IFunctionRegistry& funcRegistry, TActorSystem* actorSystem); TAutoPtr CreateKqpExplainPreparedTransformer(TIntrusivePtr gateway, const TString& cluster, TIntrusivePtr transformCtx, const NMiniKQL::IFunctionRegistry* funcRegistry, diff --git a/ydb/core/kqp/host/kqp_runner.cpp b/ydb/core/kqp/host/kqp_runner.cpp index 3f42256e7790..c0a2bc1b7c47 100644 --- a/ydb/core/kqp/host/kqp_runner.cpp +++ b/ydb/core/kqp/host/kqp_runner.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -116,17 +117,7 @@ class TPrepareQueryAsyncResult : public TKqpAsyncResultBase results; - for (auto& phyResult : TransformCtx.PhysicalQueryResults) { - auto result = google::protobuf::Arena::CreateMessage( - queryResult.ProtobufArenaPtr.get()); - - result->CopyFrom(phyResult); - results.push_back(result); - } - queryResult.QueryStats.CopyFrom(TransformCtx.QueryStats); - queryResult.Results = std::move(results); } private: @@ -137,7 +128,8 @@ class TKqpRunner : public IKqpRunner { public: TKqpRunner(TIntrusivePtr gateway, const TString& cluster, const TIntrusivePtr& typesCtx, const TIntrusivePtr& sessionCtx, - const TIntrusivePtr& transformCtx, const NMiniKQL::IFunctionRegistry& funcRegistry) + const TIntrusivePtr& transformCtx, const NMiniKQL::IFunctionRegistry& funcRegistry, + TActorSystem* actorSystem) : Gateway(gateway) , Cluster(cluster) , TypesCtx(*typesCtx) @@ -146,9 +138,10 @@ class TKqpRunner : public IKqpRunner { , Config(sessionCtx->ConfigPtr()) , TransformCtx(transformCtx) , OptimizeCtx(MakeIntrusive(cluster, Config, sessionCtx->QueryPtr(), - sessionCtx->TablesPtr())) + sessionCtx->TablesPtr(), sessionCtx->GetUserRequestContext())) , BuildQueryCtx(MakeIntrusive()) - , Pctx(TKqpProviderContext(*OptimizeCtx, Config->CostBasedOptimizationLevel.Get().GetOrElse(TDqSettings::TDefault::CostBasedOptimizationLevel))) + , Pctx(TKqpProviderContext(*OptimizeCtx, Config->CostBasedOptimizationLevel.Get().GetOrElse(Config->DefaultCostBasedOptimizationLevel))) + , ActorSystem(actorSystem) { CreateGraphTransformer(typesCtx, sessionCtx, funcRegistry); } @@ -297,6 +290,7 @@ class TKqpRunner : public IKqpRunner { .AddPostTypeAnnotation(/* forSubgraph */ true) .AddCommonOptimization() .Add(CreateKqpConstantFoldingTransformer(OptimizeCtx, *typesCtx, Config), "ConstantFolding") + .Add(CreateKqpColumnStatisticsRequester(Config, *typesCtx, SessionCtx->Tables(), Cluster, ActorSystem), "ColumnStatisticsRequester") .Add(CreateKqpStatisticsTransformer(OptimizeCtx, *typesCtx, Config, Pctx), "Statistics") .Add(CreateKqpLogOptTransformer(OptimizeCtx, *typesCtx, Config), "LogicalOptimize") .Add(CreateLogicalDataProposalsInspector(*typesCtx), "ProvidersLogicalOptimize") @@ -324,7 +318,7 @@ class TKqpRunner : public IKqpRunner { Config), "BuildPhysicalTxs") .Build(false)); - + auto physicalBuildQueryTransformer = TTransformationPipeline(typesCtx) .AddServiceTransformers() .Add(Log("PhysicalBuildQuery"), "LogPhysicalBuildQuery") @@ -399,15 +393,17 @@ class TKqpRunner : public IKqpRunner { TKqpProviderContext Pctx; TAutoPtr Transformer; + + TActorSystem* ActorSystem; }; } // namespace TIntrusivePtr CreateKqpRunner(TIntrusivePtr gateway, const TString& cluster, const TIntrusivePtr& typesCtx, const TIntrusivePtr& sessionCtx, - const TIntrusivePtr& transformCtx, const NMiniKQL::IFunctionRegistry& funcRegistry) + const TIntrusivePtr& transformCtx, const NMiniKQL::IFunctionRegistry& funcRegistry, TActorSystem* actorSystem) { - return new TKqpRunner(gateway, cluster, typesCtx, sessionCtx, transformCtx, funcRegistry); + return new TKqpRunner(gateway, cluster, typesCtx, sessionCtx, transformCtx, funcRegistry, actorSystem); } } // namespace NKqp diff --git a/ydb/core/kqp/host/kqp_statement_rewrite.cpp b/ydb/core/kqp/host/kqp_statement_rewrite.cpp index bf15ab483261..fa06bf64bfb4 100644 --- a/ydb/core/kqp/host/kqp_statement_rewrite.cpp +++ b/ydb/core/kqp/host/kqp_statement_rewrite.cpp @@ -213,12 +213,10 @@ namespace { exprCtx.NewAtom(pos, "mode"), exprCtx.NewAtom(pos, "replace"), })); - if (!isOlap) { - insertSettings.push_back( - exprCtx.NewList(pos, { - exprCtx.NewAtom(pos, "AllowInconsistentWrites"), - })); - } + insertSettings.push_back( + exprCtx.NewList(pos, { + exprCtx.NewAtom(pos, "AllowInconsistentWrites"), + })); const auto insert = exprCtx.NewCallable(pos, "Write!", { topLevelRead == nullptr ? exprCtx.NewWorld(pos) : exprCtx.NewCallable(pos, "Left!", {topLevelRead.Get()}), @@ -292,20 +290,25 @@ namespace { } } -TVector RewriteExpression( +std::pair, NYql::TIssues> RewriteExpression( const NYql::TExprNode::TPtr& root, NYql::TExprContext& exprCtx, NYql::TTypeAnnotationContext& typeCtx, const TIntrusivePtr& sessionCtx, const TString& cluster) { + NYql::TIssues issues; // CREATE TABLE AS statement can be used only with perstatement execution. // Thus we assume that there is only one such statement. + ui64 actionsCount = 0; TVector result; VisitExpr(root, [&](const NYql::TExprNode::TPtr& node) { if (NYql::NNodes::TCoWrite::Match(node.Get())) { + ++actionsCount; const auto rewriteResult = RewriteCreateTableAs(node, exprCtx, typeCtx, sessionCtx, cluster); if (rewriteResult) { - YQL_ENSURE(result.empty()); + if (!result.empty()) { + issues.AddIssue("Several CTAS statement can't be used without per-statement mode."); + } result.push_back(rewriteResult->CreateTable); result.push_back(rewriteResult->ReplaceInto); if (rewriteResult->MoveTable) { @@ -316,10 +319,14 @@ TVector RewriteExpression( return true; }); + if (!result.empty() && actionsCount > 1) { + issues.AddIssue("CTAS statement can't be used with other statements without per-statement mode."); + } + if (result.empty()) { result.push_back(root); } - return result; + return {result, issues}; } } diff --git a/ydb/core/kqp/host/kqp_statement_rewrite.h b/ydb/core/kqp/host/kqp_statement_rewrite.h index 95c4b3a506d1..3b9b560c5684 100644 --- a/ydb/core/kqp/host/kqp_statement_rewrite.h +++ b/ydb/core/kqp/host/kqp_statement_rewrite.h @@ -7,7 +7,7 @@ namespace NKikimr { namespace NKqp { -TVector RewriteExpression( +std::pair, NYql::TIssues> RewriteExpression( const NYql::TExprNode::TPtr& root, NYql::TExprContext& ctx, NYql::TTypeAnnotationContext& typeCtx, diff --git a/ydb/core/kqp/host/kqp_transform.h b/ydb/core/kqp/host/kqp_transform.h index bdf95715ba9d..0004340de583 100644 --- a/ydb/core/kqp/host/kqp_transform.h +++ b/ydb/core/kqp/host/kqp_transform.h @@ -26,18 +26,13 @@ struct TKqlTransformContext : TThrRefBase { NKqpProto::TKqpStatsQuery QueryStats; std::shared_ptr PhysicalQuery; - TVector> MkqlResults; - TVector PhysicalQueryResults; - NYql::TExprNode::TPtr ExplainTransformerInput; // Explain transformer must work after other transformers, but use input before peephole TMaybe DataQueryBlocks; void Reset() { ReplyTarget = {}; - MkqlResults.clear(); QueryStats = {}; PhysicalQuery = nullptr; - PhysicalQueryResults.clear(); ExplainTransformerInput = nullptr; DataQueryBlocks = Nothing(); } diff --git a/ydb/core/kqp/host/kqp_translate.cpp b/ydb/core/kqp/host/kqp_translate.cpp index 35e632c273b4..8b7d842f88ed 100644 --- a/ydb/core/kqp/host/kqp_translate.cpp +++ b/ydb/core/kqp/host/kqp_translate.cpp @@ -1,6 +1,8 @@ #include "kqp_translate.h" +#include #include +#include namespace NKikimr { @@ -52,7 +54,7 @@ NYql::EKikimrQueryType ConvertType(NKikimrKqp::EQueryType type) { YQL_ENSURE(false, "Unexpected query type: " << type); } } - + NSQLTranslation::TTranslationSettings TKqpTranslationSettingsBuilder::Build(NYql::TExprContext& ctx) { NSQLTranslation::TTranslationSettings settings; settings.PgParser = UsePgParser && *UsePgParser; @@ -84,6 +86,7 @@ NSQLTranslation::TTranslationSettings TKqpTranslationSettingsBuilder::Build(NYql settings.SaveWorldDependencies = true; } + settings.PGDisable = !IsEnablePgSyntax; settings.InferSyntaxVersion = true; settings.V0ForceDisable = false; settings.WarnOnV0 = false; @@ -151,13 +154,14 @@ NSQLTranslation::TTranslationSettings TKqpTranslationSettingsBuilder::Build(NYql } NYql::TAstParseResult ParseQuery(const TString& queryText, bool isSql, TMaybe& sqlVersion, bool& deprecatedSQL, - NYql::TExprContext& ctx, TKqpTranslationSettingsBuilder& settingsBuilder, bool& keepInCache, TMaybe& commandTagName) { + NYql::TExprContext& ctx, TKqpTranslationSettingsBuilder& settingsBuilder, bool& keepInCache, TMaybe& commandTagName, + NSQLTranslation::TTranslationSettings* effectiveSettings) { NYql::TAstParseResult astRes; settingsBuilder.SetSqlVersion(sqlVersion); if (isSql) { auto settings = settingsBuilder.Build(ctx); NYql::TStmtParseInfo stmtParseInfo; - auto ast = NSQLTranslation::SqlToYql(queryText, settings, nullptr, &stmtParseInfo); + auto ast = NSQLTranslation::SqlToYql(queryText, settings, nullptr, &stmtParseInfo, effectiveSettings); deprecatedSQL = (ast.ActualSyntaxType == NYql::ESyntaxType::YQLv0); sqlVersion = ast.ActualSyntaxType == NYql::ESyntaxType::YQLv1 ? 1 : 0; keepInCache = stmtParseInfo.KeepInCache; diff --git a/ydb/core/kqp/host/kqp_translate.h b/ydb/core/kqp/host/kqp_translate.h index e54879819d97..d71c01d4ce2b 100644 --- a/ydb/core/kqp/host/kqp_translate.h +++ b/ydb/core/kqp/host/kqp_translate.h @@ -1,7 +1,8 @@ #pragma once -#include -#include +#include +#include +#include #include namespace NKikimr { @@ -61,6 +62,11 @@ class TKqpTranslationSettingsBuilder { return *this; } + TKqpTranslationSettingsBuilder& SetIsEnablePgSyntax(bool value) { + IsEnablePgSyntax = value; + return *this; + } + private: const NYql::EKikimrQueryType QueryType; const ui16 KqpYqlSyntaxVersion; @@ -72,6 +78,7 @@ class TKqpTranslationSettingsBuilder { TString KqpTablePathPrefix = {}; bool IsEnableExternalDataSources = false; bool IsEnablePgConstsToParams = false; + bool IsEnablePgSyntax = false; TMaybe SqlAutoCommit = {}; TGUCSettings::TPtr GUCSettings; TMaybe ApplicationName = {}; @@ -84,7 +91,8 @@ NSQLTranslation::EBindingsMode RemapBindingsMode(NKikimrConfig::TTableServiceCon NYql::EKikimrQueryType ConvertType(NKikimrKqp::EQueryType type); NYql::TAstParseResult ParseQuery(const TString& queryText, bool isSql, TMaybe& sqlVersion, bool& deprecatedSQL, - NYql::TExprContext& ctx, TKqpTranslationSettingsBuilder& settingsBuilder, bool& keepInCache, TMaybe& commandTagName); + NYql::TExprContext& ctx, TKqpTranslationSettingsBuilder& settingsBuilder, bool& keepInCache, TMaybe& commandTagName, + NSQLTranslation::TTranslationSettings* effectiveSettings = nullptr); TVector ParseStatements(const TString& queryText, const TMaybe& syntax, bool isSql, TKqpTranslationSettingsBuilder& settingsBuilder, bool perStatementExecution); diff --git a/ydb/core/kqp/host/kqp_type_ann.cpp b/ydb/core/kqp/host/kqp_type_ann.cpp index ab1c98f876fc..36249024fea4 100644 --- a/ydb/core/kqp/host/kqp_type_ann.cpp +++ b/ydb/core/kqp/host/kqp_type_ann.cpp @@ -451,7 +451,12 @@ TStatus AnnotateReadTableRanges(const TExprNode::TPtr& node, TExprContext& ctx, TStatus AnnotateLookupTable(const TExprNode::TPtr& node, TExprContext& ctx, const TString& cluster, const TKikimrTablesData& tablesData, bool withSystemColumns) { - if (!EnsureArgsCount(*node, TKqlLookupIndexBase::Match(node.Get()) || TKqlStreamLookupTable::Match(node.Get()) ? 4 : 3, ctx)) { + const bool isStreamLookup = TKqlStreamLookupTable::Match(node.Get()) || TKqlStreamLookupIndex::Match(node.Get()); + if (isStreamLookup && !EnsureArgsCount(*node, TKqlStreamLookupIndex::Match(node.Get()) ? 5 : 4, ctx)) { + return TStatus::Error; + } + + if (!isStreamLookup && !EnsureArgsCount(*node, TKqlLookupIndexBase::Match(node.Get()) ? 4 : 3, ctx)) { return TStatus::Error; } @@ -495,14 +500,16 @@ TStatus AnnotateLookupTable(const TExprNode::TPtr& node, TExprContext& ctx, cons YQL_ENSURE(lookupType); const TStructExprType* structType = nullptr; - bool isStreamLookup = TKqlStreamLookupTable::Match(node.Get()); if (isStreamLookup) { - auto lookupStrategy = node->Child(TKqlStreamLookupTable::idx_LookupStrategy); + auto lookupStrategy = node->Child(TKqlStreamLookupTable::Match(node.Get()) ? + TKqlStreamLookupTable::idx_LookupStrategy : TKqlStreamLookupIndex::idx_LookupStrategy); if (!EnsureAtom(*lookupStrategy, ctx)) { return TStatus::Error; } - if (lookupStrategy->Content() == TKqpStreamLookupJoinStrategyName) { + if (lookupStrategy->Content() == TKqpStreamLookupJoinStrategyName + || lookupStrategy->Content() == TKqpStreamLookupSemiJoinStrategyName) { + if (!EnsureTupleType(node->Pos(), *lookupType, ctx)) { return TStatus::Error; } @@ -1682,7 +1689,9 @@ TStatus AnnotateStreamLookupConnection(const TExprNode::TPtr& node, TExprContext node->SetTypeAnn(ctx.MakeType(rowType)); - } else if (lookupStrategy.Value() == TKqpStreamLookupJoinStrategyName) { + } else if (lookupStrategy.Value() == TKqpStreamLookupJoinStrategyName + || lookupStrategy.Value() == TKqpStreamLookupSemiJoinStrategyName) { + if (!EnsureTupleType(node->Pos(), *inputItemType, ctx)) { return TStatus::Error; } diff --git a/ydb/core/kqp/node_service/kqp_node_service.cpp b/ydb/core/kqp/node_service/kqp_node_service.cpp index 1416a56be23a..4bdb4d5e4def 100644 --- a/ydb/core/kqp/node_service/kqp_node_service.cpp +++ b/ydb/core/kqp/node_service/kqp_node_service.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -60,11 +61,16 @@ class TKqpNodeService : public TActorBootstrapped { return NKikimrServices::TActivity::KQP_NODE_SERVICE; } - TKqpNodeService(const NKikimrConfig::TTableServiceConfig& config, const TIntrusivePtr& counters, + TKqpNodeService(const NKikimrConfig::TTableServiceConfig& config, + std::shared_ptr resourceManager, + std::shared_ptr caFactory, + const TIntrusivePtr& counters, NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, const std::optional& federatedQuerySetup) : Config(config.GetResourceManager()) , Counters(counters) + , ResourceManager_(std::move(resourceManager)) + , CaFactory_(std::move(caFactory)) , AsyncIoFactory(std::move(asyncIoFactory)) , FederatedQuerySetup(federatedQuerySetup) , State_(std::make_shared()) @@ -75,6 +81,13 @@ class TKqpNodeService : public TActorBootstrapped { if (config.HasIteratorReadQuotaSettings()) { SetIteratorReadsQuotaSettings(config.GetIteratorReadQuotaSettings()); } + + SchedulerOptions = { + .AdvanceTimeInterval = TDuration::MicroSeconds(config.GetComputeSchedulerSettings().GetAdvanceTimeIntervalUsec()), + .ForgetOverflowTimeout = TDuration::MicroSeconds(config.GetComputeSchedulerSettings().GetForgetOverflowTimeoutUsec()), + .ActivePoolPollingTimeout = TDuration::Seconds(config.GetComputeSchedulerSettings().GetActivePoolPollingSec()), + .Counters = counters, + }; } void Bootstrap() { @@ -95,6 +108,10 @@ class TKqpNodeService : public TActorBootstrapped { Schedule(TDuration::Seconds(1), new TEvents::TEvWakeup()); Become(&TKqpNodeService::WorkState); + + Scheduler = std::make_shared(); + SchedulerOptions.Scheduler = Scheduler; + SchedulerActorId = RegisterWithSameMailbox(CreateSchedulerActor(SchedulerOptions)); } private: @@ -128,6 +145,10 @@ class TKqpNodeService : public TActorBootstrapped { auto requester = ev->Sender; ui64 txId = msg.GetTxId(); + TMaybe lockTxId = msg.HasLockTxId() + ? TMaybe(msg.GetLockTxId()) + : Nothing(); + ui32 lockNodeId = msg.GetLockNodeId(); YQL_ENSURE(msg.GetStartAllOrFail()); // todo: support partial start @@ -159,85 +180,110 @@ class TKqpNodeService : public TActorBootstrapped { memoryPool = NRm::EKqpMemoryPool::Unspecified; } - ui32 requestChannels = 0; - ui64 totalMemory = 0; - for (auto& dqTask : *msg.MutableTasks()) { - auto estimation = EstimateTaskResources(dqTask, Config, msg.GetTasks().size()); - LOG_D("Resource estimation complete" - << ", TxId: " << txId << ", task id: " << dqTask.GetId() << ", node id: " << SelfId().NodeId() - << ", estimated resources: " << estimation.ToString()); - - NKqpNode::TTaskContext& taskCtx = request.InFlyTasks[dqTask.GetId()]; - YQL_ENSURE(taskCtx.TaskId == 0); - taskCtx.TaskId = dqTask.GetId(); + auto reply = MakeHolder(); + reply->Record.SetTxId(txId); - LOG_D("TxId: " << txId << ", task: " << taskCtx.TaskId << ", requested memory: " << estimation.TotalMemoryLimit); - totalMemory += estimation.TotalMemoryLimit; - requestChannels += estimation.ChannelBuffersCount; - } + NYql::NDq::TComputeRuntimeSettings runtimeSettingsBase; + runtimeSettingsBase.ReportStatsSettings = NYql::NDq::TReportStatsSettings{MinStatInterval, MaxStatInterval}; - LOG_D("TxId: " << txId << ", channels: " << requestChannels - << ", computeActors: " << msg.GetTasks().size() << ", memory: " << totalMemory); + TShardsScanningPolicy scanPolicy(Config.GetShardsScanningPolicy()); - TVector allocatedTasks; - allocatedTasks.reserve(msg.GetTasks().size()); - for (auto& task : request.InFlyTasks) { - NRm::TKqpResourcesRequest resourcesRequest; - resourcesRequest.MemoryPool = memoryPool; - resourcesRequest.ExecutionUnits = 1; + NComputeActor::TComputeStagesWithScan computesByStage; - // !!!!!!!!!!!!!!!!!!!!! - // we have to allocate memory instead of reserve only. currently, this memory will not be used for request processing. - resourcesRequest.Memory = (1 << 19) /* 512kb limit for check that memory exists for processing with minimal requirements */; + const TString& serializedGUCSettings = ev->Get()->Record.HasSerializedGUCSettings() ? + ev->Get()->Record.GetSerializedGUCSettings() : ""; - auto result = ResourceManager()->AllocateResources(txId, task.first, resourcesRequest); + auto schedulerNow = TlsActivationContext->Monotonic(); - if (!result) { - for (ui64 taskId : allocatedTasks) { - ResourceManager()->FreeResources(txId, taskId); - } + TString schedulerGroup = msg.GetSchedulerGroup(); - ReplyError(txId, request.Executer, msg, result.GetStatus(), result.GetFailReason()); - return; + if (SchedulerOptions.Scheduler->Disabled(schedulerGroup)) { + auto share = msg.GetPoolMaxCpuShare(); + if (share <= 0 && msg.HasQueryCpuShare()) { + share = 1.0; + } + if (share > 0) { + Scheduler->UpdateGroupShare(schedulerGroup, share, schedulerNow); + Send(SchedulerActorId, new TEvSchedulerNewPool(msg.GetDatabaseId(), schedulerGroup)); + } else { + schedulerGroup = ""; } + } - allocatedTasks.push_back(task.first); + std::optional querySchedulerGroup; + if (msg.HasQueryCpuShare() && schedulerGroup) { + querySchedulerGroup = Scheduler->MakePerQueryGroup(schedulerNow, msg.GetQueryCpuShare(), schedulerGroup); } - auto reply = MakeHolder(); - reply->Record.SetTxId(txId); - - NYql::NDq::TComputeRuntimeSettings runtimeSettingsBase; - runtimeSettingsBase.ExtraMemoryAllocationPool = memoryPool; - runtimeSettingsBase.FailOnUndelivery = msgRtSettings.GetExecType() != NYql::NDqProto::TComputeRuntimeSettings::SCAN; - - runtimeSettingsBase.StatsMode = msgRtSettings.GetStatsMode(); - runtimeSettingsBase.UseSpilling = msgRtSettings.GetUseSpilling(); - + // start compute actors + TMaybe rlPath = Nothing(); if (msgRtSettings.HasRlPath()) { - runtimeSettingsBase.RlPath = msgRtSettings.GetRlPath(); + rlPath.ConstructInPlace(msgRtSettings.GetRlPath()); } - runtimeSettingsBase.ReportStatsSettings = NYql::NDq::TReportStatsSettings{MinStatInterval, MaxStatInterval}; - - TShardsScanningPolicy scanPolicy(Config.GetShardsScanningPolicy()); + TIntrusivePtr txInfo = MakeIntrusive( + txId, TInstant::Now(), ResourceManager_->GetCounters(), + msg.GetSchedulerGroup(), msg.GetMemoryPoolPercent(), + msg.GetDatabase()); - NComputeActor::TComputeStagesWithScan computesByStage; + const ui32 tasksCount = msg.GetTasks().size(); + for (auto& dqTask: *msg.MutableTasks()) { + TComputeActorSchedulingOptions schedulingTaskOptions { + .Now = schedulerNow, + .SchedulerActorId = SchedulerActorId, + .Scheduler = Scheduler.get(), + .Group = schedulerGroup, + .Weight = 1, + .NoThrottle = schedulerGroup.empty(), + .Counters = Counters + }; + + if (!schedulingTaskOptions.NoThrottle) { + schedulingTaskOptions.Handle = SchedulerOptions.Scheduler->Enroll(schedulingTaskOptions.Group, schedulingTaskOptions.Weight, schedulingTaskOptions.Now); + if (querySchedulerGroup) { + Scheduler->AddToGroup(schedulerNow, *querySchedulerGroup, schedulingTaskOptions.Handle); + } + } - const TString& serializedGUCSettings = ev->Get()->Record.HasSerializedGUCSettings() ? - ev->Get()->Record.GetSerializedGUCSettings() : ""; + auto result = CaFactory_->CreateKqpComputeActor({ + .ExecuterId = request.Executer, + .TxId = txId, + .LockTxId = lockTxId, + .LockNodeId = lockNodeId, + .Task = &dqTask, + .TxInfo = txInfo, + .RuntimeSettings = runtimeSettingsBase, + .TraceId = NWilson::TTraceId(ev->TraceId), + .Arena = ev->Get()->Arena, + .SerializedGUCSettings = serializedGUCSettings, + .NumberOfTasks = tasksCount, + .OutputChunkMaxSize = msg.GetOutputChunkMaxSize(), + .MemoryPool = memoryPool, + .WithSpilling = msgRtSettings.GetUseSpilling(), + .StatsMode = msgRtSettings.GetStatsMode(), + .Deadline = TInstant(), + .ShareMailbox = false, + .RlPath = rlPath, + .ComputesByStages = &computesByStage, + .State = State_, + .SchedulingOptions = std::move(schedulingTaskOptions), + }); + + if (const auto* rmResult = std::get_if(&result)) { + ReplyError(txId, request.Executer, msg, rmResult->GetStatus(), rmResult->GetFailReason()); + bucket.NewRequest(std::move(request)); + TerminateTx(txId, rmResult->GetFailReason()); + return; + } - // start compute actors - const ui32 tasksCount = msg.GetTasks().size(); - for (int i = 0; i < msg.GetTasks().size(); ++i) { - auto& dqTask = *msg.MutableTasks(i); auto& taskCtx = request.InFlyTasks[dqTask.GetId()]; + YQL_ENSURE(taskCtx.TaskId == 0); + taskCtx.TaskId = dqTask.GetId(); YQL_ENSURE(taskCtx.TaskId != 0); - taskCtx.ComputeActorId = CaFactory()->CreateKqpComputeActor( - request.Executer, txId, &dqTask, runtimeSettingsBase, - NWilson::TTraceId(ev->TraceId), ev->Get()->Arena, serializedGUCSettings, computesByStage, - msg.GetOutputChunkMaxSize(), State_, memoryPool, tasksCount); + TActorId* actorId = std::get_if(&result); + Y_ABORT_UNLESS(actorId); + taskCtx.ComputeActorId = *actorId; LOG_D("TxId: " << txId << ", executing task: " << taskCtx.TaskId << " on compute actor: " << taskCtx.ComputeActorId); @@ -249,7 +295,7 @@ class TKqpNodeService : public TActorBootstrapped { for (auto&& i : computesByStage) { for (auto&& m : i.second.MutableMetaInfo()) { Register(CreateKqpScanFetcher(msg.GetSnapshot(), std::move(m.MutableActorIds()), - m.GetMeta(), runtimeSettingsBase, txId, scanPolicy, Counters, NWilson::TTraceId(ev->TraceId))); + m.GetMeta(), runtimeSettingsBase, txId, lockTxId, lockNodeId, scanPolicy, Counters, NWilson::TTraceId(ev->TraceId))); } } @@ -343,11 +389,15 @@ class TKqpNodeService : public TActorBootstrapped { FORCE_VALUE(EnableInstantMkqlMemoryAlloc); FORCE_VALUE(MaxTotalChannelBuffersSize); FORCE_VALUE(MinChannelBufferSize); + FORCE_VALUE(MinMemAllocSize); + FORCE_VALUE(MinMemFreeSize); #undef FORCE_VALUE LOG_I("Updated table service config: " << Config.DebugString()); } + CaFactory_->ApplyConfig(event.GetConfig().GetTableServiceConfig().GetResourceManager()); + if (event.GetConfig().GetTableServiceConfig().HasIteratorReadsRetrySettings()) { SetIteratorReadsRetrySettings(event.GetConfig().GetTableServiceConfig().GetIteratorReadsRetrySettings()); } @@ -442,24 +492,6 @@ class TKqpNodeService : public TActorBootstrapped { Send(executer, ev.Release()); } - std::shared_ptr ResourceManager() { - if (Y_LIKELY(ResourceManager_)) { - return ResourceManager_; - } - ResourceManager_ = GetKqpResourceManager(); - return ResourceManager_; - } - - std::shared_ptr CaFactory() { - if (Y_LIKELY(CaFactory_)) { - return CaFactory_; - } - - CaFactory_ = NComputeActor::MakeKqpCaFactory( - Config, ResourceManager(), AsyncIoFactory, FederatedQuerySetup); - return CaFactory_; - } - private: NKikimrConfig::TTableServiceConfig::TResourceManager Config; TIntrusivePtr Counters; @@ -468,6 +500,10 @@ class TKqpNodeService : public TActorBootstrapped { NYql::NDq::IDqAsyncIoFactory::TPtr AsyncIoFactory; const std::optional FederatedQuerySetup; + std::shared_ptr Scheduler; + TSchedulerActorOptions SchedulerOptions; + TActorId SchedulerActorId; + //state sharded by TxId std::shared_ptr State_; }; @@ -476,10 +512,13 @@ class TKqpNodeService : public TActorBootstrapped { } // anonymous namespace IActor* CreateKqpNodeService(const NKikimrConfig::TTableServiceConfig& tableServiceConfig, + std::shared_ptr resourceManager, + std::shared_ptr caFactory, TIntrusivePtr counters, NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, const std::optional& federatedQuerySetup) { - return new TKqpNodeService(tableServiceConfig, counters, std::move(asyncIoFactory), federatedQuerySetup); + return new TKqpNodeService(tableServiceConfig, std::move(resourceManager), std::move(caFactory), + counters, std::move(asyncIoFactory), federatedQuerySetup); } } // namespace NKqp diff --git a/ydb/core/kqp/node_service/kqp_node_service.h b/ydb/core/kqp/node_service/kqp_node_service.h index d17a56cf76b7..93f7a7a4e633 100644 --- a/ydb/core/kqp/node_service/kqp_node_service.h +++ b/ydb/core/kqp/node_service/kqp_node_service.h @@ -91,6 +91,8 @@ struct TNodeServiceState : public NKikimr::NKqp::NComputeActor::IKqpNodeState { }; NActors::IActor* CreateKqpNodeService(const NKikimrConfig::TTableServiceConfig& tableServiceConfig, + std::shared_ptr resourceManager, + std::shared_ptr caFactory, TIntrusivePtr counters, NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory = nullptr, const std::optional& federatedQuerySetup = std::nullopt); diff --git a/ydb/core/kqp/opt/kqp_column_statistics_requester.cpp b/ydb/core/kqp/opt/kqp_column_statistics_requester.cpp new file mode 100644 index 000000000000..33f46b6f54e8 --- /dev/null +++ b/ydb/core/kqp/opt/kqp_column_statistics_requester.cpp @@ -0,0 +1,253 @@ +#include "kqp_column_statistics_requester.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace NKikimr::NKqp { + +using namespace NThreading; +using namespace NYql; + +void TKqpColumnStatisticsRequester::PropagateTableToLambdaArgument(const TExprNode::TPtr& input) { + if (input->ChildrenSize() < 2) { + return; + } + + auto callableInput = input->ChildRef(0); + + + for (size_t i = 1; i < input->ChildrenSize(); ++i) { + auto maybeLambda = TExprBase(input->ChildRef(i)); + if (!maybeLambda.Maybe()) { + continue; + } + + auto lambda = maybeLambda.Cast(); + if (!lambda.Args().Size()){ + continue; + } + + if (callableInput->IsList()){ + for (size_t j = 0; j < callableInput->ChildrenSize(); ++j){ + KqpTableByExprNode[lambda.Args().Arg(j).Ptr()] = KqpTableByExprNode[callableInput->Child(j)]; + } + } else { + KqpTableByExprNode[lambda.Args().Arg(0).Ptr()] = KqpTableByExprNode[callableInput.Get()]; + } + } +} + +IGraphTransformer::TStatus TKqpColumnStatisticsRequester::DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) { + Y_UNUSED(ctx); + + output = input; + auto optLvl = Config->CostBasedOptimizationLevel.Get().GetOrElse(TDqSettings::TDefault::CostBasedOptimizationLevel); + auto enableColumnStats = Config->FeatureFlags.GetEnableColumnStatistics(); + if (!(optLvl > 0 && enableColumnStats)) { + return IGraphTransformer::TStatus::Ok; + } + + VisitExprLambdasLast( + input, + [&](const TExprNode::TPtr& input) { + BeforeLambdas(input) || BeforeLambdasUnmatched(input); + + if (input->IsCallable()) { + PropagateTableToLambdaArgument(input); + } + + return true; + }, + [&](const TExprNode::TPtr& input) { + return AfterLambdas(input) || AfterLambdasUnmatched(input); + } + ); + + if (ColumnsByTableName.empty()) { + return IGraphTransformer::TStatus::Ok; + } + + struct TTableMeta { + TString TableName; + THashMap ColumnNameByTag; + }; + THashMap tableMetaByPathId; + + // TODO: Add other statistics, not only COUNT_MIN_SKETCH. + auto getStatisticsRequest = MakeHolder(); + getStatisticsRequest->StatType = NKikimr::NStat::EStatType::COUNT_MIN_SKETCH; + + for (const auto& [table, columns]: ColumnsByTableName) { + auto tableMeta = Tables.GetTable(Cluster, table).Metadata; + auto& columnsMeta = tableMeta->Columns; + + auto pathId = TPathId(tableMeta->PathId.OwnerId(), tableMeta->PathId.TableId()); + for (const auto& column: columns) { + if (TypesCtx.ColumnStatisticsByTableName.contains(table) && TypesCtx.ColumnStatisticsByTableName[table]->Data.contains(column)) { + continue; + } + + if (!columns.contains(column)) { + YQL_CLOG(DEBUG, ProviderKikimr) << "Table: " + table + " doesn't contain " + column + " to request for column statistics"; + } + + NKikimr::NStat::TRequest req; + req.ColumnTag = columnsMeta[column].Id; + req.PathId = pathId; + getStatisticsRequest->StatRequests.push_back(req); + + tableMetaByPathId[pathId].TableName = table; + tableMetaByPathId[pathId].ColumnNameByTag[req.ColumnTag.value()] = column; + } + } + + if (getStatisticsRequest->StatRequests.empty()) { + return IGraphTransformer::TStatus::Ok; + } + + using TRequest = NStat::TEvStatistics::TEvGetStatistics; + using TResponse = NStat::TEvStatistics::TEvGetStatisticsResult; + struct TResult : public NYql::IKikimrGateway::TGenericResult { + THashMap columnStatisticsByTableName; + }; + + auto promise = NewPromise(); + auto callback = [tableMetaByPathId = std::move(tableMetaByPathId)] + (TPromise promise, NStat::TEvStatistics::TEvGetStatisticsResult&& response) mutable { + if (!response.Success) { + promise.SetValue(NYql::NCommon::ResultFromError("can't get column statistics!")); + } + + THashMap columnStatisticsByTableName; + + for (auto&& stat: response.StatResponses) { + auto meta = tableMetaByPathId[stat.Req.PathId]; + auto columnName = meta.ColumnNameByTag[stat.Req.ColumnTag.value()]; + auto& columnStatistics = columnStatisticsByTableName[meta.TableName].Data[columnName]; + columnStatistics.CountMinSketch = std::move(stat.CountMinSketch.CountMin); + } + + promise.SetValue(TResult{.columnStatisticsByTableName = std::move(columnStatisticsByTableName)}); + }; + auto statServiceId = NStat::MakeStatServiceID(ActorSystem->NodeId); + IActor* requestHandler = + new TActorRequestHandler(statServiceId, getStatisticsRequest.Release(), promise, callback); + auto actorId = ActorSystem + ->Register(requestHandler, TMailboxType::HTSwap, ActorSystem->AppData()->UserPoolId); + Y_UNUSED(actorId); + + auto res = promise.GetFuture().GetValueSync(); + if (!res.Issues().Empty()) { + TStringStream ss; + res.Issues().PrintTo(ss); + YQL_CLOG(DEBUG, ProviderKikimr) << "Can't load columns statistics for request: " << ss.Str(); + return IGraphTransformer::TStatus::Ok; + } + + for (auto&& [tableName, columnStatistics]: res.columnStatisticsByTableName) { + TypesCtx.ColumnStatisticsByTableName.insert( + {std::move(tableName), new TOptimizerStatistics::TColumnStatMap(std::move(columnStatistics))} + ); + } + + return IGraphTransformer::TStatus::Ok; +} + +bool TKqpColumnStatisticsRequester::BeforeLambdas(const TExprNode::TPtr& input) { + bool matched = true; + + if (TKqpTable::Match(input.Get())) { + KqpTableByExprNode[input.Get()] = input.Get(); + } else if (auto maybeStreamLookup = TExprBase(input).Maybe()) { + KqpTableByExprNode[input.Get()] = maybeStreamLookup.Cast().Table().Ptr(); + } else { + matched = false; + } + + return matched; +} + +bool TKqpColumnStatisticsRequester::BeforeLambdasUnmatched(const TExprNode::TPtr& input) { + for (const auto& node: input->Children()) { + if (KqpTableByExprNode.contains(node)) { + KqpTableByExprNode[input.Get()] = KqpTableByExprNode[node]; + return true; + } + } + + return true; +} + +bool TKqpColumnStatisticsRequester::AfterLambdas(const TExprNode::TPtr& input) { + bool matched = true; + + if ( + TCoFilterBase::Match(input.Get()) || + TCoFlatMapBase::Match(input.Get()) && IsPredicateFlatMap(TExprBase(input).Cast().Lambda().Body().Ref()) + ) { + std::shared_ptr dummyStats = nullptr; + auto computer = NDq::TPredicateSelectivityComputer(dummyStats, true); + + if (TCoFilterBase::Match(input.Get())) { + computer.Compute(TExprBase(input).Cast().Lambda().Body()); + } else if (TCoFlatMapBase::Match(input.Get())) { + computer.Compute(TExprBase(input).Cast().Lambda().Body()); + } else { + Y_ENSURE(false); + } + + auto columnStatsUsedMembers = computer.GetColumnStatsUsedMembers(); + for (const auto& item: columnStatsUsedMembers.Data) { + auto exprNode = TExprBase(item.Member).Ptr(); + if (!KqpTableByExprNode.contains(exprNode) || KqpTableByExprNode[exprNode] == nullptr) { + continue; + } + + auto table = TExprBase(KqpTableByExprNode[exprNode]).Cast().Path().StringValue(); + auto column = item.Member.Name().StringValue(); + size_t pointPos = column.find('.'); // table.column + if (pointPos != TString::npos) { + column = column.substr(pointPos + 1); + } + + ColumnsByTableName[table].insert(std::move(column)); + } + } else { + matched = false; + } + + return matched; +} + +bool TKqpColumnStatisticsRequester::AfterLambdasUnmatched(const TExprNode::TPtr& input) { + if (KqpTableByExprNode.contains(input.Get())) { + return true; + } + + for (const auto& node: input->Children()) { + if (KqpTableByExprNode.contains(node)) { + KqpTableByExprNode[input.Get()] = KqpTableByExprNode[node]; + return true; + } + } + + return true; +} + +TAutoPtr CreateKqpColumnStatisticsRequester( + const TKikimrConfiguration::TPtr& config, + TTypeAnnotationContext& typesCtx, + TKikimrTablesData& tables, + TString cluster, + TActorSystem* actorSystem +) { + return THolder(new TKqpColumnStatisticsRequester(config, typesCtx, tables, cluster, actorSystem)); +} + +} // end of NKikimr::NKqp diff --git a/ydb/core/kqp/opt/kqp_column_statistics_requester.h b/ydb/core/kqp/opt/kqp_column_statistics_requester.h new file mode 100644 index 000000000000..0f55c2ff06a5 --- /dev/null +++ b/ydb/core/kqp/opt/kqp_column_statistics_requester.h @@ -0,0 +1,74 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace NKikimr::NKqp { + +using namespace NYql; +using namespace NYql::NNodes; + +/* + * This tranformer collects column's and table's names from an AST. It propogates + * KqpTable node from the leaves to the root of the tree and searches members in filters. + * Then it requests column statistics for these attributes from the column statistics service + * and stores it into a TTypeAnnotationContext. + */ +class TKqpColumnStatisticsRequester : public TSyncTransformerBase { +public: + TKqpColumnStatisticsRequester( + const TKikimrConfiguration::TPtr& config, + TTypeAnnotationContext& typesCtx, + TKikimrTablesData& tables, + TString cluster, + TActorSystem* actorSystem + ) + : Config(config) + , TypesCtx(typesCtx) + , Tables(tables) + , Cluster(cluster) + , ActorSystem(actorSystem) + {} + + // Main method of the transformer + IGraphTransformer::TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final; + + void Rewind() override {} + + ~TKqpColumnStatisticsRequester() override = default; + +private: + bool BeforeLambdas(const TExprNode::TPtr& input); + + bool BeforeLambdasUnmatched(const TExprNode::TPtr& input); + + void PropagateTableToLambdaArgument(const TExprNode::TPtr& input); + + bool AfterLambdas(const TExprNode::TPtr& input); + + bool AfterLambdasUnmatched(const TExprNode::TPtr& input); + +private: + THashMap KqpTableByExprNode; + THashMap> ColumnsByTableName; + + const TKikimrConfiguration::TPtr& Config; + TTypeAnnotationContext& TypesCtx; + TKikimrTablesData& Tables; + TString Cluster; + TActorSystem* ActorSystem; +}; + +TAutoPtr CreateKqpColumnStatisticsRequester( + const TKikimrConfiguration::TPtr& config, + TTypeAnnotationContext& typesCtx, + TKikimrTablesData& tables, + TString cluster, + TActorSystem* actorSystem +); + +} // end of NKikimr::NKqp namespace diff --git a/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp b/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp index 4736895c9d9d..799e4b7f9c1a 100644 --- a/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp +++ b/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp @@ -24,7 +24,7 @@ namespace { return; } - if (IsConstantExpr(input)) { + if (IsConstantExpr(input) && !input->IsCallable("PgConst")) { TNodeOnNodeOwnedMap deepClones; auto inputClone = ctx.DeepCopy(*input, ctx, deepClones, false, true, true); @@ -60,7 +60,7 @@ IGraphTransformer::TStatus TKqpConstantFoldingTransformer::DoTransform(TExprNode TExprNode::TPtr& output, TExprContext& ctx) { output = input; - if (!Config->HasOptEnableConstantFolding()) { + if (!Config->EnableConstantFolding) { return IGraphTransformer::TStatus::Ok; } diff --git a/ydb/core/kqp/opt/kqp_opt.cpp b/ydb/core/kqp/opt/kqp_opt.cpp index b056c9c4b955..8af6ac69ad4b 100644 --- a/ydb/core/kqp/opt/kqp_opt.cpp +++ b/ydb/core/kqp/opt/kqp_opt.cpp @@ -83,7 +83,7 @@ bool IsKqpEffectsStage(const TDqStageBase& stage) { } bool NeedSinks(const TKikimrTableDescription& table, const TKqpOptimizeContext& kqpCtx) { - return kqpCtx.IsGenericQuery() + return (kqpCtx.IsGenericQuery() || (kqpCtx.IsDataQuery() && table.Metadata->Kind != EKikimrTableKind::Olap)) && (table.Metadata->Kind != EKikimrTableKind::Olap || kqpCtx.Config->EnableOlapSink) && (table.Metadata->Kind != EKikimrTableKind::Datashard || kqpCtx.Config->EnableOltpSink); } @@ -110,6 +110,49 @@ TKqpTable BuildTableMeta(const TKikimrTableDescription& tableDesc, const TPositi return BuildTableMeta(*tableDesc.Metadata, pos, ctx); } +bool IsSortKeyPrimary(const NYql::NNodes::TCoLambda& keySelector, const NYql::TKikimrTableDescription& tableDesc, + const TMaybe>& passthroughFields) +{ + auto checkKey = [keySelector, &tableDesc, &passthroughFields] (NYql::NNodes::TExprBase key, ui32 index) { + if (!key.Maybe()) { + return false; + } + + auto member = key.Cast(); + if (member.Struct().Raw() != keySelector.Args().Arg(0).Raw()) { + return false; + } + + auto column = TString(member.Name().Value()); + auto columnIndex = tableDesc.GetKeyColumnIndex(column); + if (!columnIndex || *columnIndex != index) { + return false; + } + + if (passthroughFields && !passthroughFields->contains(column)) { + return false; + } + + return true; + }; + + auto lambdaBody = keySelector.Body(); + if (auto maybeTuple = lambdaBody.Maybe()) { + auto tuple = maybeTuple.Cast(); + for (size_t i = 0; i < tuple.Size(); ++i) { + if (!checkKey(tuple.Item(i), i)) { + return false; + } + } + } else { + if (!checkKey(lambdaBody, 0)) { + return false; + } + } + + return true; +} + bool IsBuiltEffect(const TExprBase& effect) { // Stage with effect output if (effect.Maybe()) { diff --git a/ydb/core/kqp/opt/kqp_opt.h b/ydb/core/kqp/opt/kqp_opt.h index b74b4b31fbff..1904aef5215f 100644 --- a/ydb/core/kqp/opt/kqp_opt.h +++ b/ydb/core/kqp/opt/kqp_opt.h @@ -4,17 +4,20 @@ #include #include #include +#include #include namespace NKikimr::NKqp::NOpt { struct TKqpOptimizeContext : public TSimpleRefCount { TKqpOptimizeContext(const TString& cluster, const NYql::TKikimrConfiguration::TPtr& config, - const TIntrusivePtr queryCtx, const TIntrusivePtr& tables) + const TIntrusivePtr queryCtx, const TIntrusivePtr& tables, + const TIntrusivePtr& userRequestContext) : Cluster(cluster) , Config(config) , QueryCtx(queryCtx) , Tables(tables) + , UserRequestContext(userRequestContext) { YQL_ENSURE(QueryCtx); YQL_ENSURE(Tables); @@ -24,8 +27,60 @@ struct TKqpOptimizeContext : public TSimpleRefCount { const NYql::TKikimrConfiguration::TPtr Config; const TIntrusivePtr QueryCtx; const TIntrusivePtr Tables; - int JoinsCount; - int EquiJoinsCount; + const TIntrusivePtr UserRequestContext; + int JoinsCount{}; + int EquiJoinsCount{}; + std::shared_ptr OverrideStatistics{}; + std::shared_ptr CardinalityHints{}; + std::shared_ptr JoinAlgoHints{}; + std::shared_ptr JoinOrderHints{}; + + std::shared_ptr GetOverrideStatistics() { + if (Config->OptOverrideStatistics.Get()) { + if (!OverrideStatistics) { + auto jsonValue = new NJson::TJsonValue(); + NJson::ReadJsonTree(*Config->OptOverrideStatistics.Get(), jsonValue, true); + OverrideStatistics = std::shared_ptr(jsonValue); + } + return OverrideStatistics; + + } else { + return std::shared_ptr(); + } + } + + NYql::TCardinalityHints GetCardinalityHints() { + if (Config->OptCardinalityHints.Get()) { + if (!CardinalityHints) { + CardinalityHints = std::make_shared(*Config->OptCardinalityHints.Get()); + } + return *CardinalityHints; + } else { + return NYql::TCardinalityHints(); + } + } + + NYql::TJoinAlgoHints GetJoinAlgoHints() { + if (Config->OptJoinAlgoHints.Get()) { + if (!JoinAlgoHints) { + JoinAlgoHints = std::make_shared(*Config->OptJoinAlgoHints.Get()); + } + return *JoinAlgoHints; + } else { + return NYql::TJoinAlgoHints(); + } + } + + NYql::TJoinOrderHints GetJoinOrderHints() { + if (Config->OptJoinOrderHints.Get()) { + if (!JoinOrderHints) { + JoinOrderHints = std::make_shared(*Config->OptJoinOrderHints.Get()); + } + return *JoinOrderHints; + } else { + return NYql::TJoinOrderHints(); + } + } bool IsDataQuery() const { return QueryCtx->Type == NYql::EKikimrQueryType::Dml; diff --git a/ydb/core/kqp/opt/kqp_opt_build_txs.cpp b/ydb/core/kqp/opt/kqp_opt_build_txs.cpp index f30bf59da597..97cd3485bfc6 100644 --- a/ydb/core/kqp/opt/kqp_opt_build_txs.cpp +++ b/ydb/core/kqp/opt/kqp_opt_build_txs.cpp @@ -56,12 +56,12 @@ class TKqpBuildTxTransformer : public TSyncTransformerBase { TKqpBuildTxTransformer() : QueryType(EKikimrQueryType::Unspecified) , IsPrecompute(false) - , IsBlocks(false) {} + { + } - void Init(EKikimrQueryType queryType, bool isPrecompute, bool isBlocks) { + void Init(EKikimrQueryType queryType, bool isPrecompute) { QueryType = queryType; IsPrecompute = isPrecompute; - IsBlocks = isBlocks; } TStatus DoTransform(TExprNode::TPtr inputExpr, TExprNode::TPtr& outputExpr, TExprContext& ctx) final { @@ -267,23 +267,6 @@ class TKqpBuildTxTransformer : public TSyncTransformerBase { stages.emplace_back(collectStage); } - if (IsBlocks) { - const auto* tupleOutputType = resultStage.Ref().GetTypeAnn()->Cast(); - YQL_ENSURE(tupleOutputType->GetSize() == 1); - const auto* structExprType = tupleOutputType->GetItems()[0]->Cast()->GetItemType()->Cast(); - - TDqPhyStage beforeCollectStage = collectStage; - collectStage = ConvertResultStageFromBlocksToScalar(collectStage, structExprType, ctx); - if (collectStage.Ptr() != beforeCollectStage.Ptr()) { - for (auto& stage : stages) { - if (stage.Ptr() == beforeCollectStage.Ptr()) { - stage = collectStage; - break; - } - } - } - } - auto newResult = Build(ctx, results.Pos()) .Output() .Stage(collectStage) @@ -317,60 +300,6 @@ class TKqpBuildTxTransformer : public TSyncTransformerBase { .Done(); } - static TDqPhyStage ConvertResultStageFromBlocksToScalar(TDqPhyStage& stage, const TStructExprType* rowType, TExprContext& ctx) { - TCoLambda program(ctx.DeepCopyLambda(stage.Program().Ref())); - - TVector args; - args.reserve(rowType->GetSize()); - for (ui32 i = 0; i < rowType->GetSize(); ++i) { - args.push_back(TCoArgument(ctx.NewArgument(stage.Pos(), "arg"))); - } - - TVector structItems; - structItems.reserve(args.size()); - for (ui32 i = 0; i < args.size(); ++i) { - structItems.emplace_back( - Build(ctx, stage.Pos()) - .Name().Build(rowType->GetItems()[i]->GetName()) - .Value(args[i]) - .Done()); - } - - auto resultStream = Build(ctx, program.Body().Pos()) - .Input() - .Input() - .Input() - .Input(program.Body()) - .Build() - .Build() - .Lambda() - .Args(args) - .Body() - .Add(structItems) - .Build() - .Build() - .Build() - .Done(); - - - auto finalChannelSettings = TDqStageSettings::Parse(stage); - finalChannelSettings.WideChannels = false; - finalChannelSettings.OutputNarrowType = nullptr; - finalChannelSettings.BlockStatus = NYql::NDq::TDqStageSettings::EBlockStatus::None; - - auto output = Build(ctx, stage.Pos()) - .InitFrom(stage) - .Program() - .Args(program.Args()) - .Body(resultStream.Ptr()) - .Build() - .Settings(finalChannelSettings.BuildNode(ctx, stage.Pos())) - .Outputs(stage.Outputs()) - .Done().Ptr(); - - return TDqPhyStage(output); - } - static TMaybeNode ExtractParamsFromStage(const TDqPhyStage& stage, const TNodeOnNodeOwnedMap& stagesMap, TMap& bindingsMap, TExprContext& ctx) { @@ -530,7 +459,6 @@ class TKqpBuildTxTransformer : public TSyncTransformerBase { private: EKikimrQueryType QueryType; bool IsPrecompute; - bool IsBlocks; }; TVector PrecomputeInputs(const TDqStage& stage) { @@ -563,21 +491,19 @@ class TKqpBuildTxsTransformer : public TSyncTransformerBase { TTypeAnnotationContext& typesCtx, TKikimrConfiguration::TPtr& config) : KqpCtx(kqpCtx) , BuildCtx(buildCtx) - , TypesCtx(typesCtx) , TypeAnnTransformer(std::move(typeAnnTransformer)) { BuildTxTransformer = new TKqpBuildTxTransformer(); - const bool enableSpillingGenericQuery = - kqpCtx->IsGenericQuery() && config->SpillingEnabled() && - config->EnableSpillingGenericQuery; + config->EnableSpillingGenericQuery &= (kqpCtx->IsGenericQuery() && config->SpillingEnabled()); DataTxTransformer = TTransformationPipeline(&typesCtx) .AddServiceTransformers() .Add(TExprLogTransformer::Sync("TxOpt", NYql::NLog::EComponent::ProviderKqp, NYql::NLog::ELevel::TRACE), "TxOpt") .Add(*TypeAnnTransformer, "TypeAnnotation") .AddPostTypeAnnotation(/* forSubgraph */ true) - .Add(CreateKqpBuildPhyStagesTransformer(enableSpillingGenericQuery, typesCtx, config->BlockChannelsMode), "BuildPhysicalStages") + .Add(CreateKqpBuildPhyStagesTransformer(config->EnableSpillingGenericQuery, typesCtx, config->BlockChannelsMode), "BuildPhysicalStages") + // TODO(ilezhankin): "BuildWideBlockChannels" transformer is required only for BLOCK_CHANNELS_FORCE mode. .Add(CreateKqpBuildWideBlockChannelsTransformer(typesCtx, config->BlockChannelsMode), "BuildWideBlockChannels") .Add(*BuildTxTransformer, "BuildPhysicalTx") .Add(CreateKqpTxPeepholeTransformer( @@ -610,7 +536,7 @@ class TKqpBuildTxsTransformer : public TSyncTransformerBase { } if (!query.Results().Empty()) { - auto tx = BuildTx(query.Results().Ptr(), ctx, false, TypesCtx.BlockEngineMode == EBlockEngineMode::Force); + auto tx = BuildTx(query.Results().Ptr(), ctx, false); if (!tx) { return TStatus::Error; } @@ -632,7 +558,7 @@ class TKqpBuildTxsTransformer : public TSyncTransformerBase { } if (!query.Effects().Empty()) { - auto tx = BuildTx(query.Effects().Ptr(), ctx, /* isPrecompute */ false, TypesCtx.BlockEngineMode == EBlockEngineMode::Force); + auto tx = BuildTx(query.Effects().Ptr(), ctx, /* isPrecompute */ false); if (!tx) { return TStatus::Error; } @@ -824,7 +750,7 @@ class TKqpBuildTxsTransformer : public TSyncTransformerBase { .Add(phaseResults) .Done(); - auto tx = BuildTx(phaseResultsNode.Ptr(), ctx, /* isPrecompute */ true, TypesCtx.BlockEngineMode == EBlockEngineMode::Force); + auto tx = BuildTx(phaseResultsNode.Ptr(), ctx, /* isPrecompute */ true); if (!tx.IsValid()) { return TStatus::Error; @@ -850,14 +776,14 @@ class TKqpBuildTxsTransformer : public TSyncTransformerBase { return TStatus(TStatus::Repeat, true); } - TMaybeNode BuildTx(const TExprNode::TPtr& result, TExprContext& ctx, bool isPrecompute, bool isBlocks) { + TMaybeNode BuildTx(const TExprNode::TPtr& result, TExprContext& ctx, bool isPrecompute) { YQL_CLOG(TRACE, ProviderKqp) << "[BuildTx] " << KqpExprToPrettyString(*result, ctx) - << ", isPrecompute: " << isPrecompute << ", isBlocks: " << isBlocks; + << ", isPrecompute: " << isPrecompute; auto& transformer = KqpCtx->IsScanQuery() ? *ScanTxTransformer : *DataTxTransformer; transformer.Rewind(); - BuildTxTransformer->Init(KqpCtx->QueryCtx->Type, isPrecompute, isBlocks); + BuildTxTransformer->Init(KqpCtx->QueryCtx->Type, isPrecompute); auto expr = result; while (true) { @@ -875,7 +801,6 @@ class TKqpBuildTxsTransformer : public TSyncTransformerBase { private: TIntrusivePtr KqpCtx; TIntrusivePtr BuildCtx; - TTypeAnnotationContext& TypesCtx; TAutoPtr TypeAnnTransformer; TAutoPtr BuildTxTransformer; TAutoPtr DataTxTransformer; diff --git a/ydb/core/kqp/opt/kqp_opt_impl.h b/ydb/core/kqp/opt/kqp_opt_impl.h index 8370b0994b9b..e85efabe2927 100644 --- a/ydb/core/kqp/opt/kqp_opt_impl.h +++ b/ydb/core/kqp/opt/kqp_opt_impl.h @@ -62,4 +62,7 @@ TVector> BuildS bool IsBuiltEffect(const NYql::NNodes::TExprBase& effect); +bool IsSortKeyPrimary(const NYql::NNodes::TCoLambda& keySelector, const NYql::TKikimrTableDescription& tableDesc, + const TMaybe>& passthroughFields = {}); + } // namespace NKikimr::NKqp::NOpt diff --git a/ydb/core/kqp/opt/kqp_opt_phy_finalize.cpp b/ydb/core/kqp/opt/kqp_opt_phy_finalize.cpp index 8255e13914e3..bfd9839cdf62 100644 --- a/ydb/core/kqp/opt/kqp_opt_phy_finalize.cpp +++ b/ydb/core/kqp/opt/kqp_opt_phy_finalize.cpp @@ -222,6 +222,115 @@ TStatus KqpDuplicateResults(const TExprNode::TPtr& input, TExprNode::TPtr& outpu return TStatus::Ok; } +template +TVector CollectNodes(const TExprNode::TPtr& input) { + TVector result; + + VisitExpr(input, [&result](const TExprNode::TPtr& node) { + if (TExpr::Match(node.Get())) { + result.emplace_back(TExpr(node)); + } + return true; + }); + + return result; +} + +bool FindPrecomputedOutputs(TDqStageBase stage, const TParentsMap& parentsMap) { + auto outIt = parentsMap.find(stage.Raw()); + if (outIt == parentsMap.end()) { + return false; + } + + for (auto& output : outIt->second) { + if (TDqOutput::Match(output)) { + auto connIt = parentsMap.find(output); + if (connIt != parentsMap.end()) { + for (auto maybeConn : connIt->second) { + auto parentIt = parentsMap.find(maybeConn); + if (parentIt != parentsMap.end()) { + for (auto& parent : parentIt->second) { + if (TDqPrecompute::Match(parent) || TDqPhyPrecompute::Match(parent)) { + return true; + } + } + } + } + } + } + } + + return false; +} + + +TExprBase ReplicatePrecompute(TDqStageBase stage, TExprContext& ctx, const TParentsMap& parentsMap) { + for (size_t i = 0; i < stage.Inputs().Size(); ++i) { + auto input = stage.Inputs().Item(i); + if (auto maybeConn = stage.Inputs().Item(i).Maybe()) { + auto conn = maybeConn.Cast(); + if (conn.Maybe() || conn.Maybe()) { + { + auto sourceStage = conn.Output().Stage(); + if (!sourceStage.Program().Body().Maybe()) { + continue; + } + + if (!FindPrecomputedOutputs(sourceStage, parentsMap)) { + continue; + } + } + + auto arg = stage.Program().Args().Arg(i); + auto newArg = Build(ctx, stage.Program().Args().Arg(i).Pos()) + .Name("_replaced_arg") + .Done(); + + TVector newArgs; + TNodeOnNodeOwnedMap programReplaces; + for (size_t j = 0; j < stage.Program().Args().Size(); ++j) { + auto oldArg = stage.Program().Args().Arg(j); + newArgs.push_back(Build(ctx, stage.Program().Args().Arg(i).Pos()) + .Name("_replaced_arg_" + ToString(j)) + .Done()); + if (i == j) { + programReplaces[oldArg.Raw()] = Build(ctx, oldArg.Pos()).Input(newArgs.back()).Done().Ptr(); + } else { + programReplaces[oldArg.Raw()] = newArgs.back().Ptr(); + } + } + + return + Build(ctx, stage.Pos()) + .Inputs(ctx.ReplaceNode(stage.Inputs().Ptr(), input.Ref(), Build(ctx, input.Pos()).Connection(conn).Done().Ptr())) + .Outputs(stage.Outputs()) + .Settings(stage.Settings()) + .Program() + .Args(newArgs) + .Body(TExprBase(ctx.ReplaceNodes(stage.Program().Body().Ptr(), programReplaces))) + .Build() + .Done(); + } + } + } + return stage; +} + +NYql::IGraphTransformer::TStatus ReplicatePrecomputeRule(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) { + TParentsMap parents; + GatherParents(*input, parents, true); + auto stages = CollectNodes(input); + for (auto& stage : stages) { + auto applied = ReplicatePrecompute(stage, ctx, parents); + if (applied.Raw() != stage.Raw()) { + output = ctx.ReplaceNode(input.Get(), stage.Ref(), applied.Ptr()); + return TStatus::Repeat; + } + } + output = input; + return TStatus::Ok; +} + template NYql::IGraphTransformer::TStatus PerformGlobalRule(const TString& ruleName, const NYql::TExprNode::TPtr& input, NYql::TExprNode::TPtr& output, NYql::TExprContext& ctx, TFunctor func) @@ -251,6 +360,8 @@ TAutoPtr CreateKqpFinalizingOptTransformer(const TIntrusivePt [kqpCtx](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) -> TStatus { output = input; + PERFORM_GLOBAL_RULE("ReplicatePrecompute", input, output, ctx, ReplicatePrecomputeRule); + PERFORM_GLOBAL_RULE("ReplicateMultiUsedConnection", input, output, ctx, [](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) { YQL_ENSURE(TKqlQuery::Match(input.Get())); diff --git a/ydb/core/kqp/opt/kqp_query_plan.cpp b/ydb/core/kqp/opt/kqp_query_plan.cpp index 587c410fb413..6034aa593b21 100644 --- a/ydb/core/kqp/opt/kqp_query_plan.cpp +++ b/ydb/core/kqp/opt/kqp_query_plan.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -94,7 +95,7 @@ struct TSerializerCtx { const TIntrusivePtr tablesData, const TKikimrConfiguration::TPtr config, ui32 txCount, TVector> pureTxResults, - TTypeAnnotationContext& typeCtx, + TTypeAnnotationContext& typeCtx, TIntrusivePtr optCtx) : ExprCtx(exprCtx) , Cluster(cluster) @@ -348,7 +349,7 @@ class TxPlanSerializer { writer.WriteKey("Inputs"); writer.BeginList(); - + for (const auto& input : op.Inputs) { if (std::holds_alternative(input)) { @@ -461,6 +462,19 @@ class TxPlanSerializer { } void FillConnectionPlanNode(const TDqConnection& connection, TQueryPlanNode& planNode) { + TDqStageSettings settings = TDqStageSettings::Parse(connection.Output().Stage()); + auto GetNarrowColumnName = [&](const TString& wideColumnName) { + ui32 idx; + if (!TryFromString(wideColumnName, idx)) { + return wideColumnName; + } + + YQL_ENSURE(idx < settings.OutputNarrowType->GetSize(), + "Failed to lookup column name for index " << idx << " in type " << settings.OutputNarrowType->ToString()); + + return TString(settings.OutputNarrowType->GetItems()[idx]->GetName()); + }; + planNode.Type = EPlanNodeType::Connection; if (connection.Maybe()) { @@ -473,15 +487,23 @@ class TxPlanSerializer { planNode.TypeName = "HashShuffle"; auto& keyColumns = planNode.NodeInfo["KeyColumns"]; for (const auto& column : hashShuffle.Cast().KeyColumns()) { - keyColumns.AppendValue(TString(column.Value())); + if (settings.WideChannels) { + keyColumns.AppendValue(GetNarrowColumnName(TString(column.Value()))); + } else { + keyColumns.AppendValue(TString(column.Value())); + } } } else if (auto merge = connection.Maybe()) { planNode.TypeName = "Merge"; auto& sortColumns = planNode.NodeInfo["SortColumns"]; for (const auto& sortColumn : merge.Cast().SortColumns()) { TStringBuilder sortColumnDesc; - sortColumnDesc << sortColumn.Column().Value() << " (" - << sortColumn.SortDirection().Value() << ")"; + if (settings.WideChannels) { + sortColumnDesc << GetNarrowColumnName(TString(sortColumn.Column().Value())); + } else { + sortColumnDesc << sortColumn.Column().Value(); + } + sortColumnDesc << " (" << sortColumn.SortDirection().Value() << ")"; sortColumns.AppendValue(sortColumnDesc); } @@ -527,7 +549,7 @@ class TxPlanSerializer { readInfo.LookupBy.push_back(TString(keyColumn->GetName())); } - if (SerializerCtx.Config->CostBasedOptimizationLevel.Get().GetOrElse(TDqSettings::TDefault::CostBasedOptimizationLevel)!=0) { + if (SerializerCtx.Config->CostBasedOptimizationLevel.Get().GetOrElse(SerializerCtx.Config->DefaultCostBasedOptimizationLevel)!=0) { if (auto stats = SerializerCtx.TypeCtx.GetStats(tableLookup.Raw())) { planNode.OptEstimates["E-Rows"] = TStringBuilder() << stats->Nrows; @@ -592,9 +614,7 @@ class TxPlanSerializer { } if (auto literal = key.Maybe()) { - TStringStream out; - NUuid::UuidBytesToString(literal.Cast().Literal().Value().Data(), out); - return out.Str(); + return NUuid::UuidBytesToString(literal.Cast().Literal().StringValue()); } if (auto literal = key.Maybe()) { @@ -799,7 +819,30 @@ class TxPlanSerializer { return path; } - void Visit(const TDqSource& source, TQueryPlanNode& stagePlanNode) { + std::shared_ptr FindWrapStats(TExprNode::TPtr node, const TExprNode* dataSourceNode) { + if (auto maybeWrapBase = TMaybeNode(node)) { + if (maybeWrapBase.Cast().DataSource().Raw() == dataSourceNode) { + return SerializerCtx.TypeCtx.GetStats(node.Get()); + } + } + for (const auto& child : node->Children()) { + if (child->IsLambda()) { + // support wide lambda as well + for (size_t bodyIndex = 1; bodyIndex < child->ChildrenSize(); ++bodyIndex) { + if (auto result = FindWrapStats(child->ChildPtr(bodyIndex), dataSourceNode)) { + return result; + } + } + } else { + if (auto result = FindWrapStats(child, dataSourceNode)) { + return result; + } + } + } + return nullptr; + } + + void Visit(const TDqSource& source, TQueryPlanNode& stagePlanNode, const TCoLambda& Lambda) { // YDB sources if (auto settings = source.Settings().Maybe(); settings.IsValid()) { Visit(settings.Cast(), stagePlanNode); @@ -829,7 +872,15 @@ class TxPlanSerializer { op.Properties["Name"] = "Read from external data source"; } - if (auto stats = SerializerCtx.TypeCtx.GetStats(dataSource.Raw())) { + // Actual stats must be binded with TDqSourceWrapBase + auto stats = FindWrapStats(Lambda.Body().Ptr(), dataSource.Raw()); + + if (!stats) { + // Fallback to TCoDataSource + stats = SerializerCtx.TypeCtx.GetStats(dataSource.Raw()); + } + + if (stats) { op.Properties["E-Rows"] = TStringBuilder() << stats->Nrows; op.Properties["E-Cost"] = TStringBuilder() << stats->Cost; op.Properties["E-Size"] = TStringBuilder() << stats->ByteSize; @@ -903,7 +954,7 @@ class TxPlanSerializer { for (const auto& input : expr.Cast().Inputs()) { if (auto source = input.Maybe()) { auto& inputSourceNode = AddPlanNode(stagePlanNode); - Visit(source.Cast(), inputSourceNode); + Visit(source.Cast(), inputSourceNode, expr.Cast().Program()); inputIds.emplace_back(&inputSourceNode); } else { auto inputCn = input.Cast(); @@ -961,7 +1012,8 @@ class TxPlanSerializer { if (auto maybeRead = TMaybeNode(node)) { operatorId = Visit(maybeRead.Cast(), planNode); - } else if (auto maybeReadRanges = TMaybeNode(node)) { + } else if (TMaybeNode(node) && !TMaybeNode(node)) { + auto maybeReadRanges = TMaybeNode(node); operatorId = Visit(maybeReadRanges.Cast(), planNode); } else if (auto maybeLookup = TMaybeNode(node)) { operatorId = Visit(maybeLookup.Cast(), planNode); @@ -1057,6 +1109,27 @@ class TxPlanSerializer { auto mapLambdaInputs = Visit(map.Lambda().Body().Ptr(), planNode); inputIds.insert(inputIds.end(), mapLambdaInputs.begin(), mapLambdaInputs.end()); + } else if (TMaybeNode(node)) { + auto olapTable = TExprBase(node).Cast(); + + auto pred = [](const TExprNode::TPtr& n) -> bool { + if (auto maybeFilter = TMaybeNode(n)) { return true; } return false; + }; + if (auto maybeOlapFilter = FindNode(olapTable.Process().Body().Ptr(), pred)) { + auto olapFilter = TExprBase(maybeOlapFilter).Cast(); + + TOperator op; + op.Properties["Name"] = "Filter"; + + op.Properties["Predicate"] = OlapStr(olapFilter.Condition().Ptr()); + + AddOptimizerEstimates(op, olapFilter); + + operatorId = AddOperator(planNode, "Filter", std::move(op)); + inputIds.push_back(Visit(olapTable, planNode)); + } else { + operatorId = Visit(olapTable, planNode); + } } else { for (const auto& child : node->Children()) { if(!child->IsLambda()) { @@ -1081,6 +1154,52 @@ class TxPlanSerializer { return inputIds; } + TString OlapStr(const TExprNode::TPtr& node) { + TVector s; + + if (TMaybeNode(node)) { + s.emplace_back("Not"); + } else if (auto maybeList = TMaybeNode(node)) { + auto listPtr = maybeList.Cast().Ptr(); + size_t listSize = listPtr->Children().size(); + if (listSize == 3) { + THashMap strComp = { + {"eq", "=="}, + {"neq", "!="}, + {"lt", "<"}, + {"lte", "<="}, + {"gt", ">"}, + {"gte", ">="} + }; + TString compSign = TString(listPtr->Child(0)->Content()); + if (strComp.contains(compSign)) { + TString attr = TString(listPtr->Child(1)->Content()); + TString value; + if (listPtr->Child(2)->ChildrenSize() >= 1) { + value = TString(listPtr->Child(2)->Child(0)->Content()); + } + + return Sprintf("%s %s %s", attr.c_str(), strComp[compSign].c_str(), value.c_str()); + } + } + } + + for (const auto& child: node->Children()) { + auto childStr = OlapStr(child); + if (!childStr.empty()) { + s.push_back(std::move(childStr)); + } + } + + TString delim = " "; + if (TMaybeNode(node)) { + delim = " And "; + } else if (TMaybeNode(node)) { + delim = " Or "; + } + return JoinStrings(s, delim); + } + TVector> Visit(const TCoMap& map, TQueryPlanNode& planNode) { auto mapInputs = Visit(map.Input().Ptr(), planNode); @@ -1378,7 +1497,7 @@ class TxPlanSerializer { TOperator op; op.Properties["Name"] = name; - + return AddOperator(planNode, name, std::move(op)); } @@ -1449,7 +1568,7 @@ class TxPlanSerializer { } void AddOptimizerEstimates(TOperator& op, const TExprBase& expr) { - if (SerializerCtx.Config->CostBasedOptimizationLevel.Get().GetOrElse(TDqSettings::TDefault::CostBasedOptimizationLevel)==0) { + if (SerializerCtx.Config->CostBasedOptimizationLevel.Get().GetOrElse(SerializerCtx.Config->DefaultCostBasedOptimizationLevel)==0) { return; } @@ -1730,8 +1849,8 @@ class TxPlanSerializer { template void AddReadTableSettings( - TOperator& op, - const TReadTableSettings& readTableSettings, + TOperator& op, + const TReadTableSettings& readTableSettings, TTableRead& readInfo ) { auto settings = NYql::TKqpReadTableSettings::Parse(readTableSettings); @@ -1753,9 +1872,9 @@ class TxPlanSerializer { } if (settings.SequentialInFlight) { - op.Properties["Scan"] = "Sequential"; + op.Properties["Scan"] = "Sequential"; } else { - op.Properties["Scan"] = "Parallel"; + op.Properties["Scan"] = "Parallel"; } } @@ -1947,6 +2066,9 @@ TVector RemoveRedundantNodes(NJson::TJsonValue& plan, const T } } + if (!planMap.contains("Node Type")) { + return {}; + } const auto typeName = planMap.at("Node Type").GetStringSafe(); if (redundantNodes.contains(typeName) || typeName.find("Precompute") != TString::npos) { return children; @@ -1955,167 +2077,235 @@ TVector RemoveRedundantNodes(NJson::TJsonValue& plan, const T return {plan}; } -NJson::TJsonValue ReconstructQueryPlanRec(const NJson::TJsonValue& plan, - int operatorIndex, - const THashMap& planIndex, - const THashMap& precomputes, - int& nodeCounter) { - - int currentNodeId = nodeCounter++; - - NJson::TJsonValue result; - result["PlanNodeId"] = currentNodeId; - - if (plan.GetMapSafe().contains("PlanNodeType")) { - result["PlanNodeType"] = plan.GetMapSafe().at("PlanNodeType").GetStringSafe(); - } +struct TQueryPlanReconstructor { + TQueryPlanReconstructor( + const THashMap& planIndex, + const THashMap& precomputes + ) + : PlanIndex(planIndex) + , Precomputes(precomputes) + , NodeIDCounter(0) + , Budget(10'000) + {} - if (plan.GetMapSafe().contains("Stats") && operatorIndex==0) { - result["Stats"] = plan.GetMapSafe().at("Stats"); - } + NJson::TJsonValue Reconstruct( + const NJson::TJsonValue& plan, + int operatorIndex + ) { + int currentNodeId = NodeIDCounter++; - if (!plan.GetMapSafe().contains("Operators")) { - NJson::TJsonValue planInputs; + NJson::TJsonValue result; + result["PlanNodeId"] = currentNodeId; - result["Node Type"] = plan.GetMapSafe().at("Node Type").GetStringSafe(); + if (--Budget <= 0) { + YQL_CLOG(DEBUG, ProviderKqp) << "Can't build the plan - recursion depth has been exceeded!"; + return result; + } - if (plan.GetMapSafe().contains("CTE Name")) { - auto precompute = plan.GetMapSafe().at("CTE Name").GetStringSafe(); - if (precomputes.contains(precompute)) { - planInputs.AppendValue(ReconstructQueryPlanRec(precomputes.at(precompute), 0, planIndex, precomputes, nodeCounter)); - } + if (plan.GetMapSafe().contains("PlanNodeType")) { + result["PlanNodeType"] = plan.GetMapSafe().at("PlanNodeType").GetStringSafe(); } - if (!plan.GetMapSafe().contains("Plans")) { - result["Plans"] = planInputs; - return result; + if (plan.GetMapSafe().contains("Stats") && operatorIndex==0) { + result["Stats"] = plan.GetMapSafe().at("Stats"); } - if (plan.GetMapSafe().at("Node Type").GetStringSafe() == "TableLookup") { + if (plan.GetMapSafe().at("Node Type") == "TableLookupJoin" && plan.GetMapSafe().contains("Table")) { + result["Node Type"] = "LookupJoin"; NJson::TJsonValue newOps; NJson::TJsonValue op; - op["Name"] = "TableLookup"; - op["Columns"] = plan.GetMapSafe().at("Columns"); + op["Name"] = "LookupJoin"; op["LookupKeyColumns"] = plan.GetMapSafe().at("LookupKeyColumns"); - op["Table"] = plan.GetMapSafe().at("Table"); + + newOps.AppendValue(std::move(op)); + result["Operators"] = std::move(newOps); + + NJson::TJsonValue newPlans; + + NJson::TJsonValue lookupPlan; + lookupPlan["Node Type"] = "TableLookup"; + lookupPlan["PlanNodeType"] = "TableLookup"; + + NJson::TJsonValue lookupOps; + NJson::TJsonValue lookupOp; + + lookupOp["Name"] = "TableLookup"; + lookupOp["Columns"] = plan.GetMapSafe().at("Columns"); + lookupOp["LookupKeyColumns"] = plan.GetMapSafe().at("LookupKeyColumns"); + lookupOp["Table"] = plan.GetMapSafe().at("Table"); if (plan.GetMapSafe().contains("E-Cost")) { - op["E-Cost"] = plan.GetMapSafe().at("E-Cost"); - } + lookupOp["E-Cost"] = plan.GetMapSafe().at("E-Cost"); + } if (plan.GetMapSafe().contains("E-Rows")) { - op["E-Rows"] = plan.GetMapSafe().at("E-Rows"); + lookupOp["E-Rows"] = plan.GetMapSafe().at("E-Rows"); } if (plan.GetMapSafe().contains("E-Size")) { - op["E-Size"] = plan.GetMapSafe().at("E-Size"); + lookupOp["E-Size"] = plan.GetMapSafe().at("E-Size"); } - newOps.AppendValue(op); + lookupOps.AppendValue(std::move(lookupOp)); + lookupPlan["Operators"] = std::move(lookupOps); + + newPlans.AppendValue(Reconstruct(plan.GetMapSafe().at("Plans").GetArraySafe()[0], 0)); + + newPlans.AppendValue(std::move(lookupPlan)); + + result["Plans"] = std::move(newPlans); - result["Operators"] = newOps; return result; } - for (auto p : plan.GetMapSafe().at("Plans").GetArraySafe()) { - if (!p.GetMapSafe().contains("Operators") && p.GetMapSafe().contains("CTE Name")) { - auto precompute = p.GetMapSafe().at("CTE Name").GetStringSafe(); - if (precomputes.contains(precompute)) { - planInputs.AppendValue(ReconstructQueryPlanRec(precomputes.at(precompute), 0, planIndex, precomputes, nodeCounter)); + if (!plan.GetMapSafe().contains("Operators")) { + NJson::TJsonValue planInputs; + + result["Node Type"] = plan.GetMapSafe().at("Node Type").GetStringSafe(); + + if (plan.GetMapSafe().contains("CTE Name")) { + auto precompute = plan.GetMapSafe().at("CTE Name").GetStringSafe(); + if (Precomputes.contains(precompute)) { + planInputs.AppendValue(Reconstruct(Precomputes.at(precompute), 0)); } - } else if (p.GetMapSafe().at("Node Type").GetStringSafe().find("Precompute") == TString::npos) { - planInputs.AppendValue(ReconstructQueryPlanRec(p, 0, planIndex, precomputes, nodeCounter)); } - } - result["Plans"] = planInputs; - return result; - } - if (plan.GetMapSafe().contains("CTE Name") && plan.GetMapSafe().at("Node Type").GetStringSafe() == "ConstantExpr") { - auto precompute = plan.GetMapSafe().at("CTE Name").GetStringSafe(); - if (!precomputes.contains(precompute)) { - result["Node Type"] = plan.GetMapSafe().at("Node Type"); + if (!plan.GetMapSafe().contains("Plans")) { + result["Plans"] = std::move(planInputs); + return result; + } + + if (plan.GetMapSafe().at("Node Type").GetStringSafe() == "TableLookup") { + NJson::TJsonValue newOps; + NJson::TJsonValue op; + + op["Name"] = "TableLookup"; + op["Columns"] = plan.GetMapSafe().at("Columns"); + op["LookupKeyColumns"] = plan.GetMapSafe().at("LookupKeyColumns"); + op["Table"] = plan.GetMapSafe().at("Table"); + + if (plan.GetMapSafe().contains("E-Cost")) { + op["E-Cost"] = plan.GetMapSafe().at("E-Cost"); + } + if (plan.GetMapSafe().contains("E-Rows")) { + op["E-Rows"] = plan.GetMapSafe().at("E-Rows"); + } + if (plan.GetMapSafe().contains("E-Size")) { + op["E-Size"] = plan.GetMapSafe().at("E-Size"); + } + + newOps.AppendValue(std::move(op)); + + result["Operators"] = std::move(newOps); + return result; + } + + for (auto p : plan.GetMapSafe().at("Plans").GetArraySafe()) { + if (!p.GetMapSafe().contains("Operators") && p.GetMapSafe().contains("CTE Name")) { + auto precompute = p.GetMapSafe().at("CTE Name").GetStringSafe(); + if (Precomputes.contains(precompute)) { + planInputs.AppendValue(Reconstruct(Precomputes.at(precompute), 0)); + } + } else if (p.GetMapSafe().at("Node Type").GetStringSafe().find("Precompute") == TString::npos) { + planInputs.AppendValue(Reconstruct(p, 0)); + } + } + result["Plans"] = planInputs; return result; } - return ReconstructQueryPlanRec(precomputes.at(precompute), 0, planIndex, precomputes, nodeCounter); - } + if (plan.GetMapSafe().contains("CTE Name") && plan.GetMapSafe().at("Node Type").GetStringSafe() == "ConstantExpr") { + auto precompute = plan.GetMapSafe().at("CTE Name").GetStringSafe(); + if (!Precomputes.contains(precompute)) { + result["Node Type"] = plan.GetMapSafe().at("Node Type"); + return result; + } - auto ops = plan.GetMapSafe().at("Operators").GetArraySafe(); - auto op = ops[operatorIndex]; + return Reconstruct(Precomputes.at(precompute), 0); + } - TVector planInputs; + auto ops = plan.GetMapSafe().at("Operators").GetArraySafe(); + auto op = ops[operatorIndex]; - auto opName = op.GetMapSafe().at("Name").GetStringSafe(); + TVector planInputs; - THashSet processedExternalOperators; - THashSet processedInternalOperators; - for (auto opInput : op.GetMapSafe().at("Inputs").GetArraySafe()) { + auto opName = op.GetMapSafe().at("Name").GetStringSafe(); - if (opInput.GetMapSafe().contains("ExternalPlanNodeId")) { - auto inputPlanKey = opInput.GetMapSafe().at("ExternalPlanNodeId").GetIntegerSafe(); + THashSet processedExternalOperators; + THashSet processedInternalOperators; + for (auto opInput : op.GetMapSafe().at("Inputs").GetArraySafe()) { - if (processedExternalOperators.contains(inputPlanKey)) { - continue; - } - processedExternalOperators.insert(inputPlanKey); + if (opInput.GetMapSafe().contains("ExternalPlanNodeId")) { + auto inputPlanKey = opInput.GetMapSafe().at("ExternalPlanNodeId").GetIntegerSafe(); - auto inputPlan = planIndex.at(inputPlanKey); - planInputs.push_back( ReconstructQueryPlanRec(inputPlan, 0, planIndex, precomputes, nodeCounter)); - } else if (opInput.GetMapSafe().contains("InternalOperatorId")) { - auto inputPlanId = opInput.GetMapSafe().at("InternalOperatorId").GetIntegerSafe(); + if (processedExternalOperators.contains(inputPlanKey)) { + continue; + } + processedExternalOperators.insert(inputPlanKey); - if (processedInternalOperators.contains(inputPlanId)) { - continue; - } - processedInternalOperators.insert(inputPlanId); + auto inputPlan = PlanIndex.at(inputPlanKey); + planInputs.push_back( Reconstruct(inputPlan, 0) ); + } else if (opInput.GetMapSafe().contains("InternalOperatorId")) { + auto inputPlanId = opInput.GetMapSafe().at("InternalOperatorId").GetIntegerSafe(); + + if (processedInternalOperators.contains(inputPlanId)) { + continue; + } + processedInternalOperators.insert(inputPlanId); - planInputs.push_back( ReconstructQueryPlanRec(plan, inputPlanId, planIndex, precomputes, nodeCounter)); + planInputs.push_back( Reconstruct(plan, inputPlanId) ); + } } - } - if (op.GetMapSafe().contains("Inputs")) { - op.GetMapSafe().erase("Inputs"); - } + if (op.GetMapSafe().contains("Inputs")) { + op.GetMapSafe().erase("Inputs"); + } - if (op.GetMapSafe().contains("Input") - || op.GetMapSafe().contains("ToFlow") - || op.GetMapSafe().contains("Member") - || op.GetMapSafe().contains("AssumeSorted") - || op.GetMapSafe().contains("Iterator")) { + if (op.GetMapSafe().contains("Input") + || op.GetMapSafe().contains("ToFlow") + || op.GetMapSafe().contains("Member") + || op.GetMapSafe().contains("AssumeSorted") + || op.GetMapSafe().contains("Iterator")) { - TString maybePrecompute = ""; - if (op.GetMapSafe().contains("Input")) { - maybePrecompute = op.GetMapSafe().at("Input").GetStringSafe(); - } else if (op.GetMapSafe().contains("ToFlow")) { - maybePrecompute = op.GetMapSafe().at("ToFlow").GetStringSafe(); - } else if (op.GetMapSafe().contains("Member")) { - maybePrecompute = op.GetMapSafe().at("Member").GetStringSafe(); - } else if (op.GetMapSafe().contains("AssumeSorted")) { - maybePrecompute = op.GetMapSafe().at("AssumeSorted").GetStringSafe(); - } else if (op.GetMapSafe().contains("Iterator")) { - maybePrecompute = op.GetMapSafe().at("Iterator").GetStringSafe(); - } + TString maybePrecompute = ""; + if (op.GetMapSafe().contains("Input")) { + maybePrecompute = op.GetMapSafe().at("Input").GetStringSafe(); + } else if (op.GetMapSafe().contains("ToFlow")) { + maybePrecompute = op.GetMapSafe().at("ToFlow").GetStringSafe(); + } else if (op.GetMapSafe().contains("Member")) { + maybePrecompute = op.GetMapSafe().at("Member").GetStringSafe(); + } else if (op.GetMapSafe().contains("AssumeSorted")) { + maybePrecompute = op.GetMapSafe().at("AssumeSorted").GetStringSafe(); + } else if (op.GetMapSafe().contains("Iterator")) { + maybePrecompute = op.GetMapSafe().at("Iterator").GetStringSafe(); + } - if (precomputes.contains(maybePrecompute) && planInputs.empty()) { - planInputs.push_back(ReconstructQueryPlanRec(precomputes.at(maybePrecompute), 0, planIndex, precomputes, nodeCounter)); + if (Precomputes.contains(maybePrecompute) && planInputs.empty()) { + planInputs.push_back(Reconstruct(Precomputes.at(maybePrecompute), 0)); + } } - } - result["Node Type"] = opName; - NJson::TJsonValue newOps; - newOps.AppendValue(op); - result["Operators"] = newOps; + result["Node Type"] = std::move(opName); + NJson::TJsonValue newOps; + newOps.AppendValue(std::move(op)); + result["Operators"] = std::move(newOps); - if (planInputs.size()){ - NJson::TJsonValue plans; - for( auto i : planInputs) { - plans.AppendValue(i); + if (!planInputs.empty()){ + NJson::TJsonValue plans; + for(auto&& i : planInputs) { + plans.AppendValue(std::move(i)); + } + result["Plans"] = std::move(plans); } - result["Plans"] = plans; + + return result; } - return result; -} +private: + const THashMap& PlanIndex; + const THashMap& Precomputes; + ui32 NodeIDCounter; + i32 Budget; // Prevent bugs with inf recursion +}; double ComputeCpuTimes(NJson::TJsonValue& plan) { double currCpuTime = 0; @@ -2151,7 +2341,7 @@ double ComputeCpuTimes(NJson::TJsonValue& plan) { } void ComputeTotalRows(NJson::TJsonValue& plan) { - + if (plan.GetMapSafe().contains("Plans")) { for (auto& p : plan.GetMapSafe().at("Plans").GetArraySafe()) { ComputeTotalRows(p); @@ -2203,7 +2393,7 @@ NJson::TJsonValue SimplifyQueryPlan(NJson::TJsonValue& plan) { "ToFlow", "Member", "AssumeSorted" - }; + }; THashMap planIndex; THashMap precomputes; @@ -2211,8 +2401,7 @@ NJson::TJsonValue SimplifyQueryPlan(NJson::TJsonValue& plan) { BuildPlanIndex(plan, planIndex, precomputes); - int nodeCounter = 0; - plan = ReconstructQueryPlanRec(plan, 0, planIndex, precomputes, nodeCounter); + plan = TQueryPlanReconstructor(planIndex, precomputes).Reconstruct(plan, 0); RemoveRedundantNodes(plan, redundantNodes); ComputeCpuTimes(plan); @@ -2239,13 +2428,13 @@ TString AddSimplifiedPlan(const TString& planText, TIntrusivePtrJoinsCount; optimizerStats["EquiJoinsCount"] = optCtx->EquiJoinsCount; simplifiedPlan["OptimizerStats"] = optimizerStats; - } + } planJson["SimplifiedPlan"] = simplifiedPlan; return planJson.GetStringRobust(); } -TString SerializeTxPlans(const TVector& txPlans, TIntrusivePtr optCtx, const TString commonPlanInfo = "") { +TString SerializeTxPlans(const TVector& txPlans, TIntrusivePtr optCtx, const TString commonPlanInfo = "", const TString& queryStats = "") { NJsonWriter::TBuf writer; writer.SetIndentSpaces(2); @@ -2268,6 +2457,15 @@ TString SerializeTxPlans(const TVector& txPlans, TIntrusivePtrUserRequestContext && optCtx->UserRequestContext->PoolId) { + NJsonWriter::TBuf writer; + writer.BeginObject(); + writer.WriteKey("ResourcePoolId").WriteString(optCtx->UserRequestContext->PoolId); + writer.EndObject(); + + queryStats = writer.Str(); + } + NJsonWriter::TBuf writer; writer.SetIndentSpaces(2); WriteCommonTablesInfo(writer, serializerCtx.Tables); - queryProto.SetQueryPlan(SerializeTxPlans(txPlans, optCtx, writer.Str())); + queryProto.SetQueryPlan(SerializeTxPlans(txPlans, optCtx, writer.Str(), queryStats)); } void FillAggrStat(NJson::TJsonValue& node, const NYql::NDqProto::TDqStatsAggr& aggr, const TString& name) { @@ -2373,6 +2581,13 @@ void FillAggrStat(NJson::TJsonValue& node, const NYql::NDqProto::TDqStatsAggr& a aggrStat["Max"] = max; aggrStat["Sum"] = sum; aggrStat["Count"] = aggr.GetCnt(); + if (aggr.GetHistory().size()) { + auto& aggrHistory = aggrStat.InsertValue("History", NJson::JSON_ARRAY); + for (auto& h : aggr.GetHistory()) { + aggrHistory.AppendValue(h.GetTimeMs()); + aggrHistory.AppendValue(h.GetValue()); + } + } } } @@ -2546,10 +2761,15 @@ TString AddExecStatsToTxPlan(const TString& txPlanJson, const NYql::NDqProto::TD stats["UseLlvm"] = "undefined"; } + stats["PhysicalStageId"] = (*stat)->GetStageId(); stats["Tasks"] = (*stat)->GetTotalTasksCount(); stats["StageDurationUs"] = (*stat)->GetStageDurationUs(); + if ((*stat)->GetBaseTimeMs()) { + stats["BaseTimeMs"] = (*stat)->GetBaseTimeMs(); + } + if ((*stat)->HasDurationUs()) { FillAggrStat(stats, (*stat)->GetDurationUs(), "DurationUs"); } @@ -2601,6 +2821,18 @@ TString AddExecStatsToTxPlan(const TString& txPlanJson, const NYql::NDqProto::TD if ((*stat)->HasMaxMemoryUsage()) { FillAggrStat(stats, (*stat)->GetMaxMemoryUsage(), "MaxMemoryUsage"); } + if ((*stat)->HasSpillingComputeBytes()) { + FillAggrStat(stats, (*stat)->GetSpillingComputeBytes(), "SpillingComputeBytes"); + } + if ((*stat)->HasSpillingChannelBytes()) { + FillAggrStat(stats, (*stat)->GetSpillingChannelBytes(), "SpillingChannelBytes"); + } + if ((*stat)->HasSpillingComputeTimeUs()) { + FillAggrStat(stats, (*stat)->GetSpillingComputeTimeUs(), "SpillingComputeTimeUs"); + } + if ((*stat)->HasSpillingChannelTimeUs()) { + FillAggrStat(stats, (*stat)->GetSpillingChannelTimeUs(), "SpillingChannelTimeUs"); + } if (!(*stat)->GetIngress().empty()) { auto& ingressStats = stats.InsertValue("Ingress", NJson::JSON_ARRAY); @@ -2700,14 +2932,37 @@ TString AddExecStatsToTxPlan(const TString& txPlanJson, const NYql::NDqProto::TD return AddExecStatsToTxPlan(txPlanJson, stats, TIntrusivePtr()); } -TString SerializeAnalyzePlan(const NKqpProto::TKqpStatsQuery& queryStats) { +TString SerializeAnalyzePlan(const NKqpProto::TKqpStatsQuery& queryStats, const TString& poolId) { TVector txPlans; for (const auto& execStats: queryStats.GetExecutions()) { for (const auto& txPlan: execStats.GetTxPlansWithStats()) { txPlans.push_back(txPlan); } } - return SerializeTxPlans(txPlans, TIntrusivePtr()); + + NJsonWriter::TBuf writer; + writer.BeginObject(); + + if (queryStats.HasCompilation()) { + const auto& compilation = queryStats.GetCompilation(); + + writer.WriteKey("Compilation"); + writer.BeginObject(); + writer.WriteKey("FromCache").WriteBool(compilation.GetFromCache()); + writer.WriteKey("DurationUs").WriteLongLong(compilation.GetDurationUs()); + writer.WriteKey("CpuTimeUs").WriteLongLong(compilation.GetCpuTimeUs()); + writer.EndObject(); + } + + writer.WriteKey("ProcessCpuTimeUs").WriteLongLong(queryStats.GetWorkerCpuTimeUs()); + writer.WriteKey("TotalDurationUs").WriteLongLong(queryStats.GetDurationUs()); + if (poolId) { + writer.WriteKey("QueuedTimeUs").WriteLongLong(queryStats.GetQueuedTimeUs()); + writer.WriteKey("ResourcePoolId").WriteString(poolId); + } + writer.EndObject(); + + return SerializeTxPlans(txPlans, TIntrusivePtr(), "", writer.Str()); } TString SerializeScriptPlan(const TVector& queryPlans) { diff --git a/ydb/core/kqp/opt/kqp_query_plan.h b/ydb/core/kqp/opt/kqp_query_plan.h index 8adbf2b20866..7f720dc4c6ca 100644 --- a/ydb/core/kqp/opt/kqp_query_plan.h +++ b/ydb/core/kqp/opt/kqp_query_plan.h @@ -44,7 +44,7 @@ void PhyQuerySetTxPlans(NKqpProto::TKqpPhyQuery& queryProto, const NYql::NNodes: */ TString AddExecStatsToTxPlan(const TString& txPlan, const NYql::NDqProto::TDqExecutionStats& stats); -TString SerializeAnalyzePlan(const NKqpProto::TKqpStatsQuery& queryStats); +TString SerializeAnalyzePlan(const NKqpProto::TKqpStatsQuery& queryStats, const TString& poolId = ""); TString SerializeScriptPlan(const TVector& queryPlans); diff --git a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp index 2b0a32aeadd0..83c556a35878 100644 --- a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp +++ b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -22,40 +23,67 @@ void InferStatisticsForReadTable(const TExprNode::TPtr& input, TTypeAnnotationCo const TKqpOptimizeContext& kqpCtx) { auto inputNode = TExprBase(input); - double nRows = 0; - int nAttrs = 0; + std::shared_ptr inputStats; - const TExprNode* path; + int nAttrs = 0; + bool readRange = false; if (auto readTable = inputNode.Maybe()) { - path = readTable.Cast().Table().Path().Raw(); + inputStats = typeCtx->GetStats(readTable.Cast().Table().Raw()); nAttrs = readTable.Cast().Columns().Size(); + + auto range = readTable.Cast().Range(); + auto rangeFrom = range.From().Maybe(); + auto rangeTo = range.To().Maybe(); + if (rangeFrom && rangeTo) { + readRange = true; + } } else if (auto readRanges = inputNode.Maybe()) { - path = readRanges.Cast().Table().Path().Raw(); + inputStats = typeCtx->GetStats(readRanges.Cast().Table().Raw()); nAttrs = readRanges.Cast().Columns().Size(); } else { Y_ENSURE(false, "Invalid node type for InferStatisticsForReadTable"); } - const auto& tableData = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, path->Content()); - int totalAttrs = tableData.Metadata->Columns.size(); - nRows = tableData.Metadata->RecordsCount; + if (!inputStats) { + return; + } - double byteSize = tableData.Metadata->DataSize * (nAttrs / (double)totalAttrs); + auto keyColumns = inputStats->KeyColumns; + if (auto indexRead = inputNode.Maybe()) { + const auto& tableData = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, indexRead.Cast().Table().Path().Value()); + const auto& [indexMeta, _] = tableData.Metadata->GetIndexMetadata(indexRead.Cast().Index().StringValue()); - auto keyColumns = TIntrusivePtr(new TOptimizerStatistics::TKeyColumns(tableData.Metadata->KeyColumnNames)); - auto stats = std::make_shared(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, 0.0, keyColumns); - if (kqpCtx.Config->OverrideStatistics.Get()) { - stats = OverrideStatistics(*stats, path->Content(), *kqpCtx.Config->OverrideStatistics.Get()); + keyColumns = TIntrusivePtr( + new TOptimizerStatistics::TKeyColumns(indexMeta->KeyColumnNames)); } - if (stats->ColumnStatistics) { - for (const auto& [columnName, metaData]: tableData.Metadata->Columns) { - stats->ColumnStatistics->Data[columnName].Type = metaData.Type; - } + /** + * We need index statistics to calculate this in the future + * Right now we use very small estimates to make sure CBO picks Lookup Joins + * I.e. there can be a chain of lookup joins in OLTP scenario and we want to make + * sure the cardinality doesn't blow up and lookup joins are still being picked + */ + double inputRows = inputStats->Nrows; + double nRows = inputRows; + if (readRange) { + nRows = 1; } - YQL_CLOG(TRACE, CoreDq) << "Infer statistics for read table, nrows: " << stats->Nrows << ", nattrs: " << stats->Ncols; + double sizePerRow = inputStats->ByteSize / (inputRows==0?1:inputRows); + double byteSize = nRows * sizePerRow * (nAttrs / (double)inputStats->Ncols); + + auto stats = std::make_shared( + EStatisticsType::BaseTable, + nRows, + nAttrs, + byteSize, + 0.0, + keyColumns, + inputStats->ColumnStatistics, + inputStats->StorageType); + + YQL_CLOG(TRACE, CoreDq) << "Infer statistics for read table" << stats->ToString(); typeCtx->SetStats(input.Get(), stats); } @@ -64,24 +92,49 @@ void InferStatisticsForReadTable(const TExprNode::TPtr& input, TTypeAnnotationCo * Infer statistics for KQP table */ void InferStatisticsForKqpTable(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx, - const TKqpOptimizeContext& kqpCtx) { + TKqpOptimizeContext& kqpCtx) { auto inputNode = TExprBase(input); auto readTable = inputNode.Cast(); auto path = readTable.Path(); const auto& tableData = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, path.Value()); + if (!tableData.Metadata->StatsLoaded && !kqpCtx.Config->OptOverrideStatistics.Get()) { + return; + } + double nRows = tableData.Metadata->RecordsCount; double byteSize = tableData.Metadata->DataSize; int nAttrs = tableData.Metadata->Columns.size(); auto keyColumns = TIntrusivePtr(new TOptimizerStatistics::TKeyColumns(tableData.Metadata->KeyColumnNames)); auto stats = std::make_shared(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, 0.0, keyColumns); - if (kqpCtx.Config->OverrideStatistics.Get()) { - stats = OverrideStatistics(*stats, path.Value(), *kqpCtx.Config->OverrideStatistics.Get()); + if (typeCtx->ColumnStatisticsByTableName.contains(path.StringValue())) { + stats->ColumnStatistics = typeCtx->ColumnStatisticsByTableName[path.StringValue()]; + } + if (kqpCtx.Config->OptOverrideStatistics.Get()) { + stats = OverrideStatistics(*stats, path.Value(), kqpCtx.GetOverrideStatistics()); + } + if (stats->ColumnStatistics) { + for (const auto& [columnName, metaData]: tableData.Metadata->Columns) { + stats->ColumnStatistics->Data[columnName].Type = metaData.Type; + } } - YQL_CLOG(TRACE, CoreDq) << "Infer statistics for table: " << path.Value() << ", nrows: " << stats->Nrows << ", nattrs: " << stats->Ncols << ", nKeyColumns: " << stats->KeyColumns->Data.size(); + EStorageType storageType = EStorageType::NA; + switch (tableData.Metadata->Kind) { + case EKikimrTableKind::Datashard: + storageType = EStorageType::RowStorage; + break; + case EKikimrTableKind::Olap: + storageType = EStorageType::ColumnStorage; + break; + default: + break; + } + stats->StorageType = storageType; + + YQL_CLOG(TRACE, CoreDq) << "Infer statistics for table: " << path.Value() << ": " << stats->ToString(); typeCtx->SetStats(input.Get(), stats); } @@ -101,9 +154,23 @@ void InferStatisticsForSteamLookup(const TExprNode::TPtr& input, TTypeAnnotation int nAttrs = streamLookup.Columns().Size(); auto inputStats = typeCtx->GetStats(streamLookup.Table().Raw()); + if (!inputStats) { + return; + } auto byteSize = inputStats->ByteSize * (nAttrs / (double) inputStats->Ncols); - typeCtx->SetStats(input.Get(), std::make_shared(EStatisticsType::BaseTable, inputStats->Nrows, nAttrs, byteSize, 0, inputStats->KeyColumns)); + auto res = std::make_shared( + EStatisticsType::BaseTable, + inputStats->Nrows, + nAttrs, + byteSize, + 0, + inputStats->KeyColumns, + inputStats->ColumnStatistics, + inputStats->StorageType); + + typeCtx->SetStats(input.Get(), res); + } /** @@ -121,6 +188,9 @@ void InferStatisticsForLookupTable(const TExprNode::TPtr& input, TTypeAnnotation double byteSize = 0; auto inputStats = typeCtx->GetStats(lookupTable.Table().Raw()); + if (!inputStats) { + return; + } if (lookupTable.LookupKeys().Maybe()) { if (inputStats) { @@ -134,7 +204,15 @@ void InferStatisticsForLookupTable(const TExprNode::TPtr& input, TTypeAnnotation byteSize = 10; } - typeCtx->SetStats(input.Get(), std::make_shared(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, 0, inputStats->KeyColumns)); + typeCtx->SetStats(input.Get(), std::make_shared( + EStatisticsType::BaseTable, + nRows, + nAttrs, + byteSize, + 0, + inputStats->KeyColumns, + inputStats->ColumnStatistics, + inputStats->StorageType)); } /** @@ -142,7 +220,9 @@ void InferStatisticsForLookupTable(const TExprNode::TPtr& input, TTypeAnnotation * We look into range expression to check if its a point lookup or a full scan * We currently don't try to figure out whether this is a small range vs full scan */ -void InferStatisticsForRowsSourceSettings(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) { +void InferStatisticsForRowsSourceSettings(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx, + const TKqpOptimizeContext& kqpCtx) { + auto inputNode = TExprBase(input); auto sourceSettings = inputNode.Cast(); @@ -151,7 +231,8 @@ void InferStatisticsForRowsSourceSettings(const TExprNode::TPtr& input, TTypeAnn return; } - double nRows = inputStats->Nrows; + double inputRows = inputStats->Nrows; + double nRows = inputRows; // Check if we have a range expression, in that case just assign a single row to this read // We don't currently check the size of an index lookup @@ -164,11 +245,30 @@ void InferStatisticsForRowsSourceSettings(const TExprNode::TPtr& input, TTypeAnn } } + auto keyColumns = inputStats->KeyColumns; + if (auto indexRead = inputNode.Maybe()) { + const auto& tableData = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, indexRead.Cast().Table().Path().Value()); + const auto& [indexMeta, _] = tableData.Metadata->GetIndexMetadata(indexRead.Cast().Index().StringValue()); + + keyColumns = TIntrusivePtr( + new TOptimizerStatistics::TKeyColumns(indexMeta->KeyColumnNames)); + } + int nAttrs = sourceSettings.Columns().Size(); + + double sizePerRow = inputStats->ByteSize / (inputRows==0?1:inputRows); + double byteSize = nRows * sizePerRow * (nAttrs / (double)inputStats->Ncols); double cost = inputStats->Cost; - double byteSize = inputStats->ByteSize * (nAttrs / (double)inputStats->Ncols); - typeCtx->SetStats(input.Get(), std::make_shared(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, cost, inputStats->KeyColumns)); + typeCtx->SetStats(input.Get(), std::make_shared( + EStatisticsType::BaseTable, + nRows, + nAttrs, + byteSize, + cost, + keyColumns, + inputStats->ColumnStatistics, + inputStats->StorageType)); } /** @@ -179,6 +279,35 @@ void InferStatisticsForIndexLookup(const TExprNode::TPtr& input, TTypeAnnotation typeCtx->SetStats(input.Get(), std::make_shared(EStatisticsType::BaseTable, 5, 5, 20, 0.0)); } +void InferStatisticsForReadTableIndexRanges(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) { + auto indexRanges = TKqlReadTableIndexRanges(input); + + auto inputStats = typeCtx->GetStats(indexRanges.Table().Raw()); + if (!inputStats) { + return; + } + + TVector indexColumns; + for (auto c : indexRanges.Columns()) { + indexColumns.push_back(c.StringValue()); + } + + auto indexColumnsPtr = TIntrusivePtr(new TOptimizerStatistics::TKeyColumns(indexColumns)); + auto stats = std::make_shared( + inputStats->Type, + inputStats->Nrows, + inputStats->Ncols, + inputStats->ByteSize, + inputStats->Cost, + indexColumnsPtr, + inputStats->ColumnStatistics, + inputStats->StorageType); + + typeCtx->SetStats(input.Get(), stats); + + YQL_CLOG(TRACE, CoreDq) << "Infer statistics for index: " << stats->ToString(); +} + /*** * Infer statistics for result binding of a stage */ @@ -201,35 +330,330 @@ void InferStatisticsForResultBinding(const TExprNode::TPtr& input, TTypeAnnotati std::from_chars(bindingNoStr.data(), bindingNoStr.data() + bindingNoStr.size(), bindingNo); std::from_chars(resultNoStr.data(), resultNoStr.data() + resultNoStr.size(), resultNo); - typeCtx->SetStats(param.Name().Raw(), txStats[bindingNo][resultNo]); - typeCtx->SetStats(inputNode.Raw(), txStats[bindingNo][resultNo]); + auto resStats = txStats[bindingNo][resultNo]; + typeCtx->SetStats(param.Name().Raw(), resStats); + typeCtx->SetStats(inputNode.Raw(), resStats); + } + } +} + +class TKqpOlapPredicateSelectivityComputer: public TPredicateSelectivityComputer { +public: + TKqpOlapPredicateSelectivityComputer(const std::shared_ptr& stats) + : TPredicateSelectivityComputer(stats) + {} + + double Compute(const NNodes::TExprBase& input) { + std::optional resSelectivity; + + if (auto andNode = input.Maybe()) { + double tmpSelectivity = 1.0; + for (size_t i = 0; i < andNode.Cast().ArgCount(); i++) { + tmpSelectivity *= Compute(andNode.Cast().Arg(i)); + } + resSelectivity = tmpSelectivity; + } else if (auto orNode = input.Maybe()) { + double tmpSelectivity = 0.0; + for (size_t i = 0; i < orNode.Cast().ArgCount(); i++) { + tmpSelectivity += Compute(orNode.Cast().Arg(i)); + } + resSelectivity = tmpSelectivity; + } else if (auto notNode = input.Maybe()) { + resSelectivity = 1 - Compute(notNode.Cast().Value()); + } else if (input.Maybe() && input.Ptr()->ChildrenSize() >= 1) { + auto listPtr = input.Maybe().Cast().Ptr()->Child(1); + size_t listSize = listPtr->ChildrenSize(); + + if (listSize == 3) { + TString compSign = TString(listPtr->Child(0)->Content()); + TString attr = TString(listPtr->Child(1)->Content()); + + TExprContext dummyCtx; + TPositionHandle dummyPos; + + auto rowArg = + Build(dummyCtx, dummyPos) + .Name("row") + .Done(); + + auto member = + Build(dummyCtx, dummyPos) + .Struct(rowArg) + .Name().Build(attr) + .Done(); + + auto value = TExprBase(listPtr->ChildPtr(2)); + if (OlapCompSigns.contains(compSign)) { + resSelectivity = this->ComputeComparisonSelectivity(member, value); + } else if (compSign == "eq") { + resSelectivity = this->ComputeEqualitySelectivity(member, value); + } else if (compSign == "neq") { + resSelectivity = 1 - this->ComputeEqualitySelectivity(member, value); + } + } + } + + if (!resSelectivity.has_value()) { + auto dumped = input.Raw()->Dump(); + YQL_CLOG(TRACE, ProviderKqp) << "ComputePredicateSelectivity NOT FOUND : " << dumped; + return 1.0; } + + return std::min(1.0, resSelectivity.value()); + } + +private: + THashSet OlapCompSigns = { + {"lt"}, + {"lte"}, + {"gt"}, + {"gte"} + }; +}; + +void InferStatisticsForOlapFilter(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) { + auto inputNode = TExprBase(input); + auto filter = inputNode.Cast(); + auto filterInput = filter.Input(); + auto inputStats = typeCtx->GetStats(filterInput.Raw()); + + if (!inputStats) { + return; + } + + double selectivity = TKqpOlapPredicateSelectivityComputer(inputStats).Compute(filter.Condition()); + + auto outputStats = TOptimizerStatistics(inputStats->Type, inputStats->Nrows * selectivity, inputStats->Ncols, inputStats->ByteSize * selectivity, inputStats->Cost, inputStats->KeyColumns ); + outputStats.Labels = inputStats->Labels; + outputStats.Selectivity *= selectivity; + + typeCtx->SetStats(input.Get(), std::make_shared(std::move(outputStats)) ); +} + +void InferStatisticsForOlapRead(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) { + auto inputNode = TExprBase(input); + auto olapRead = inputNode.Cast(); + + auto process = olapRead.Process(); + auto lambdaStats = typeCtx->GetStats(process.Body().Raw()); + if (lambdaStats) { + YQL_CLOG(TRACE, CoreDq) << "Infer statistics for OLAP table: " << lambdaStats->ToString(); + typeCtx->SetStats(input.Get(), lambdaStats); } } +double EstimateRowSize(const TStructExprType& rowType, const TString& format, const TString& compression, bool decoded) { + double result = 0.0; + for (auto item : rowType.GetItems()) { + auto itemType = item->GetItemType(); + if (itemType->GetKind() == ETypeAnnotationKind::Data) { + switch(itemType->Cast()->GetSlot()) { + case EDataSlot::Bool: + result += decoded ? 1.0 : 0.2; + break; + case EDataSlot::Int8: + [[fallthrough]]; + case EDataSlot::Uint8: + result += decoded ? 1.0 : 0.72; + break; + case EDataSlot::Int16: + [[fallthrough]]; + case EDataSlot::Uint16: + result += decoded ? 2.0 : 1.44; + break; + case EDataSlot::Int32: + [[fallthrough]]; + case EDataSlot::Uint32: + result += decoded ? 4.0 : 2.88; + break; + case EDataSlot::Int64: + [[fallthrough]]; + case EDataSlot::Uint64: + [[fallthrough]]; + case EDataSlot::Double: + result += decoded ? 8.0 : 3.88; + break; + case EDataSlot::Float: + result += decoded ? 4.0 : 2.88; + break; + case EDataSlot::String: + [[fallthrough]]; + case EDataSlot::Utf8: + result += decoded ? 28.0 : 8.0; + break; + case EDataSlot::Yson: + [[fallthrough]]; + case EDataSlot::Json: + result += decoded ? 56.0 : 16.0; + break; + case EDataSlot::Uuid: + break; + case EDataSlot::Date: + result += decoded ? 2.0 : 1.51; + break; + case EDataSlot::Datetime: + [[fallthrough]]; + case EDataSlot::Timestamp: + result += decoded ? 8.0 : 6.04; + break; + case EDataSlot::Interval: + break; + case EDataSlot::TzDate: + break; + case EDataSlot::TzDatetime: + break; + case EDataSlot::TzTimestamp: + break; + case EDataSlot::Decimal: + result += decoded ? 16.0 : 7.76; + break; + case EDataSlot::DyNumber: + break; + case EDataSlot::JsonDocument: + break; + case EDataSlot::Date32: + result += decoded ? 4.0 : 2.88; + break; + case EDataSlot::Datetime64: + [[fallthrough]]; + case EDataSlot::Timestamp64: + case EDataSlot::Interval64: + result += decoded ? 8.0 : 3.88; + break; + case EDataSlot::TzDate32: + break; + case EDataSlot::TzDatetime64: + break; + case EDataSlot::TzTimestamp64: + break; + } + } + } + + if (result == 0.0) { + result = 1000.0; + } + + if (format != "parquet" && !decoded) { + double compressionRatio = 1.0; + if (format == "csv_with_names" || format == "tsv_with_names") { + result *= 5.0; + compressionRatio = 4.5; // gzip + } else if (format != "raw") { // json's + result *= 12.0; + compressionRatio = 14.0; // gzip + } + if (compression) { + if (compression == "gzip") { + // 1.00 + } else if (compression == "zstd") { + compressionRatio *= 1.05; + } else if (compression == "lz4") { + compressionRatio *= 1.43; + } else if (compression == "brotli") { + compressionRatio *= 1.20; + } else if (compression == "bzip2") { + compressionRatio *= 1.24; + } else if (compression == "xz") { + compressionRatio *= 1.45; + } + result /= compressionRatio; + } + } + + return result; +} + void InferStatisticsForDqSourceWrap(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx, - const TKqpOptimizeContext& kqpCtx) { + TKqpOptimizeContext& kqpCtx) { auto inputNode = TExprBase(input); if (auto wrapBase = inputNode.Maybe()) { if (auto maybeS3DataSource = wrapBase.Cast().DataSource().Maybe()) { auto s3DataSource = maybeS3DataSource.Cast(); if (s3DataSource.Name()) { - auto path = s3DataSource.Name().Cast().StringValue(); - if (kqpCtx.Config->OverrideStatistics.Get() && path) { - auto stats = std::make_shared(EStatisticsType::BaseTable, 0.0, 0, 0, 0.0, TIntrusivePtr()); - stats = OverrideStatistics(*stats, path, *kqpCtx.Config->OverrideStatistics.Get()); - if (stats->ByteSize == 0.0) { + auto stats = typeCtx->GetStats(s3DataSource.Raw()); + if (!stats) { + stats = std::make_shared(EStatisticsType::BaseTable, 0.0, 0, 0, 0.0, TIntrusivePtr()); + } + if (!stats->Specific) { + stats->Specific = std::make_shared(); + } + + const TS3ProviderStatistics* specific = dynamic_cast((stats->Specific.get())); + + if (!specific->OverrideApplied && kqpCtx.Config->OptOverrideStatistics.Get()) { + auto path = s3DataSource.Name().Cast().StringValue(); + auto dbStats = kqpCtx.GetOverrideStatistics()->GetMapSafe(); + if (!dbStats.contains(path)) { auto n = path.find_last_of('/'); if (n != path.npos) { - stats = OverrideStatistics(*stats, path.substr(n + 1), *kqpCtx.Config->OverrideStatistics.Get()); + path = path.substr(n + 1); } } - if (stats->ByteSize != 0.0) { - YQL_CLOG(TRACE, CoreDq) << "Infer statistics for s3 data source " << path; - typeCtx->SetStats(input.Get(), stats); + if (dbStats.contains(path)) { + YQL_CLOG(TRACE, CoreDq) << "Override statistics for s3 data source " << path; + stats = OverrideStatistics(*stats, path, kqpCtx.GetOverrideStatistics()); + auto newSpecific = std::make_shared(*specific); + newSpecific->OverrideApplied = true; + stats->Specific = newSpecific; + specific = newSpecific.get(); typeCtx->SetStats(s3DataSource.Raw(), stats); } } + + auto dataSourceStats = stats; + + auto rowType = wrapBase.Cast().RowType().Ref().GetTypeAnn()->Cast()->GetType()->Cast(); + if (specific->FullRawRowAvgSize == 0.0) { + auto newSpecific = std::make_shared(*specific); + stats = std::make_shared(stats->Type, stats->Nrows, stats->Ncols, stats->ByteSize, stats->Cost, stats->KeyColumns, stats->ColumnStatistics, stats->StorageType, newSpecific); + newSpecific->FullRawRowAvgSize = EstimateRowSize(*rowType, newSpecific->Format, newSpecific->Compression, false); + newSpecific->FullDecodedRowAvgSize = EstimateRowSize(*rowType, newSpecific->Format, newSpecific->Compression, true); + specific = newSpecific.get(); + typeCtx->SetStats(s3DataSource.Raw(), stats); + } + + auto wrapStats = typeCtx->GetStats(input.Get()); + if (!wrapStats) { + typeCtx->SetStats(input.Get(), stats); + } else { + stats = wrapStats; + } + + if (stats->Ncols == 0 || stats->Ncols > static_cast(rowType->GetSize()) || stats->Nrows == 0 || stats->ByteSize == 0.0 || stats->Cost == 0.0) { + auto newSpecific = std::make_shared(*specific); + stats = std::make_shared(stats->Type, stats->Nrows, stats->Ncols, stats->ByteSize, stats->Cost, stats->KeyColumns, stats->ColumnStatistics, stats->StorageType, newSpecific); + + if (stats->Nrows == 0 && newSpecific->FullRawRowAvgSize) { + stats->Nrows = newSpecific->RawByteSize / newSpecific->FullRawRowAvgSize; + } + if (stats->Ncols == 0 || stats->Ncols > static_cast(rowType->GetSize())) { + stats->Ncols = rowType->GetSize(); + newSpecific->PrunedRawRowAvgSize = EstimateRowSize(*rowType, newSpecific->Format, newSpecific->Compression, false); + newSpecific->PrunedDecodedRowAvgSize = EstimateRowSize(*rowType, newSpecific->Format, newSpecific->Compression, true); + stats->ByteSize = 0.0; + } + if (stats->ByteSize == 0.0) { + stats->ByteSize = stats->Nrows * newSpecific->PrunedDecodedRowAvgSize; + } + double rowSize = 0.0; + if (stats->Cost == 0.0) { + if (newSpecific->Format == "parquet") { + rowSize = newSpecific->PrunedRawRowAvgSize; + } else { + rowSize = newSpecific->FullRawRowAvgSize; + } + stats->Cost = rowSize * stats->Nrows; + if (newSpecific->Compression) { + stats->Cost *= 1.5; + } + { + auto specific = const_cast(dynamic_cast((dataSourceStats->Specific.get()))); + specific->Costs[TStructExprType::MakeHash(rowType->GetItems())] = stats->Cost; + } + } + typeCtx->SetStats(input.Get(), stats); + } } } } @@ -273,7 +697,10 @@ bool TKqpStatisticsTransformer::BeforeLambdasSpecific(const TExprNode::TPtr& inp Y_UNUSED(ctx); bool matched = true; // KQP Matchers - if(TKqlReadTableBase::Match(input.Get()) || TKqlReadTableRangesBase::Match(input.Get())){ + if(TKqlReadTableIndexRanges::Match(input.Get())) { + InferStatisticsForReadTableIndexRanges(input, TypeCtx); + } + else if(TKqlReadTableBase::Match(input.Get()) || TKqlReadTableRangesBase::Match(input.Get())){ InferStatisticsForReadTable(input, TypeCtx, KqpCtx); } else if(TKqlLookupTableBase::Match(input.Get())) { @@ -286,7 +713,7 @@ bool TKqpStatisticsTransformer::BeforeLambdasSpecific(const TExprNode::TPtr& inp InferStatisticsForKqpTable(input, TypeCtx, KqpCtx); } else if (TKqpReadRangesSourceSettings::Match(input.Get())) { - InferStatisticsForRowsSourceSettings(input, TypeCtx); + InferStatisticsForRowsSourceSettings(input, TypeCtx, KqpCtx); } else if (TKqpCnStreamLookup::Match(input.Get())) { InferStatisticsForSteamLookup(input, TypeCtx); @@ -298,6 +725,9 @@ bool TKqpStatisticsTransformer::BeforeLambdasSpecific(const TExprNode::TPtr& inp } else if(TDqSourceWrapBase::Match(input.Get())) { InferStatisticsForDqSourceWrap(input, TypeCtx, KqpCtx); + } + else if (TKqpOlapFilter::Match(input.Get())) { + InferStatisticsForOlapFilter(input, TypeCtx); } else { matched = false; diff --git a/ydb/core/kqp/opt/kqp_statistics_transformer.h b/ydb/core/kqp/opt/kqp_statistics_transformer.h index 3c54c7ee768f..e74646bc0883 100644 --- a/ydb/core/kqp/opt/kqp_statistics_transformer.h +++ b/ydb/core/kqp/opt/kqp_statistics_transformer.h @@ -29,13 +29,13 @@ using namespace NOpt; class TKqpStatisticsTransformer : public NYql::NDq::TDqStatisticsTransformerBase { const TKikimrConfiguration::TPtr& Config; - const TKqpOptimizeContext& KqpCtx; + TKqpOptimizeContext& KqpCtx; TVector>> TxStats; public: TKqpStatisticsTransformer(const TIntrusivePtr& kqpCtx, TTypeAnnotationContext& typeCtx, const TKikimrConfiguration::TPtr& config, const TKqpProviderContext& pctx) : - TDqStatisticsTransformerBase(&typeCtx, pctx), + TDqStatisticsTransformerBase(&typeCtx, pctx, kqpCtx->GetCardinalityHints()), Config(config), KqpCtx(*kqpCtx) {} diff --git a/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp b/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp index 9f3c13f2dcb2..9b7947845cd6 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp @@ -113,6 +113,7 @@ bool IsLookupJoinApplicable(std::shared_ptr left, const TVector& rightJoinKeys, TKqpProviderContext& ctx) { + Y_UNUSED(joinConditions); Y_UNUSED(left); Y_UNUSED(leftJoinKeys); @@ -125,21 +126,13 @@ bool IsLookupJoinApplicable(std::shared_ptr left, if (rightStats->Type != EStatisticsType::BaseTable) { return false; } - if (joinConditions.size() > rightStats->KeyColumns->Data.size()) { - return false; - } - for (auto [leftCol, rightCol] : joinConditions) { - // Fix for clang14, somehow structured binding does not create a variable in clang14 - auto r = rightCol; - if (find_if(rightStats->KeyColumns->Data.begin(), rightStats->KeyColumns->Data.end(), - [&r] (const TString& s) { - return r.AttributeName == s; - } ) == rightStats->KeyColumns->Data.end()) { + for (auto rightCol : rightJoinKeys) { + if (std::find(rightStats->KeyColumns->Data.begin(), rightStats->KeyColumns->Data.end(), rightCol) == rightStats->KeyColumns->Data.end()) { return false; } } - + return IsLookupJoinApplicableDetailed(std::static_pointer_cast(right), rightJoinKeys, ctx); } @@ -155,13 +148,22 @@ bool TKqpProviderContext::IsJoinApplicable(const std::shared_ptr= 2) && (left->Stats->Nrows > 1000)) { + if ((OptLevel != 3) && (left->Stats->Nrows > 1000)) { return false; } return IsLookupJoinApplicable(left, right, joinConditions, leftJoinKeys, rightJoinKeys, *this); + case EJoinAlgoType::LookupJoinReverse: + if (joinKind != EJoinKind::LeftSemi) { + return false; + } + if ((OptLevel != 3) && (right->Stats->Nrows > 1000)) { + return false; + } + return IsLookupJoinApplicable(right, left, joinConditions, rightJoinKeys, leftJoinKeys, *this); + case EJoinAlgoType::MapJoin: - return joinKind != EJoinKind::OuterJoin && joinKind != EJoinKind::Exclusion && right->Stats->ByteSize < 5e8; + return joinKind != EJoinKind::OuterJoin && joinKind != EJoinKind::Exclusion && right->Stats->ByteSize < 1e8; case EJoinAlgoType::GraceJoin: return true; default: @@ -174,14 +176,21 @@ double TKqpProviderContext::ComputeJoinCost(const TOptimizerStatistics& leftStat switch(joinAlgo) { case EJoinAlgoType::LookupJoin: - if (OptLevel==1) { + if (OptLevel == 3) { return -1; } return leftStats.Nrows + outputRows; + + case EJoinAlgoType::LookupJoinReverse: + if (OptLevel == 3) { + return -1; + } + return rightStats.Nrows + outputRows; + case EJoinAlgoType::MapJoin: - return leftStats.Nrows + 1.8 * rightStats.Nrows + outputRows; + return 1.5 * (leftStats.Nrows + 1.8 * rightStats.Nrows + outputRows); case EJoinAlgoType::GraceJoin: - return leftStats.Nrows + 2.0 * rightStats.Nrows + outputRows; + return 1.5 * (leftStats.Nrows + 2.0 * rightStats.Nrows + outputRows); default: Y_ENSURE(false, "Illegal join type encountered"); return 0; diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp index 0a63f6b6cf83..99ad9ce69e98 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -61,6 +62,7 @@ class TKqpLogicalOptTransformer : public TOptimizeTransformerBase { AddHandler(0, &TCoNarrowFlatMap::Match, HNDL(DqReadWideWrapFieldSubset)); AddHandler(0, &TCoNarrowMultiMap::Match, HNDL(DqReadWideWrapFieldSubset)); AddHandler(0, &TCoWideMap::Match, HNDL(DqReadWideWrapFieldSubset)); + AddHandler(0, &TCoMatchRecognize::Match, HNDL(MatchRecognize)); AddHandler(1, &TCoFlatMap::Match, HNDL(LatePushExtractedPredicateToReadTable)); AddHandler(1, &TCoTop::Match, HNDL(RewriteTopSortOverIndexRead)); @@ -139,7 +141,8 @@ class TKqpLogicalOptTransformer : public TOptimizeTransformerBase { true, // defaultWatermarksMode true); // syncActor } else { - output = DqRewriteAggregate(node, ctx, TypesCtx, false, KqpCtx.Config->HasOptEnableOlapPushdown() || KqpCtx.Config->HasOptUseFinalizeByKey(), KqpCtx.Config->HasOptUseFinalizeByKey()); + NDq::TSpillingSettings spillingSettings(KqpCtx.Config->GetEnabledSpillingNodes()); + output = DqRewriteAggregate(node, ctx, TypesCtx, false, KqpCtx.Config->HasOptEnableOlapPushdown() || KqpCtx.Config->HasOptUseFinalizeByKey(), KqpCtx.Config->HasOptUseFinalizeByKey(), spillingSettings.IsAggregationSpillingEnabled()); } if (output) { DumpAppliedRule("RewriteAggregate", node.Ptr(), output.Cast().Ptr(), ctx); @@ -167,27 +170,33 @@ class TKqpLogicalOptTransformer : public TOptimizeTransformerBase { TMaybeNode OptimizeEquiJoinWithCosts(TExprBase node, TExprContext& ctx) { auto maxDPccpDPTableSize = Config->MaxDPccpDPTableSize.Get().GetOrElse(TDqSettings::TDefault::MaxDPccpDPTableSize); - auto optLevel = Config->CostBasedOptimizationLevel.Get().GetOrElse(TDqSettings::TDefault::CostBasedOptimizationLevel); + auto optLevel = Config->CostBasedOptimizationLevel.Get().GetOrElse(Config->DefaultCostBasedOptimizationLevel); auto providerCtx = TKqpProviderContext(KqpCtx, optLevel); auto opt = std::unique_ptr(MakeNativeOptimizerNew(providerCtx, maxDPccpDPTableSize)); TExprBase output = DqOptimizeEquiJoinWithCosts(node, ctx, TypesCtx, optLevel, *opt, [](auto& rels, auto label, auto node, auto stat) { rels.emplace_back(std::make_shared(TString(label), stat, node)); }, - KqpCtx.EquiJoinsCount); + KqpCtx.EquiJoinsCount, + TOptimizerHints{ + .CardinalityHints = KqpCtx.GetCardinalityHints(), + .JoinAlgoHints = KqpCtx.GetJoinAlgoHints(), + .JoinOrderHints = KqpCtx.GetJoinOrderHints() + } + ); DumpAppliedRule("OptimizeEquiJoinWithCosts", node.Ptr(), output.Ptr(), ctx); return output; } TMaybeNode RewriteEquiJoin(TExprBase node, TExprContext& ctx) { - bool useCBO = Config->CostBasedOptimizationLevel.Get().GetOrElse(TDqSettings::TDefault::CostBasedOptimizationLevel) == 3; + bool useCBO = Config->CostBasedOptimizationLevel.Get().GetOrElse(Config->DefaultCostBasedOptimizationLevel) >= 2; TExprBase output = DqRewriteEquiJoin(node, KqpCtx.Config->GetHashJoinMode(), useCBO, ctx, TypesCtx, KqpCtx.JoinsCount); DumpAppliedRule("RewriteEquiJoin", node.Ptr(), output.Ptr(), ctx); return output; } TMaybeNode JoinToIndexLookup(TExprBase node, TExprContext& ctx) { - bool useCBO = Config->CostBasedOptimizationLevel.Get().GetOrElse(TDqSettings::TDefault::CostBasedOptimizationLevel) == 3; + bool useCBO = Config->CostBasedOptimizationLevel.Get().GetOrElse(Config->DefaultCostBasedOptimizationLevel) >= 2; TExprBase output = KqpJoinToIndexLookup(node, ctx, KqpCtx, useCBO); DumpAppliedRule("JoinToIndexLookup", node.Ptr(), output.Ptr(), ctx); return output; @@ -311,6 +320,14 @@ class TKqpLogicalOptTransformer : public TOptimizeTransformerBase { return output; } + TMaybeNode MatchRecognize(TExprBase node, TExprContext& ctx) { + auto output = ExpandMatchRecognize(node.Ptr(), ctx, TypesCtx); + if (output) { + DumpAppliedRule("MatchRecognize", node.Ptr(), output, ctx); + } + return output; + } + TMaybeNode DqReadWrapByProvider(TExprBase node, TExprContext& ctx) { auto output = NDq::DqReadWrapByProvider(node, ctx, TypesCtx); if (output) { diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_indexes.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_indexes.cpp index a872addcf58a..157c371c167e 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log_indexes.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log_indexes.cpp @@ -412,44 +412,67 @@ TExprBase KqpRewriteLookupIndex(const TExprBase& node, TExprContext& ctx, const } TExprBase KqpRewriteStreamLookupIndex(const TExprBase& node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx) { - if (!kqpCtx.IsScanQuery()) { + if (!node.Maybe()) { return node; } - if (auto maybeStreamLookupIndex = node.Maybe()) { - auto streamLookupIndex = maybeStreamLookupIndex.Cast(); + auto streamLookupIndex = node.Maybe().Cast(); - const auto& tableDesc = GetTableData(*kqpCtx.Tables, kqpCtx.Cluster, streamLookupIndex.Table().Path()); - const auto& [indexMeta, _] = tableDesc.Metadata->GetIndexMetadata(streamLookupIndex.Index().StringValue()); + const auto& tableDesc = GetTableData(*kqpCtx.Tables, kqpCtx.Cluster, streamLookupIndex.Table().Path()); + const auto& [indexMeta, _] = tableDesc.Metadata->GetIndexMetadata(streamLookupIndex.Index().StringValue()); - const bool needDataRead = CheckIndexCovering(streamLookupIndex, indexMeta); - if (!needDataRead) { - return Build(ctx, node.Pos()) - .Table(BuildTableMeta(*indexMeta, node.Pos(), ctx)) - .LookupKeys(streamLookupIndex.LookupKeys()) - .Columns(streamLookupIndex.Columns()) - .LookupStrategy().Build(TKqpStreamLookupStrategyName) - .Done(); - } - - auto keyColumnsList = BuildKeyColumnsList(tableDesc, streamLookupIndex.Pos(), ctx); - - TExprBase lookupIndexTable = Build(ctx, node.Pos()) + const bool needDataRead = CheckIndexCovering(streamLookupIndex, indexMeta); + if (!needDataRead) { + return Build(ctx, node.Pos()) .Table(BuildTableMeta(*indexMeta, node.Pos(), ctx)) .LookupKeys(streamLookupIndex.LookupKeys()) - .Columns(keyColumnsList) - .LookupStrategy().Build(TKqpStreamLookupStrategyName) + .Columns(streamLookupIndex.Columns()) + .LookupStrategy().Build(streamLookupIndex.LookupStrategy()) .Done(); + } - return Build(ctx, node.Pos()) - .Table(streamLookupIndex.Table()) - .LookupKeys(lookupIndexTable.Ptr()) - .Columns(streamLookupIndex.Columns()) - .LookupStrategy().Build(TKqpStreamLookupStrategyName) + auto keyColumnsList = BuildKeyColumnsList(tableDesc, streamLookupIndex.Pos(), ctx); + + TExprBase lookupIndexTable = Build(ctx, node.Pos()) + .Table(BuildTableMeta(*indexMeta, node.Pos(), ctx)) + .LookupKeys(streamLookupIndex.LookupKeys()) + .Columns(keyColumnsList) + .LookupStrategy().Build(streamLookupIndex.LookupStrategy()) + .Done(); + + TMaybeNode lookupKeys; + YQL_ENSURE(streamLookupIndex.LookupStrategy().Maybe()); + TString lookupStrategy = streamLookupIndex.LookupStrategy().Maybe().Cast().StringValue(); + if (lookupStrategy == TKqpStreamLookupJoinStrategyName || lookupStrategy == TKqpStreamLookupSemiJoinStrategyName) { + // Result type of lookupIndexTable: list>>, + // expected input type for main table stream join: list, left_row>>, + // so we should transform list>> to list, left_row>> + lookupKeys = Build(ctx, node.Pos()) + .Input(lookupIndexTable) + .Lambda() + .Args({"tuple"}) + .Body() + .Add() + .Tuple("tuple") + .Index().Value("1").Build() + .Build() + .Add() + .Tuple("tuple") + .Index().Value("0").Build() + .Build() + .Build() + .Build() .Done(); + } else { + lookupKeys = lookupIndexTable; } - return node; + return Build(ctx, node.Pos()) + .Table(streamLookupIndex.Table()) + .LookupKeys(lookupKeys.Cast()) + .Columns(streamLookupIndex.Columns()) + .LookupStrategy().Build(streamLookupIndex.LookupStrategy()) + .Done(); } /// Can push flat map node to read from table using only columns available in table description diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp index ac26425830f7..6c32c107ad83 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp @@ -198,6 +198,7 @@ TExprBase BuildLookupIndex(TExprContext& ctx, const TPositionHandle pos, .Columns(columns) .Index() .Build(indexName) + .LookupStrategy().Build(TKqpStreamLookupStrategyName) .Done(); } @@ -336,6 +337,7 @@ bool IsParameterToListOfStructsRepack(const TExprBase& expr) { TMaybeNode BuildKqpStreamIndexLookupJoin( const TDqJoin& join, TExprBase leftInput, + const TString& indexName, const TPrefixLookup& rightLookup, const TKqpMatchReadResult& rightReadMatch, TExprContext& ctx) @@ -395,19 +397,34 @@ TMaybeNode BuildKqpStreamIndexLookupJoin( } } - TExprBase lookupJoin = Build(ctx, join.Pos()) - .Table(rightLookup.MainTable) - .LookupKeys(leftInput) - .Columns(lookupColumns.Cast()) - .LookupStrategy().Build(TKqpStreamLookupJoinStrategyName) - .Done(); + auto strategy = join.JoinType().Value() == "LeftSemi" + ? TKqpStreamLookupSemiJoinStrategyName + : TKqpStreamLookupJoinStrategyName; + + TMaybeNode lookupJoin; + if (indexName) { + lookupJoin = Build(ctx, join.Pos()) + .Table(rightLookup.MainTable) + .LookupKeys(leftInput) + .Columns(lookupColumns.Cast()) + .Index().Build(indexName) + .LookupStrategy().Build(strategy) + .Done(); + } else { + lookupJoin = Build(ctx, join.Pos()) + .Table(rightLookup.MainTable) + .LookupKeys(leftInput) + .Columns(lookupColumns.Cast()) + .LookupStrategy().Build(strategy) + .Done(); + } // Stream lookup join output: stream>> // so we should apply filters to second element of tuple for each row if (extraRightFilter.IsValid()) { lookupJoin = Build(ctx, join.Pos()) - .Input(lookupJoin) + .Input(lookupJoin.Cast()) .Lambda() .Args({"tuple"}) .Body() @@ -429,7 +446,7 @@ TMaybeNode BuildKqpStreamIndexLookupJoin( if (rightReadMatch.ExtractMembers) { lookupJoin = Build(ctx, join.Pos()) - .Input(lookupJoin) + .Input(lookupJoin.Cast()) .Lambda() .Args({"tuple"}) .Body() @@ -451,7 +468,7 @@ TMaybeNode BuildKqpStreamIndexLookupJoin( if (rightReadMatch.FilterNullMembers) { lookupJoin = Build(ctx, join.Pos()) - .Input(lookupJoin) + .Input(lookupJoin.Cast()) .Lambda() .Args({"tuple"}) .Body() @@ -473,7 +490,7 @@ TMaybeNode BuildKqpStreamIndexLookupJoin( if (rightReadMatch.SkipNullMembers) { lookupJoin = Build(ctx, join.Pos()) - .Input(lookupJoin) + .Input(lookupJoin.Cast()) .Lambda() .Args({"tuple"}) .Body() @@ -495,7 +512,7 @@ TMaybeNode BuildKqpStreamIndexLookupJoin( if (rightReadMatch.FlatMap) { lookupJoin = Build(ctx, join.Pos()) - .Input(lookupJoin) + .Input(lookupJoin.Cast()) .Lambda() .Args({"tuple"}) .Body() @@ -516,7 +533,7 @@ TMaybeNode BuildKqpStreamIndexLookupJoin( } return Build(ctx, join.Pos()) - .Input(lookupJoin) + .Input(lookupJoin.Cast()) .LeftLabel().Build(leftLabel) .RightLabel().Build(rightLabel) .JoinType(join.JoinType()) @@ -597,8 +614,7 @@ TMaybeNode KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext } const bool useStreamIndexLookupJoin = (kqpCtx.IsDataQuery() || kqpCtx.IsGenericQuery()) - && kqpCtx.Config->EnableKqpDataQueryStreamIdxLookupJoin - && !indexName; + && kqpCtx.Config->EnableKqpDataQueryStreamIdxLookupJoin; auto leftRowArg = Build(ctx, join.Pos()) .Name("leftRowArg") @@ -833,7 +849,7 @@ TMaybeNode KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext .Build() .Done(); - return BuildKqpStreamIndexLookupJoin(join, leftInput, *prefixLookup, *rightReadMatch, ctx); + return BuildKqpStreamIndexLookupJoin(join, leftInput, indexName, *prefixLookup, *rightReadMatch, ctx); } auto leftDataDeduplicated = DeduplicateByMembers(leftData, filter, deduplicateLeftColumns, ctx, join.Pos()); @@ -921,15 +937,28 @@ TMaybeNode KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext TExprBase KqpJoinToIndexLookup(const TExprBase& node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx, bool useCBO) { - if ((!useCBO && kqpCtx.IsScanQuery() && !kqpCtx.Config->EnableKqpScanQueryStreamIdxLookupJoin) || !node.Maybe()) { + if (!node.Maybe()) { return node; } + auto join = node.Cast(); + auto algo = FromString(join.JoinAlgo().StringValue()); + + if (algo == EJoinAlgoType::Undefined) { + useCBO = false; + } - if (useCBO && FromString(join.JoinAlgo().StringValue()) != EJoinAlgoType::LookupJoin) { + if (!useCBO && kqpCtx.IsScanQuery() && !kqpCtx.Config->EnableKqpScanQueryStreamIdxLookupJoin) { return node; } + if (useCBO){ + + if (algo != EJoinAlgoType::LookupJoin && algo != EJoinAlgoType::LookupJoinReverse) { + return node; + } + } + DBG("-- Join: " << KqpExprToPrettyString(join, ctx)); // SqlIn support (preferred lookup direction) diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp index bcc8a8cf7150..99fd7c51cbe3 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp @@ -177,6 +177,34 @@ TMaybeNode TryBuildTrivialReadTable(TCoFlatMap& flatmap, TKqlReadTabl .Done(); } +TMaybeNode ExtractTopSortKeySelector(TExprBase node, const NYql::TParentsMap& parentsMap) { + auto it = parentsMap.find(node.Raw()); + if (it != parentsMap.end()) { + if (it->second.size() != 1) { + return {}; + } + for (auto* node : it->second) { + if (TCoTopSort::Match(node)) { + TCoTopSort topSort(node); + return topSort.KeySelectorLambda(); + } + } + } + return {}; +} + +bool IsIdLambda(TExprBase body) { + if (auto cond = body.Maybe()) { + if (auto boolLit = cond.Cast().Predicate().Maybe()) { + return boolLit.Literal().Cast().Value() == "true" && cond.Value().Maybe(); + } + } + if (body.Maybe()) { + return true; + } + return false; +} + } // namespace TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx, @@ -269,7 +297,7 @@ TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx YQL_ENSURE(prepareSuccess); if (!indexName.IsValid() && !readSettings.ForcePrimary && kqpCtx.Config->IndexAutoChooserMode != NKikimrConfig::TTableServiceConfig_EIndexAutoChooseMode_DISABLED) { - using TIndexComparisonKey = std::tuple; + using TIndexComparisonKey = std::tuple; auto calcNeedsJoin = [&] (const TKikimrTableMetadataPtr& keyTable) -> bool { bool needsJoin = false; for (auto&& column : read.Columns()) { @@ -280,8 +308,16 @@ TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx return needsJoin; }; - auto calcKey = [&](NYql::IPredicateRangeExtractor::TBuildResult buildResult, size_t descriptionKeyColumns, bool needsJoin) -> TIndexComparisonKey { + auto keySelector = ExtractTopSortKeySelector(flatmap, parentsMap); + + auto calcKey = [&]( + NYql::IPredicateRangeExtractor::TBuildResult buildResult, + size_t descriptionKeyColumns, + bool needsJoin, + const NYql::TKikimrTableDescription & tableDesc) -> TIndexComparisonKey + { return std::make_tuple( + keySelector.IsValid() && IsSortKeyPrimary(keySelector.Cast(), tableDesc) && IsIdLambda(TCoLambda(buildResult.PrunedLambda).Body()), buildResult.PointPrefixLen >= descriptionKeyColumns, buildResult.PointPrefixLen >= descriptionKeyColumns ? 0 : buildResult.PointPrefixLen, buildResult.UsedPrefixLen >= descriptionKeyColumns, @@ -293,7 +329,7 @@ TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx auto primaryBuildResult = extractor->BuildComputeNode(mainTableDesc.Metadata->KeyColumnNames, ctx, typesCtx); if (primaryBuildResult.PointPrefixLen < mainTableDesc.Metadata->KeyColumnNames.size()) { - auto maxKey = calcKey(primaryBuildResult, mainTableDesc.Metadata->KeyColumnNames.size(), false); + auto maxKey = calcKey(primaryBuildResult, mainTableDesc.Metadata->KeyColumnNames.size(), false, mainTableDesc); for (auto& index : mainTableDesc.Metadata->Indexes) { if (index.Type != TIndexDescription::EType::GlobalAsync) { auto& tableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, mainTableDesc.Metadata->GetIndexMetadata(TString(index.Name)).first->Name); @@ -307,7 +343,7 @@ TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx continue; } - auto key = calcKey(buildResult, index.KeyColumns.size(), needsJoin); + auto key = calcKey(buildResult, index.KeyColumns.size(), needsJoin, tableDesc); if (key > maxKey) { maxKey = key; chosenIndex = index.Name; @@ -387,6 +423,7 @@ TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx .LookupKeys(keys) .Index(indexName.Cast()) .LookupKeys(keys) + .LookupStrategy().Build(TKqpStreamLookupStrategyName) .Done(); } } else { diff --git a/ydb/core/kqp/opt/peephole/kqp_opt_peephole.cpp b/ydb/core/kqp/opt/peephole/kqp_opt_peephole.cpp index 808af2b3262e..1471d3f631df 100644 --- a/ydb/core/kqp/opt/peephole/kqp_opt_peephole.cpp +++ b/ydb/core/kqp/opt/peephole/kqp_opt_peephole.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -12,6 +13,7 @@ #include #include #include +#include #include #include @@ -153,7 +155,7 @@ class TKqpPeepholeTransformer : public TOptimizeTransformerBase { struct TKqpPeepholePipelineConfigurator : IPipelineConfigurator { TKqpPeepholePipelineConfigurator( - TKikimrConfiguration::TPtr config, + TKikimrConfiguration::TPtr config, TSet disabledOpts ) : Config(config) @@ -213,6 +215,96 @@ struct TKqpPeepholePipelineFinalConfigurator : IPipelineConfigurator { const TKikimrConfiguration::TPtr Config; }; +// Sort stages in topological order by their inputs, so that we optimize the ones without inputs first. +TVector TopSortStages(const TDqPhyStageList& stages) { + TVector topSortedStages; + topSortedStages.reserve(stages.Size()); + std::function topSort; + THashSet visitedStages; + + // Assume there is no cycles. + topSort = [&](const TDqPhyStage& stage) { + if (visitedStages.contains(stage.Ref().UniqueId())) { + return; + } + + for (const auto& input : stage.Inputs()) { + if (auto connection = input.Maybe()) { + // NOTE: somehow `Output()` is actually an input. + if (auto phyStage = connection.Cast().Output().Stage().Maybe()) { + topSort(phyStage.Cast()); + } + } + } + + visitedStages.insert(stage.Ref().UniqueId()); + topSortedStages.push_back(stage); + }; + + for (const auto& stage : stages) { + topSort(stage); + } + + return topSortedStages; +} + +// TODO: copy-paste from https://github.com/ydb-platform/ydb/blob/122f053354c5df4fc559bf06fe0302f92d813032/ydb/library/yql/dq/opt/dq_opt_build.cpp#L444 +bool IsCompatibleWithBlocks(TPositionHandle pos, const TStructExprType& type, TExprContext& ctx, TTypeAnnotationContext& typesCtx) { + TVector types; + for (auto& item : type.GetItems()) { + types.emplace_back(item->GetItemType()); + } + + auto resolveStatus = typesCtx.ArrowResolver->AreTypesSupported(ctx.GetPosition(pos), types, ctx); + YQL_ENSURE(resolveStatus != IArrowResolver::ERROR); + return resolveStatus == IArrowResolver::OK; +} + +// TODO: composite copy-paste from https://github.com/ydb-platform/ydb/blob/122f053354c5df4fc559bf06fe0302f92d813032/ydb/library/yql/dq/opt/dq_opt_build.cpp#L388 +bool CanPropagateWideBlockThroughChannel( + const TDqOutput& output, + const THashMap& programs, + const TDqStageSettings& stageSettings, + TExprContext& ctx, + TTypeAnnotationContext& typesCtx) +{ + const auto& program = programs.at(output.Stage().Ref().UniqueId()); + + ui32 index = FromString(output.Index().Value()); + if (index != 0) { + // stage has multiple outputs + return false; + } + + if (!stageSettings.WideChannels) { + return false; + } + + YQL_ENSURE(stageSettings.OutputNarrowType); + + if (!IsCompatibleWithBlocks(program.Pos(), *stageSettings.OutputNarrowType, ctx, typesCtx)) { + return false; + } + + // Ensure that stage has blocks on top level (i.e. FromFlow(WideFromBlocks(...))) + if (!program.Lambda().Body().Maybe() || + !program.Lambda().Body().Cast().Input().Maybe()) + { + return false; + } + + auto typeAnnotation = program.Lambda().Ref().GetTypeAnn(); + + YQL_ENSURE(typeAnnotation, "Program for stage " << output.Stage().Ref().UniqueId() << " doesn't have type annotation"); + + if (IsWideBlockType(*typeAnnotation->Cast()->GetItemType())) { + // output is already wide block + return false; + } + + return true; +} + TStatus PeepHoleOptimize(const TExprBase& program, TExprNode::TPtr& newProgram, TExprContext& ctx, IGraphTransformer& typeAnnTransformer, TTypeAnnotationContext& typesCtx, TKikimrConfiguration::TPtr config, bool allowNonDeterministicFunctions, bool withFinalStageRules, TSet disabledOpts) @@ -244,30 +336,112 @@ TMaybeNode PeepholeOptimize(const TKqpPhysicalTx& tx, TExprConte IGraphTransformer& typeAnnTransformer, TTypeAnnotationContext& typesCtx, THashSet& optimizedStages, TKikimrConfiguration::TPtr config, bool withFinalStageRules, TSet disabledOpts) { - TVector stages; - stages.reserve(tx.Stages().Size()); - TNodeOnNodeOwnedMap stagesMap; - TVector bindings(tx.ParamBindings().begin(), tx.ParamBindings().end()); + THashMap programs; THashMap nonDetParamBindings; - for (const auto& stage : tx.Stages()) { + const auto topSortedStages = TopSortStages(tx.Stages()); + for (const auto& stage : topSortedStages) { YQL_ENSURE(!optimizedStages.contains(stage.Ref().UniqueId())); + TCoLambda lambda = stage.Program(); + TVector newArgs; + newArgs.reserve(stage.Inputs().Size()); + + // Propagate "WideFromBlock" through connections. + // TODO(ilezhankin): this peephole optimization should be implemented instead as + // the original whole-graph transformer |CreateDqBuildWideBlockChannelsTransformer|. + if (config->BlockChannelsMode == NKikimrConfig::TTableServiceConfig_EBlockChannelsMode_BLOCK_CHANNELS_AUTO) { + TNodeOnNodeOwnedMap argsMap; + + YQL_ENSURE(stage.Inputs().Size() == stage.Program().Args().Size()); + + for (size_t i = 0; i < stage.Inputs().Size(); ++i) { + auto oldArg = stage.Program().Args().Arg(i); + auto newArg = TCoArgument(ctx.NewArgument(oldArg.Pos(), oldArg.Name())); + newArg.MutableRef().SetTypeAnn(oldArg.Ref().GetTypeAnn()); + newArgs.emplace_back(newArg); + + if (auto connection = stage.Inputs().Item(i).Maybe(); connection && + CanPropagateWideBlockThroughChannel(connection.Cast().Output(), programs, TDqStageSettings::Parse(stage), ctx, typesCtx)) + { + TExprNode::TPtr newArgNode = ctx.Builder(oldArg.Pos()) + .Callable("FromFlow") + .Callable(0, "WideFromBlocks") + .Callable(0, "ToFlow") + .Add(0, newArg.Ptr()) + .Seal() + .Seal() + .Seal() + .Build(); + argsMap.emplace(oldArg.Raw(), newArgNode); + + auto stageUid = connection.Cast().Output().Stage().Ref().UniqueId(); + + // Update input program with: FromFlow(WideFromBlocks($1)) → FromFlow($1) + if (const auto& inputProgram = programs.at(stageUid); inputProgram.Lambda().Body().Maybe() && + inputProgram.Lambda().Body().Cast().Input().Maybe()) + { + auto newBody = Build(ctx, inputProgram.Lambda().Body().Cast().Pos()) + .Input(inputProgram.Lambda().Body().Cast().Input().Cast().Input()) + .Done(); + + auto newInputProgram = Build(ctx, inputProgram.Pos()) + .Lambda() + .Args(inputProgram.Lambda().Args()) + .Body(newBody) + .Build() + .ArgsType(inputProgram.ArgsType()) + .Done(); + + // Run the peephole optimization on new program again to update type annotations. + // TODO(ilezhankin): refactor to run only the update of type annotations - not the whole optimization. + bool allowNonDeterministicFunctions = !newInputProgram.Lambda().Body().Maybe(); + TExprNode::TPtr newInputProgramNode; + + auto status = PeepHoleOptimize(newInputProgram, newInputProgramNode, ctx, typeAnnTransformer, typesCtx, config, + allowNonDeterministicFunctions, withFinalStageRules, disabledOpts); + if (status != TStatus::Ok) { + ctx.AddError(TIssue(ctx.GetPosition(stage.Pos()), "Peephole optimization failed for KQP transaction")); + return {}; + } + + programs.at(stageUid) = TKqpProgram(newInputProgramNode); + } + + // Update the type annotation for an argument with return value of the input program. + newArg.MutableRef().SetTypeAnn(programs.at(stageUid).Lambda().Body().Ref().GetTypeAnn()); + } else { + argsMap.emplace(oldArg.Raw(), newArg.Ptr()); + } + } + + // Rebuild lambda with new arguments. + lambda = Build(ctx, lambda.Pos()) + .Args(newArgs) + .Body(ctx.ReplaceNodes(stage.Program().Body().Ptr(), argsMap)) + .Done(); + } else { + for (size_t i = 0; i < stage.Inputs().Size(); ++i) { + auto oldArg = stage.Program().Args().Arg(i); + auto newArg = TCoArgument(ctx.NewArgument(oldArg.Pos(), oldArg.Name())); + newArg.MutableRef().SetTypeAnn(oldArg.Ref().GetTypeAnn()); + newArgs.emplace_back(newArg); + } + } + TVector argTypes; - for (const auto& arg : stage.Program().Args()) { + for (const auto& arg : newArgs) { YQL_ENSURE(arg.Ref().GetTypeAnn()); argTypes.push_back(arg.Ref().GetTypeAnn()); } // TODO: get rid of TKqpProgram-callable (YQL-10078) - TNodeOnNodeOwnedMap tmp; auto program = Build(ctx, stage.Pos()) - //.Lambda(ctx.DeepCopy(stage.Program().Ref(), ctx, tmp, true /* internStrings */, false /* copyTypes */)) - .Lambda(stage.Program()) + .Lambda(lambda) .ArgsType(ExpandType(stage.Pos(), *ctx.MakeType(argTypes), ctx)) .Done(); - bool allowNonDeterministicFunctions = !stage.Program().Body().Maybe(); + const bool allowNonDeterministicFunctions = !program.Lambda().Body().Maybe(); TExprNode::TPtr newProgram; auto status = PeepHoleOptimize(program, newProgram, ctx, typeAnnTransformer, typesCtx, config, @@ -287,26 +461,34 @@ TMaybeNode PeepholeOptimize(const TKqpPhysicalTx& tx, TExprConte } } - auto newStage = Build(ctx, stage.Pos()) - .Inputs(ctx.ReplaceNodes(stage.Inputs().Ptr(), stagesMap)) - .Program(ctx.DeepCopyLambda(TKqpProgram(newProgram).Lambda().Ref())) - .Settings(stage.Settings()) - .Outputs(stage.Outputs()) - .Done(); - - stages.emplace_back(newStage); - stagesMap.emplace(stage.Raw(), newStage.Ptr()); - optimizedStages.emplace(stage.Ref().UniqueId()); + YQL_ENSURE(programs.emplace(stage.Ref().UniqueId(), TKqpProgram(newProgram)).second); } + TVector bindings(tx.ParamBindings().begin(), tx.ParamBindings().end()); + for (const auto& [_, binding] : nonDetParamBindings) { bindings.emplace_back(std::move(binding)); } + TVector newStages; + TNodeOnNodeOwnedMap stagesMap; + + // Rebuild stages only after all new programs are ready. + for (const auto& stage : topSortedStages) { + auto newStage = Build(ctx, stage.Pos()) + .InitFrom(stage) + .Inputs(ctx.ReplaceNodes(stage.Inputs().Ptr(), stagesMap)) + .Program(ctx.DeepCopyLambda(programs.at(stage.Ref().UniqueId()).Lambda().Ref())) + .Done(); + + newStages.emplace_back(newStage); + stagesMap.emplace(stage.Raw(), newStage.Ptr()); + } + return Build(ctx, tx.Pos()) .Stages() - .Add(stages) + .Add(newStages) .Build() .Results(ctx.ReplaceNodes(tx.Results().Ptr(), stagesMap)) .ParamBindings().Add(bindings).Build() @@ -318,7 +500,7 @@ class TKqpTxPeepholeTransformer : public TSyncTransformerBase { public: TKqpTxPeepholeTransformer( IGraphTransformer* typeAnnTransformer, - TTypeAnnotationContext& typesCtx, + TTypeAnnotationContext& typesCtx, TKikimrConfiguration::TPtr config, bool withFinalStageRules, TSet disabledOpts @@ -444,8 +626,8 @@ class TKqpTxsPeepholeTransformer : public TSyncTransformerBase { TAutoPtr CreateKqpTxPeepholeTransformer( NYql::IGraphTransformer* typeAnnTransformer, - TTypeAnnotationContext& typesCtx, - const TKikimrConfiguration::TPtr& config, + TTypeAnnotationContext& typesCtx, + const TKikimrConfiguration::TPtr& config, bool withFinalStageRules, TSet disabledOpts ) @@ -455,7 +637,7 @@ TAutoPtr CreateKqpTxPeepholeTransformer( TAutoPtr CreateKqpTxsPeepholeTransformer( TAutoPtr typeAnnTransformer, - TTypeAnnotationContext& typesCtx, + TTypeAnnotationContext& typesCtx, const TKikimrConfiguration::TPtr& config ) { diff --git a/ydb/core/kqp/opt/physical/effects/kqp_opt_phy_returning.cpp b/ydb/core/kqp/opt/physical/effects/kqp_opt_phy_returning.cpp index d4e97c698697..4ae0aaf7e3b4 100644 --- a/ydb/core/kqp/opt/physical/effects/kqp_opt_phy_returning.cpp +++ b/ydb/core/kqp/opt/physical/effects/kqp_opt_phy_returning.cpp @@ -204,7 +204,7 @@ TExprBase KqpRewriteReturningUpsert(TExprBase node, TExprContext& ctx, const TKq return node; } - if (!upsert.Input().Maybe() && !upsert.Input().Maybe()) { + if (upsert.Input().Maybe() || upsert.Input().Maybe()) { return node; } @@ -226,7 +226,7 @@ TExprBase KqpRewriteReturningDelete(TExprBase node, TExprContext& ctx, const TKq return node; } - if (!del.Input().Maybe() && !del.Input().Maybe()) { + if (del.Input().Maybe() || del.Input().Maybe()) { return node; } diff --git a/ydb/core/kqp/opt/physical/effects/kqp_opt_phy_update.cpp b/ydb/core/kqp/opt/physical/effects/kqp_opt_phy_update.cpp index 39550e770404..ad89fb36ff67 100644 --- a/ydb/core/kqp/opt/physical/effects/kqp_opt_phy_update.cpp +++ b/ydb/core/kqp/opt/physical/effects/kqp_opt_phy_update.cpp @@ -91,7 +91,7 @@ TExprBase KqpBuildUpdateStages(TExprBase node, TExprContext& ctx, const TKqpOpti const auto& table = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, update.Table().Path()); - const bool isSink = NeedSinks(table, kqpCtx) && table.Metadata->Kind == EKikimrTableKind::Olap; + const bool isSink = NeedSinks(table, kqpCtx); const bool needPrecompute = !isSink; if (needPrecompute) { diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy.cpp index a28771a8f9da..3f7ab21bc3c9 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy.cpp @@ -88,8 +88,6 @@ class TKqpPhysicalOptTransformer : public TOptimizeTransformerBase { AddHandler(0, &TCoTake::Match, HNDL(PropagatePrecomuteTake)); AddHandler(0, &TCoFlatMap::Match, HNDL(PropagatePrecomuteFlatmap)); - AddHandler(0, &TDqCnHashShuffle::Match, HNDL(BuildHashShuffleByKeyStage)); - AddHandler(0, &TCoAggregateCombine::Match, HNDL(ExpandAggregatePhase)); AddHandler(0, &TCoAggregateCombineState::Match, HNDL(ExpandAggregatePhase)); AddHandler(0, &TCoAggregateMergeState::Match, HNDL(ExpandAggregatePhase)); @@ -253,15 +251,9 @@ class TKqpPhysicalOptTransformer : public TOptimizeTransformerBase { return output; } - TMaybeNode BuildHashShuffleByKeyStage(TExprBase node, TExprContext& ctx) { - auto output = DqBuildHashShuffleByKeyStage(node, ctx, {}); - DumpAppliedRule("BuildHashShuffleByKeyStage", node.Ptr(), output.Ptr(), ctx); - return TExprBase(output); - } - - TMaybeNode ExpandAggregatePhase(TExprBase node, TExprContext& ctx) { - auto output = ExpandAggregatePeepholeImpl(node.Ptr(), ctx, TypesCtx, KqpCtx.Config->HasOptUseFinalizeByKey(), false); + NDq::TSpillingSettings spillingSettings(KqpCtx.Config->GetEnabledSpillingNodes()); + auto output = ExpandAggregatePeepholeImpl(node.Ptr(), ctx, TypesCtx, KqpCtx.Config->HasOptUseFinalizeByKey(), false, spillingSettings.IsAggregationSpillingEnabled()); DumpAppliedRule("ExpandAggregatePhase", node.Ptr(), output, ctx); return TExprBase(output); } @@ -428,9 +420,9 @@ class TKqpPhysicalOptTransformer : public TOptimizeTransformerBase { { // TODO: Allow push to left stage for data queries. // It is now possible as we don't use datashard transactions for reads in data queries. - bool pushLeftStage = !KqpCtx.IsDataQuery() && AllowFuseJoinInputs(node); + bool pushLeftStage = (KqpCtx.IsScanQuery() || KqpCtx.Config->EnableKqpDataQueryStreamLookup) && AllowFuseJoinInputs(node); TExprBase output = DqBuildJoin(node, ctx, optCtx, *getParents(), IsGlobal, - pushLeftStage, KqpCtx.Config->GetHashJoinMode() + pushLeftStage, KqpCtx.Config->GetHashJoinMode(), false ); DumpAppliedRule("BuildJoin", node.Ptr(), output.Ptr(), ctx); return output; diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_helpers.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy_helpers.cpp index 18a439af0c21..298680c604bb 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_helpers.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_helpers.cpp @@ -170,49 +170,6 @@ NYql::NNodes::TDqStage ReplaceTableSourceSettings(NYql::NNodes::TDqStage stage, .Done(); } -bool IsSortKeyPrimary(const NYql::NNodes::TCoLambda& keySelector, const NYql::TKikimrTableDescription& tableDesc, - const TMaybe>& passthroughFields) -{ - auto checkKey = [keySelector, &tableDesc, &passthroughFields] (NYql::NNodes::TExprBase key, ui32 index) { - if (!key.Maybe()) { - return false; - } - - auto member = key.Cast(); - if (member.Struct().Raw() != keySelector.Args().Arg(0).Raw()) { - return false; - } - - auto column = TString(member.Name().Value()); - auto columnIndex = tableDesc.GetKeyColumnIndex(column); - if (!columnIndex || *columnIndex != index) { - return false; - } - - if (passthroughFields && !passthroughFields->contains(column)) { - return false; - } - - return true; - }; - - auto lambdaBody = keySelector.Body(); - if (auto maybeTuple = lambdaBody.Maybe()) { - auto tuple = maybeTuple.Cast(); - for (size_t i = 0; i < tuple.Size(); ++i) { - if (!checkKey(tuple.Item(i), i)) { - return false; - } - } - } else { - if (!checkKey(lambdaBody, 0)) { - return false; - } - } - - return true; -} - ESortDirection GetSortDirection(const NYql::NNodes::TExprBase& sortDirections) { auto getDirection = [] (TExprBase expr) -> ESortDirection { if (!expr.Maybe()) { diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_impl.h b/ydb/core/kqp/opt/physical/kqp_opt_phy_impl.h index 0e22dbac8e4f..846bf2d5cd69 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_impl.h +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_impl.h @@ -28,9 +28,6 @@ NYql::NNodes::TDqStage ReplaceStageArg(NYql::NNodes::TDqStage stage, size_t inpu NYql::NNodes::TDqStage ReplaceTableSourceSettings(NYql::NNodes::TDqStage stage, size_t inputIndex, NYql::NNodes::TKqpReadRangesSourceSettings settings, NYql::TExprContext& ctx); -bool IsSortKeyPrimary(const NYql::NNodes::TCoLambda& keySelector, const NYql::TKikimrTableDescription& tableDesc, - const TMaybe>& passthroughFields = {}); - enum ESortDirection : ui32 { None = 0, Forward = 1, diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_agg.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_agg.cpp index 5092bbeec38d..ffe2ffae4c53 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_agg.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_agg.cpp @@ -299,11 +299,6 @@ TExprBase KqpPushOlapAggregate(TExprBase node, TExprContext& ctx, const TKqpOpti return node; } - // temporary for keys grouping push down not useful - if (!aggCombine.Keys().Empty()) { - return node; - } - auto read = maybeRead.Cast(); auto aggs = Build(ctx, node.Pos()); diff --git a/ydb/core/kqp/opt/physical/predicate_collector.cpp b/ydb/core/kqp/opt/physical/predicate_collector.cpp index cfb680616c97..031cc196d84d 100644 --- a/ydb/core/kqp/opt/physical/predicate_collector.cpp +++ b/ydb/core/kqp/opt/physical/predicate_collector.cpp @@ -143,7 +143,14 @@ bool AbstractTreeCanBePushed(const TExprBase& expr, const TExprNode* ) { bool CheckExpressionNodeForPushdown(const TExprBase& node, const TExprNode* lambdaArg) { if constexpr (NKikimr::NSsa::RuntimeVersion >= 5U) { - if (node.Maybe() || node.Maybe() || node.Maybe()) { + if (node.Maybe() || node.Maybe()) { + return true; + } + // Temporary fix for https://github.com/ydb-platform/ydb/issues/7967 + else if (auto ifPred = node.Maybe()) { + if (ifPred.ThenValue().Maybe() || ifPred.ElseValue().Maybe()) { + return false; + } return true; } } diff --git a/ydb/core/kqp/opt/ya.make b/ydb/core/kqp/opt/ya.make index 6f07040d1180..c893b604701e 100644 --- a/ydb/core/kqp/opt/ya.make +++ b/ydb/core/kqp/opt/ya.make @@ -13,6 +13,7 @@ SRCS( kqp_query_blocks_transformer.cpp kqp_query_plan.cpp kqp_statistics_transformer.cpp + kqp_column_statistics_requester.cpp kqp_constant_folding_transformer.cpp ) @@ -25,9 +26,10 @@ PEERDIR( ydb/library/yql/dq/opt ydb/library/yql/dq/type_ann ydb/library/yql/providers/s3/expr_nodes + ydb/library/yql/providers/s3/statistics ydb/library/yql/utils/plan ydb/core/kqp/provider - ydb/core/formats/arrow/protos + ydb/library/formats/arrow/protos ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/kqp/provider/read_attributes_utils.cpp b/ydb/core/kqp/provider/read_attributes_utils.cpp index c261c2c494e5..9ae143169ab6 100644 --- a/ydb/core/kqp/provider/read_attributes_utils.cpp +++ b/ydb/core/kqp/provider/read_attributes_utils.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace NYql { @@ -23,7 +24,27 @@ class TGatheringAttributesVisitor : public IAstAttributesVisitor { CurrentSource->second.try_emplace(key, value); }; - void VisitNonAttribute(TExprNode::TPtr) override {} + void VisitNonAttribute(TExprNode::TPtr node) override { + if (!CurrentSource) { + return; + } + + auto nodeChildren = node->Children(); + if (nodeChildren.size() > 2 && nodeChildren[0]->IsAtom()) { + TCoAtom attrName{nodeChildren[0]}; + if (attrName.StringValue() == "partitionedby") { + NJson::TJsonArray values; + + for (size_t i = 1; i < nodeChildren.size(); ++i) { + Y_ABORT_UNLESS(nodeChildren[i]->IsAtom()); + TCoAtom attrValue{nodeChildren[i]}; + values.AppendValue(attrValue.StringValue()); + } + + CurrentSource->second.try_emplace(attrName.StringValue(), NJson::WriteJson(values)); + } + } + } public: THashMap, THashMap> Result; @@ -100,9 +121,11 @@ class TAttributesReplacingVisitor : public IAstAttributesVisitor { auto nodeChildren = node->Children(); if (!nodeChildren.empty() && nodeChildren[0]->IsAtom()) { TCoAtom attrName{nodeChildren[0]}; - if (attrName.StringValue().equal("userschema")) { + if (attrName.StringValue() == "userschema") { node = BuildSchemaFromMetadata(Read->Pos(), Ctx, Metadata->Columns); ReplacedUserchema = true; + } else if (attrName.StringValue() == "partitionedby") { + NewAttributes.erase("partitionedby"); } } Children.push_back(std::move(node)); @@ -203,7 +226,19 @@ static Ydb::Type CreateYdbType(const NKikimr::NScheme::TTypeInfo& typeInfo, bool auto& item = notNull ? ydbType : *ydbType.mutable_optional_type()->mutable_item(); - item.set_type_id((Ydb::Type::PrimitiveTypeId)typeInfo.GetTypeId()); + // + // DECIMAL is PrimitiveType with (22,9) defaults in Scheme + // and separate (non-primitive) type everywhere else + // + // NKikimr::NScheme::NTypeIds::Decimal is omitted in public API intentionally + // + if (typeInfo.GetTypeId() == NKikimr::NScheme::NTypeIds::Decimal) { + auto* decimal = item.mutable_decimal_type(); + decimal->set_precision(NKikimr::NScheme::DECIMAL_PRECISION); + decimal->set_scale(NKikimr::NScheme::DECIMAL_SCALE); + } else { + item.set_type_id((Ydb::Type::PrimitiveTypeId)typeInfo.GetTypeId()); + } } return ydbType; } diff --git a/ydb/core/kqp/provider/rewrite_io_utils.cpp b/ydb/core/kqp/provider/rewrite_io_utils.cpp index 633149305a44..067ab4b583a9 100644 --- a/ydb/core/kqp/provider/rewrite_io_utils.cpp +++ b/ydb/core/kqp/provider/rewrite_io_utils.cpp @@ -1,6 +1,7 @@ #include "rewrite_io_utils.h" #include +#include #include #include #include @@ -15,23 +16,18 @@ using namespace NNodes; constexpr const char* QueryGraphNodeSignature = "SavedQueryGraph"; -NSQLTranslation::TTranslationSettings CreateViewTranslationSettings(const TString& cluster) { - NSQLTranslation::TTranslationSettings settings; - - settings.DefaultCluster = cluster; - settings.ClusterMapping[cluster] = TString(NYql::KikimrProviderName); - settings.Mode = NSQLTranslation::ESqlMode::LIMITED_VIEW; - - return settings; -} - TExprNode::TPtr CompileViewQuery( - const TString& query, TExprContext& ctx, - const TString& cluster + NKikimr::NKqp::TKqpTranslationSettingsBuilder& settingsBuilder, + IModuleResolver::TPtr moduleResolver, + const TViewPersistedData& viewData ) { + auto translationSettings = settingsBuilder.Build(ctx); + translationSettings.Mode = NSQLTranslation::ESqlMode::LIMITED_VIEW; + NSQLTranslation::Deserialize(viewData.CapturedContext, translationSettings); + TAstParseResult queryAst; - queryAst = NSQLTranslation::SqlToYql(query, CreateViewTranslationSettings(cluster)); + queryAst = NSQLTranslation::SqlToYql(viewData.QueryText, translationSettings); ctx.IssueManager.AddIssues(queryAst.Issues); if (!queryAst.IsOk()) { @@ -39,7 +35,7 @@ TExprNode::TPtr CompileViewQuery( } TExprNode::TPtr queryGraph; - if (!CompileExpr(*queryAst.Root, queryGraph, ctx, nullptr, nullptr)) { + if (!CompileExpr(*queryAst.Root, queryGraph, ctx, moduleResolver.get(), nullptr)) { return nullptr; } @@ -122,8 +118,9 @@ TExprNode::TPtr FindTopLevelRead(const TExprNode::TPtr& queryGraph) { TExprNode::TPtr RewriteReadFromView( const TExprNode::TPtr& node, TExprContext& ctx, - const TString& query, - const TString& cluster + NKikimr::NKqp::TKqpTranslationSettingsBuilder& settingsBuilder, + IModuleResolver::TPtr moduleResolver, + const TViewPersistedData& viewData ) { YQL_PROFILE_FUNC(DEBUG); @@ -132,7 +129,7 @@ TExprNode::TPtr RewriteReadFromView( TExprNode::TPtr queryGraph = FindSavedQueryGraph(readNode.Ptr()); if (!queryGraph) { - queryGraph = CompileViewQuery(query, ctx, cluster); + queryGraph = CompileViewQuery(ctx, settingsBuilder, moduleResolver, viewData); if (!queryGraph) { ctx.AddError(TIssue(ctx.GetPosition(readNode.Pos()), "The query stored in the view cannot be compiled.")); @@ -156,4 +153,4 @@ TExprNode::TPtr RewriteReadFromView( return Build(ctx, node->Pos()).Input(topLevelRead).Done().Ptr(); } -} \ No newline at end of file +} diff --git a/ydb/core/kqp/provider/rewrite_io_utils.h b/ydb/core/kqp/provider/rewrite_io_utils.h index d793cc0fcd1a..dd3dff66dd06 100644 --- a/ydb/core/kqp/provider/rewrite_io_utils.h +++ b/ydb/core/kqp/provider/rewrite_io_utils.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include namespace NYql { @@ -9,8 +11,9 @@ TExprNode::TPtr FindTopLevelRead(const TExprNode::TPtr& queryGraph); TExprNode::TPtr RewriteReadFromView( const TExprNode::TPtr& node, TExprContext& ctx, - const TString& query, - const TString& cluster + NKikimr::NKqp::TKqpTranslationSettingsBuilder& settingsBuilder, + IModuleResolver::TPtr moduleResolver, + const TViewPersistedData& viewData ); -} \ No newline at end of file +} diff --git a/ydb/core/kqp/provider/yql_kikimr_datasink.cpp b/ydb/core/kqp/provider/yql_kikimr_datasink.cpp index a4d88aff12d2..e36f8ce93841 100644 --- a/ydb/core/kqp/provider/yql_kikimr_datasink.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_datasink.cpp @@ -79,6 +79,16 @@ class TKiSinkIntentDeterminationTransformer: public TKiSinkVisitorTransformer { return TStatus::Ok; } + TStatus HandleAnalyze(TKiAnalyzeTable node, TExprContext& ctx) override { + Y_UNUSED(ctx); + + auto cluster = node.DataSink().Cluster(); + auto table = node.Table(); + + SessionCtx->Tables().GetOrAddTable(TString(cluster), SessionCtx->GetDatabase(), TString(table)); + return TStatus::Ok; + } + TStatus HandleCreateTopic(TKiCreateTopic node, TExprContext& ctx) override { Y_UNUSED(ctx); Y_UNUSED(node); @@ -176,9 +186,9 @@ class TKiSinkIntentDeterminationTransformer: public TKiSinkVisitorTransformer { } TStatus HandleDropObject(TKiDropObject node, TExprContext& ctx) override { - ctx.AddError(TIssue(ctx.GetPosition(node.Pos()), TStringBuilder() - << "DropObject is not yet implemented for intent determination transformer")); - return TStatus::Error; + Y_UNUSED(node); + Y_UNUSED(ctx); + return TStatus::Ok; } TStatus HandleCreateGroup(TKiCreateGroup node, TExprContext& ctx) override { @@ -275,6 +285,9 @@ class TKiSinkIntentDeterminationTransformer: public TKiSinkVisitorTransformer { } SessionCtx->Tables().GetOrAddTable(TString(cluster), SessionCtx->GetDatabase(), key.GetTablePath()); return TStatus::Ok; + } else if (mode == "analyze") { + SessionCtx->Tables().GetOrAddTable(TString(cluster), SessionCtx->GetDatabase(), key.GetTablePath()); + return TStatus::Ok; } else { ctx.AddError(TIssue(ctx.GetPosition(node.Pos()), TStringBuilder() << "Unsupported Kikimr table write mode: " << settings.Mode.Cast().Value())); @@ -549,6 +562,10 @@ class TKikimrDataSink : public TDataProviderBase return true; } + if (node.IsCallable(TKiAnalyzeTable::CallableName())) { + return true; + } + if (auto maybeRight = TMaybeNode(&node).Tuple().Maybe()) { if (maybeRight.Input().Maybe()) { return true; @@ -864,11 +881,16 @@ class TKikimrDataSink : public TDataProviderBase return false; } - if (tableDesc.Metadata->Kind == EKikimrTableKind::Olap && mode != "replace" && mode != "drop" && mode != "drop_if_exists" && mode != "insert_abort" && mode != "update" && mode != "upsert" && mode != "delete" && mode != "update_on" && mode != "delete_on") { + if (tableDesc.Metadata->Kind == EKikimrTableKind::Olap && mode != "replace" && mode != "drop" && mode != "drop_if_exists" && mode != "insert_abort" && mode != "update" && mode != "upsert" && mode != "delete" && mode != "update_on" && mode != "delete_on" && mode != "analyze") { ctx.AddError(TIssue(ctx.GetPosition(node->Pos()), TStringBuilder() << "Write mode '" << static_cast(mode) << "' is not supported for olap tables.")); return true; } + if (tableDesc.Metadata->Kind == EKikimrTableKind::Datashard && mode == "analyze") { + ctx.AddError(TIssue(ctx.GetPosition(node->Pos()), TStringBuilder() << static_cast(mode) << " is not supported for oltp tables.")); + return true; + } + return false; } @@ -997,6 +1019,20 @@ class TKikimrDataSink : public TDataProviderBase .Done() .Ptr(); } + } else if (mode == "analyze") { + auto columns = Build(ctx, node->Pos()); + + for (const auto& column: settings.Columns.Cast().Ptr()->Children()) { + columns.Add(column); + } + + return Build(ctx, node->Pos()) + .World(node->Child(0)) + .DataSink(node->Child(1)) + .Table().Build(key.GetTablePath()) + .Columns(columns.Done()) + .Done() + .Ptr(); } else { return Build(ctx, node->Pos()) .World(node->Child(0)) @@ -1132,7 +1168,8 @@ class TKikimrDataSink : public TDataProviderBase YQL_ENSURE(settings.Mode); auto mode = settings.Mode.Cast(); - if (mode == "create") { + if (mode == "create" || mode == "create_if_not_exists") { + bool existingOk = mode == "create_if_not_exists"; return Build(ctx, node->Pos()) .World(node->Child(0)) .DataSink(node->Child(1)) @@ -1140,9 +1177,11 @@ class TKikimrDataSink : public TDataProviderBase .TopicSettings(settings.TopicSettings.Cast()) .Consumers(settings.Consumers.Cast()) .Settings(settings.Other) + .ExistingOk().Value(existingOk).Build() .Done() .Ptr(); - } else if (mode == "alter") { + } else if (mode == "alter" || mode == "alter_if_exists") { + bool missingOk = mode == "alter_if_exists"; return Build(ctx, node->Pos()) .World(node->Child(0)) .DataSink(node->Child(1)) @@ -1152,14 +1191,17 @@ class TKikimrDataSink : public TDataProviderBase .AlterConsumers(settings.AlterConsumers.Cast()) .DropConsumers(settings.DropConsumers.Cast()) .Settings(settings.Other) + .MissingOk().Value(missingOk).Build() .Done() .Ptr(); - } else if (mode == "drop") { + } else if (mode == "drop" || mode == "drop_if_exists") { + bool missingOk = (mode == "drop_if_exists"); return Build(ctx, node->Pos()) .World(node->Child(0)) .DataSink(node->Child(1)) .Topic().Build(key.GetTopicPath()) .Settings(settings.Other) + .MissingOk().Value(missingOk).Build() .Done() .Ptr(); } else { @@ -1571,6 +1613,10 @@ IGraphTransformer::TStatus TKiSinkVisitorTransformer::DoTransform(TExprNode::TPt return HandleAlterSequence(node.Cast(), ctx); } + if (auto node = callable.Maybe()) { + return HandleAnalyze(node.Cast(), ctx); + } + ctx.AddError(TIssue(ctx.GetPosition(input->Pos()), TStringBuilder() << "(Kikimr DataSink) Unsupported function: " << callable.CallableName())); return TStatus::Error; diff --git a/ydb/core/kqp/provider/yql_kikimr_datasource.cpp b/ydb/core/kqp/provider/yql_kikimr_datasource.cpp index d289a9e7f2e1..1c3a02efc504 100644 --- a/ydb/core/kqp/provider/yql_kikimr_datasource.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_datasource.cpp @@ -3,6 +3,7 @@ #include "yql_kikimr_provider_impl.h" #include +#include #include #include @@ -472,12 +473,14 @@ class TKikimrDataSource : public TDataProviderBase { TIntrusivePtr gateway, TIntrusivePtr sessionCtx, const NExternalSource::IExternalSourceFactory::TPtr& externalSourceFactory, - bool isInternalCall) + bool isInternalCall, + TGUCSettings::TPtr gucSettings) : FunctionRegistry(functionRegistry) , Types(types) , Gateway(gateway) , SessionCtx(sessionCtx) , ExternalSourceFactory(externalSourceFactory) + , GUCSettings(gucSettings) , ConfigurationTransformer(new TKikimrConfigurationTransformer(sessionCtx, types)) , IntentDeterminationTransformer(new TKiSourceIntentDeterminationTransformer(sessionCtx)) , LoadTableMetadataTransformer(CreateKiSourceLoadTableMetadataTransformer(gateway, sessionCtx, types, externalSourceFactory, isInternalCall)) @@ -760,6 +763,7 @@ class TKikimrDataSource : public TDataProviderBase { } ctx.Step + .Repeat(TExprStep::ExpandApplyForLambdas) .Repeat(TExprStep::ExprEval) .Repeat(TExprStep::DiscoveryIO) .Repeat(TExprStep::Epochs) @@ -767,8 +771,17 @@ class TKikimrDataSource : public TDataProviderBase { .Repeat(TExprStep::LoadTablesMetadata) .Repeat(TExprStep::RewriteIO); - const auto& query = tableDesc.Metadata->ViewPersistedData.QueryText; - return RewriteReadFromView(node, ctx, query, cluster); + const auto& viewData = tableDesc.Metadata->ViewPersistedData; + + NKqp::TKqpTranslationSettingsBuilder settingsBuilder( + SessionCtx->Query().Type, + SessionCtx->Config()._KqpYqlSyntaxVersion.Get().GetRef(), + cluster, + viewData.QueryText, + SessionCtx->Config().BindingsMode, + GUCSettings + ); + return RewriteReadFromView(node, ctx, settingsBuilder, Types.Modules, viewData); } } @@ -881,6 +894,7 @@ class TKikimrDataSource : public TDataProviderBase { TIntrusivePtr Gateway; TIntrusivePtr SessionCtx; NExternalSource::IExternalSourceFactory::TPtr ExternalSourceFactory; + TGUCSettings::TPtr GUCSettings; TAutoPtr ConfigurationTransformer; TAutoPtr IntentDeterminationTransformer; @@ -920,9 +934,10 @@ TIntrusivePtr CreateKikimrDataSource( TIntrusivePtr gateway, TIntrusivePtr sessionCtx, const NExternalSource::IExternalSourceFactory::TPtr& externalSourceFactory, - bool isInternalCall) + bool isInternalCall, + TGUCSettings::TPtr gucSettings) { - return new TKikimrDataSource(functionRegistry, types, gateway, sessionCtx, externalSourceFactory, isInternalCall); + return new TKikimrDataSource(functionRegistry, types, gateway, sessionCtx, externalSourceFactory, isInternalCall, gucSettings); } TAutoPtr CreateKiSourceLoadTableMetadataTransformer(TIntrusivePtr gateway, diff --git a/ydb/core/kqp/provider/yql_kikimr_exec.cpp b/ydb/core/kqp/provider/yql_kikimr_exec.cpp index 8c1d07ed998a..6a10ccdbf721 100644 --- a/ydb/core/kqp/provider/yql_kikimr_exec.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_exec.cpp @@ -280,6 +280,18 @@ namespace { }; } + TAnalyzeSettings ParseAnalyzeSettings(const TKiAnalyzeTable& analyze) { + TVector columns; + for (const auto& column: analyze.Columns()) { + columns.push_back(TString(column.Ptr()->Content())); + } + + return TAnalyzeSettings{ + .TablePath = TString(analyze.Table()), + .Columns = std::move(columns) + }; + } + TAlterColumnTableSettings ParseAlterColumnTableSettings(TKiAlterTable alter) { return TAlterColumnTableSettings{ .Table = TString(alter.Table()) @@ -425,8 +437,8 @@ namespace { request->mutable_partitioning_settings()->set_min_active_partitions( FromString(setting.Value().Cast().Literal().Cast().Value()) ); - } else if (name == "setPartitionsLimit") { - request->mutable_partitioning_settings()->set_partition_count_limit( + } else if (name == "setMaxPartitions") { + request->mutable_partitioning_settings()->set_max_active_partitions( FromString(setting.Value().Cast().Literal().Cast().Value()) ); } else if (name == "setRetentionPeriod") { @@ -456,8 +468,29 @@ namespace { ); auto* protoCodecs = request->mutable_supported_codecs(); for (auto codec : codecs) { - protoCodecs->add_codecs(codec); + protoCodecs->add_codecs(codec); } + } else if (name == "setAutoPartitioningStabilizationWindow") { + auto microValue = FromString(setting.Value().Cast().Literal().Value()); + request->mutable_partitioning_settings()->mutable_auto_partitioning_settings()->mutable_partition_write_speed()->mutable_stabilization_window()->set_seconds( + static_cast(microValue / 1'000'000) + ); + } else if (name == "setAutoPartitioningUpUtilizationPercent") { + request->mutable_partitioning_settings()->mutable_auto_partitioning_settings()->mutable_partition_write_speed()->set_up_utilization_percent( + FromString(setting.Value().Cast().Literal().Cast().Value()) + ); + } else if (name == "setAutoPartitioningDownUtilizationPercent") { + request->mutable_partitioning_settings()->mutable_auto_partitioning_settings()->mutable_partition_write_speed()->set_down_utilization_percent( + FromString(setting.Value().Cast().Literal().Cast().Value()) + ); + } else if (name == "setAutoPartitioningStrategy") { + Ydb::Topic::AutoPartitioningStrategy strategy; + auto result = GetTopicAutoPartitioningStrategyFromString( + TString(setting.Value().Cast().Literal().Cast().Value()), + strategy + ); + YQL_ENSURE(result); + request->mutable_partitioning_settings()->mutable_auto_partitioning_settings()->set_strategy(strategy); } } } @@ -470,7 +503,7 @@ namespace { request->mutable_alter_partitioning_settings()->set_set_min_active_partitions( FromString(setting.Value().Cast().Literal().Cast().Value()) ); - } else if (name == "setPartitionsLimit") { + } else if (name == "setMaxPartitions") { request->mutable_alter_partitioning_settings()->set_set_partition_count_limit( FromString(setting.Value().Cast().Literal().Cast().Value()) ); @@ -503,6 +536,27 @@ namespace { for (auto codec : codecs) { protoCodecs->add_codecs(codec); } + } else if (name == "setAutoPartitioningStabilizationWindow") { + auto microValue = FromString(setting.Value().Cast().Literal().Value()); + request->mutable_alter_partitioning_settings()->mutable_alter_auto_partitioning_settings()->mutable_set_partition_write_speed()->mutable_set_stabilization_window()->set_seconds( + static_cast(microValue / 1'000'000) + ); + } else if (name == "setAutoPartitioningUpUtilizationPercent") { + request->mutable_alter_partitioning_settings()->mutable_alter_auto_partitioning_settings()->mutable_set_partition_write_speed()->set_set_up_utilization_percent( + FromString(setting.Value().Cast().Literal().Cast().Value()) + ); + } else if (name == "setAutoPartitioningDownUtilizationPercent") { + request->mutable_alter_partitioning_settings()->mutable_alter_auto_partitioning_settings()->mutable_set_partition_write_speed()->set_set_down_utilization_percent( + FromString(setting.Value().Cast().Literal().Cast().Value()) + ); + } else if (name == "setAutoPartitioningStrategy") { + Ydb::Topic::AutoPartitioningStrategy strategy; + auto result = GetTopicAutoPartitioningStrategyFromString( + TString(setting.Value().Cast().Literal().Cast().Value()), + strategy + ); + YQL_ENSURE(result); + request->mutable_alter_partitioning_settings()->mutable_alter_auto_partitioning_settings()->set_set_strategy(strategy); } } } @@ -761,10 +815,11 @@ class TKiSourceCallableExecutionTransformer : public TAsyncCallbackTransformer fakeReads; auto paramsType = NDq::CollectParameters(programLambda, ctx); + NDq::TSpillingSettings spillingSettings{SessionCtx->Config().GetEnabledSpillingNodes()}; lambda = NDq::BuildProgram( programLambda, *paramsType, compiler, SessionCtx->Query().QueryData->GetAllocState()->TypeEnv, *SessionCtx->Query().QueryData->GetAllocState()->HolderFactory.GetFunctionRegistry(), - ctx, fakeReads); + ctx, fakeReads, spillingSettings); NKikimr::NMiniKQL::TProgramBuilder programBuilder(SessionCtx->Query().QueryData->GetAllocState()->TypeEnv, *SessionCtx->Query().QueryData->GetAllocState()->HolderFactory.GetFunctionRegistry()); @@ -938,16 +993,12 @@ class TKiSourceCallableExecutionTransformer : public TAsyncCallbackTransformer columnHints(NCommon::GetResOrPullColumnHints(node)); auto protoValue = &resultValue; YQL_ENSURE(resultValue.GetArena()); - if (IsRawKikimrResult(resultValue)) { - protoValue = KikimrResultToProto(resultValue, columnHints, fillSettings, resultValue.GetArena()); - } - YQL_ENSURE(fillSettings.Format == IDataProvider::EResultFormat::Custom); YQL_ENSURE(fillSettings.FormatDetails == KikimrMkqlProtoFormat); @@ -1829,38 +1880,6 @@ class TKiSinkCallableExecutionTransformer : public TAsyncCallbackTransformer(input)) { - auto requireStatus = RequireChild(*input, 0); - if (requireStatus.Level != TStatus::Ok) { - return SyncStatus(requireStatus); - } - auto cluster = TString(maybeCreate.Cast().DataSink().Cluster()); - TString topicName = TString(maybeCreate.Cast().Topic()); - Ydb::Topic::CreateTopicRequest createReq; - createReq.set_path(topicName); - for (const auto& consumer : maybeCreate.Cast().Consumers()) { - auto error = AddConsumerToTopicRequest(createReq.add_consumers(), consumer); - if (!error.empty()) { - ctx.AddError(TIssue(ctx.GetPosition(input->Pos()), TStringBuilder() << error << input->Content())); - return SyncError(); - } - } - AddTopicSettingsToRequest(&createReq,maybeCreate.Cast().TopicSettings()); - bool prepareOnly = SessionCtx->Query().PrepareOnly; - // DEBUG - // Cerr << "Create topic request proto: " << createReq.DebugString() << Endl; - auto future = prepareOnly ? CreateDummySuccess() : ( - Gateway->CreateTopic(cluster, std::move(createReq)) - ); - - return WrapFuture(future, - [](const IKikimrGateway::TGenericResult& res, const TExprNode::TPtr& input, TExprContext& ctx) { - Y_UNUSED(res); - auto resultNode = ctx.NewWorld(input->Pos()); - return resultNode; - }, "Executing CREATE TOPIC"); - } - if (auto maybeCreateSequence = TMaybeNode(input)) { auto requireStatus = RequireChild(*input, 0); if (requireStatus.Level != TStatus::Ok) { @@ -1922,6 +1941,35 @@ class TKiSinkCallableExecutionTransformer : public TAsyncCallbackTransformer(input)) { + auto requireStatus = RequireChild(*input, 0); + if (requireStatus.Level != TStatus::Ok) { + return SyncStatus(requireStatus); + } + auto cluster = TString(maybeCreate.Cast().DataSink().Cluster()); + TString topicName = TString(maybeCreate.Cast().Topic()); + Ydb::Topic::CreateTopicRequest createReq; + createReq.set_path(topicName); + for (const auto& consumer : maybeCreate.Cast().Consumers()) { + auto error = AddConsumerToTopicRequest(createReq.add_consumers(), consumer); + if (!error.empty()) { + ctx.AddError(TIssue(ctx.GetPosition(input->Pos()), TStringBuilder() << error << input->Content())); + return SyncError(); + } + } + AddTopicSettingsToRequest(&createReq,maybeCreate.Cast().TopicSettings()); + bool existingOk = (maybeCreate.ExistingOk().Cast().Value() == "1"); + + auto future = Gateway->CreateTopic(cluster, std::move(createReq), existingOk); + + return WrapFuture(future, + [](const IKikimrGateway::TGenericResult& res, const TExprNode::TPtr& input, TExprContext& ctx) { + Y_UNUSED(res); + auto resultNode = ctx.NewWorld(input->Pos()); + return resultNode; + }, "Executing CREATE TOPIC"); + } + if (auto maybeAlter = TMaybeNode(input)) { auto requireStatus = RequireChild(*input, 0); if (requireStatus.Level != TStatus::Ok) { @@ -1949,13 +1997,9 @@ class TKiSinkCallableExecutionTransformer : public TAsyncCallbackTransformer().StringValue(); alterReq.add_drop_consumers(name); } + bool missingOk = (maybeAlter.MissingOk().Cast().Value() == "1"); AddAlterTopicSettingsToRequest(&alterReq, maybeAlter.Cast().TopicSettings()); - bool prepareOnly = SessionCtx->Query().PrepareOnly; - // DEBUG - // Cerr << "Alter topic request proto:\n" << alterReq.DebugString() << Endl; - auto future = prepareOnly ? CreateDummySuccess() : ( - Gateway->AlterTopic(cluster, std::move(alterReq)) - ); + auto future = Gateway->AlterTopic(cluster, std::move(alterReq), missingOk); return WrapFuture(future, [](const IKikimrGateway::TGenericResult& res, const TExprNode::TPtr& input, TExprContext& ctx) { @@ -1966,21 +2010,15 @@ class TKiSinkCallableExecutionTransformer : public TAsyncCallbackTransformer(input)) { - if (!EnsureNotPrepare("DROP TOPIC", input->Pos(), SessionCtx->Query(), ctx)) { - return SyncError(); - } - auto requireStatus = RequireChild(*input, 0); if (requireStatus.Level != TStatus::Ok) { return SyncStatus(requireStatus); } auto cluster = TString(maybeDrop.Cast().DataSink().Cluster()); TString topicName = TString(maybeDrop.Cast().Topic()); + bool missingOk = (maybeDrop.MissingOk().Cast().Value() == "1"); - bool prepareOnly = SessionCtx->Query().PrepareOnly; - auto future = prepareOnly ? CreateDummySuccess() : ( - Gateway->DropTopic(cluster, topicName) - ); + auto future = Gateway->DropTopic(cluster, topicName, missingOk); return WrapFuture(future, [](const IKikimrGateway::TGenericResult& res, const TExprNode::TPtr& input, TExprContext& ctx) { @@ -2281,6 +2319,26 @@ class TKiSinkCallableExecutionTransformer : public TAsyncCallbackTransformer(input)) { + if (!SessionCtx->Config().FeatureFlags.GetEnableColumnStatistics()) { + ctx.AddError(TIssue("ANALYZE command is not supported because `EnableColumnStatistics` feature flag is off")); + return SyncError(); + } + + auto cluster = TString(maybeAnalyze.Cast().DataSink().Cluster()); + + TAnalyzeSettings analyzeSettings = ParseAnalyzeSettings(maybeAnalyze.Cast()); + + auto future = Gateway->Analyze(cluster, analyzeSettings); + + return WrapFuture(future, + [](const IKikimrGateway::TGenericResult& res, const TExprNode::TPtr& input, TExprContext& ctx) { + Y_UNUSED(res); + auto resultNode = ctx.NewWorld(input->Pos()); + return resultNode; + }, "Executing ANALYZE"); + } + ctx.AddError(TIssue(ctx.GetPosition(input->Pos()), TStringBuilder() << "(Kikimr DataSink) Failed to execute node: " << input->Content())); return SyncError(); diff --git a/ydb/core/kqp/provider/yql_kikimr_expr_nodes.json b/ydb/core/kqp/provider/yql_kikimr_expr_nodes.json index 4d8f0946cacb..741ffcd780d4 100644 --- a/ydb/core/kqp/provider/yql_kikimr_expr_nodes.json +++ b/ydb/core/kqp/provider/yql_kikimr_expr_nodes.json @@ -171,7 +171,8 @@ {"Index": 2, "Name": "Topic", "Type": "TCoAtom"}, {"Index": 3, "Name": "Consumers", "Type": "TCoTopicConsumerList"}, {"Index": 4, "Name": "TopicSettings", "Type": "TCoNameValueTupleList"}, - {"Index": 5, "Name": "Settings", "Type": "TCoNameValueTupleList"} + {"Index": 5, "Name": "Settings", "Type": "TCoNameValueTupleList"}, + {"Index": 6, "Name": "ExistingOk", "Type": "TCoAtom"} ] }, { @@ -186,7 +187,9 @@ {"Index": 4, "Name": "AlterConsumers", "Type": "TCoTopicConsumerList"}, {"Index": 5, "Name": "DropConsumers", "Type": "TCoAtomList"}, {"Index": 6, "Name": "TopicSettings", "Type": "TCoNameValueTupleList"}, - {"Index": 7, "Name": "Settings", "Type": "TCoNameValueTupleList"} + {"Index": 7, "Name": "Settings", "Type": "TCoNameValueTupleList"}, + {"Index": 8, "Name": "MissingOk", "Type": "TCoAtom"} + ] }, { @@ -197,7 +200,9 @@ {"Index": 0, "Name": "World", "Type": "TExprBase"}, {"Index": 1, "Name": "DataSink", "Type": "TKiDataSink"}, {"Index": 2, "Name": "Topic", "Type": "TCoAtom"}, - {"Index": 3, "Name": "Settings", "Type": "TCoNameValueTupleList"} + {"Index": 3, "Name": "Settings", "Type": "TCoNameValueTupleList"}, + {"Index": 4, "Name": "MissingOk", "Type": "TCoAtom"} + ] }, { @@ -493,6 +498,17 @@ {"Index": 2, "Name": "Replication", "Type": "TCoAtom"}, {"Index": 3, "Name": "Cascade", "Type": "TCoAtom"} ] + }, + { + "Name": "TKiAnalyzeTable", + "Base": "TCallable", + "Match": {"Type": "Callable", "Name": "KiAnalyze!"}, + "Children": [ + {"Index": 0, "Name": "World", "Type": "TExprBase"}, + {"Index": 1, "Name": "DataSink", "Type": "TKiDataSink"}, + {"Index": 2, "Name": "Table", "Type": "TCoAtom"}, + {"Index": 3, "Name": "Columns", "Type": "TCoAtomList"} + ] } ] } diff --git a/ydb/core/kqp/provider/yql_kikimr_gateway.cpp b/ydb/core/kqp/provider/yql_kikimr_gateway.cpp index c0bf3edf6d61..8b23ef1d6715 100644 --- a/ydb/core/kqp/provider/yql_kikimr_gateway.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_gateway.cpp @@ -365,6 +365,26 @@ static std::shared_ptr> GetCodecsMapping() return codecsMapping; } +static std::shared_ptr> GetAutoPartitioningStrategiesMapping() { + static std::shared_ptr> strategiesMapping; + if (!strategiesMapping) { + strategiesMapping = MakeEnumMapping( + Ydb::Topic::AutoPartitioningStrategy_descriptor(), "auto_partitioning_strategy_"); + + const TString prefix = "scale_"; + for (const auto& [key, value] : *strategiesMapping) { + if (key.StartsWith(prefix)) { + TString newKey = key; + newKey.erase(0, prefix.length()); + + Y_ABORT_UNLESS(strategiesMapping->find(newKey) == strategiesMapping->end()); + (*strategiesMapping)[newKey] = value; + } + } + } + return strategiesMapping; +} + static std::shared_ptr> GetMeteringModesMapping() { static std::shared_ptr> metModesMapping; if (metModesMapping == nullptr) { @@ -387,6 +407,18 @@ bool GetTopicMeteringModeFromString(const TString& meteringMode, Ydb::Topic::Met } } +bool GetTopicAutoPartitioningStrategyFromString(const TString& strategy, Ydb::Topic::AutoPartitioningStrategy& result) { + auto mapping = GetAutoPartitioningStrategiesMapping(); + auto normStrategy = to_lower(strategy); + auto iter = mapping->find(normStrategy); + if (iter.IsEnd()) { + return false; + } else { + result = iter->second; + return true; + } +} + TVector GetTopicCodecsFromString(const TStringBuf& codecsStr) { const TVector codecsList = StringSplitter(codecsStr).Split(',').SkipEmpty(); TVector result; diff --git a/ydb/core/kqp/provider/yql_kikimr_gateway.h b/ydb/core/kqp/provider/yql_kikimr_gateway.h index a5c666a23ab1..152504e142c0 100644 --- a/ydb/core/kqp/provider/yql_kikimr_gateway.h +++ b/ydb/core/kqp/provider/yql_kikimr_gateway.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -21,6 +22,7 @@ #include #include #include +#include #include #include @@ -367,6 +369,7 @@ ETableType GetTableTypeFromString(const TStringBuf& tableType); bool GetTopicMeteringModeFromString(const TString& meteringMode, Ydb::Topic::MeteringMode& result); TVector GetTopicCodecsFromString(const TStringBuf& codecsStr); +bool GetTopicAutoPartitioningStrategyFromString(const TString& strategy, Ydb::Topic::AutoPartitioningStrategy& result); enum class EStoreType : ui32 { @@ -404,6 +407,7 @@ enum EMetaSerializationType : ui64 { struct TViewPersistedData { TString QueryText; + NYql::NProto::TTranslationSettings CapturedContext; }; struct TKikimrTableMetadata : public TThrRefBase { @@ -424,6 +428,7 @@ struct TKikimrTableMetadata : public TThrRefBase { ui64 DataSize = 0; ui64 MemorySize = 0; ui32 ShardsCount = 0; + bool StatsLoaded = false; TInstant LastAccessTime; TInstant LastUpdateTime; @@ -460,6 +465,7 @@ struct TKikimrTableMetadata : public TThrRefBase { , Kind(static_cast(message->GetKind())) , RecordsCount(message->GetRecordsCount()) , DataSize(message->GetDataSize()) + , StatsLoaded(message->GetStatsLoaded()) , KeyColumnNames(message->GetKeyColunmNames().begin(), message->GetKeyColunmNames().end()) { @@ -525,8 +531,10 @@ struct TKikimrTableMetadata : public TThrRefBase { PathId.ToMessage(message->MutablePathId()); message->SetSchemaVersion(SchemaVersion); message->SetKind(static_cast(Kind)); + message->SetStatsLoaded(StatsLoaded); message->SetRecordsCount(RecordsCount); message->SetDataSize(DataSize); + message->SetStatsLoaded(StatsLoaded); for(auto& [key, value] : Attributes) { message->AddAttributes()->SetKey(key); message->AddAttributes()->SetValue(value); @@ -664,6 +672,13 @@ struct TCreateExternalTableSettings { TVector> SourceTypeParameters; }; +struct TAlterTopicSettings { + Ydb::Topic::AlterTopicRequest Request; + TString Name; + TString WorkDir; + bool MissingOk; +}; + struct TSequenceSettings { TMaybe MinValue; TMaybe MaxValue; @@ -773,6 +788,11 @@ struct TDropReplicationSettings { bool Cascade = false; }; +struct TAnalyzeSettings { + TString TablePath; + TVector Columns; +}; + struct TKikimrListPathItem { TKikimrListPathItem(TString name, bool isDirectory) { Name = name; @@ -842,7 +862,7 @@ class IKikimrGateway : public TThrRefBase { struct TQueryResult : public TGenericResult { TString SessionId; - TVector Results; + TVector Results; TMaybe Profile; // TODO: Deprecate. NKqpProto::TKqpStatsQuery QueryStats; std::unique_ptr PreparingQuery; @@ -937,11 +957,13 @@ class IKikimrGateway : public TThrRefBase { virtual NThreading::TFuture DropTable(const TString& cluster, const TDropTableSettings& settings) = 0; - virtual NThreading::TFuture CreateTopic(const TString& cluster, Ydb::Topic::CreateTopicRequest&& request) = 0; + virtual NThreading::TFuture CreateTopic(const TString& cluster, Ydb::Topic::CreateTopicRequest&& request, bool existingOk) = 0; + + virtual NThreading::TFuture AlterTopic(const TString& cluster, Ydb::Topic::AlterTopicRequest&& request, bool missingOk) = 0; - virtual NThreading::TFuture AlterTopic(const TString& cluster, Ydb::Topic::AlterTopicRequest&& request) = 0; + virtual NThreading::TFuture AlterTopicPrepared(TAlterTopicSettings&& settings) = 0; - virtual NThreading::TFuture DropTopic(const TString& cluster, const TString& topic) = 0; + virtual NThreading::TFuture DropTopic(const TString& cluster, const TString& topic, bool missingOk) = 0; virtual NThreading::TFuture CreateReplication(const TString& cluster, const TCreateReplicationSettings& settings) = 0; @@ -999,6 +1021,8 @@ class IKikimrGateway : public TThrRefBase { virtual NThreading::TFuture DropExternalTable(const TString& cluster, const TDropExternalTableSettings& settings, bool missingOk) = 0; + virtual NThreading::TFuture Analyze(const TString& cluster, const TAnalyzeSettings& settings) = 0; + virtual TVector GetCollectedSchemeData() = 0; virtual NThreading::TFuture ExecuteLiteral(const TString& program, const NKikimrMiniKQL::TType& resultType, NKikimr::NKqp::TTxAllocatorState::TPtr txAlloc) = 0; diff --git a/ydb/core/kqp/provider/yql_kikimr_gateway_ut.cpp b/ydb/core/kqp/provider/yql_kikimr_gateway_ut.cpp index b16d43a67527..a5422321bcf8 100644 --- a/ydb/core/kqp/provider/yql_kikimr_gateway_ut.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_gateway_ut.cpp @@ -74,7 +74,7 @@ TIntrusivePtr GetIcGateway(Tests::TServer& server) { counters->TxProxyMon = new NTxProxy::TTxProxyMon(server.GetRuntime()->GetAppData(0).Counters); std::shared_ptr loader = std::make_shared(TestCluster, server.GetRuntime()->GetAnyNodeActorSystem(),TIntrusivePtr(nullptr), false); - return CreateKikimrIcGateway(TestCluster, NKikimrKqp::QUERY_TYPE_SQL_GENERIC_QUERY, "/Root", std::move(loader), server.GetRuntime()->GetAnyNodeActorSystem(), + return CreateKikimrIcGateway(TestCluster, NKikimrKqp::QUERY_TYPE_SQL_GENERIC_QUERY, "/Root", "/Root", std::move(loader), server.GetRuntime()->GetAnyNodeActorSystem(), server.GetRuntime()->GetNodeId(0), counters, server.GetSettings().AppConfig->GetQueryServiceConfig()); } @@ -232,7 +232,7 @@ void TestCreateResourcePool(TTestActorRuntime& runtime, TIntrusivePtr gateway, const TString& poolId) { TDropObjectSettings settings("RESOURCE_POOL", poolId, {}); - TestDropObjectCommon(runtime, gateway, settings, TStringBuilder() << "/Root/.resource_pools/" << poolId); + TestDropObjectCommon(runtime, gateway, settings, TStringBuilder() << "/Root/.metadata/workload_manager/pools/" << poolId); } TKikimrRunner GetKikimrRunnerWithResourcePools() { @@ -422,6 +422,7 @@ Y_UNIT_TEST_SUITE(KikimrIcGateway) { LOCATION="my-bucket", AUTH_METHOD="BASIC", LOGIN="mylogin", + DATABASE_NAME="postgres", PASSWORD_SECRET_NAME=")" << secretId << R"(" );)"; auto result = session.ExecuteSchemeQuery(query).GetValueSync(); @@ -458,6 +459,7 @@ Y_UNIT_TEST_SUITE(KikimrIcGateway) { SERVICE_ACCOUNT_ID="mysa", SERVICE_ACCOUNT_SECRET_NAME=")" << secretSaId << R"(", LOGIN="mylogin", + DATABASE_NAME="postgres", PASSWORD_SECRET_NAME=")" << secretPasswordId << R"(" );)"; auto result = session.ExecuteSchemeQuery(query).GetValueSync(); diff --git a/ydb/core/kqp/provider/yql_kikimr_opt_build.cpp b/ydb/core/kqp/provider/yql_kikimr_opt_build.cpp index 5220fba92a72..5c4d7633ba55 100644 --- a/ydb/core/kqp/provider/yql_kikimr_opt_build.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_opt_build.cpp @@ -247,7 +247,7 @@ struct TKiExploreTxResults { } } - void AddResult(const TExprBase& result) { + void PrepareForResult() { if (QueryBlocks.empty()) { AddQueryBlock(); } @@ -255,6 +255,10 @@ struct TKiExploreTxResults { if (!ConcurrentResults && QueryBlocks.back().Results.size() > 0) { AddQueryBlock(); } + } + + void AddResult(const TExprBase& result) { + PrepareForResult(); auto& curBlock = QueryBlocks.back(); curBlock.Results.push_back(result); @@ -422,6 +426,10 @@ bool ExploreTx(TExprBase node, TExprContext& ctx, const TKiDataSink& dataSink, T const auto& tableData = tablesData->ExistingTable(cluster, table); YQL_ENSURE(tableData.Metadata); + if (!write.ReturningColumns().Empty()) { + txRes.PrepareForResult(); + } + if (tableOp == TYdbOperation::UpdateOn) { auto inputColumnsSetting = GetSetting(write.Settings().Ref(), "input_columns"); YQL_ENSURE(inputColumnsSetting); @@ -445,7 +453,9 @@ bool ExploreTx(TExprBase node, TExprContext& ctx, const TKiDataSink& dataSink, T .Update(node) .Columns(write.ReturningColumns()) .Build() - .Settings().Build() + .Settings() + .Add().Name().Value("columns").Build().Value(write.ReturningColumns()).Build() + .Build() .Done()); } @@ -480,6 +490,11 @@ bool ExploreTx(TExprBase node, TExprContext& ctx, const TKiDataSink& dataSink, T for (const auto& item : updateStructType->GetItems()) { updateColumns.emplace(item->GetName()); } + + if (!update.ReturningColumns().Empty()) { + txRes.PrepareForResult(); + } + txRes.AddUpdateOpToQueryBlock(node, tableData.Metadata, updateColumns); if (!update.ReturningColumns().Empty()) { txRes.AddResult( @@ -491,7 +506,9 @@ bool ExploreTx(TExprBase node, TExprContext& ctx, const TKiDataSink& dataSink, T .Update(node) .Columns(update.ReturningColumns()) .Build() - .Settings().Build() + .Settings() + .Add().Name().Value("columns").Build().Value(update.ReturningColumns()).Build() + .Build() .Done()); } @@ -513,6 +530,10 @@ bool ExploreTx(TExprBase node, TExprContext& ctx, const TKiDataSink& dataSink, T YQL_ENSURE(tablesData); const auto& tableData = tablesData->ExistingTable(cluster, table); YQL_ENSURE(tableData.Metadata); + if (!del.ReturningColumns().Empty()) { + txRes.PrepareForResult(); + } + txRes.AddWriteOpToQueryBlock(node, tableData.Metadata, tableOp & KikimrReadOps()); if (!del.ReturningColumns().Empty()) { txRes.AddResult( @@ -524,7 +545,9 @@ bool ExploreTx(TExprBase node, TExprContext& ctx, const TKiDataSink& dataSink, T .Update(node) .Columns(del.ReturningColumns()) .Build() - .Settings().Build() + .Settings() + .Add().Name().Value("columns").Build().Value(del.ReturningColumns()).Build() + .Build() .Done()); } diff --git a/ydb/core/kqp/provider/yql_kikimr_provider.cpp b/ydb/core/kqp/provider/yql_kikimr_provider.cpp index ac310064077c..42fd3b749594 100644 --- a/ydb/core/kqp/provider/yql_kikimr_provider.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_provider.cpp @@ -73,6 +73,7 @@ struct TKikimrData { DataSinkNames.insert(TKiCreateSequence::CallableName()); DataSinkNames.insert(TKiDropSequence::CallableName()); DataSinkNames.insert(TKiAlterSequence::CallableName()); + DataSinkNames.insert(TKiAnalyzeTable::CallableName()); CommitModes.insert(CommitModeFlush); CommitModes.insert(CommitModeRollback); @@ -964,3 +965,36 @@ TCoNameValueTupleList TKiExecDataQuerySettings::BuildNode(TExprContext& ctx, TPo } } // namespace NYql + +namespace NSQLTranslation { + +void Serialize(const TTranslationSettings& settings, NYql::NProto::TTranslationSettings& serializedSettings) { + serializedSettings.SetPathPrefix(settings.PathPrefix); + serializedSettings.SetSyntaxVersion(settings.SyntaxVersion); + serializedSettings.SetAnsiLexer(settings.AnsiLexer); + serializedSettings.SetPgParser(settings.PgParser); + + auto* pragmas = serializedSettings.MutablePragmas(); + pragmas->Clear(); + pragmas->Add(settings.Flags.begin(), settings.Flags.end()); +} + +void Deserialize(const NYql::NProto::TTranslationSettings& serializedSettings, TTranslationSettings& settings) { + #define DeserializeSetting(settingName) \ + if (serializedSettings.Has##settingName()) { \ + settings.settingName = serializedSettings.Get##settingName(); \ + } + + DeserializeSetting(PathPrefix); + DeserializeSetting(SyntaxVersion); + DeserializeSetting(AnsiLexer); + DeserializeSetting(PgParser); + + #undef DeserializeSetting + + // overwrite existing pragmas + settings.Flags.clear(); + settings.Flags.insert(serializedSettings.GetPragmas().begin(), serializedSettings.GetPragmas().end()); +} + +} diff --git a/ydb/core/kqp/provider/yql_kikimr_provider.h b/ydb/core/kqp/provider/yql_kikimr_provider.h index 48634bf9c7e8..246fd20b3e99 100644 --- a/ydb/core/kqp/provider/yql_kikimr_provider.h +++ b/ydb/core/kqp/provider/yql_kikimr_provider.h @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -124,6 +125,8 @@ struct TKikimrQueryContext : TThrRefBase { // we do not want add extra life time for query context here std::shared_ptr RpcCtx; + NSQLTranslation::TTranslationSettings TranslationSettings; + void Reset() { PrepareOnly = false; SuppressDdlChecks = false; @@ -142,6 +145,7 @@ struct TKikimrQueryContext : TThrRefBase { RlPath.Clear(); RpcCtx.reset(); + TranslationSettings = NSQLTranslation::TTranslationSettings(); } }; @@ -240,6 +244,7 @@ enum class TYdbOperation : ui32 { CreateReplication = 1 << 24, AlterReplication = 1 << 25, DropReplication = 1 << 26, + Analyze = 1 << 27, }; Y_DECLARE_FLAGS(TYdbOperations, TYdbOperation); @@ -323,7 +328,6 @@ class TKikimrTransactionContextBase : public TThrRefBase { for (const auto& info : tableInfos) { tableInfoMap.emplace(info.GetTableName(), &info); - TKikimrPathId pathId(info.GetTableId().GetOwnerId(), info.GetTableId().GetTableId()); TableByIdMap.emplace(pathId, info.GetTableName()); } @@ -461,12 +465,14 @@ class TKikimrSessionContext : public TThrRefBase { TIntrusivePtr timeProvider, TIntrusivePtr randomProvider, const TIntrusiveConstPtr& userToken, - TIntrusivePtr txCtx = nullptr) + TIntrusivePtr txCtx = nullptr, + const TIntrusivePtr& userRequestContext = nullptr) : Configuration(config) , TablesData(MakeIntrusive()) , QueryCtx(MakeIntrusive(functionRegistry, timeProvider, randomProvider)) , TxCtx(txCtx) , UserToken(userToken) + , UserRequestContext(userRequestContext) {} TKikimrSessionContext(const TKikimrSessionContext&) = delete; @@ -506,6 +512,10 @@ class TKikimrSessionContext : public TThrRefBase { return Database; } + TString GetDatabaseId() const { + return DatabaseId; + } + const TString& GetSessionId() const { return SessionId; } @@ -518,6 +528,10 @@ class TKikimrSessionContext : public TThrRefBase { Database = database; } + void SetDatabaseId(const TString& databaseId) { + DatabaseId = databaseId; + } + void SetSessionId(const TString& sessionId) { SessionId = sessionId; } @@ -548,10 +562,15 @@ class TKikimrSessionContext : public TThrRefBase { return UserToken; } + const TIntrusivePtr& GetUserRequestContext() const { + return UserRequestContext; + } + private: TString UserName; TString Cluster; TString Database; + TString DatabaseId; TString SessionId; TKikimrConfiguration::TPtr Configuration; TIntrusivePtr TablesData; @@ -559,6 +578,7 @@ class TKikimrSessionContext : public TThrRefBase { TIntrusivePtr TxCtx; NKikimr::NKqp::TKqpTempTablesState::TConstPtr TempTablesState; TIntrusiveConstPtr UserToken; + TIntrusivePtr UserRequestContext; }; TIntrusivePtr CreateKikimrDataSource( @@ -567,7 +587,8 @@ TIntrusivePtr CreateKikimrDataSource( TIntrusivePtr gateway, TIntrusivePtr sessionCtx, const NKikimr::NExternalSource::IExternalSourceFactory::TPtr& sourceFactory, - bool isInternalCall); + bool isInternalCall, + TGUCSettings::TPtr gucSettings); TIntrusivePtr CreateKikimrDataSink( const NKikimr::NMiniKQL::IFunctionRegistry& functionRegistry, @@ -578,3 +599,10 @@ TIntrusivePtr CreateKikimrDataSink( TIntrusivePtr queryExecutor); } // namespace NYql + +namespace NSQLTranslation { + +void Serialize(const TTranslationSettings& settings, NYql::NProto::TTranslationSettings& serializedSettings); +void Deserialize(const NYql::NProto::TTranslationSettings& serializedSettings, TTranslationSettings& settings); + +} diff --git a/ydb/core/kqp/provider/yql_kikimr_provider_impl.h b/ydb/core/kqp/provider/yql_kikimr_provider_impl.h index f267f3f5a86e..a7d95b99f129 100644 --- a/ydb/core/kqp/provider/yql_kikimr_provider_impl.h +++ b/ydb/core/kqp/provider/yql_kikimr_provider_impl.h @@ -74,6 +74,8 @@ class TKiSinkVisitorTransformer : public TSyncTransformerBase { virtual TStatus HandleModifyPermissions(NNodes::TKiModifyPermissions node, TExprContext& ctx) = 0; virtual TStatus HandleReturningList(NNodes::TKiReturningList node, TExprContext& ctx) = 0; + + virtual TStatus HandleAnalyze(NNodes::TKiAnalyzeTable node, TExprContext& ctx) = 0; }; class TKikimrKey { diff --git a/ydb/core/kqp/provider/yql_kikimr_settings.cpp b/ydb/core/kqp/provider/yql_kikimr_settings.cpp index c3db44432dc8..66f689c8aba8 100644 --- a/ydb/core/kqp/provider/yql_kikimr_settings.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_settings.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include namespace NYql { @@ -23,6 +25,22 @@ EOptionalFlag GetOptionalFlagValue(const TMaybe& flag) { return EOptionalFlag::Disabled; } + +ui64 ParseEnableSpillingNodes(const TString &v) { + ui64 res = 0; + TVector vec; + StringSplitter(v).SplitBySet(",;| ").AddTo(&vec); + for (auto& s: vec) { + if (s.empty()) { + throw yexception() << "Empty value item"; + } + + auto value = FromString(s); + res |= ui64(value); + } + return res; +} + static inline bool GetFlagValue(const TMaybe& flag) { return flag ? flag.GetRef() : false; } @@ -64,12 +82,18 @@ TKikimrConfiguration::TKikimrConfiguration() { REGISTER_SETTING(*this, OptEnablePredicateExtract); REGISTER_SETTING(*this, OptEnableOlapPushdown); REGISTER_SETTING(*this, OptEnableOlapProvideComputeSharding); - REGISTER_SETTING(*this, OverrideStatistics); + REGISTER_SETTING(*this, OptOverrideStatistics); + REGISTER_SETTING(*this, OptCardinalityHints); + REGISTER_SETTING(*this, OptJoinAlgoHints); + REGISTER_SETTING(*this, OptJoinOrderHints); + REGISTER_SETTING(*this, OverridePlanner); + REGISTER_SETTING(*this, UseGraceJoinCoreForMap); REGISTER_SETTING(*this, OptUseFinalizeByKey); REGISTER_SETTING(*this, CostBasedOptimizationLevel); - REGISTER_SETTING(*this, OptEnableConstantFolding); + REGISTER_SETTING(*this, EnableSpillingNodes) + .Parser([](const TString& v) { return ParseEnableSpillingNodes(v); }); REGISTER_SETTING(*this, MaxDPccpDPTableSize); @@ -123,14 +147,9 @@ bool TKikimrSettings::HasOptEnableOlapProvideComputeSharding() const { } bool TKikimrSettings::HasOptUseFinalizeByKey() const { - return GetOptionalFlagValue(OptUseFinalizeByKey.Get()) != EOptionalFlag::Disabled; -} - -bool TKikimrSettings::HasOptEnableConstantFolding() const { - return GetOptionalFlagValue(OptEnableConstantFolding.Get()) == EOptionalFlag::Enabled; + return GetFlagValue(OptUseFinalizeByKey.Get().GetOrElse(true)) != EOptionalFlag::Disabled; } - EOptionalFlag TKikimrSettings::GetOptPredicateExtract() const { return GetOptionalFlagValue(OptEnablePredicateExtract.Get()); } @@ -152,4 +171,12 @@ TKikimrSettings::TConstPtr TKikimrConfiguration::Snapshot() const { return std::make_shared(*this); } +void TKikimrConfiguration::SetDefaultEnabledSpillingNodes(const TString& node) { + DefaultEnableSpillingNodes = ParseEnableSpillingNodes(node); +} + +ui64 TKikimrConfiguration::GetEnabledSpillingNodes() const { + return EnableSpillingNodes.Get().GetOrElse(DefaultEnableSpillingNodes); +} + } diff --git a/ydb/core/kqp/provider/yql_kikimr_settings.h b/ydb/core/kqp/provider/yql_kikimr_settings.h index d53fd5ef403a..fedfec239136 100644 --- a/ydb/core/kqp/provider/yql_kikimr_settings.h +++ b/ydb/core/kqp/provider/yql_kikimr_settings.h @@ -50,7 +50,14 @@ struct TKikimrSettings { NCommon::TConfSetting UseLlvm; NCommon::TConfSetting EnableLlvm; NCommon::TConfSetting HashJoinMode; - NCommon::TConfSetting OverrideStatistics; + NCommon::TConfSetting EnableSpillingNodes; + NCommon::TConfSetting OverridePlanner; + NCommon::TConfSetting UseGraceJoinCoreForMap; + + NCommon::TConfSetting OptOverrideStatistics; + NCommon::TConfSetting OptCardinalityHints; + NCommon::TConfSetting OptJoinAlgoHints; + NCommon::TConfSetting OptJoinOrderHints; /* Disable optimizer rules */ NCommon::TConfSetting OptDisableTopSort; @@ -61,7 +68,6 @@ struct TKikimrSettings { NCommon::TConfSetting OptEnableOlapProvideComputeSharding; NCommon::TConfSetting OptUseFinalizeByKey; NCommon::TConfSetting CostBasedOptimizationLevel; - NCommon::TConfSetting OptEnableConstantFolding; NCommon::TConfSetting MaxDPccpDPTableSize; @@ -83,8 +89,6 @@ struct TKikimrSettings { bool HasOptEnableOlapPushdown() const; bool HasOptEnableOlapProvideComputeSharding() const; bool HasOptUseFinalizeByKey() const; - bool HasOptEnableConstantFolding() const; - EOptionalFlag GetOptPredicateExtract() const; EOptionalFlag GetUseLlvm() const; @@ -169,6 +173,12 @@ struct TKikimrConfiguration : public TKikimrSettings, public NCommon::TSettingDi bool EnableOltpSink = false; NKikimrConfig::TTableServiceConfig_EBlockChannelsMode BlockChannelsMode; bool EnableSpillingGenericQuery = false; + ui32 DefaultCostBasedOptimizationLevel = 4; + bool EnableConstantFolding = true; + ui64 DefaultEnableSpillingNodes = 0; + + void SetDefaultEnabledSpillingNodes(const TString& node); + ui64 GetEnabledSpillingNodes() const; }; } diff --git a/ydb/core/kqp/provider/yql_kikimr_type_ann.cpp b/ydb/core/kqp/provider/yql_kikimr_type_ann.cpp index a5746e0be2c0..593e8f24a714 100644 --- a/ydb/core/kqp/provider/yql_kikimr_type_ann.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_type_ann.cpp @@ -1455,7 +1455,7 @@ virtual TStatus HandleCreateTable(TKiCreateTable create, TExprContext& ctx) over } static bool CheckTopicSettings(const TCoNameValueTupleList& settings, TExprContext& ctx) { - ui32 minParts = 0, partsLimit = 0; + ui32 minParts = 0, maxPartitions = 0; TPosition errorPos; for (const auto& setting : settings) { auto name = setting.Name().Value(); @@ -1477,11 +1477,11 @@ virtual TStatus HandleCreateTable(TKiCreateTable create, TExprContext& ctx) over ); minParts = value; errorPos = ctx.GetPosition(setting.Value().Ref().Pos()); - } else if (name == "setPartitionsLimit") { + } else if (name == "setMaxPartitions") { ui32 value = FromString( setting.Value().Cast().Literal().template Cast().Value() ); - partsLimit = value; + maxPartitions = value; errorPos = ctx.GetPosition(setting.Value().Ref().Pos()); } else if (name.StartsWith("reset")) { ctx.AddError(TIssue( @@ -1490,10 +1490,10 @@ virtual TStatus HandleCreateTable(TKiCreateTable create, TExprContext& ctx) over ); return false; } - if (minParts && partsLimit && partsLimit < minParts) { + if (minParts && maxPartitions && maxPartitions < minParts) { ctx.AddError(TIssue( errorPos, - TStringBuilder() << "partitions_limit cannot be less than min_partitions") + TStringBuilder() << "max_partitions cannot be less than min_partitions") ); return false; } @@ -2020,6 +2020,16 @@ virtual TStatus HandleCreateTable(TKiCreateTable create, TExprContext& ctx) over return false; } + virtual TStatus HandleAnalyze(NNodes::TKiAnalyzeTable node, TExprContext& ctx) override { + auto table = SessionCtx->Tables().EnsureTableExists(TString(node.DataSink().Cluster()), TString(node.Table().Value()), node.Pos(), ctx); + if (!table) { + return TStatus::Error; + } + + node.Ptr()->SetTypeAnn(node.World().Ref().GetTypeAnn()); + return TStatus::Ok; + } + private: TIntrusivePtr Gateway; TIntrusivePtr SessionCtx; diff --git a/ydb/core/kqp/proxy_service/kqp_proxy_databases_cache.cpp b/ydb/core/kqp/proxy_service/kqp_proxy_databases_cache.cpp new file mode 100644 index 000000000000..4b949031032c --- /dev/null +++ b/ydb/core/kqp/proxy_service/kqp_proxy_databases_cache.cpp @@ -0,0 +1,238 @@ +#include "kqp_proxy_service_impl.h" + +#include + +#include + + +namespace NKikimr::NKqp { + +namespace { + + +struct TEvPrivate { + // Event ids + enum EEv : ui32 { + EvSubscribeOnDatabase = EventSpaceBegin(TEvents::ES_PRIVATE), + EvPingDatabaseSubscription, + + EvEnd + }; + + static_assert(EvEnd < EventSpaceEnd(TEvents::ES_PRIVATE), "expect EvEnd < EventSpaceEnd(TEvents::ES_PRIVATE)"); + + struct TEvSubscribeOnDatabase : public TEventLocal { + explicit TEvSubscribeOnDatabase(const TString& database) + : Database(database) + {} + + const TString Database; + }; + + struct TEvPingDatabaseSubscription : public TEventLocal { + explicit TEvPingDatabaseSubscription(const TString& database) + : Database(database) + {} + + const TString Database; + }; +}; + +class TDatabaseSubscriberActor : public TActor { + struct TDatabaseState { + TString Database; + TString DatabaseId = ""; + bool Serverless = false; + + bool FetchRequestIsRunning = true; + TInstant LastUpdateTime = TInstant::Now(); + ui32 WatchKey = 0; + }; + + using TBase = TActor; + +public: + TDatabaseSubscriberActor(TDuration idleTimeout) + : TBase(&TDatabaseSubscriberActor::StateFunc) + , IdleTimeout(idleTimeout) + , DatabaseStates(std::numeric_limits::max()) + {} + + void Registered(TActorSystem* sys, const TActorId& owner) { + TBase::Registered(sys, owner); + Owner = owner; + } + + void Handle(TEvPrivate::TEvSubscribeOnDatabase::TPtr& ev) { + const TString& database = ev->Get()->Database; + auto databaseStateIt = DatabaseStates.Find(database); + + if (databaseStateIt == DatabaseStates.End()) { + DatabaseStates.Insert({database, TDatabaseState{.Database = database}}); + Register(NWorkload::CreateDatabaseFetcherActor(SelfId(), database)); + StartIdleCheck(); + return; + } + + databaseStateIt->LastUpdateTime = TInstant::Now(); + if (databaseStateIt->DatabaseId) { + SendSubscriberInfo(*databaseStateIt, Ydb::StatusIds::SUCCESS); + } + } + + void Handle(TEvPrivate::TEvPingDatabaseSubscription::TPtr& ev) { + auto databaseStateIt = DatabaseStates.Find(ev->Get()->Database); + if (databaseStateIt != DatabaseStates.End()) { + databaseStateIt->LastUpdateTime = TInstant::Now(); + } + } + + void Handle(NWorkload::TEvFetchDatabaseResponse::TPtr& ev) { + auto databaseStateIt = DatabaseStates.Find(ev->Get()->Database); + if (databaseStateIt == DatabaseStates.End()) { + return; + } + + databaseStateIt->FetchRequestIsRunning = false; + databaseStateIt->LastUpdateTime = TInstant::Now(); + databaseStateIt->DatabaseId = ev->Get()->DatabaseId; + databaseStateIt->Serverless = ev->Get()->Serverless; + SendSubscriberInfo(*databaseStateIt, ev->Get()->Status, ev->Get()->Issues); + + if (ev->Get()->Status == Ydb::StatusIds::SUCCESS) { + FreeWatchKey++; + databaseStateIt->WatchKey = FreeWatchKey; + Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvWatchPathId(ev->Get()->PathId, FreeWatchKey)); + } + } + + void Handle(TEvTxProxySchemeCache::TEvWatchNotifyDeleted::TPtr& ev) { + auto databaseStateIt = DatabaseStates.Find(ev->Get()->Path); + if (databaseStateIt == DatabaseStates.End()) { + return; + } + + UnsubscribeFromSchemeCache(*databaseStateIt); + SendSubscriberInfo(*databaseStateIt, Ydb::StatusIds::NOT_FOUND, {NYql::TIssue{"Database was dropped"}}); + DatabaseStates.Erase(databaseStateIt); + } + + void HandlePoison() { + Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvWatchRemove(0)); + TBase::PassAway(); + } + + void HandleWakeup() { + IdleCheckStarted = false; + const auto minimalTime = TInstant::Now() - IdleTimeout; + while (!DatabaseStates.Empty()) { + auto oldestIt = DatabaseStates.FindOldest(); + if (oldestIt->LastUpdateTime > minimalTime) { + break; + } + + UnsubscribeFromSchemeCache(*oldestIt); + SendSubscriberInfo(*oldestIt, Ydb::StatusIds::ABORTED, {NYql::TIssue{"Database subscription was dropped by idle timeout"}}); + DatabaseStates.Erase(oldestIt); + } + + if (!DatabaseStates.Empty()) { + StartIdleCheck(); + } + } + + STRICT_STFUNC(StateFunc, + hFunc(TEvPrivate::TEvSubscribeOnDatabase, Handle); + hFunc(TEvPrivate::TEvPingDatabaseSubscription, Handle); + hFunc(NWorkload::TEvFetchDatabaseResponse, Handle); + sFunc(TEvents::TEvPoison, HandlePoison); + sFunc(TEvents::TEvWakeup, HandleWakeup); + + hFunc(TEvTxProxySchemeCache::TEvWatchNotifyDeleted, Handle); + IgnoreFunc(TEvTxProxySchemeCache::TEvWatchNotifyUpdated); + ) + +private: + void UnsubscribeFromSchemeCache(TDatabaseState& databaseState) const { + if (databaseState.WatchKey) { + Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvWatchRemove(databaseState.WatchKey)); + databaseState.WatchKey = 0; + } + } + + void SendSubscriberInfo(const TDatabaseState& databaseState, Ydb::StatusIds::StatusCode status, NYql::TIssues issues = {}) { + if (status == Ydb::StatusIds::SUCCESS || status == Ydb::StatusIds::UNSUPPORTED) { + Send(Owner, new TEvKqp::TEvUpdateDatabaseInfo(databaseState.Database, databaseState.DatabaseId, databaseState.Serverless)); + } else { + NYql::TIssue rootIssue(TStringBuilder() << "Failed to describe database " << databaseState.Database); + for (const auto& issue : issues) { + rootIssue.AddSubIssue(MakeIntrusive(issue)); + } + Send(Owner, new TEvKqp::TEvUpdateDatabaseInfo(databaseState.Database, status, {rootIssue})); + } + } + + void StartIdleCheck() { + if (!IdleCheckStarted) { + IdleCheckStarted = true; + Schedule(IdleTimeout, new TEvents::TEvWakeup()); + } + } + +private: + const TDuration IdleTimeout; + TActorId Owner; + bool IdleCheckStarted = false; + + TLRUCache DatabaseStates; + ui32 FreeWatchKey = 0; +}; + +} // anonymous namespace + +TDatabasesCache::TDatabasesCache(TDuration idleTimeout) + : IdleTimeout(idleTimeout) +{} + +void TDatabasesCache::UpdateDatabaseInfo(TEvKqp::TEvUpdateDatabaseInfo::TPtr& event, TActorContext actorContext) { + auto it = DatabasesCache.find(event->Get()->Database); + if (it == DatabasesCache.end()) { + return; + } + it->second.DatabaseId = event->Get()->DatabaseId; + + const bool success = event->Get()->Status == Ydb::StatusIds::SUCCESS; + for (auto& delayedEvent : it->second.DelayedEvents) { + if (success) { + actorContext.Send(std::move(delayedEvent.Event)); + } else { + actorContext.Send(actorContext.SelfID, new TEvKqp::TEvDelayedRequestError(std::move(delayedEvent.Event), event->Get()->Status, event->Get()->Issues), 0, delayedEvent.RequestType); + } + } + it->second.DelayedEvents.clear(); + + if (!success) { + DatabasesCache.erase(it); + } +} + +void TDatabasesCache::SubscribeOnDatabase(const TString& database, TActorContext actorContext) { + if (!SubscriberActor) { + SubscriberActor = actorContext.Register(new TDatabaseSubscriberActor(IdleTimeout)); + } + actorContext.Send(SubscriberActor, new TEvPrivate::TEvSubscribeOnDatabase(database)); +} + +void TDatabasesCache::PingDatabaseSubscription(const TString& database, TActorContext actorContext) const { + if (SubscriberActor) { + actorContext.Send(SubscriberActor, new TEvPrivate::TEvPingDatabaseSubscription(database)); + } +} + +void TDatabasesCache::StopSubscriberActor(TActorContext actorContext) const { + if (SubscriberActor) { + actorContext.Send(SubscriberActor, new TEvents::TEvPoison()); + } +} + +} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/proxy_service/kqp_proxy_service.cpp b/ydb/core/kqp/proxy_service/kqp_proxy_service.cpp index 23be28bcb264..b21490755b6e 100644 --- a/ydb/core/kqp/proxy_service/kqp_proxy_service.cpp +++ b/ydb/core/kqp/proxy_service/kqp_proxy_service.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -45,6 +46,7 @@ #include #include +#include namespace NKikimr::NKqp { @@ -146,12 +148,14 @@ class TKqpProxyService : public TActorBootstrapped { struct TEvOnRequestTimeout: public TEventLocal { ui64 RequestId; TDuration Timeout; + TDuration InitialTimeout; NYql::NDqProto::StatusIds::StatusCode Status; int Round; TEvOnRequestTimeout(ui64 requestId, TDuration timeout, NYql::NDqProto::StatusIds::StatusCode status, int round) : RequestId(requestId) , Timeout(timeout) + , InitialTimeout(timeout) , Status(status) , Round(round) {} @@ -172,6 +176,15 @@ class TKqpProxyService : public TActorBootstrapped { }; }; + enum class EDelayedRequestType { + QueryRequest, + ScriptRequest, + ForgetScriptExecutionOperation, + GetScriptExecutionOperation, + ListScriptExecutionOperations, + CancelScriptExecutionOperation, + }; + public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::KQP_PROXY_ACTOR; @@ -232,11 +245,18 @@ class TKqpProxyService : public TActorBootstrapped { IEventHandle::FlagTrackDelivery); WhiteBoardService = NNodeWhiteboard::MakeNodeWhiteboardServiceId(SelfId().NodeId()); + ResourcePoolsCache.UpdateFeatureFlags(FeatureFlags, ActorContext()); if (auto& cfg = TableServiceConfig.GetSpillingServiceConfig().GetLocalFileConfig(); cfg.GetEnable()) { + TString spillingRoot = cfg.GetRoot(); + if (spillingRoot.empty()) { + spillingRoot = NYql::NDq::GetTmpSpillingRootForCurrentUser(); + MakeDirIfNotExist(spillingRoot); + } + SpillingService = TlsActivationContext->ExecutorThread.RegisterActor(NYql::NDq::CreateDqLocalFileSpillingService( NYql::NDq::TFileSpillingServiceConfig{ - .Root = cfg.GetRoot(), + .Root = spillingRoot, .MaxTotalSize = cfg.GetMaxTotalSize(), .MaxFileSize = cfg.GetMaxFileSize(), .MaxFilePartSize = cfg.GetMaxFilePartSize(), @@ -269,7 +289,11 @@ class TKqpProxyService : public TActorBootstrapped { MakeKqpCompileComputationPatternServiceID(SelfId().NodeId()), CompileComputationPatternService); } - KqpNodeService = TlsActivationContext->ExecutorThread.RegisterActor(CreateKqpNodeService(TableServiceConfig, Counters, AsyncIoFactory, FederatedQuerySetup)); + ResourceManager_ = GetKqpResourceManager(); + CaFactory_ = NComputeActor::MakeKqpCaFactory( + TableServiceConfig.GetResourceManager(), ResourceManager_, AsyncIoFactory, FederatedQuerySetup); + + KqpNodeService = TlsActivationContext->ExecutorThread.RegisterActor(CreateKqpNodeService(TableServiceConfig, ResourceManager_, CaFactory_, Counters, AsyncIoFactory, FederatedQuerySetup)); TlsActivationContext->ExecutorThread.ActorSystem->RegisterLocalService( MakeKqpNodeServiceID(SelfId().NodeId()), KqpNodeService); @@ -470,6 +494,9 @@ class TKqpProxyService : public TActorBootstrapped { Send(TActivationContext::InterconnectProxy(node), new TEvents::TEvUnsubscribe); }); + ResourcePoolsCache.UnsubscribeFromResourcePoolClassifiers(ActorContext()); + DatabasesCache.StopSubscriberActor(ActorContext()); + return TActor::PassAway(); } @@ -487,6 +514,7 @@ class TKqpProxyService : public TActorBootstrapped { UpdateYqlLogLevels(); FeatureFlags.Swap(event.MutableConfig()->MutableFeatureFlags()); + ResourcePoolsCache.UpdateFeatureFlags(FeatureFlags, ActorContext()); auto responseEv = MakeHolder(event); Send(ev->Sender, responseEv.Release(), IEventHandle::FlagTrackDelivery, ev->Cookie); @@ -617,6 +645,10 @@ class TKqpProxyService : public TActorBootstrapped { } void Handle(TEvKqp::TEvQueryRequest::TPtr& ev) { + if (!DatabasesCache.SetDatabaseIdOrDefer(ev, static_cast(EDelayedRequestType::QueryRequest), ActorContext())) { + return; + } + const TString& database = ev->Get()->GetDatabase(); const TString& traceId = ev->Get()->GetTraceId(); const auto queryType = ev->Get()->GetType(); @@ -687,11 +719,8 @@ class TKqpProxyService : public TActorBootstrapped { LocalSessions->AttachQueryText(sessionInfo, ev->Get()->GetQuery()); } - if (!FeatureFlags.GetEnableResourcePools()) { - ev->Get()->SetPoolId(""); - } else if (!ev->Get()->GetPoolId()) { - // TODO: do not use default pool if there is no limits - ev->Get()->SetPoolId(NResourcePool::DEFAULT_POOL_ID); + if (!TryFillPoolInfoFromCache(ev, requestId)) { + return; } TActorId targetId; @@ -720,7 +749,7 @@ class TKqpProxyService : public TActorBootstrapped { } void Handle(TEvKqp::TEvScriptRequest::TPtr& ev) { - if (CheckScriptExecutionsTablesReady(ev)) { + if (CheckScriptExecutionsTablesReady(ev, EDelayedRequestType::ScriptRequest)) { auto req = ev->Get()->Record.MutableRequest(); auto maxRunTime = GetQueryTimeout(req->GetType(), req->GetTimeoutMs(), TableServiceConfig, QueryServiceConfig); req->SetTimeoutMs(maxRunTime.MilliSeconds()); @@ -1270,9 +1299,9 @@ class TKqpProxyService : public TActorBootstrapped { const TKqpSessionInfo* info = LocalSessions->FindPtr(reqInfo->SessionId); if (msg->Round == 0 && info) { - TString message = TStringBuilder() - << "request's " << (msg->Status == NYql::NDqProto::StatusIds::TIMEOUT ? "timeout" : "cancelAfter") - << " exceeded"; + TString message = msg->Status == NYql::NDqProto::StatusIds::TIMEOUT + ? (TStringBuilder() << "Request timeout " << msg->Timeout.MilliSeconds() << "ms exceeded") + : (TStringBuilder() << "Request canceled after " << msg->Timeout.MilliSeconds() << "ms"); Send(info->WorkerId, new TEvKqp::TEvAbortExecution(msg->Status, message)); @@ -1284,7 +1313,7 @@ class TKqpProxyService : public TActorBootstrapped { } } else { TString message = TStringBuilder() - << "Query did not complete within specified timeout, session id " << reqInfo->SessionId; + << "Query did not complete within specified timeout " << msg->InitialTimeout.MilliSeconds() << "ms, session id " << reqInfo->SessionId; ReplyProcessError(NYql::NDq::DqStatusToYdbStatus(msg->Status), message, requestId); } } @@ -1344,6 +1373,10 @@ class TKqpProxyService : public TActorBootstrapped { hFunc(TEvInterconnect::TEvNodeDisconnected, Handle); hFunc(TEvKqp::TEvListSessionsRequest, Handle); hFunc(TEvKqp::TEvListProxyNodesRequest, Handle); + hFunc(NWorkload::TEvUpdatePoolInfo, Handle); + hFunc(TEvKqp::TEvUpdateDatabaseInfo, Handle); + hFunc(TEvKqp::TEvDelayedRequestError, Handle); + hFunc(NMetadata::NProvider::TEvRefreshSubscriberData, Handle); default: Y_ABORT("TKqpProxyService: unexpected event type: %" PRIx32 " event: %s", ev->GetTypeRewrite(), ev->ToString().data()); @@ -1479,7 +1512,7 @@ class TKqpProxyService : public TActorBootstrapped { auto config = CreateConfig(KqpSettings, workerSettings); - IActor* sessionActor = CreateKqpSessionActor(SelfId(), sessionId, KqpSettings, workerSettings, + IActor* sessionActor = CreateKqpSessionActor(SelfId(), ResourceManager_, CaFactory_, sessionId, KqpSettings, workerSettings, FederatedQuerySetup, AsyncIoFactory, ModuleResolverState, Counters, QueryServiceConfig, KqpTempTablesAgentActor); auto workerId = TlsActivationContext->ExecutorThread.RegisterActor(sessionActor, TMailboxType::HTSwap, AppData()->UserPoolId); @@ -1566,6 +1599,46 @@ class TKqpProxyService : public TActorBootstrapped { } } + bool TryFillPoolInfoFromCache(TEvKqp::TEvQueryRequest::TPtr& ev, ui64 requestId) { + ResourcePoolsCache.UpdateFeatureFlags(FeatureFlags, ActorContext()); + + const auto& databaseId = ev->Get()->GetDatabaseId(); + if (!ResourcePoolsCache.ResourcePoolsEnabled(databaseId)) { + ev->Get()->SetPoolId(""); + return true; + } + + const auto& userToken = ev->Get()->GetUserToken(); + if (!ev->Get()->GetPoolId()) { + ev->Get()->SetPoolId(ResourcePoolsCache.GetPoolId(databaseId, userToken, ActorContext())); + } + + const auto& poolId = ev->Get()->GetPoolId(); + const auto& poolInfo = ResourcePoolsCache.GetPoolInfo(databaseId, poolId, ActorContext()); + if (!poolInfo) { + return true; + } + + const auto& securityObject = poolInfo->SecurityObject; + if (securityObject && userToken && !userToken->GetSerializedToken().empty()) { + if (!securityObject->CheckAccess(NACLib::EAccessRights::DescribeSchema, *userToken)) { + ReplyProcessError(Ydb::StatusIds::NOT_FOUND, TStringBuilder() << "Resource pool " << poolId << " not found or you don't have access permissions", requestId); + return false; + } + if (!securityObject->CheckAccess(NACLib::EAccessRights::SelectRow, *userToken)) { + ReplyProcessError(Ydb::StatusIds::UNAUTHORIZED, TStringBuilder() << "You don't have access permissions for resource pool " << poolId, requestId); + return false; + } + } + + const auto& poolConfig = poolInfo->Config; + if (!NWorkload::IsWorkloadServiceRequired(poolConfig)) { + ev->Get()->SetPoolConfig(poolConfig); + } + + return true; + } + void UpdateYqlLogLevels() { const auto& kqpYqlName = NKikimrServices::EServiceKikimr_Name(NKikimrServices::KQP_YQL); for (auto &entry : LogConfig.GetEntry()) { @@ -1585,12 +1658,53 @@ class TKqpProxyService : public TActorBootstrapped { NYql::NDq::SetYqlLogLevels(yqlPriority); } - template - bool CheckScriptExecutionsTablesReady(TEvent& ev) { + void HanleDelayedRequestError(EDelayedRequestType requestType, THolder requestEvent, Ydb::StatusIds::StatusCode status, NYql::TIssues issues) { + switch (requestType) { + case EDelayedRequestType::QueryRequest: { + auto response = std::make_unique(); + response->Record.GetRef().SetYdbStatus(status); + NYql::IssuesToMessage(issues, response->Record.GetRef().MutableResponse()->MutableQueryIssues()); + Send(requestEvent->Sender, std::move(response), 0, requestEvent->Cookie); + break; + } + + case EDelayedRequestType::ScriptRequest: + HanleDelayedScriptRequestError(std::move(requestEvent), status, std::move(issues)); + break; + + case EDelayedRequestType::ForgetScriptExecutionOperation: + HanleDelayedScriptRequestError(std::move(requestEvent), status, std::move(issues)); + break; + + case EDelayedRequestType::GetScriptExecutionOperation: + HanleDelayedScriptRequestError(std::move(requestEvent), status, std::move(issues)); + break; + + case EDelayedRequestType::ListScriptExecutionOperations: + HanleDelayedScriptRequestError(std::move(requestEvent), status, std::move(issues)); + break; + + case EDelayedRequestType::CancelScriptExecutionOperation: + HanleDelayedScriptRequestError(std::move(requestEvent), status, std::move(issues)); + break; + } + } + + template + void HanleDelayedScriptRequestError(THolder requestEvent, Ydb::StatusIds::StatusCode status, NYql::TIssues issues) const { + Send(requestEvent->Sender, new TResponse(status, std::move(issues)), 0, requestEvent->Cookie); + } + + template + bool CheckScriptExecutionsTablesReady(TEvent& ev, EDelayedRequestType requestType) { if (!AppData()->FeatureFlags.GetEnableScriptExecutionOperations()) { NYql::TIssues issues; issues.AddIssue("ExecuteScript feature is not enabled"); - Send(ev->Sender, new TResponse(Ydb::StatusIds::UNSUPPORTED, std::move(issues))); + HanleDelayedRequestError(requestType, std::move(ev), Ydb::StatusIds::UNSUPPORTED, std::move(issues)); + return false; + } + + if (!DatabasesCache.SetDatabaseIdOrDefer(ev, static_cast(requestType), ActorContext())) { return false; } @@ -1603,14 +1717,12 @@ class TKqpProxyService : public TActorBootstrapped { if (DelayedEventsQueue.size() < 10000) { DelayedEventsQueue.push_back({ .Event = std::move(ev), - .ResponseBuilder = [](Ydb::StatusIds::StatusCode status, NYql::TIssues issues) { - return new TResponse(status, std::move(issues)); - } + .RequestType = static_cast(requestType) }); } else { NYql::TIssues issues; issues.AddIssue("Too many queued requests"); - Send(ev->Sender, new TResponse(Ydb::StatusIds::OVERLOADED, std::move(issues))); + HanleDelayedRequestError(requestType, std::move(ev), Ydb::StatusIds::OVERLOADED, std::move(issues)); } return false; case EScriptExecutionsCreationStatus::Finished: @@ -1635,32 +1747,32 @@ class TKqpProxyService : public TActorBootstrapped { if (ev->Get()->Success) { Send(std::move(delayedEvent.Event)); } else { - Send(delayedEvent.Event->Sender, delayedEvent.ResponseBuilder(Ydb::StatusIds::INTERNAL_ERROR, {rootIssue})); + HanleDelayedRequestError(static_cast(delayedEvent.RequestType), std::move(delayedEvent.Event), Ydb::StatusIds::INTERNAL_ERROR, {rootIssue}); } DelayedEventsQueue.pop_front(); } } void Handle(NKqp::TEvForgetScriptExecutionOperation::TPtr& ev) { - if (CheckScriptExecutionsTablesReady(ev)) { + if (CheckScriptExecutionsTablesReady(ev, EDelayedRequestType::ForgetScriptExecutionOperation)) { Register(CreateForgetScriptExecutionOperationActor(std::move(ev)), TMailboxType::HTSwap, AppData()->SystemPoolId); } } void Handle(NKqp::TEvGetScriptExecutionOperation::TPtr& ev) { - if (CheckScriptExecutionsTablesReady(ev)) { + if (CheckScriptExecutionsTablesReady(ev, EDelayedRequestType::GetScriptExecutionOperation)) { Register(CreateGetScriptExecutionOperationActor(std::move(ev)), TMailboxType::HTSwap, AppData()->SystemPoolId); } } void Handle(NKqp::TEvListScriptExecutionOperations::TPtr& ev) { - if (CheckScriptExecutionsTablesReady(ev)) { + if (CheckScriptExecutionsTablesReady(ev, EDelayedRequestType::ListScriptExecutionOperations)) { Register(CreateListScriptExecutionOperationsActor(std::move(ev)), TMailboxType::HTSwap, AppData()->SystemPoolId); } } void Handle(NKqp::TEvCancelScriptExecutionOperation::TPtr& ev) { - if (CheckScriptExecutionsTablesReady(ev)) { + if (CheckScriptExecutionsTablesReady(ev, EDelayedRequestType::CancelScriptExecutionOperation)) { Register(CreateCancelScriptExecutionOperationActor(std::move(ev)), TMailboxType::HTSwap, AppData()->SystemPoolId); } } @@ -1692,7 +1804,11 @@ class TKqpProxyService : public TActorBootstrapped { KQP_PROXY_LOG_D("incoming list sessions request " << ev->Get()->Record.ShortUtf8DebugString()); auto result = std::make_unique(); - auto startIt = LocalSessions->GetOrderedLowerBound(ev->Get()->Record.GetSessionIdStart()); + + const auto& tenant = ev->Get()->Record.GetTenantName(); + bool checkTenant = (AppData()->TenantName != tenant); + + auto startIt = LocalSessions->GetOrderedLowerBound(tenant, ev->Get()->Record.GetSessionIdStart()); auto endIt = LocalSessions->GetOrderedEnd(); i32 freeSpace = ev->Get()->Record.GetFreeSpace(); @@ -1703,6 +1819,10 @@ class TKqpProxyService : public TActorBootstrapped { while(startIt != endIt && freeSpace > 0) { auto* sessionInfo = startIt->second; + if (checkTenant && sessionInfo->Database != ev->Get()->Record.GetTenantName()) { + finished = true; + break; + } if (!until.empty()) { if (sessionInfo->SessionId > until) { @@ -1730,7 +1850,8 @@ class TKqpProxyService : public TActorBootstrapped { if (finished) { result->Record.SetFinished(true); } else { - result->Record.SetContinuationToken(startIt->first); + Y_ABORT_UNLESS(startIt != endIt); + result->Record.SetContinuationToken(startIt->first.second); result->Record.SetFinished(false); } @@ -1751,6 +1872,25 @@ class TKqpProxyService : public TActorBootstrapped { Send(ev->Sender, result.release(), 0, ev->Cookie); } + void Handle(NWorkload::TEvUpdatePoolInfo::TPtr& ev) { + ResourcePoolsCache.UpdatePoolInfo(ev->Get()->DatabaseId, ev->Get()->PoolId, ev->Get()->Config, ev->Get()->SecurityObject, ActorContext()); + } + + void Handle(TEvKqp::TEvUpdateDatabaseInfo::TPtr& ev) { + if (ev->Get()->Status == Ydb::StatusIds::SUCCESS) { + ResourcePoolsCache.UpdateDatabaseInfo(ev->Get()->DatabaseId, ev->Get()->Serverless); + } + DatabasesCache.UpdateDatabaseInfo(ev, ActorContext()); + } + + void Handle(TEvKqp::TEvDelayedRequestError::TPtr& ev) { + HanleDelayedRequestError(static_cast(ev->Cookie), std::move(ev->Get()->RequestEvent), ev->Get()->Status, std::move(ev->Get()->Issues)); + } + + void Handle(NMetadata::NProvider::TEvRefreshSubscriberData::TPtr& ev) { + ResourcePoolsCache.UpdateResourcePoolClassifiersInfo(ev->Get()->GetSnapshotAs(), ActorContext()); + } + private: NKikimrConfig::TLogConfig LogConfig; NKikimrConfig::TTableServiceConfig TableServiceConfig; @@ -1768,6 +1908,8 @@ class TKqpProxyService : public TActorBootstrapped { THashMap ConfigSubscriptions; THashMap TimeoutTimers; + std::shared_ptr ResourceManager_; + std::shared_ptr CaFactory_; TIntrusivePtr ShutdownState; TIntrusivePtr ModuleResolverState; @@ -1802,14 +1944,13 @@ class TKqpProxyService : public TActorBootstrapped { Pending, Finished, }; - struct TDelayedEvent { - THolder Event; - std::function ResponseBuilder; - }; EScriptExecutionsCreationStatus ScriptExecutionsCreationStatus = EScriptExecutionsCreationStatus::NotStarted; - std::deque DelayedEventsQueue; + std::deque DelayedEventsQueue; bool IsLookupByRmScheduled = false; TActorId KqpTempTablesAgentActor; + + TResourcePoolsCache ResourcePoolsCache; + TDatabasesCache DatabasesCache; }; } // namespace diff --git a/ydb/core/kqp/proxy_service/kqp_proxy_service_impl.h b/ydb/core/kqp/proxy_service/kqp_proxy_service_impl.h index 1ca66b5f019a..94f8f0526dff 100644 --- a/ydb/core/kqp/proxy_service/kqp_proxy_service_impl.h +++ b/ydb/core/kqp/proxy_service/kqp_proxy_service_impl.h @@ -1,8 +1,11 @@ #pragma once #include +#include #include +#include #include +#include #include #include @@ -147,7 +150,7 @@ struct TKqpSessionInfo { class TLocalSessionsRegistry { THashMap LocalSessions; - std::map OrderedSessions; + std::map, TKqpSessionInfo*> OrderedSessions; THashMap TargetIdIndex; THashSet ShutdownInFlightSessions; THashMap SessionsCountPerDatabase; @@ -205,7 +208,7 @@ class TLocalSessionsRegistry { auto result = LocalSessions.emplace(sessionId, TKqpSessionInfo(sessionId, workerId, database, dbCounters, std::move(pos), sessionStartedAt + idleDuration, IdleSessions.end(), pgWire, startedAt)); - OrderedSessions.emplace(sessionId, &result.first->second); + OrderedSessions.emplace(std::make_pair(database, sessionId), &result.first->second); SessionsCountPerDatabase[database]++; Y_ABORT_UNLESS(result.second, "Duplicate session id!"); TargetIdIndex.emplace(workerId, sessionId); @@ -299,11 +302,11 @@ class TLocalSessionsRegistry { return ShutdownInFlightSessions.size(); } - std::map::const_iterator GetOrderedLowerBound(const TString& continuation) const { - return OrderedSessions.lower_bound(continuation); + std::map, TKqpSessionInfo*>::const_iterator GetOrderedLowerBound(const TString& tenant, const TString& continuation) const { + return OrderedSessions.lower_bound(std::make_pair(tenant, continuation)); } - std::map::const_iterator GetOrderedEnd() const { + std::map, TKqpSessionInfo*>::const_iterator GetOrderedEnd() const { return OrderedSessions.end(); } @@ -336,7 +339,7 @@ class TLocalSessionsRegistry { } } - OrderedSessions.erase(sessionId); + OrderedSessions.erase(std::make_pair(it->second.Database, sessionId)); LocalSessions.erase(it); } @@ -415,4 +418,282 @@ class TLocalSessionsRegistry { } }; +class TResourcePoolsCache { + struct TClassifierInfo { + const TString MemberName; + const TString PoolId; + const i64 Rank; + + TClassifierInfo(const NResourcePool::TClassifierSettings& classifierSettings) + : MemberName(classifierSettings.MemberName) + , PoolId(classifierSettings.ResourcePool) + , Rank(classifierSettings.Rank) + {} + }; + + struct TDatabaseInfo { + std::unordered_map ResourcePoolsClassifiers = {}; // Classifier name to config + std::map RankToClassifierInfo = {}; // Classifier rank to config + std::unordered_map> UserToResourcePool = {}; // UserSID to (resource pool, classifier rank) + bool Serverless = false; + }; + + struct TPoolInfo { + NResourcePool::TPoolSettings Config; + std::optional SecurityObject; + bool Expired = false; + }; + +public: + bool ResourcePoolsEnabled(const TString& databaseId) const { + if (!EnableResourcePools) { + return false; + } + + if (EnableResourcePoolsOnServerless) { + return true; + } + + const auto databaseInfo = GetDatabaseInfo(databaseId); + return !databaseInfo || !databaseInfo->Serverless; + } + + TString GetPoolId(const TString& databaseId, const TIntrusiveConstPtr& userToken, TActorContext actorContext) { + if (!userToken || userToken->GetUserSID().empty()) { + return NResourcePool::DEFAULT_POOL_ID; + } + + TDatabaseInfo& databaseInfo = *GetOrCreateDatabaseInfo(databaseId); + auto [resultPoolId, resultRank] = GetPoolIdFromClassifiers(databaseId, userToken->GetUserSID(), databaseInfo, userToken, actorContext); + for (const auto& userSID : userToken->GetGroupSIDs()) { + const auto& [poolId, rank] = GetPoolIdFromClassifiers(databaseId, userSID, databaseInfo, userToken, actorContext); + if (poolId && (!resultPoolId || resultRank > rank)) { + resultPoolId = poolId; + resultRank = rank; + } + } + + return resultPoolId ? resultPoolId : NResourcePool::DEFAULT_POOL_ID; + } + + std::optional GetPoolInfo(const TString& databaseId, const TString& poolId, TActorContext actorContext) const { + auto it = PoolsCache.find(GetPoolKey(databaseId, poolId)); + if (it == PoolsCache.end()) { + actorContext.Send(MakeKqpWorkloadServiceId(actorContext.SelfID.NodeId()), new NWorkload::TEvSubscribeOnPoolChanges(databaseId, poolId)); + return std::nullopt; + } + return it->second; + } + + void UpdateFeatureFlags(const NKikimrConfig::TFeatureFlags& featureFlags, TActorContext actorContext) { + EnableResourcePools = featureFlags.GetEnableResourcePools(); + EnableResourcePoolsOnServerless = featureFlags.GetEnableResourcePoolsOnServerless(); + UpdateResourcePoolClassifiersSubscription(actorContext); + } + + void UpdateDatabaseInfo(const TString& databaseId, bool serverless) { + GetOrCreateDatabaseInfo(databaseId)->Serverless = serverless; + } + + void UpdatePoolInfo(const TString& databaseId, const TString& poolId, const std::optional& config, const std::optional& securityObject, TActorContext actorContext) { + bool clearClassifierCache = false; + + const TString& poolKey = GetPoolKey(databaseId, poolId); + if (!config) { + auto it = PoolsCache.find(poolKey); + if (it == PoolsCache.end()) { + return; + } + if (it->second.Expired) { + // Pool was dropped + clearClassifierCache = true; + PoolsCache.erase(it); + } else { + // Refresh pool subscription + it->second.Expired = true; + actorContext.Send(MakeKqpWorkloadServiceId(actorContext.SelfID.NodeId()), new NWorkload::TEvSubscribeOnPoolChanges(databaseId, poolId)); + } + } else { + auto& poolInfo = PoolsCache[poolKey]; + clearClassifierCache = poolInfo.SecurityObject != securityObject; + poolInfo.Config = *config; + poolInfo.SecurityObject = securityObject; + poolInfo.Expired = false; + } + + if (clearClassifierCache) { + GetOrCreateDatabaseInfo(databaseId)->UserToResourcePool.clear(); + } + } + + void UpdateResourcePoolClassifiersInfo(const TResourcePoolClassifierSnapshot* snapsot, TActorContext actorContext) { + auto resourcePoolClassifierConfigs = snapsot->GetResourcePoolClassifierConfigs(); + for (auto& [databaseId, databaseInfo] : DatabasesCache) { + auto it = resourcePoolClassifierConfigs.find(databaseId); + if (it != resourcePoolClassifierConfigs.end()) { + UpdateDatabaseResourcePoolClassifiers(databaseId, databaseInfo, std::move(it->second), actorContext); + resourcePoolClassifierConfigs.erase(it); + } else if (!databaseInfo.ResourcePoolsClassifiers.empty()) { + databaseInfo.ResourcePoolsClassifiers.clear(); + databaseInfo.RankToClassifierInfo.clear(); + databaseInfo.UserToResourcePool.clear(); + } + } + for (auto& [databaseId, configsMap] : resourcePoolClassifierConfigs) { + UpdateDatabaseResourcePoolClassifiers(databaseId, *GetOrCreateDatabaseInfo(databaseId), std::move(configsMap), actorContext); + } + } + + void UnsubscribeFromResourcePoolClassifiers(TActorContext actorContext) { + if (SubscribedOnResourcePoolClassifiers) { + SubscribedOnResourcePoolClassifiers = false; + actorContext.Send(NMetadata::NProvider::MakeServiceId(actorContext.SelfID.NodeId()), new NMetadata::NProvider::TEvUnsubscribeExternal(std::make_shared())); + } + } + +private: + void UpdateResourcePoolClassifiersSubscription(TActorContext actorContext) { + if (EnableResourcePools) { + SubscribeOnResourcePoolClassifiers(actorContext); + } else { + UnsubscribeFromResourcePoolClassifiers(actorContext); + } + } + + void SubscribeOnResourcePoolClassifiers(TActorContext actorContext) { + if (!SubscribedOnResourcePoolClassifiers && NMetadata::NProvider::TServiceOperator::IsEnabled()) { + SubscribedOnResourcePoolClassifiers = true; + actorContext.Send(NMetadata::NProvider::MakeServiceId(actorContext.SelfID.NodeId()), new NMetadata::NProvider::TEvSubscribeExternal(std::make_shared())); + } + } + + void UpdateDatabaseResourcePoolClassifiers(const TString& databaseId, TDatabaseInfo& databaseInfo, std::unordered_map&& configsMap, TActorContext actorContext) { + if (databaseInfo.ResourcePoolsClassifiers == configsMap) { + return; + } + + databaseInfo.ResourcePoolsClassifiers.swap(configsMap); + databaseInfo.UserToResourcePool.clear(); + databaseInfo.RankToClassifierInfo.clear(); + for (const auto& [_, classifier] : databaseInfo.ResourcePoolsClassifiers) { + const auto& classifierSettings = classifier.GetClassifierSettings(); + databaseInfo.RankToClassifierInfo.insert({classifier.GetRank(), TClassifierInfo(classifierSettings)}); + if (!PoolsCache.contains(classifierSettings.ResourcePool)) { + actorContext.Send(MakeKqpWorkloadServiceId(actorContext.SelfID.NodeId()), new NWorkload::TEvSubscribeOnPoolChanges(databaseId, classifierSettings.ResourcePool)); + } + } + } + + std::pair GetPoolIdFromClassifiers(const TString& databaseId, const TString& userSID, TDatabaseInfo& databaseInfo, const TIntrusiveConstPtr& userToken, TActorContext actorContext) const { + auto& usersMap = databaseInfo.UserToResourcePool; + if (const auto it = usersMap.find(userSID); it != usersMap.end()) { + return it->second; + } + + TString poolId = ""; + i64 rank = -1; + for (const auto& [_, classifier] : databaseInfo.RankToClassifierInfo) { + if (classifier.MemberName != userSID) { + continue; + } + + auto it = PoolsCache.find(GetPoolKey(databaseId, classifier.PoolId)); + if (it == PoolsCache.end()) { + actorContext.Send(MakeKqpWorkloadServiceId(actorContext.SelfID.NodeId()), new NWorkload::TEvSubscribeOnPoolChanges(databaseId, classifier.PoolId)); + continue; + } + + if (userToken && !userToken->GetSerializedToken().empty() && !it->second.SecurityObject->CheckAccess(NACLib::DescribeSchema | NACLib::SelectRow, *userToken)) { + continue; + } + + poolId = classifier.PoolId; + rank = classifier.Rank; + break; + } + + usersMap[userSID] = {poolId, rank}; + return {poolId, rank}; + } + + TDatabaseInfo* GetOrCreateDatabaseInfo(const TString& databaseId) { + if (const auto it = DatabasesCache.find(databaseId); it != DatabasesCache.end()) { + return &it->second; + } + return &DatabasesCache.insert({databaseId, TDatabaseInfo{}}).first->second; + } + + const TDatabaseInfo* GetDatabaseInfo(const TString& databaseId) const { + const auto it = DatabasesCache.find(databaseId); + return it != DatabasesCache.end() ? &it->second : nullptr; + } + + static TString GetPoolKey(const TString& databaseId, const TString& poolId) { + return TStringBuilder() << databaseId << "/" << poolId; + } + +private: + std::unordered_map PoolsCache; + std::unordered_map DatabasesCache; + + bool EnableResourcePools = false; + bool EnableResourcePoolsOnServerless = false; + bool SubscribedOnResourcePoolClassifiers = false; +}; + +class TDatabasesCache { +public: + struct TDelayedEvent { + THolder Event; + i32 RequestType; + }; + +private: + struct TDatabaseInfo { + TString DatabaseId; // string "::" + std::vector DelayedEvents; + }; + +public: + TDatabasesCache(TDuration idleTimeout = TDuration::Seconds(60)); + + template + bool SetDatabaseIdOrDefer(TEvent& event, i32 requestType, TActorContext actorContext) { + const auto& database = CanonizePath(event->Get()->GetDatabase()); + const auto& tenantName = CanonizePath(AppData()->TenantName); + if (database.empty() || database == tenantName) { + event->Get()->SetDatabaseId(tenantName); + return true; + } + + auto& databaseInfo = DatabasesCache[database]; + if (databaseInfo.DatabaseId) { + PingDatabaseSubscription(database, actorContext); + event->Get()->SetDatabaseId(databaseInfo.DatabaseId); + return true; + } + + SubscribeOnDatabase(database, actorContext); + databaseInfo.DelayedEvents.push_back(TDelayedEvent{ + .Event = std::move(event), + .RequestType = requestType + }); + + return false; + } + + void UpdateDatabaseInfo(TEvKqp::TEvUpdateDatabaseInfo::TPtr& event, TActorContext actorContext); + void StopSubscriberActor(TActorContext actorContext) const; + +private: + void SubscribeOnDatabase(const TString& database, TActorContext actorContext); + void PingDatabaseSubscription(const TString& database, TActorContext actorContext) const; + +private: + const TDuration IdleTimeout; + std::unordered_map DatabasesCache; + TActorId SubscriberActor; + TString TenantName; +}; + } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/proxy_service/kqp_proxy_ut.cpp b/ydb/core/kqp/proxy_service/kqp_proxy_ut.cpp index 883ec7d9198e..1d008a72be80 100644 --- a/ydb/core/kqp/proxy_service/kqp_proxy_ut.cpp +++ b/ydb/core/kqp/proxy_service/kqp_proxy_ut.cpp @@ -2,7 +2,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -65,6 +67,89 @@ TString CreateSession(TTestActorRuntime* runtime, const TActorId& kqpProxy, cons return sessionId; } +class TDatabaseCacheTestActor : public TActorBootstrapped { +public: + TDatabaseCacheTestActor(const TString& database, const TString& expectedDatabaseId, TDuration idleTimeout, NThreading::TPromise promise) + : IdleTimeout(idleTimeout) + , Database(database) + , ExpectedDatabaseId(expectedDatabaseId) + , Cache(idleTimeout) + , Promise(promise) + {} + + void Bootstrap() { + Become(&TDatabaseCacheTestActor::StateFunc); + + auto event = MakeHolder(); + event->Record.MutableRequest()->SetDatabase(Database); + Send(SelfId(), event.Release()); + + Schedule(3 * IdleTimeout, new TEvents::TEvWakeup()); + } + + void Handle(TEvKqp::TEvUpdateDatabaseInfo::TPtr& ev) { + if (!CacheUpdated) { + UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, Ydb::StatusIds::SUCCESS, TStringBuilder() << GetErrorString() << ev->Get()->Issues.ToString()); + Cache.UpdateDatabaseInfo(ev, ActorContext()); + CacheUpdated = true; + } else { + UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, Ydb::StatusIds::ABORTED, TStringBuilder() << GetErrorString() << ev->Get()->Issues.ToString()); + UNIT_ASSERT_STRING_CONTAINS_C(ev->Get()->Issues.ToString(), "Database subscription was dropped by idle timeout", GetErrorString()); + Finish(); + } + } + + void Handle(TEvKqp::TEvDelayedRequestError::TPtr& ev) { + UNIT_ASSERT_C(false, TStringBuilder() << "Unexpected fail, status: " << ev->Get()->Status << ", " << GetErrorString() << ev->Get()->Issues.ToString()); + } + + void Handle(TEvKqp::TEvQueryRequest::TPtr& ev) { + auto success = Cache.SetDatabaseIdOrDefer(ev, 0, ActorContext()); + + bool dedicated = Database == ExpectedDatabaseId; + if (CacheUpdated || dedicated) { + UNIT_ASSERT_C(success, TStringBuilder() << "Expected database id from cache, " << GetErrorString()); + UNIT_ASSERT_STRING_CONTAINS_C(ev->Get()->GetDatabaseId(), ExpectedDatabaseId, GetErrorString()); + if (dedicated) { + Finish(); + } + } else { + UNIT_ASSERT_C(!success, TStringBuilder() << "Unexpected database id from cache, " << GetErrorString()); + } + } + + void HandleWakeup() { + UNIT_ASSERT_C(false, TStringBuilder() << "Test cache timeout, " << GetErrorString()); + Finish(); + } + + STRICT_STFUNC(StateFunc, + hFunc(TEvKqp::TEvUpdateDatabaseInfo, Handle); + hFunc(TEvKqp::TEvDelayedRequestError, Handle); + hFunc(TEvKqp::TEvQueryRequest, Handle); + sFunc(TEvents::TEvWakeup, HandleWakeup); + ) + +private: + TString GetErrorString() const { + return TStringBuilder() << "cache updated: " << CacheUpdated << ", database: " << Database << "\n"; + } + + void Finish() { + Promise.SetValue(); + PassAway(); + } + +private: + const TDuration IdleTimeout; + const TString Database; + const TString ExpectedDatabaseId; + TDatabasesCache Cache; + NThreading::TPromise Promise; + + bool CacheUpdated = false; +}; + } Y_UNIT_TEST_SUITE(KqpProxy) { @@ -542,5 +627,30 @@ Y_UNIT_TEST_SUITE(KqpProxy) { UNIT_ASSERT(allDoneOk); } + + Y_UNIT_TEST(DatabasesCacheForServerless) { + auto ydb = NWorkload::TYdbSetupSettings() + .CreateSampleTenants(true) + .Create(); + + auto& runtime = *ydb->GetRuntime(); + TDuration idleTimeout = TDuration::Seconds(5); + + auto checkCache = [&](const TString& database, const TString& expectedDatabaseId, ui32 nodeIndex) { + auto promise = NThreading::NewPromise(); + runtime.Register(new TDatabaseCacheTestActor(database, expectedDatabaseId, idleTimeout, promise), nodeIndex); + promise.GetFuture().GetValueSync(); + }; + + const auto& dedicatedTennant = ydb->GetSettings().GetDedicatedTenantName(); + checkCache(dedicatedTennant, dedicatedTennant, 2); + + const auto& sharedTennant = ydb->GetSettings().GetSharedTenantName(); + checkCache(sharedTennant, sharedTennant, 1); + + const auto& serverlessTennant = ydb->GetSettings().GetServerlessTenantName(); + checkCache(serverlessTennant, TStringBuilder() << ":4:" << serverlessTennant, 1); + } + } // namspace NKqp } // namespace NKikimr diff --git a/ydb/core/kqp/proxy_service/kqp_session_info.cpp b/ydb/core/kqp/proxy_service/kqp_session_info.cpp index 995e02676c54..383fd6690244 100644 --- a/ydb/core/kqp/proxy_service/kqp_session_info.cpp +++ b/ydb/core/kqp/proxy_service/kqp_session_info.cpp @@ -6,6 +6,8 @@ namespace NKikimr::NKqp { using VSessions = NKikimr::NSysView::Schema::QuerySessions; +constexpr size_t QUERY_TEXT_LIMIT = 10_KB; + void TKqpSessionInfo::SerializeTo(::NKikimrKqp::TSessionInfo* proto, const TFieldsMap& fieldsMap) const { if (fieldsMap.NeedField(VSessions::SessionId::ColumnId)) { // 1 proto->SetSessionId(SessionId); @@ -26,7 +28,12 @@ void TKqpSessionInfo::SerializeTo(::NKikimrKqp::TSessionInfo* proto, const TFiel // last executed query or currently running query. if (fieldsMap.NeedField(VSessions::Query::ColumnId)) { // 4 - proto->SetQuery(QueryText); + if (QueryText.size() > QUERY_TEXT_LIMIT) { + TString truncatedText = QueryText.substr(0, QUERY_TEXT_LIMIT); + proto->SetQuery(QueryText); + } else { + proto->SetQuery(QueryText); + } } if (fieldsMap.NeedField(VSessions::QueryCount::ColumnId)) { // 5 diff --git a/ydb/core/kqp/proxy_service/ut/ya.make b/ydb/core/kqp/proxy_service/ut/ya.make index 730f59a3fcae..fc6d9e7c89cb 100644 --- a/ydb/core/kqp/proxy_service/ut/ya.make +++ b/ydb/core/kqp/proxy_service/ut/ya.make @@ -13,6 +13,7 @@ PEERDIR( ydb/core/kqp/run_script_actor ydb/core/kqp/proxy_service ydb/core/kqp/ut/common + ydb/core/kqp/workload_service/ut/common ydb/library/yql/sql/pg_dummy ydb/public/sdk/cpp/client/ydb_query ydb/public/sdk/cpp/client/ydb_driver diff --git a/ydb/core/kqp/proxy_service/ya.make b/ydb/core/kqp/proxy_service/ya.make index a69e5c7ca7b2..8a143789e701 100644 --- a/ydb/core/kqp/proxy_service/ya.make +++ b/ydb/core/kqp/proxy_service/ya.make @@ -2,6 +2,7 @@ LIBRARY() SRCS( kqp_proxy_service.cpp + kqp_proxy_databases_cache.cpp kqp_proxy_peer_stats_calculator.cpp kqp_script_executions.cpp kqp_session_info.cpp @@ -17,6 +18,7 @@ PEERDIR( ydb/core/kqp/common ydb/core/kqp/common/events ydb/core/kqp/counters + ydb/core/kqp/gateway/behaviour/resource_pool_classifier ydb/core/kqp/proxy_service/proto ydb/core/kqp/run_script_actor ydb/core/kqp/workload_service diff --git a/ydb/core/kqp/query_compiler/kqp_olap_compiler.cpp b/ydb/core/kqp/query_compiler/kqp_olap_compiler.cpp index 5f6283824745..5b2cfc5bb7a2 100644 --- a/ydb/core/kqp/query_compiler/kqp_olap_compiler.cpp +++ b/ydb/core/kqp/query_compiler/kqp_olap_compiler.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include @@ -325,6 +325,8 @@ ui64 ConvertValueToColumn(const TCoDataCtor& value, TKqpOlapCompileContext& ctx) ssaValue->MutableConstant()->SetUint64(FromString(nodeValue)); } else if (value.Maybe()) { ssaValue->MutableConstant()->SetTimestamp(FromString(nodeValue)); + } else if (value.Maybe()) { + ssaValue->MutableConstant()->SetTimestamp(FromString(nodeValue)); } else { YQL_ENSURE(false, "Unsupported content: " << value.Ref().Content()); } @@ -856,6 +858,8 @@ TTypedColumn GetOrCreateColumnIdAndType(const TExprBase& node, TKqpOlapCompileCo return BuildLogicalNot(maybeNot.Cast().Value(), ctx); } else if (const auto& maybeJsonValue = node.Maybe()) { return ConvertJsonValueToColumn(maybeJsonValue.Cast(), ctx); + } else if (const auto& maybeJsonValue = node.Maybe()) { + return CompileJsonExists(maybeJsonValue.Cast(), ctx); } else if (const auto& maybeApply = node.Maybe()) { return CompileYqlKernelScalarApply(maybeApply.Cast(), ctx); } diff --git a/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp b/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp index 78be12f810e6..be18fac6256e 100644 --- a/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp +++ b/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp @@ -21,8 +21,10 @@ #include #include #include +#include #include + namespace NKikimr { namespace NKqp { @@ -91,6 +93,8 @@ NKqpProto::EStreamLookupStrategy GetStreamLookupStrategy(const std::string_view lookupStrategy = NKqpProto::EStreamLookupStrategy::LOOKUP; } else if (strategy == "LookupJoinRows"sv) { lookupStrategy = NKqpProto::EStreamLookupStrategy::JOIN; + } else if (strategy == "LookupSemiJoinRows"sv) { + lookupStrategy = NKqpProto::EStreamLookupStrategy::SEMI_JOIN; } YQL_ENSURE(lookupStrategy != NKqpProto::EStreamLookupStrategy::UNSPECIFIED, @@ -513,6 +517,33 @@ class TKqpQueryCompiler : public IKqpQueryCompiler { CompileTransaction(tx, *queryProto.AddTransactions(), ctx); } + auto overridePlanner = Config->OverridePlanner.Get(); + if (overridePlanner) { + NJson::TJsonReaderConfig jsonConfig; + NJson::TJsonValue jsonNode; + if (NJson::ReadJsonTree(*overridePlanner, &jsonConfig, &jsonNode)) { + for (auto& stageOverride : jsonNode.GetArray()) { + ui32 txId = 0; + if (auto* txNode = stageOverride.GetValueByPath("tx")) { + txId = txNode->GetIntegerSafe(); + } + if (txId < static_cast(queryProto.GetTransactions().size())) { + auto& tx = *queryProto.MutableTransactions(txId); + ui32 stageId = 0; + if (auto* stageNode = stageOverride.GetValueByPath("stage")) { + stageId = stageNode->GetIntegerSafe(); + } + if (stageId < static_cast(tx.GetStages().size())) { + auto& stage = *tx.MutableStages(stageId); + if (auto* tasksNode = stageOverride.GetValueByPath("tasks")) { + stage.SetTaskCount(tasksNode->GetIntegerSafe()); + } + } + } + } + } + } + for (ui32 i = 0; i < query.Results().Size(); ++i) { const auto& result = query.Results().Item(i); @@ -641,8 +672,11 @@ class TKqpQueryCompiler : public IKqpQueryCompiler { } } + double stageCost = 0.0; VisitExpr(stage.Program().Ptr(), [&](const TExprNode::TPtr& exprNode) { + TExprBase node(exprNode); + if (auto maybeReadTable = node.Maybe()) { auto readTable = maybeReadTable.Cast(); auto tableMeta = TablesData->ExistingTable(Cluster, readTable.Table().Path()).Metadata; @@ -723,12 +757,15 @@ class TKqpQueryCompiler : public IKqpQueryCompiler { FillOlapProgram(readTableRanges, miniKqlResultType, *tableMeta, *tableOp.MutableReadOlapRange(), ctx); FillResultType(miniKqlResultType, *tableOp.MutableReadOlapRange()); tableOp.MutableReadOlapRange()->SetReadType(NKqpProto::TKqpPhyOpReadOlapRanges::BLOCKS); + } else if (auto maybeDqSourceWrapBase = node.Maybe()) { + stageCost += GetDqSourceWrapBaseCost(maybeDqSourceWrapBase.Cast(), TypesCtx); } else { YQL_ENSURE(!node.Maybe()); } return true; }); + stageProto.SetStageCost(stageCost); const auto& secureParams = FindSecureParams(stage.Program().Ptr(), TypesCtx, SecretNames); stageProto.MutableSecureParams()->insert(secureParams.begin(), secureParams.end()); @@ -777,8 +814,9 @@ class TKqpQueryCompiler : public IKqpQueryCompiler { stageProto.SetIsEffectsStage(hasEffects || hasTxTableSink); auto paramsType = CollectParameters(stage, ctx); + NDq::TSpillingSettings spillingSettings{Config->GetEnabledSpillingNodes()}; auto programBytecode = NDq::BuildProgram(stage.Program(), *paramsType, *KqlCompiler, TypeEnv, FuncRegistry, - ctx, {}); + ctx, {}, spillingSettings); auto& programProto = *stageProto.MutableProgram(); programProto.SetRuntimeVersion(NYql::NDqProto::ERuntimeVersion::RUNTIME_VERSION_YQL_1_0); @@ -1274,7 +1312,8 @@ class TKqpQueryCompiler : public IKqpQueryCompiler { break; } - case NKqpProto::EStreamLookupStrategy::JOIN: { + case NKqpProto::EStreamLookupStrategy::JOIN: + case NKqpProto::EStreamLookupStrategy::SEMI_JOIN: { YQL_ENSURE(inputItemType->GetKind() == ETypeAnnotationKind::Tuple); const auto inputTupleType = inputItemType->Cast(); YQL_ENSURE(inputTupleType->GetSize() == 2); diff --git a/ydb/core/kqp/query_compiler/ya.make b/ydb/core/kqp/query_compiler/ya.make index c2ab1f96c875..f3d5233505a2 100644 --- a/ydb/core/kqp/query_compiler/ya.make +++ b/ydb/core/kqp/query_compiler/ya.make @@ -19,6 +19,7 @@ PEERDIR( ydb/library/yql/minikql ydb/library/yql/providers/common/mkql ydb/library/yql/providers/dq/common + ydb/library/yql/providers/s3/expr_nodes ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/kqp/query_data/kqp_predictor.cpp b/ydb/core/kqp/query_data/kqp_predictor.cpp index 9c0ce87d73e4..3cf0b3b97187 100644 --- a/ydb/core/kqp/query_data/kqp_predictor.cpp +++ b/ydb/core/kqp/query_data/kqp_predictor.cpp @@ -131,7 +131,7 @@ ui32 TStagePredictor::GetUsableThreads() { userPoolSize = TlsActivationContext->ActorSystem()->GetPoolThreadsCount(AppData()->UserPoolId); } if (!userPoolSize) { - ALS_ERROR(NKikimrServices::KQP_EXECUTER) << "user pool is undefined for executer tasks construction"; + ALS_INFO(NKikimrServices::KQP_EXECUTER) << "user pool is undefined for executer tasks construction"; userPoolSize = NSystemInfo::NumberOfCpus(); } return Max(1, *userPoolSize); diff --git a/ydb/core/kqp/query_data/kqp_prepared_query.cpp b/ydb/core/kqp/query_data/kqp_prepared_query.cpp index 9817c4b3ce37..a46ec53d9068 100644 --- a/ydb/core/kqp/query_data/kqp_prepared_query.cpp +++ b/ydb/core/kqp/query_data/kqp_prepared_query.cpp @@ -154,7 +154,7 @@ const NKikimr::NKqp::TStagePredictor& TKqpPhyTxHolder::GetCalculationPredictor(c } TPreparedQueryHolder::TPreparedQueryHolder(NKikimrKqp::TPreparedQuery* proto, - const NKikimr::NMiniKQL::IFunctionRegistry* functionRegistry) + const NKikimr::NMiniKQL::IFunctionRegistry* functionRegistry, bool noFillTables) : Proto(proto) , Alloc(nullptr) , TableConstInfoById(MakeIntrusive()) @@ -164,6 +164,11 @@ TPreparedQueryHolder::TPreparedQueryHolder(NKikimrKqp::TPreparedQuery* proto, Alloc = std::make_shared(functionRegistry); } + // In case of some compilation failures filling tables may produce new problems which may replace original error messages. + if (noFillTables) { + return; + } + THashSet tablesSet; const auto& phyQuery = Proto->GetPhysicalQuery(); Transactions.reserve(phyQuery.TransactionsSize()); diff --git a/ydb/core/kqp/query_data/kqp_prepared_query.h b/ydb/core/kqp/query_data/kqp_prepared_query.h index a92a79063137..b7cd6ab1b167 100644 --- a/ydb/core/kqp/query_data/kqp_prepared_query.h +++ b/ydb/core/kqp/query_data/kqp_prepared_query.h @@ -143,7 +143,10 @@ class TPreparedQueryHolder { public: - TPreparedQueryHolder(NKikimrKqp::TPreparedQuery* proto, const NKikimr::NMiniKQL::IFunctionRegistry* functionRegistry); + TPreparedQueryHolder( + NKikimrKqp::TPreparedQuery* proto, + const NKikimr::NMiniKQL::IFunctionRegistry* functionRegistry, + bool noFillTables = false); ~TPreparedQueryHolder(); using TConstPtr = std::shared_ptr; diff --git a/ydb/core/kqp/rm_service/kqp_resource_estimation.cpp b/ydb/core/kqp/rm_service/kqp_resource_estimation.cpp index 5a228c190751..9c78862fcf5e 100644 --- a/ydb/core/kqp/rm_service/kqp_resource_estimation.cpp +++ b/ydb/core/kqp/rm_service/kqp_resource_estimation.cpp @@ -5,13 +5,7 @@ namespace NKikimr::NKqp { using namespace NYql::NDqProto; using namespace NKikimrConfig; -TTaskResourceEstimation EstimateTaskResources(const TDqTask& task, - const TTableServiceConfig::TResourceManager& config, const ui32 tasksCount) -{ - TTaskResourceEstimation ret = BuildInitialTaskResources(task); - EstimateTaskResources(config, ret, tasksCount); - return ret; -} + TTaskResourceEstimation BuildInitialTaskResources(const TDqTask& task) { TTaskResourceEstimation ret; @@ -23,24 +17,4 @@ TTaskResourceEstimation BuildInitialTaskResources(const TDqTask& task) { return ret; } -void EstimateTaskResources(const TTableServiceConfig::TResourceManager& config, - TTaskResourceEstimation& ret, const ui32 tasksCount) -{ - ui64 totalChannels = std::max(tasksCount, (ui32)1) * std::max(ret.ChannelBuffersCount, (ui32)1); - ui64 optimalChannelBufferSizeEstimation = totalChannels * config.GetChannelBufferSize(); - - optimalChannelBufferSizeEstimation = std::min(optimalChannelBufferSizeEstimation, config.GetMaxTotalChannelBuffersSize()); - - ret.ChannelBufferMemoryLimit = std::max(config.GetMinChannelBufferSize(), optimalChannelBufferSizeEstimation / totalChannels); - - if (ret.HeavyProgram) { - ret.MkqlProgramMemoryLimit = config.GetMkqlHeavyProgramMemoryLimit() / tasksCount; - } else { - ret.MkqlProgramMemoryLimit = config.GetMkqlLightProgramMemoryLimit() / tasksCount; - } - - ret.TotalMemoryLimit = ret.ChannelBuffersCount * ret.ChannelBufferMemoryLimit - + ret.MkqlProgramMemoryLimit; -} - } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/rm_service/kqp_resource_estimation.h b/ydb/core/kqp/rm_service/kqp_resource_estimation.h index e88c7065c2ed..2a6af1a5339c 100644 --- a/ydb/core/kqp/rm_service/kqp_resource_estimation.h +++ b/ydb/core/kqp/rm_service/kqp_resource_estimation.h @@ -30,9 +30,5 @@ struct TTaskResourceEstimation { TTaskResourceEstimation BuildInitialTaskResources(const NYql::NDqProto::TDqTask& task); -TTaskResourceEstimation EstimateTaskResources(const NYql::NDqProto::TDqTask& task, - const NKikimrConfig::TTableServiceConfig::TResourceManager& config, const ui32 tasksCount); - -void EstimateTaskResources(const NKikimrConfig::TTableServiceConfig::TResourceManager& config, TTaskResourceEstimation& result, const ui32 tasksCount); } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/rm_service/kqp_resource_estimation_ut.cpp b/ydb/core/kqp/rm_service/kqp_resource_estimation_ut.cpp deleted file mode 100644 index b741dc2220cf..000000000000 --- a/ydb/core/kqp/rm_service/kqp_resource_estimation_ut.cpp +++ /dev/null @@ -1,53 +0,0 @@ -#include -#include - -#include - -namespace NKikimr::NKqp { - -Y_UNIT_TEST_SUITE(KqpResourceEstimation) { - -Y_UNIT_TEST(TestChannelSize) { - NKikimrConfig::TTableServiceConfig::TResourceManager config; - config.SetChannelBufferSize(8_MB); - config.SetMinChannelBufferSize(2_MB); - config.SetMaxTotalChannelBuffersSize(2_GB); - config.SetMkqlLightProgramMemoryLimit(100); - - NYql::NDqProto::TDqTask task; - - // 100 input channels - auto* input = task.MutableInputs()->Add(); - for (int i = 0; i < 100; ++i) { - input->MutableChannels()->Add(); - } - - // 100 input channels - input = task.MutableInputs()->Add(); - for (int i = 0; i < 100; ++i) { - input->MutableChannels()->Add(); - } - - auto* output = task.MutableOutputs()->Add(); - output->MutableChannels()->Add(); - - auto est = EstimateTaskResources(task, config, 1); - UNIT_ASSERT_EQUAL(2, est.ChannelBuffersCount); - UNIT_ASSERT_EQUAL(est.ChannelBufferMemoryLimit, config.GetChannelBufferSize()); - - // add more channels, to be more then 256 - input = task.MutableInputs()->Add(); - for (int i = 0; i < 100; ++i) { - input->MutableChannels()->Add(); - } - - est = EstimateTaskResources(task, config, 1); - UNIT_ASSERT_EQUAL(2, est.ChannelBuffersCount); - - UNIT_ASSERT(est.ChannelBufferMemoryLimit == config.GetChannelBufferSize()); - UNIT_ASSERT(est.ChannelBufferMemoryLimit >= config.GetMinChannelBufferSize()); -} - -} // suite KqpResourceEstimation - -} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/rm_service/kqp_rm_service.cpp b/ydb/core/kqp/rm_service/kqp_rm_service.cpp index b02e74d3b24b..6632865dc0c5 100644 --- a/ydb/core/kqp/rm_service/kqp_rm_service.cpp +++ b/ydb/core/kqp/rm_service/kqp_rm_service.cpp @@ -20,6 +20,8 @@ #include +#include + namespace NKikimr { namespace NKqp { namespace NRm { @@ -43,42 +45,83 @@ using namespace NResourceBroker; namespace { -template -class TLimitedResource { +static constexpr double MYEPS = 1e-9; + +ui64 OverPercentage(ui64 limit, double percent) { + return static_cast(limit) / 100 * (100 - percent) + MYEPS; +} + +ui64 Percentage(ui64 limit, double percent) { + return static_cast(limit) / 100 * percent + MYEPS; +} + +class TMemoryResource : public TAtomicRefCount { public: - explicit TLimitedResource(T limit) - : Limit(limit) - , Used(0) {} + explicit TMemoryResource(ui64 baseLimit, double memoryPoolPercent, double overPercent) + : BaseLimit(baseLimit) + , Used(0) + , MemoryPoolPercent(memoryPoolPercent) + , OverPercent(overPercent) + , SpillingCookie(MakeIntrusive()) + { + SetActualLimits(); + } - T Available() const { + ui64 Available() const { return Limit > Used ? Limit - Used : 0; } - bool Has(T amount) const { + bool Has(ui64 amount) const { return Available() >= amount; } - bool Acquire(T value) { + bool AcquireIfAvailable(ui64 value) { if (Available() >= value) { Used += value; + UpdateCookie(); return true; } return false; } - void Release(T value) { + TIntrusivePtr GetSpillingCookie() const { + return SpillingCookie; + } + + void UpdateCookie() { + SpillingCookie->SpillingPercentReached.store(Available() < OverLimit); + } + + ui64 GetUsed() const { + return Used; + } + + void Release(ui64 value) { if (Used > value) { Used -= value; } else { Used = 0; } + + UpdateCookie(); } - void SetNewLimit(T limit) { - Limit = limit; + void SetNewLimit(ui64 baseLimit, double memoryPoolPercent, double overPercent) { + if (abs(memoryPoolPercent - MemoryPoolPercent) < MYEPS && baseLimit == BaseLimit) + return; + + BaseLimit = baseLimit; + MemoryPoolPercent = memoryPoolPercent; + OverPercent = overPercent; + SetActualLimits(); } - T GetLimit() const { + void SetActualLimits() { + Limit = Percentage(BaseLimit, MemoryPoolPercent); + OverLimit = OverPercentage(Limit, OverPercent); + } + + ui64 GetLimit() const { return Limit; } @@ -87,64 +130,16 @@ class TLimitedResource { } private: - T Limit; - T Used; -}; - -struct TTaskState { - bool AllocatedExecutionUnit = false; - ui64 ScanQueryMemory = 0; - ui64 ExternalDataQueryMemory = 0; - ui32 ExecutionUnits = 0; - ui64 ResourceBrokerTaskId = 0; - TInstant CreatedAt; -}; - -struct TTxState { - std::unordered_map Tasks; - ui64 TxScanQueryMemory = 0; - ui64 TxExternalDataQueryMemory = 0; - ui32 TxExecutionUnits = 0; - TInstant CreatedAt; - - TTaskState& Allocated(ui64 taskId, TInstant now, const TKqpResourcesRequest& resources, bool memoryAsExternal = false) { - ui64 externalMemory = resources.ExternalMemory; - ui64 resourceBrokerMemory = 0; - if (memoryAsExternal) { - externalMemory += resources.Memory; - } else { - resourceBrokerMemory = resources.Memory; - } - - TxExternalDataQueryMemory += externalMemory; - TxScanQueryMemory += resourceBrokerMemory; - if (!CreatedAt) { - CreatedAt = now; - } - - if (resources.ExecutionUnits) { - Y_ABORT_UNLESS(!Tasks.contains(taskId)); - } - - auto& taskState = Tasks[taskId]; - taskState.ExecutionUnits += resources.ExecutionUnits; - taskState.ScanQueryMemory += resourceBrokerMemory; - taskState.ExternalDataQueryMemory += externalMemory; - if (!taskState.CreatedAt) { - taskState.CreatedAt = now; - } - - return taskState; - } -}; - -struct TTxStatesBucket { - std::unordered_map Txs; // TxId -> TxState - TMutex Lock; + ui64 BaseLimit; + ui64 OverLimit; + ui64 Limit; + ui64 Used; + double MemoryPoolPercent; + double OverPercent; + + TIntrusivePtr SpillingCookie; }; -constexpr ui64 BucketsCount = 64; - struct TEvPrivate { enum EEv { EvPublishResources = EventSpaceBegin(TEvents::ES_PRIVATE), @@ -167,31 +162,45 @@ class TKqpResourceManager : public IKqpResourceManager { public: TKqpResourceManager(const NKikimrConfig::TTableServiceConfig::TResourceManager& config, TIntrusivePtr counters) - : Config(config) - , Counters(counters) - , ExecutionUnitsResource(Config.GetComputeActorsCount()) - , ExecutionUnitsLimit(Config.GetComputeActorsCount()) - , ScanQueryMemoryResource(Config.GetQueryMemoryLimit()) - , PublishResourcesByExchanger(Config.GetEnablePublishResourcesByExchanger()) { - + : Counters(counters) + , ExecutionUnitsResource(config.GetComputeActorsCount()) + , ExecutionUnitsLimit(config.GetComputeActorsCount()) + , SpillingPercent(config.GetSpillingPercent()) + , TotalMemoryResource(MakeIntrusive(config.GetQueryMemoryLimit(), (double)100, config.GetSpillingPercent())) + , PublishResourcesByExchanger(config.GetEnablePublishResourcesByExchanger()) + { + SetConfigValues(config); } - void Bootstrap(TActorSystem* actorSystem, TActorId selfId) { + void Bootstrap(NKikimrConfig::TTableServiceConfig::TResourceManager& config, TActorSystem* actorSystem, TActorId selfId) { if (!Counters) { Counters = MakeIntrusive(AppData()->Counters); } ActorSystem = actorSystem; SelfId = selfId; - UpdatePatternCache(Config.GetKqpPatternCacheCapacityBytes(), - Config.GetKqpPatternCacheCompiledCapacityBytes(), - Config.GetKqpPatternCachePatternAccessTimesBeforeTryToCompile()); + UpdatePatternCache(config.GetKqpPatternCacheCapacityBytes(), + config.GetKqpPatternCacheCompiledCapacityBytes(), + config.GetKqpPatternCachePatternAccessTimesBeforeTryToCompile()); if (PublishResourcesByExchanger) { - CreateResourceInfoExchanger(Config.GetInfoExchangerSettings()); + CreateResourceInfoExchanger(config.GetInfoExchangerSettings()); return; } } + const TIntrusivePtr& GetCounters() const override { + return Counters; + } + + TPlannerPlacingOptions GetPlacingOptions() override { + return TPlannerPlacingOptions{ + .MaxNonParallelTasksExecutionLimit = MaxNonParallelTasksExecutionLimit.load(), + .MaxNonParallelDataQueryTasksLimit = MaxNonParallelDataQueryTasksLimit.load(), + .MaxNonParallelTopStageExecutionLimit = MaxNonParallelTopStageExecutionLimit.load(), + .PreferLocalDatacenterExecution = PreferLocalDatacenterExecution.load(), + }; + } + void CreateResourceInfoExchanger( const NKikimrConfig::TTableServiceConfig::TResourceManager::TInfoExchangerSettings& settings) { PublishResourcesByExchanger = true; @@ -210,7 +219,6 @@ class TKqpResourceManager : public IKqpResourceManager { ExecutionUnitsResource.fetch_add(cnt); return false; } else { - Counters->RmComputeActors->Add(cnt); return true; } } @@ -221,11 +229,13 @@ class TKqpResourceManager : public IKqpResourceManager { } ExecutionUnitsResource.fetch_add(cnt); - Counters->RmComputeActors->Sub(cnt); } - TKqpRMAllocateResult AllocateResources(ui64 txId, ui64 taskId, const TKqpResourcesRequest& resources) override + TKqpRMAllocateResult AllocateResources(TIntrusivePtr& tx, TIntrusivePtr& task, const TKqpResourcesRequest& resources) override { + const ui64 txId = tx->TxId; + const ui64 taskId = task->TaskId; + TKqpRMAllocateResult result; if (resources.ExecutionUnits) { if (!AllocateExecutionUnits(resources.ExecutionUnits)) { @@ -248,22 +258,14 @@ class TKqpResourceManager : public IKqpResourceManager { return result; } - auto now = ActorSystem->Timestamp(); bool hasScanQueryMemory = true; - ui64 queryMemoryLimit = 0; - // NOTE(gvit): the first memory request from the data query pool always satisfied. - // all other requests are not guaranteed to be satisfied. - // In the nearest future we need to implement several layers of memory requests. + bool isFirstAllocationRequest = (resources.ExecutionUnits > 0 && resources.MemoryPool == EKqpMemoryPool::DataQuery); if (isFirstAllocationRequest) { - auto& txBucket = TxBucket(txId); - with_lock(txBucket.Lock) { - auto& tx = txBucket.Txs[txId]; - tx.Allocated(taskId, now, resources, /*memoryAsExternal=*/true); - ExternalDataQueryMemory.fetch_add(resources.Memory + resources.ExternalMemory); - Counters->RmExternalMemory->Add(resources.Memory + resources.ExternalMemory); - } - + TKqpResourcesRequest newRequest = resources; + newRequest.MoveToFreeTier(); + tx->Allocated(task, newRequest); + ExternalDataQueryMemory.fetch_add(newRequest.ExternalMemory); return result; } @@ -275,183 +277,134 @@ class TKqpResourceManager : public IKqpResourceManager { return result; } - hasScanQueryMemory = ScanQueryMemoryResource.Has(resources.Memory); - if (hasScanQueryMemory) { - ScanQueryMemoryResource.Acquire(resources.Memory); - queryMemoryLimit = Config.GetQueryMemoryLimit(); + hasScanQueryMemory = TotalMemoryResource->AcquireIfAvailable(resources.Memory); + task->TotalMemoryCookie = TotalMemoryResource->GetSpillingCookie(); + + if (hasScanQueryMemory && !tx->PoolId.empty() && tx->MemoryPoolPercent > 0) { + auto [it, success] = MemoryNamedPools.emplace(tx->MakePoolId(), nullptr); + + if (success) { + it->second = MakeIntrusive(TotalMemoryResource->GetLimit(), tx->MemoryPoolPercent, SpillingPercent.load()); + } else { + it->second->SetNewLimit(TotalMemoryResource->GetLimit(), tx->MemoryPoolPercent, SpillingPercent.load()); + } + + auto& poolMemory = it->second; + if (!poolMemory->AcquireIfAvailable(resources.Memory)) { + hasScanQueryMemory = false; + TotalMemoryResource->Release(resources.Memory); + } + + task->PoolMemoryCookie = poolMemory->GetSpillingCookie(); } - } // with_lock (Lock) + } if (!hasScanQueryMemory) { Counters->RmNotEnoughMemory->Inc(); TStringBuilder reason; - reason << "TxId: " << txId << ", taskId: " << taskId << ". Not enough memory for query, requested: " << resources.Memory; + reason << "TxId: " << txId << ", taskId: " << taskId << ". Not enough memory for query, requested: " << resources.Memory + << ". " << tx->ToString(); result.SetError(NKikimrKqp::TEvStartKqpTasksResponse::NOT_ENOUGH_MEMORY, reason); return result; } ui64 rbTaskId = LastResourceBrokerTaskId.fetch_add(1) + 1; TString rbTaskName = TStringBuilder() << "kqp-" << txId << '-' << taskId << '-' << rbTaskId; - bool extraAlloc = false; - - auto& txBucket = TxBucket(txId); - with_lock (txBucket.Lock) { - Y_DEFER { - if (!result) { - auto unguard = ::Unguard(txBucket.Lock); - Counters->RmNotEnoughMemory->Inc(); - with_lock (Lock) { - ScanQueryMemoryResource.Release(resources.Memory); - } // with_lock (Lock) - } - }; - - auto& tx = txBucket.Txs[txId]; - ui64 txTotalRequestedMemory = tx.TxScanQueryMemory + resources.Memory; - result.TotalAllocatedQueryMemory = txTotalRequestedMemory; - if (txTotalRequestedMemory > queryMemoryLimit) { - TStringBuilder reason; - reason << "TxId: " << txId << ", taskId: " << taskId << ". Query memory limit exceeded: " - << "requested " << txTotalRequestedMemory; - result.SetError(NKikimrKqp::TEvStartKqpTasksResponse::QUERY_MEMORY_LIMIT_EXCEEDED, reason); - return result; - } - bool allocated = ResourceBroker->SubmitTaskInstant( - TEvResourceBroker::TEvSubmitTask(rbTaskId, rbTaskName, {0, resources.Memory}, "kqp_query", 0, {}), - SelfId); + Y_DEFER { + if (!result) { + Counters->RmNotEnoughMemory->Inc(); + with_lock (Lock) { + TotalMemoryResource->Release(resources.Memory); + if (!tx->PoolId.empty()) { + auto it = MemoryNamedPools.find(tx->MakePoolId()); + if (it != MemoryNamedPools.end()) { + it->second->Release(resources.Memory); + } - if (!allocated) { - TStringBuilder reason; - reason << "TxId: " << txId << ", taskId: " << taskId << ". Not enough ScanQueryMemory: " - << "requested " << resources.Memory; - LOG_AS_N(reason); - result.SetError(NKikimrKqp::TEvStartKqpTasksResponse::NOT_ENOUGH_MEMORY, reason); - return result; + if (it->second->GetUsed() == 0) { + MemoryNamedPools.erase(it); + } + } + } } + }; - auto& taskState = tx.Allocated(taskId, now, resources); - if (!taskState.ResourceBrokerTaskId) { - taskState.ResourceBrokerTaskId = rbTaskId; - } else { - extraAlloc = true; - bool merged = ResourceBroker->MergeTasksInstant(taskState.ResourceBrokerTaskId, rbTaskId, SelfId); - Y_ABORT_UNLESS(merged); - } - } // with_lock (txBucket.Lock) + bool allocated = ResourceBroker->SubmitTaskInstant( + TEvResourceBroker::TEvSubmitTask(rbTaskId, rbTaskName, {0, resources.Memory}, "kqp_query", 0, {}), + SelfId); - LOG_AS_D("TxId: " << txId << ", taskId: " << taskId << ". Allocated " << resources.ToString()); + if (!allocated) { + TStringBuilder reason; + reason << "TxId: " << txId << ", taskId: " << taskId << ". Not enough memory for query, requested: " << resources.Memory + << ". " << tx->ToString(); + LOG_AS_N(reason); + result.SetError(NKikimrKqp::TEvStartKqpTasksResponse::NOT_ENOUGH_MEMORY, reason); + return result; + } - Counters->RmMemory->Add(resources.Memory); - if (extraAlloc) { - Counters->RmExtraMemAllocs->Inc(); + tx->Allocated(task, resources); + if (!task->ResourceBrokerTaskId) { + task->ResourceBrokerTaskId = rbTaskId; + } else { + bool merged = ResourceBroker->MergeTasksInstant(task->ResourceBrokerTaskId, rbTaskId, SelfId); + Y_ABORT_UNLESS(merged); } + LOG_AS_D("TxId: " << txId << ", taskId: " << taskId << ". Allocated " << resources.ToString()); FireResourcesPublishing(); return result; } - void FreeResources(ui64 txId, ui64 taskId) override { - FreeResources(txId, taskId, TKqpResourcesRequest{.ReleaseAllResources=true}); + void FreeResources(TIntrusivePtr& tx, TIntrusivePtr& task) override { + FreeResources(tx, task, task->FreeResourcesRequest()); } - void FreeResources(ui64 txId, ui64 taskId, const TKqpResourcesRequest& resources) override { - ui64 releaseScanQueryMemory = 0; - ui64 releaseExternalDataQueryMemory = 0; - - auto& txBucket = TxBucket(txId); - - { - TMaybe> guard; - guard.ConstructInPlace(txBucket.Lock); - - auto txIt = txBucket.Txs.find(txId); - if (txIt == txBucket.Txs.end()) { - return; - } - - auto& tx = txIt->second; - auto taskIt = tx.Tasks.find(taskId); - if (taskIt == tx.Tasks.end()) { - return; - } - - auto& task = taskIt->second; - if (resources.ReleaseAllResources && task.ExecutionUnits) { - FreeExecutionUnits(task.ExecutionUnits); - } - - if (resources.ReleaseAllResources) { - releaseExternalDataQueryMemory = task.ExternalDataQueryMemory; - releaseScanQueryMemory = task.ScanQueryMemory; - } else { - releaseScanQueryMemory = std::min(task.ScanQueryMemory, resources.Memory); - ui64 leftToRelease = resources.Memory - releaseScanQueryMemory; - releaseExternalDataQueryMemory = std::min(task.ExternalDataQueryMemory, resources.ExternalMemory + leftToRelease); - } - - task.ScanQueryMemory -= releaseScanQueryMemory; - tx.TxScanQueryMemory -= releaseScanQueryMemory; - - task.ExternalDataQueryMemory -= releaseExternalDataQueryMemory; - tx.TxExternalDataQueryMemory -= releaseExternalDataQueryMemory; + void FreeResources(TIntrusivePtr& tx, TIntrusivePtr& task, const TKqpResourcesRequest& resources) override { + if (resources.ExecutionUnits) { + FreeExecutionUnits(resources.ExecutionUnits); + } - if (task.ScanQueryMemory == 0) { - if (task.ResourceBrokerTaskId) { - bool finished = ResourceBroker->FinishTaskInstant( - TEvResourceBroker::TEvFinishTask(task.ResourceBrokerTaskId), SelfId); - Y_DEBUG_ABORT_UNLESS(finished); - task.ResourceBrokerTaskId = 0; - } + Y_ABORT_UNLESS(resources.Memory <= task->ScanQueryMemory); + if (resources.Memory > 0 && task->ResourceBrokerTaskId) { + if (resources.Memory == task->ScanQueryMemory) { + bool finished = ResourceBroker->FinishTaskInstant( + TEvResourceBroker::TEvFinishTask(task->ResourceBrokerTaskId), SelfId); + Y_DEBUG_ABORT_UNLESS(finished); + task->ResourceBrokerTaskId = 0; } else { bool reduced = ResourceBroker->ReduceTaskResourcesInstant( - taskIt->second.ResourceBrokerTaskId, {0, releaseScanQueryMemory}, SelfId); + task->ResourceBrokerTaskId, {0, resources.Memory}, SelfId); Y_DEBUG_ABORT_UNLESS(reduced); } + } - if (resources.ExecutionUnits) { - ui64 remainsTasks = tx.Tasks.size() - 1; - if (remainsTasks == 0) { - txBucket.Txs.erase(txIt); - } else { - tx.Tasks.erase(taskIt); - } - } - - i64 prev = ExternalDataQueryMemory.fetch_sub(releaseExternalDataQueryMemory); - Counters->RmExternalMemory->Sub(releaseExternalDataQueryMemory); - Y_DEBUG_ABORT_UNLESS(prev >= 0); - Counters->RmMemory->Sub(releaseScanQueryMemory); - Y_DEBUG_ABORT_UNLESS(Counters->RmMemory->Val() >= 0); - } // with_lock (txBucket.Lock) - - with_lock (Lock) { - ScanQueryMemoryResource.Release(releaseScanQueryMemory); - } // with_lock (Lock) - - LOG_AS_D("TxId: " << txId << ", taskId: " << taskId << ". Released resources, " - << "ScanQueryMemory: " << releaseScanQueryMemory << ", " - << "ExternalDataQueryMemory " << releaseExternalDataQueryMemory << ", " - << "ExecutionUnits " << resources.ExecutionUnits << "."); - - FireResourcesPublishing(); - } - - void NotifyExternalResourcesAllocated(ui64 txId, ui64 taskId, const TKqpResourcesRequest& resources) override { - LOG_AS_D("TxId: " << txId << ", taskId: " << taskId << ". External allocation: " << resources.ToString()); + tx->Released(task, resources); + i64 prev = ExternalDataQueryMemory.fetch_sub(resources.ExternalMemory); + Y_DEBUG_ABORT_UNLESS(prev >= 0); - // we don't register data execution units for now - //YQL_ENSURE(resources.ExecutionUnits == 0); - YQL_ENSURE(resources.MemoryPool == EKqpMemoryPool::DataQuery); + if (resources.Memory > 0) { + with_lock (Lock) { + TotalMemoryResource->Release(resources.Memory); + if (!tx->PoolId.empty()) { + auto it = MemoryNamedPools.find(tx->MakePoolId()); + if (it != MemoryNamedPools.end()) { + it->second->Release(resources.Memory); - auto& txBucket = TxBucket(txId); - with_lock (txBucket.Lock) { - txBucket.Txs[txId].Allocated(taskId, TInstant(), resources); - ExternalDataQueryMemory.fetch_add(resources.ExternalMemory); - Counters->RmExternalMemory->Add(resources.ExternalMemory); - } // with_lock (txBucket.Lock) + if (it->second->GetUsed() == 0) { + MemoryNamedPools.erase(it); + } + } + } + } + } + LOG_AS_D("TxId: " << tx->TxId << ", taskId: " << task->TaskId + << ". Released resources, " + << "Memory: " << resources.Memory << ", " + << "Free Tier: " << resources.ExternalMemory << ", " + << "ExecutionUnits: " << resources.ExecutionUnits << "."); FireResourcesPublishing(); } @@ -499,40 +452,67 @@ class TKqpResourceManager : public IKqpResourceManager { with_lock (Lock) { result.ExecutionUnits = ExecutionUnitsResource.load(); - result.Memory[EKqpMemoryPool::ScanQuery] = ScanQueryMemoryResource.Available(); + result.Memory[EKqpMemoryPool::ScanQuery] = TotalMemoryResource->Available(); } return result; } - NKikimrConfig::TTableServiceConfig::TResourceManager GetConfig() override { + std::shared_ptr GetPatternCache() override { with_lock (Lock) { - return Config; + return PatternCache; } } - std::shared_ptr GetPatternCache() override { - with_lock (Lock) { - return PatternCache; + TTaskResourceEstimation EstimateTaskResources(const NYql::NDqProto::TDqTask& task, const ui32 tasksCount) override + { + TTaskResourceEstimation ret = BuildInitialTaskResources(task); + EstimateTaskResources(ret, tasksCount); + return ret; + } + + void EstimateTaskResources(TTaskResourceEstimation& ret, const ui32 tasksCount) override + { + ui64 totalChannels = std::max(tasksCount, (ui32)1) * std::max(ret.ChannelBuffersCount, (ui32)1); + ui64 optimalChannelBufferSizeEstimation = totalChannels * ChannelBufferSize.load(); + + optimalChannelBufferSizeEstimation = std::min(optimalChannelBufferSizeEstimation, MaxTotalChannelBuffersSize.load()); + + ret.ChannelBufferMemoryLimit = std::max(MinChannelBufferSize.load(), optimalChannelBufferSizeEstimation / totalChannels); + + if (ret.HeavyProgram) { + ret.MkqlProgramMemoryLimit = MkqlHeavyProgramMemoryLimit.load() / std::max(tasksCount, (ui32)1); + } else { + ret.MkqlProgramMemoryLimit = MkqlLightProgramMemoryLimit.load() / std::max(tasksCount, (ui32)1); } + + ret.TotalMemoryLimit = ret.ChannelBuffersCount * ret.ChannelBufferMemoryLimit + + ret.MkqlProgramMemoryLimit; } - ui32 GetNodeId() override { - return SelfId.NodeId(); + void SetConfigValues(const NKikimrConfig::TTableServiceConfig::TResourceManager& config) { + MkqlHeavyProgramMemoryLimit.store(config.GetMkqlHeavyProgramMemoryLimit()); + MkqlLightProgramMemoryLimit.store(config.GetMkqlLightProgramMemoryLimit()); + ChannelBufferSize.store(config.GetChannelBufferSize()); + MinChannelBufferSize.store(config.GetMinChannelBufferSize()); + MaxTotalChannelBuffersSize.store(config.GetMaxTotalChannelBuffersSize()); + QueryMemoryLimit.store(config.GetQueryMemoryLimit()); + MaxNonParallelTopStageExecutionLimit.store(config.GetMaxNonParallelTopStageExecutionLimit()); + MaxNonParallelTasksExecutionLimit.store(config.GetMaxNonParallelTasksExecutionLimit()); + PreferLocalDatacenterExecution.store(config.GetPreferLocalDatacenterExecution()); + MaxNonParallelDataQueryTasksLimit.store(config.GetMaxNonParallelDataQueryTasksLimit()); + SpillingPercent.store(config.GetSpillingPercent()); } - TTxStatesBucket& TxBucket(ui64 txId) { - return Buckets[txId % Buckets.size()]; + ui32 GetNodeId() override { + return SelfId.NodeId(); } void FireResourcesPublishing() { - with_lock (Lock) { - if (PublishScheduledAt) { - return; - } + bool prev = PublishScheduled.test_and_set(); + if (!prev) { + ActorSystem->Send(SelfId, new TEvPrivate::TEvSchedulePublishResources); } - - ActorSystem->Send(SelfId, new TEvPrivate::TEvSchedulePublishResources); } void UpdatePatternCache(ui64 maxSizeBytes, ui64 maxCompiledSizeBytes, ui64 patternAccessTimesBeforeTryToCompile) { @@ -549,7 +529,13 @@ class TKqpResourceManager : public IKqpResourceManager { TActorId SelfId; - NKikimrConfig::TTableServiceConfig::TResourceManager Config; // guarded by Lock + std::atomic QueryMemoryLimit; + std::atomic MkqlHeavyProgramMemoryLimit; + std::atomic MkqlLightProgramMemoryLimit; + std::atomic ChannelBufferSize; + std::atomic MinChannelBufferSize; + std::atomic MaxTotalChannelBuffersSize; + TIntrusivePtr Counters; TIntrusivePtr ResourceBroker; TActorSystem* ActorSystem = nullptr; @@ -560,16 +546,18 @@ class TKqpResourceManager : public IKqpResourceManager { // limits (guarded by Lock) std::atomic ExecutionUnitsResource; std::atomic ExecutionUnitsLimit; - TLimitedResource ScanQueryMemoryResource; + std::atomic SpillingPercent; + TIntrusivePtr TotalMemoryResource; std::atomic ExternalDataQueryMemory = 0; + std::atomic MaxNonParallelTopStageExecutionLimit = 1; + std::atomic MaxNonParallelTasksExecutionLimit = 8; + std::atomic PreferLocalDatacenterExecution = true; + std::atomic MaxNonParallelDataQueryTasksLimit = 1000; // current state - std::array Buckets; std::atomic LastResourceBrokerTaskId = 0; - // schedule info (guarded by Lock) - std::optional PublishScheduledAt; - + std::atomic_flag PublishScheduled; // pattern cache for different actors std::shared_ptr PatternCache; @@ -577,6 +565,8 @@ class TKqpResourceManager : public IKqpResourceManager { std::shared_ptr ResourceSnapshotState; bool PublishResourcesByExchanger; TActorId ResourceInfoExchanger = TActorId(); + + absl::flat_hash_map, TIntrusivePtr, THash>> MemoryNamedPools; }; struct TResourceManagers { @@ -601,16 +591,21 @@ class TKqpResourceManagerActor : public TActorBootstrapped counters, const TActorId& resourceBrokerId, - std::shared_ptr&& kqpProxySharedResources) - : ResourceBrokerId(resourceBrokerId ? resourceBrokerId : MakeResourceBrokerID()) + std::shared_ptr&& kqpProxySharedResources, ui32 nodeId) + : Config(config) + , ResourceBrokerId(resourceBrokerId ? resourceBrokerId : MakeResourceBrokerID()) , KqpProxySharedResources(std::move(kqpProxySharedResources)) , PublishResourcesByExchanger(config.GetEnablePublishResourcesByExchanger()) { ResourceManager = std::make_shared(config, counters); + with_lock (ResourceManagers.Lock) { + ResourceManagers.ByNodeId[nodeId] = ResourceManager; + ResourceManagers.Default = ResourceManager; + } } void Bootstrap() { - ResourceManager->Bootstrap(TlsActivationContext->ActorSystem(), SelfId()); + ResourceManager->Bootstrap(Config, TlsActivationContext->ActorSystem(), SelfId()); LOG_D("Start KqpResourceManagerActor at " << SelfId() << " with ResourceBroker at " << ResourceBrokerId); @@ -639,11 +634,6 @@ class TKqpResourceManagerActor : public TActorBootstrappedLock) { - ResourceManager->PublishScheduledAt.reset(); - } + PublishResourcesScheduledAt.reset(); PublishResourceUsage("batching"); } @@ -741,7 +729,7 @@ class TKqpResourceManagerActor : public TActorBootstrapped 0) { with_lock (ResourceManager->Lock) { - ResourceManager->ScanQueryMemoryResource.SetNewLimit(queueConfig.GetLimit().GetMemory()); + ResourceManager->TotalMemoryResource->SetNewLimit(queueConfig.GetLimit().GetMemory(), (double)100, ResourceManager->SpillingPercent.load()); } LOG_I("Total node memory for scan queries: " << queueConfig.GetLimit().GetMemory() << " bytes"); } @@ -841,9 +829,9 @@ class TKqpResourceManagerActor : public TActorBootstrappedExecutionUnitsLimit.load(); ResourceManager->ExecutionUnitsLimit.store(config.GetComputeActorsCount()); ResourceManager->ExecutionUnitsResource.fetch_add((i32)config.GetComputeActorsCount() - prev); - ResourceManager->Config.Swap(&config); + ResourceManager->SetConfigValues(config); + Config.Swap(&config); } - } static void HandleWork(TEvents::TEvUndelivered::TPtr& ev) { @@ -881,14 +869,9 @@ class TKqpResourceManagerActor : public TActorBootstrappedLock) { - str << ResourceManager->Config.DebugString() << Endl; - } - str << "State storage key: " << WbState.Tenant << Endl; with_lock (ResourceManager->Lock) { - str << "ScanQuery memory resource: " << ResourceManager->ScanQueryMemoryResource.ToString() << Endl; + str << "ScanQuery memory resource: " << ResourceManager->TotalMemoryResource->ToString() << Endl; str << "External DataQuery memory: " << ResourceManager->ExternalDataQueryMemory.load() << Endl; str << "ExecutionUnits resource: " << ResourceManager->ExecutionUnitsResource.load() << Endl; } @@ -897,35 +880,8 @@ class TKqpResourceManagerActor : public TActorBootstrapped publishScheduledAt; - with_lock (ResourceManager->Lock) { - publishScheduledAt = ResourceManager->PublishScheduledAt; - } - - if (publishScheduledAt) { - str << "Next publish time: " << *publishScheduledAt << Endl; - } - - str << Endl << "Transactions:" << Endl; - for (auto& bucket : ResourceManager->Buckets) { - with_lock (bucket.Lock) { - for (auto& [txId, txState] : bucket.Txs) { - str << " TxId: " << txId << Endl; - str << " ScanQuery memory: " << txState.TxScanQueryMemory << Endl; - str << " External DataQuery memory: " << txState.TxExternalDataQueryMemory << Endl; - str << " Execution units: " << txState.TxExecutionUnits << Endl; - str << " Create at: " << txState.CreatedAt << Endl; - str << " Tasks:" << Endl; - for (auto& [taskId, taskState] : txState.Tasks) { - str << " TaskId: " << taskId << Endl; - str << " ScanQuery memory: " << taskState.ScanQueryMemory << Endl; - str << " External DataQuery memory: " << taskState.ExternalDataQueryMemory << Endl; - str << " Execution units: " << taskState.ExecutionUnits << Endl; - str << " ResourceBroker TaskId: " << taskState.ResourceBrokerTaskId << Endl; - str << " Created at: " << taskState.CreatedAt << Endl; - } - } - } // with_lock (bucket.Lock) + if (PublishResourcesScheduledAt) { + str << "Next publish time: " << *PublishResourcesScheduledAt << Endl; } if (snapshot.empty()) { @@ -940,13 +896,6 @@ class TKqpResourceManagerActor : public TActorBootstrapped publishScheduledAt; - - with_lock (ResourceManager->Lock) { - publishInterval = TDuration::Seconds(ResourceManager->Config.GetPublishStatisticsIntervalSec()); - publishScheduledAt = ResourceManager->PublishScheduledAt; - } - - if (publishScheduledAt) { + const TDuration publishInterval = TDuration::Seconds(Config.GetPublishStatisticsIntervalSec()); + if (PublishResourcesScheduledAt) { return; } auto now = ResourceManager->ActorSystem->Timestamp(); if (publishInterval && WbState.LastPublishTime && now - *WbState.LastPublishTime < publishInterval) { - publishScheduledAt = *WbState.LastPublishTime + publishInterval; - - with_lock (ResourceManager->Lock) { - ResourceManager->PublishScheduledAt = publishScheduledAt; - } + PublishResourcesScheduledAt = *WbState.LastPublishTime + publishInterval; - Schedule(*publishScheduledAt - now, new TEvPrivate::TEvPublishResources); - LOG_D("Schedule publish at " << *publishScheduledAt << ", after " << (*publishScheduledAt - now)); + Schedule(*PublishResourcesScheduledAt - now, new TEvPrivate::TEvPublishResources); + LOG_D("Schedule publish at " << *PublishResourcesScheduledAt << ", after " << (*PublishResourcesScheduledAt - now)); return; } + // starting resources publishing. + // saying resource manager that we are ready for the next publishing. + ResourceManager->PublishScheduled.clear(); + NKikimrKqp::TKqpNodeResources payload; payload.SetNodeId(SelfId().NodeId()); payload.SetTimestamp(now.Seconds()); @@ -1027,13 +969,13 @@ class TKqpResourceManagerActor : public TActorBootstrappedLock) { payload.SetAvailableComputeActors(ResourceManager->ExecutionUnitsResource.load()); // legacy - payload.SetTotalMemory(ResourceManager->ScanQueryMemoryResource.GetLimit()); // legacy - payload.SetUsedMemory(ResourceManager->ScanQueryMemoryResource.GetLimit() - ResourceManager->ScanQueryMemoryResource.Available()); // legacy + payload.SetTotalMemory(ResourceManager->TotalMemoryResource->GetLimit()); // legacy + payload.SetUsedMemory(ResourceManager->TotalMemoryResource->GetLimit() - ResourceManager->TotalMemoryResource->Available()); // legacy payload.SetExecutionUnits(ResourceManager->ExecutionUnitsResource.load()); auto* pool = payload.MutableMemory()->Add(); pool->SetPool(EKqpMemoryPool::ScanQuery); - pool->SetAvailable(ResourceManager->ScanQueryMemoryResource.Available()); + pool->SetAvailable(ResourceManager->TotalMemoryResource->Available()); } if (PublishResourcesByExchanger) { @@ -1072,6 +1014,8 @@ class TKqpResourceManagerActor : public TActorBootstrapped ResourceManager; + std::optional PublishResourcesScheduledAt; bool PublishResourcesByExchanger; std::optional SelfDataCenterId; }; @@ -1100,9 +1045,9 @@ class TKqpResourceManagerActor : public TActorBootstrapped counters, NActors::TActorId resourceBroker, - std::shared_ptr kqpProxySharedResources) + std::shared_ptr kqpProxySharedResources, ui32 nodeId) { - return new NRm::TKqpResourceManagerActor(config, counters, resourceBroker, std::move(kqpProxySharedResources)); + return new NRm::TKqpResourceManagerActor(config, counters, resourceBroker, std::move(kqpProxySharedResources), nodeId); } std::shared_ptr GetKqpResourceManager(TMaybe _nodeId) { @@ -1111,6 +1056,10 @@ std::shared_ptr GetKqpResourceManager(TMaybe _no } ui32 nodeId = _nodeId ? *_nodeId : TActivationContext::ActorSystem()->NodeId; + if (auto rm = TryGetKqpResourceManager(nodeId)) { + return rm; + } + Y_ABORT("KqpResourceManager not ready yet, node #%" PRIu32, nodeId); } diff --git a/ydb/core/kqp/rm_service/kqp_rm_service.h b/ydb/core/kqp/rm_service/kqp_rm_service.h index fe5f3e7986c5..23138cf394c4 100644 --- a/ydb/core/kqp/rm_service/kqp_rm_service.h +++ b/ydb/core/kqp/rm_service/kqp_rm_service.h @@ -11,9 +11,12 @@ #include #include +#include "kqp_resource_estimation.h" + #include #include #include +#include namespace NKikimr { @@ -40,18 +43,173 @@ struct TKqpResourcesRequest { ui64 ExternalMemory = 0; bool ReleaseAllResources = false; + void MoveToFreeTier() { + ExternalMemory += Memory; + Memory = 0; + } + TString ToString() const { return TStringBuilder() << "TKqpResourcesRequest{ MemoryPool: " << (ui32) MemoryPool << ", Memory: " << Memory << "ExternalMemory: " << ExternalMemory << " }"; } }; +class TTxState; + +class TMemoryResourceCookie : public TAtomicRefCount { +public: + std::atomic SpillingPercentReached{false}; +}; + +class TTaskState : public TAtomicRefCount { + friend TTxState; + +public: + const ui64 TaskId = 0; + const TInstant CreatedAt; + ui64 ScanQueryMemory = 0; + ui64 ExternalDataQueryMemory = 0; + ui64 ResourceBrokerTaskId = 0; + ui32 ExecutionUnits = 0; + TIntrusivePtr TotalMemoryCookie; + TIntrusivePtr PoolMemoryCookie; + +public: + + // compute actor wants to release some memory. + // we distribute that memory across granted resources + TKqpResourcesRequest FitRequest(TKqpResourcesRequest& resources) { + ui64 releaseScanQueryMemory = std::min(ScanQueryMemory, resources.Memory); + ui64 leftToRelease = resources.Memory - releaseScanQueryMemory; + ui64 releaseExternalDataQueryMemory = std::min(ExternalDataQueryMemory, resources.ExternalMemory + leftToRelease); + + resources.Memory = releaseScanQueryMemory; + resources.ExternalMemory = releaseExternalDataQueryMemory; + return resources; + } + + bool IsReasonableToStartSpilling() { + return (PoolMemoryCookie && PoolMemoryCookie->SpillingPercentReached.load()) + || (TotalMemoryCookie && TotalMemoryCookie->SpillingPercentReached.load()); + } + + TKqpResourcesRequest FreeResourcesRequest() const { + return TKqpResourcesRequest{ + .ExecutionUnits=ExecutionUnits, + .MemoryPool=EKqpMemoryPool::Unspecified, + .Memory=ScanQueryMemory, + .ExternalMemory=ExternalDataQueryMemory}; + } + + explicit TTaskState(ui64 taskId, TInstant createdAt) + : TaskId(taskId) + , CreatedAt(createdAt) + { + } +}; + +class TTxState : public TAtomicRefCount { + +public: + const ui64 TxId; + const TInstant CreatedAt; + TIntrusivePtr Counters; + const TString PoolId; + const double MemoryPoolPercent; + const TString Database; + +private: + std::atomic TxScanQueryMemory = 0; + std::atomic TxExternalDataQueryMemory = 0; + std::atomic TxExecutionUnits = 0; + +public: + explicit TTxState(ui64 txId, TInstant now, TIntrusivePtr counters, const TString& poolId, const double memoryPoolPercent, + const TString& database) + : TxId(txId) + , CreatedAt(now) + , Counters(std::move(counters)) + , PoolId(poolId) + , MemoryPoolPercent(memoryPoolPercent) + , Database(database) + {} + + std::pair MakePoolId() const { + return std::make_pair(Database, PoolId); + } + + TString ToString() const { + auto res = TStringBuilder() << "TxResourcesInfo{ " + << "TxId: " << TxId + << "Database: " << Database; + + if (!PoolId.empty()) { + res << ", PoolId: " << PoolId + << ", MemoryPoolPercent: " << Sprintf("%.2f", MemoryPoolPercent); + } + + res << ", memory initially granted resources: " << TxExternalDataQueryMemory.load() + << ", extra allocations " << TxScanQueryMemory.load() + << ", execution units: " << TxExecutionUnits.load() + << ", started at: " << CreatedAt + << " }"; + + return res; + } + + ui64 GetExtraMemoryAllocatedSize() { + return TxScanQueryMemory.load(); + } + + void Released(TIntrusivePtr& taskState, const TKqpResourcesRequest& resources) { + if (resources.ExecutionUnits) { + Counters->RmOnCompleteFree->Inc(); + } else { + Counters->RmExtraMemFree->Inc(); + } + + Counters->RmExternalMemory->Sub(resources.ExternalMemory); + TxExternalDataQueryMemory.fetch_sub(resources.ExternalMemory); + taskState->ExternalDataQueryMemory -= resources.ExternalMemory; + + TxScanQueryMemory.fetch_sub(resources.Memory); + taskState->ScanQueryMemory -= resources.Memory; + Counters->RmMemory->Sub(resources.Memory); + + TxExecutionUnits.fetch_sub(resources.ExecutionUnits); + taskState->ExecutionUnits -= resources.ExecutionUnits; + Counters->RmComputeActors->Sub(resources.ExecutionUnits); + } + + void Allocated(TIntrusivePtr& taskState, const TKqpResourcesRequest& resources) { + if (resources.ExecutionUnits > 0) { + Counters->RmOnStartAllocs->Inc(); + } + + Counters->RmExternalMemory->Add(resources.ExternalMemory); + TxExternalDataQueryMemory.fetch_add(resources.ExternalMemory); + taskState->ExternalDataQueryMemory += resources.ExternalMemory; + + TxScanQueryMemory.fetch_add(resources.Memory); + taskState->ScanQueryMemory += resources.Memory; + Counters->RmMemory->Add(resources.Memory); + if (resources.Memory) { + Counters->RmExtraMemAllocs->Inc(); + } + + TxExecutionUnits.fetch_add(resources.ExecutionUnits); + taskState->ExecutionUnits += resources.ExecutionUnits; + Counters->RmComputeActors->Add(resources.ExecutionUnits); + } +}; + /// detailed information on allocation failure struct TKqpRMAllocateResult { bool Success = true; NKikimrKqp::TEvStartKqpTasksResponse::ENotStartedTaskReason Status = NKikimrKqp::TEvStartKqpTasksResponse::INTERNAL_ERROR; TString FailReason; - ui64 TotalAllocatedQueryMemory = 0; + TIntrusivePtr TaskInfo; + TIntrusivePtr TxInfo; NKikimrKqp::TEvStartKqpTasksResponse::ENotStartedTaskReason GetStatus() const { return Status; @@ -78,25 +236,32 @@ struct TKqpLocalNodeResources { std::array Memory; }; +struct TPlannerPlacingOptions { + ui64 MaxNonParallelTasksExecutionLimit = 8; + ui64 MaxNonParallelDataQueryTasksLimit = 1000; + ui64 MaxNonParallelTopStageExecutionLimit = 1; + bool PreferLocalDatacenterExecution = true; +}; + /// per node singleton with instant API class IKqpResourceManager : private TNonCopyable { public: virtual ~IKqpResourceManager() = default; - virtual TKqpRMAllocateResult AllocateResources(ui64 txId, ui64 taskId, const TKqpResourcesRequest& resources) = 0; - - using TResourcesAllocatedCallback = std::function; + virtual const TIntrusivePtr& GetCounters() const = 0; - virtual void FreeResources(ui64 txId, ui64 taskId, const TKqpResourcesRequest& resources) = 0; - virtual void FreeResources(ui64 txId, ui64 taskId) = 0; + virtual TKqpRMAllocateResult AllocateResources(TIntrusivePtr& tx, TIntrusivePtr& task, const TKqpResourcesRequest& resources) = 0; - virtual void NotifyExternalResourcesAllocated(ui64 txId, ui64 taskId, const TKqpResourcesRequest& resources) = 0; + virtual TPlannerPlacingOptions GetPlacingOptions() = 0; + virtual TTaskResourceEstimation EstimateTaskResources(const NYql::NDqProto::TDqTask& task, const ui32 tasksCount) = 0; + virtual void EstimateTaskResources(TTaskResourceEstimation& result, const ui32 tasksCount) = 0; + virtual void FreeResources(TIntrusivePtr& tx, TIntrusivePtr& task, const TKqpResourcesRequest& resources) = 0; + virtual void FreeResources(TIntrusivePtr& tx, TIntrusivePtr& task) = 0; virtual void RequestClusterResourcesInfo(TOnResourcesSnapshotCallback&& callback) = 0; virtual TVector GetClusterResources() const = 0; virtual TKqpLocalNodeResources GetLocalResources() const = 0; - virtual NKikimrConfig::TTableServiceConfig::TResourceManager GetConfig() = 0; virtual std::shared_ptr GetPatternCache() = 0; @@ -142,7 +307,8 @@ struct TKqpProxySharedResources { NActors::IActor* CreateKqpResourceManagerActor(const NKikimrConfig::TTableServiceConfig::TResourceManager& config, TIntrusivePtr counters, NActors::TActorId resourceBroker = {}, - std::shared_ptr kqpProxySharedResources = nullptr); + std::shared_ptr kqpProxySharedResources = nullptr, + ui32 nodeId = 0); std::shared_ptr GetKqpResourceManager(TMaybe nodeId = Nothing()); std::shared_ptr TryGetKqpResourceManager(TMaybe nodeId = Nothing()); diff --git a/ydb/core/kqp/rm_service/kqp_rm_ut.cpp b/ydb/core/kqp/rm_service/kqp_rm_ut.cpp index 764e9cf8b6ce..579e90bad452 100644 --- a/ydb/core/kqp/rm_service/kqp_rm_ut.cpp +++ b/ydb/core/kqp/rm_service/kqp_rm_ut.cpp @@ -151,7 +151,7 @@ class KqpRm : public TTestBase { void CreateKqpResourceManager( const NKikimrConfig::TTableServiceConfig::TResourceManager& config, ui32 nodeInd = 0) { auto kqpCounters = MakeIntrusive(Counters); - auto resman = CreateKqpResourceManagerActor(config, kqpCounters, ResourceBrokers[nodeInd]); + auto resman = CreateKqpResourceManagerActor(config, kqpCounters, ResourceBrokers[nodeInd], nullptr, Runtime->GetNodeId(nodeInd)); ResourceManagers.push_back(Runtime->Register(resman, nodeInd)); Runtime->RegisterService(MakeKqpResourceManagerServiceID( Runtime->GetNodeId(nodeInd)), ResourceManagers.back(), nodeInd); @@ -185,6 +185,14 @@ class KqpRm : public TTestBase { UNIT_ASSERT_VALUES_EQUAL(t->GetCounter("InFlyTasks")->Val(), infly); } + TIntrusivePtr MakeTx(ui64 txId, std::shared_ptr rm) { + return MakeIntrusive(txId, TInstant::Now(), rm->GetCounters(), "", (double)100, ""); + } + + TIntrusivePtr MakeTask(ui64 taskId, TIntrusivePtr tx) { + return MakeIntrusive(taskId, tx->CreatedAt); + } + void AssertResourceManagerStats( std::shared_ptr rm, ui64 scanQueryMemory, ui32 executionUnits) { Y_UNUSED(executionUnits); @@ -316,14 +324,16 @@ void KqpRm::SingleTask() { NRm::TKqpResourcesRequest request; request.MemoryPool = NRm::EKqpMemoryPool::ScanQuery; request.Memory = 100; + auto tx1 = MakeTx(1, rm); + auto task2 = MakeTask(2, tx1); - bool allocated = rm->AllocateResources(1, 2, request); + bool allocated = rm->AllocateResources(tx1, task2, request); UNIT_ASSERT(allocated); AssertResourceManagerStats(rm, 900, 90); AssertResourceBrokerSensors(0, 100, 0, 0, 1); - rm->FreeResources(1, 2); + rm->FreeResources(tx1, task2); AssertResourceManagerStats(rm, 1000, 100); AssertResourceBrokerSensors(0, 0, 0, 1, 0); } @@ -338,14 +348,23 @@ void KqpRm::ManyTasks() { request.MemoryPool = NRm::EKqpMemoryPool::ScanQuery; request.Memory = 100; + auto tx1 = MakeTx(1, rm); + TIntrusivePtr task1; + for (ui32 i = 1; i < 10; ++i) { - bool allocated = rm->AllocateResources(1, i, request); + auto task = MakeTask(i, tx1); + if (!task1) { + task1 = task; + } + + bool allocated = rm->AllocateResources(tx1, task, request); UNIT_ASSERT(allocated); AssertResourceManagerStats(rm, 1000 - 100 * i, 100 - 10 * i); AssertResourceBrokerSensors(0, 100 * i, 0, 0, i); } +/* // invalid taskId rm->FreeResources(1, 0); AssertResourceManagerStats(rm, 100, 10); @@ -355,8 +374,9 @@ void KqpRm::ManyTasks() { rm->FreeResources(10, 1); AssertResourceManagerStats(rm, 100, 10); AssertResourceBrokerSensors(0, 900, 0, 0, 9); +*/ - rm->FreeResources(1, 1); + rm->FreeResources(tx1, task1); AssertResourceManagerStats(rm, 200, 20); AssertResourceBrokerSensors(0, 800, 0, 1, 8); } @@ -371,7 +391,10 @@ void KqpRm::NotEnoughMemory() { request.MemoryPool = NRm::EKqpMemoryPool::ScanQuery; request.Memory = 10'000; - bool allocated = rm->AllocateResources(1, 2, request); + auto tx = MakeTx(1, rm); + auto task = MakeTask(2, tx); + + bool allocated = rm->AllocateResources(tx, task, request); UNIT_ASSERT(!allocated); AssertResourceManagerStats(rm, 1000, 100); @@ -389,8 +412,10 @@ void KqpRm::NotEnoughExecutionUnits() { request.Memory = 100; request.ExecutionUnits = 1000; - bool allocated = true; - allocated &= rm->AllocateResources(1, 2, request); + auto tx = MakeTx(1, rm); + auto task = MakeTask(2, tx); + + bool allocated = rm->AllocateResources(tx, task, request); UNIT_ASSERT(!allocated); AssertResourceManagerStats(rm, 1000, 100); @@ -410,12 +435,15 @@ void KqpRm::ResourceBrokerNotEnoughResources() { request.MemoryPool = NRm::EKqpMemoryPool::ScanQuery; request.Memory = 1'000; - bool allocated = rm->AllocateResources(1, 2, request); + auto tx = MakeTx(1, rm); + auto task = MakeTask(2, tx); + + bool allocated = rm->AllocateResources(tx, task, request); UNIT_ASSERT(allocated); request.MemoryPool = NRm::EKqpMemoryPool::ScanQuery; request.Memory = 100'000; - allocated = rm->AllocateResources(1, 2, request); + allocated = rm->AllocateResources(tx, task, request); UNIT_ASSERT(!allocated); AssertResourceManagerStats(rm, config.GetQueryMemoryLimit() - 1000, 90); @@ -432,11 +460,16 @@ void KqpRm::Snapshot(bool byExchanger) { request.MemoryPool = NRm::EKqpMemoryPool::ScanQuery; request.Memory = 100; request.ExecutionUnits = 10; + auto tx1 = MakeTx(1, rm); + auto tx2 = MakeTx(2, rm); - bool allocated = rm->AllocateResources(1, 2, request); + auto task2 = MakeTask(2, tx1); + auto task1 = MakeTask(1, tx2); + + bool allocated = rm->AllocateResources(tx1, task2, request); UNIT_ASSERT(allocated); - allocated &= rm->AllocateResources(2, 1, request); + allocated &= rm->AllocateResources(tx2, task1, request); UNIT_ASSERT(allocated); AssertResourceManagerStats(rm, 800, 80); @@ -446,8 +479,8 @@ void KqpRm::Snapshot(bool byExchanger) { CheckSnapshot(0, {{800, 80}, {1000, 100}}, rm); - rm->FreeResources(1, 2); - rm->FreeResources(2, 1); + rm->FreeResources(tx1, task2); + rm->FreeResources(tx2, task1); AssertResourceManagerStats(rm, 1000, 100); AssertResourceBrokerSensors(0, 0, 0, 2, 0); @@ -474,8 +507,10 @@ void KqpRm::Reduce() { NRm::TKqpResourcesRequest request; request.MemoryPool = NRm::EKqpMemoryPool::ScanQuery; request.Memory = 100; + auto tx = MakeTx(1, rm); + auto task = MakeTask(1, tx); - bool allocated = rm->AllocateResources(1, 1, request); + bool allocated = rm->AllocateResources(tx, task, request); UNIT_ASSERT(allocated); AssertResourceManagerStats(rm, 1000 - 100, 100 - 10); @@ -485,6 +520,7 @@ void KqpRm::Reduce() { reduceRequest.MemoryPool = NRm::EKqpMemoryPool::ScanQuery; reduceRequest.Memory = 70; +/* // invalid taskId rm->FreeResources(1, 0); AssertResourceManagerStats(rm, 1000 - 100, 100 - 10); @@ -494,8 +530,9 @@ void KqpRm::Reduce() { rm->FreeResources(10, 1); AssertResourceManagerStats(rm, 1000 - 100, 100 - 10); AssertResourceBrokerSensors(0, 100, 0, 0, 1); +*/ - rm->FreeResources(1, 1, reduceRequest); + rm->FreeResources(tx, task, reduceRequest); AssertResourceManagerStats(rm, 1000 - 30, 100 - 7); AssertResourceBrokerSensors(0, 30, 0, 0, 1); } @@ -517,11 +554,21 @@ void KqpRm::SnapshotSharing(bool byExchanger) { request.Memory = 100; request.ExecutionUnits = 10; + auto tx1Rm1 = MakeTx(1, rm_first); + auto tx2Rm1 = MakeTx(2, rm_first); + auto task1Rm1 = MakeTask(1, tx1Rm1); + auto task2Rm1 = MakeTask(1, tx2Rm1); + + auto tx1Rm2 = MakeTx(1, rm_second); + auto tx2Rm2 = MakeTx(2, rm_second); + auto task1Rm2 = MakeTask(1, tx1Rm2); + auto task2Rm2 = MakeTask(2, tx2Rm2); + { - bool allocated = rm_first->AllocateResources(1, 2, request); + bool allocated = rm_first->AllocateResources(tx1Rm1, task1Rm1, request); UNIT_ASSERT(allocated); - allocated &= rm_first->AllocateResources(2, 1, request); + allocated &= rm_first->AllocateResources(tx2Rm1, task2Rm1, request); UNIT_ASSERT(allocated); Runtime->DispatchEvents(TDispatchOptions(), TDuration::Seconds(1)); @@ -530,10 +577,10 @@ void KqpRm::SnapshotSharing(bool byExchanger) { } { - bool allocated = rm_second->AllocateResources(1, 2, request); + bool allocated = rm_second->AllocateResources(tx1Rm2, task1Rm2, request); UNIT_ASSERT(allocated); - allocated &= rm_second->AllocateResources(2, 1, request); + allocated &= rm_second->AllocateResources(tx2Rm2, task2Rm2, request); UNIT_ASSERT(allocated); Runtime->DispatchEvents(TDispatchOptions(), TDuration::Seconds(1)); @@ -542,8 +589,8 @@ void KqpRm::SnapshotSharing(bool byExchanger) { } { - rm_first->FreeResources(1, 2); - rm_first->FreeResources(2, 1); + rm_first->FreeResources(tx1Rm1, task1Rm1); + rm_first->FreeResources(tx2Rm1, task2Rm1); Runtime->DispatchEvents(TDispatchOptions(), TDuration::Seconds(1)); @@ -551,8 +598,8 @@ void KqpRm::SnapshotSharing(bool byExchanger) { } { - rm_second->FreeResources(1, 2); - rm_second->FreeResources(2, 1); + rm_second->FreeResources(tx1Rm2, task1Rm2); + rm_second->FreeResources(tx2Rm2, task2Rm2); Runtime->DispatchEvents(TDispatchOptions(), TDuration::Seconds(1)); diff --git a/ydb/core/kqp/rm_service/ut/ya.make b/ydb/core/kqp/rm_service/ut/ya.make index 9c259fa0c3d4..fb595ee233f6 100644 --- a/ydb/core/kqp/rm_service/ut/ya.make +++ b/ydb/core/kqp/rm_service/ut/ya.make @@ -7,7 +7,6 @@ IF (SANITIZER_TYPE OR WITH_VALGRIND) ENDIF() SRCS( - kqp_resource_estimation_ut.cpp kqp_rm_ut.cpp ) diff --git a/ydb/core/kqp/run_script_actor/kqp_run_script_actor.cpp b/ydb/core/kqp/run_script_actor/kqp_run_script_actor.cpp index 04fd7ea59ecd..3f93bda5465e 100644 --- a/ydb/core/kqp/run_script_actor/kqp_run_script_actor.cpp +++ b/ydb/core/kqp/run_script_actor/kqp_run_script_actor.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -216,6 +217,12 @@ class TRunScriptActor : public NActors::TActorBootstrapped { WaitFinalizationRequest = true; RunState = IsExecuting() ? ERunState::Finishing : RunState; + if (RunState == ERunState::Cancelling) { + NYql::TIssue cancelIssue("Request was canceled by user"); + cancelIssue.SetCode(NYql::DEFAULT_ERROR, NYql::TSeverityIds::S_INFO); + Issues.AddIssue(std::move(cancelIssue)); + } + auto scriptFinalizeRequest = std::make_unique( GetFinalizationStatusFromRunState(), ExecutionId, Database, Status, GetExecStatusFromStatusCode(Status), Issues, std::move(QueryStats), std::move(QueryPlan), std::move(QueryAst), LeaseGeneration @@ -424,6 +431,13 @@ class TRunScriptActor : public NActors::TActorBootstrapped { const auto& issueMessage = record.GetResponse().GetQueryIssues(); NYql::IssuesFromMessage(issueMessage, Issues); + if (record.GetYdbStatus() == Ydb::StatusIds::TIMEOUT) { + const TDuration timeout = GetQueryTimeout(NKikimrKqp::QUERY_TYPE_SQL_GENERIC_SCRIPT, Request.GetRequest().GetTimeoutMs(), {}, QueryServiceConfig); + NYql::TIssue timeoutIssue(TStringBuilder() << "Current request timeout is " << timeout.MilliSeconds() << "ms"); + timeoutIssue.SetCode(NYql::DEFAULT_ERROR, NYql::TSeverityIds::S_INFO); + Issues.AddIssue(std::move(timeoutIssue)); + } + if (record.GetResponse().HasQueryPlan()) { QueryPlan = record.GetResponse().GetQueryPlan(); } diff --git a/ydb/core/kqp/runtime/kqp_compute_scheduler.cpp b/ydb/core/kqp/runtime/kqp_compute_scheduler.cpp new file mode 100644 index 000000000000..a29f837a7564 --- /dev/null +++ b/ydb/core/kqp/runtime/kqp_compute_scheduler.cpp @@ -0,0 +1,859 @@ +#include "kqp_compute_scheduler.h" + +#include + +#include +#include + +#include +#include + +namespace { + static constexpr ui64 FromDuration(TDuration d) { + return d.MicroSeconds(); + } + + static constexpr TDuration ToDuration(double t) { + return TDuration::MicroSeconds(t); + } + + static constexpr TDuration AvgBatch = TDuration::MicroSeconds(100); +} + +namespace NKikimr { +namespace NKqp { + +class IObservable : TNonCopyable { +public: + virtual bool Update() = 0; + + void AddDependency(IObservable* dep) { + Depth = Max(Depth, dep->Depth + 1); + Dependencies.insert(dep); + dep->Dependents.insert(this); + } + + bool HasDependents() { + return !Dependents.empty(); + } + + virtual ~IObservable() { + for (auto& dep : Dependencies) { + dep->Dependents.erase(this); + } + for (auto& dep : Dependents) { + dep->Dependencies.erase(this); + } + } + + size_t GetDepth() { + return Depth; + } + + template + void ForAllDependents(T&& f) { + for (auto* dep : Dependents) { + f(dep); + } + } + +protected: + TSet CutAllDependents() { + TSet res; + Dependents.swap(res); + for (auto* dep : res) { + dep->Dependencies.erase(this); + } + return res; + } + +private: + size_t Depth = 0; + + TSet Dependencies; + TSet Dependents; +}; + +template +class IObservableValue : public IObservable { +protected: + virtual double DoUpdateValue() = 0; + +public: + bool Update() override { + if (auto val = DoUpdateValue()) { + Value = val; + return true; + } else { + return false; + } + } + + T GetValue() { + return Value; + } + +private: + T Value; +}; + +class TShare : public IObservableValue { +protected: + double DoUpdateValue() override { + return Base->GetValue() * Share->GetValue(); + } + +public: + TShare(IObservableValue* base, IObservableValue* share) + : Base(base) + , Share(share) + { + AddDependency(base); + AddDependency(share); + Update(); + } + +private: + IObservableValue* Base; + IObservableValue* Share; +}; + +template +class TParameter; + +class TObservableUpdater : IObservable { +private: + bool Update() override { + return false; + } + +public: + void UpdateAll() { + TVector> queue; + auto deps = CutAllDependents(); + for (auto* dep : deps) { + queue.resize(Max(queue.size(), dep->GetDepth() + 1)); + queue[dep->GetDepth()].insert(dep); + } + + for (size_t i = 0; i < queue.size(); ++i) { + TSet cur; + queue[i].swap(cur); + for (auto* node : cur) { + if (node->Update()) { + node->ForAllDependents([&](auto* dep){ + queue.resize(Max(queue.size(), dep->GetDepth() + 1)); + queue[dep->GetDepth()].insert(dep); + }); + } + } + } + + } + + void ToUpdate(IObservable* dep) { + dep->AddDependency(this); + } + + using TParameterKey = std::pair; + + template + T* FindValue(TParameterKey key) { + if (auto ptr = Params.FindPtr(key)) { + return ptr->Get(); + } + return nullptr; + } + + template + void AddValue(TParameterKey key, THolder value) { + Params.emplace(key, std::move(value)); + } + + template + TParameter* FindOrAddParameter(TParameterKey key, double def); + + ui64 ValuesCount() { + return Params.size(); + } + + void CollectValues() { + std::vector toerase; + for (auto& [k, v] : Params) { + if (!v.Holder->HasDependents()) { + toerase.push_back(k); + } + } + for (auto& key : toerase) { + Params.erase(key); + } + } + +private: + struct TValueContainer { + TParameter* AsDoubleParameter = nullptr; + TParameter* AsBoolParameter = nullptr; + + THolder Holder; + + TValueContainer() = default; + TValueContainer(TValueContainer&&) = default; + + TValueContainer(THolder value) { + Holder = std::move(value); + } + + TValueContainer(THolder> value); + TValueContainer(THolder> value); + + template + T* Get(); + }; + + THashMap Params; +}; + +template +class TParameter : public IObservableValue { +public: + TParameter(TObservableUpdater* engine, double initialValue) + : Value_(initialValue) + , Updater_(engine) + { + Updater_->ToUpdate(this); + } + + void SetValue(T val) { + Value_ = val; + Updater_->ToUpdate(this); + } + +protected: + double DoUpdateValue() override { + return Value_; + } + +private: + T Value_; + TObservableUpdater* Updater_; +}; + +template +TParameter* TObservableUpdater::FindOrAddParameter(TParameterKey key, double def) { + if (auto* ptr = FindValue>(key)) { + return ptr; + } + auto value = MakeHolder>(this, def); + auto* result = value.Get(); + AddValue>(key, std::move(value)); + return result; +} + +TObservableUpdater::TValueContainer::TValueContainer(THolder> value) { + AsDoubleParameter = value.Get(); + Holder = THolder(value.Release()); +} + +TObservableUpdater::TValueContainer::TValueContainer(THolder> value) { + AsBoolParameter = value.Get(); + Holder = THolder(value.Release()); +} + +template +T* TObservableUpdater::TValueContainer::Get() { + if constexpr (std::is_same_v>) { + return AsDoubleParameter; + } else if constexpr (std::is_same_v>) { + return AsBoolParameter; + } else { + return Holder.Get(); + } +} + +template +class TMultiThreadView { +public: + TMultiThreadView(std::atomic* usage, T* slot) + : Usage(usage) + , Slot(slot) + { + Usage->fetch_add(1); + } + const T* get() { + return Slot; + } + + ~TMultiThreadView() { + Usage->fetch_sub(1); + } + +private: + std::atomic* Usage; + T* Slot; +}; + +template +class TMultithreadPublisher { +public: + void Publish() { + auto oldVal = CurrentT.load(); + auto newVal = 1 - oldVal; + CurrentT.store(newVal); + while (true) { + if (Usage[oldVal].load() == 0) { + Slots[oldVal] = Slots[newVal]; + return; + } + } + } + + T* Next() { + return &Slots[1 - CurrentT.load()]; + } + + TMultiThreadView Current() { + while (true) { + auto val = CurrentT.load(); + TMultiThreadView view(&Usage[val], &Slots[val]); + if (CurrentT.load() == val) { + return view; + } + } + } + +private: + std::atomic CurrentT = 0; + std::atomic Usage[2] = {0, 0}; + T Slots[2]; +}; + +TSchedulerEntityHandle::TSchedulerEntityHandle(TSchedulerEntity* ptr) + : Ptr(ptr) +{ +} + +TSchedulerEntityHandle::TSchedulerEntityHandle(){} + +TSchedulerEntityHandle::TSchedulerEntityHandle(TSchedulerEntityHandle&& other) { + Ptr.swap(other.Ptr); +} + +TSchedulerEntityHandle& TSchedulerEntityHandle::operator = (TSchedulerEntityHandle&& other) { + Ptr.swap(other.Ptr); + return *this; +} + +TSchedulerEntityHandle::~TSchedulerEntityHandle() = default; + +class TSchedulerEntity { +public: + TSchedulerEntity() {} + ~TSchedulerEntity() {} + + struct TGroupMutableStats { + double Capacity = 0; + TMonotonic LastNowRecalc; + bool Disabled = false; + i64 EntitiesWeight = 0; + double MaxDeviation = 0; + double MaxLimitDeviation = 0; + + ssize_t TrackedBefore = 0; + + double Limit(TMonotonic now) const { + return FromDuration(now - LastNowRecalc) * Capacity + MaxLimitDeviation + TrackedBefore; + } + }; + + struct TGroupRecord { + std::atomic TrackedMicroSeconds = 0; + std::atomic DelayedSumBatches = 0; + std::atomic DelayedCount = 0; + + THolder> Share; + + ::NMonitoring::TDynamicCounters::TCounterPtr Vtime; + ::NMonitoring::TDynamicCounters::TCounterPtr EntitiesWeight; + ::NMonitoring::TDynamicCounters::TCounterPtr Limit; + ::NMonitoring::TDynamicCounters::TCounterPtr Weight; + + ::NMonitoring::TDynamicCounters::TCounterPtr SchedulerClock; + ::NMonitoring::TDynamicCounters::TCounterPtr SchedulerLimitUs; + ::NMonitoring::TDynamicCounters::TCounterPtr SchedulerTrackedUs; + + TString Name; + + void AssignWeight() { + MutableStats.Next()->Capacity = Share->GetValue(); + } + + void InitCounters(const TIntrusivePtr& counters) { + if (Vtime || !Name) { + return; + } + + auto group = counters->GetKqpCounters()->GetSubgroup("NodeScheduler/Group", Name); + Vtime = group->GetCounter("VTime", true); + EntitiesWeight = group->GetCounter("Entities", false); + Limit = group->GetCounter("Limit", true); + Weight = group->GetCounter("Weight", false); + SchedulerClock = group->GetCounter("Clock", false); + SchedulerTrackedUs = group->GetCounter("Tracked", true); + SchedulerLimitUs = group->GetCounter("AbsoluteLimit", true); + } + + TMultithreadPublisher MutableStats; + }; + + TStackVec> Groups; + i64 Weight; + double Vruntime = 0; + double Vstart; + + double Vcurrent; + + TDuration MaxDelay; + + static constexpr double WakeupDelay = 1.1; + static constexpr double BatchCalcDecay = 0; + TDuration BatchTime = AvgBatch; + + TDuration OverflowToleranceTimeout = TDuration::Seconds(1); + + static constexpr TDuration ActivationPenalty = TDuration::MicroSeconds(10); + + size_t Wakeups = 0; + bool isThrottled = false; + + void TrackTime(TDuration time, TMonotonic) { + for (auto group : Groups) { + //auto current = group->MutableStats.Current(); + group->TrackedMicroSeconds.fetch_add(time.MicroSeconds()); + } + } + + void UpdateBatchTime(TDuration time) { + Wakeups = 0; + auto newBatch = BatchTime * BatchCalcDecay + time * (1 - BatchCalcDecay); + if (isThrottled) { + MarkResumed(); + BatchTime = newBatch; + MarkThrottled(); + } else { + BatchTime = newBatch; + } + } + + TMaybe GroupDelay(TMonotonic now, TGroupRecord* group) { + auto current = group->MutableStats.Current(); + auto limit = current.get()->Limit(now); + auto tracked = group->TrackedMicroSeconds.load(); + //double Coeff = pow(WakeupDelay, Wakeups); + if (limit > tracked) { + return {}; + } else { + return Min(MaxDelay, ToDuration(/*Coeff * */(tracked - limit + + Max(0, group->DelayedSumBatches.load()) + BatchTime.MicroSeconds() + + ActivationPenalty.MicroSeconds() * (group->DelayedCount.load() + 1) + + current.get()->MaxLimitDeviation) / current.get()->Capacity)); + } + } + + TMaybe GroupDelay(TMonotonic now) { + TMaybe result; + for (auto group : Groups) { + auto groupResult = GroupDelay(now, group); + if (!result) { + result = groupResult; + } else if (groupResult && *result < *groupResult) { + result = groupResult; + } + } + return result; + } + + void MarkThrottled() { + isThrottled = true; + for (auto group : Groups) { + group->DelayedSumBatches.fetch_add(BatchTime.MicroSeconds()); + group->DelayedCount.fetch_add(1); + } + } + + void MarkResumed() { + isThrottled = false; + for (auto group : Groups) { + group->DelayedSumBatches.fetch_sub(BatchTime.MicroSeconds()); + group->DelayedCount.fetch_sub(1); + } + } +}; + +struct TComputeScheduler::TImpl { + THashMap GroupId; + std::vector> Records; + + TObservableUpdater WeightsUpdater; + TParameter SumCores{&WeightsUpdater, 1}; + + enum : ui32 { + Share = 1, + PerQueryShare = 2, + }; + + TIntrusivePtr Counters; + TDuration SmoothPeriod = TDuration::MilliSeconds(100); + TDuration ForgetInteval = TDuration::Seconds(2); + + TDuration MaxDelay = TDuration::Seconds(10); + + void CreateGroup(THolder> share, NMonotonic::TMonotonic now, std::optional groupName = std::nullopt) { + auto group = std::make_unique(); + group->Share = std::move(share); + if (groupName) { + group->Name = *groupName; + GroupId[*groupName] = Records.size(); + } + AdvanceTime(now, group.get()); + Records.push_back(std::move(group)); + } + + void CollectGroups() { + std::vector remap; + std::vector> records; + + for (size_t i = 0; i < Records.size(); ++i) { + auto record = Records[i]->MutableStats.Current(); + if (record.get()->EntitiesWeight > 0 || Records[i]->Share->HasDependents()) { + remap.push_back(records.size()); + records.emplace_back(Records[i].release()); + } else { + // to delete + remap.push_back(-1); + } + } + + Records.swap(records); + + { + std::vector toerase; + for (auto& [k, v] : GroupId) { + if (remap[v] >= 0) { + v = remap[v]; + } else { + toerase.push_back(k); + } + } + for (auto& k: toerase) { + GroupId.erase(k); + } + } + + WeightsUpdater.CollectValues(); + } + + void AdvanceTime(TMonotonic now, TSchedulerEntity::TGroupRecord* record); +}; + +TComputeScheduler::TComputeScheduler() { + Impl = std::make_unique(); +} + +TComputeScheduler::~TComputeScheduler() = default; + +void TComputeScheduler::AddToGroup(TMonotonic now, ui64 id, TSchedulerEntityHandle& handle) { + auto group = Impl->Records[id].get(); + (*handle).Groups.push_back(group); + group->MutableStats.Next()->EntitiesWeight += (*handle).Weight; + Impl->AdvanceTime(now, group); +} + +TSchedulerEntityHandle TComputeScheduler::Enroll(TString groupName, i64 weight, TMonotonic now) { + Y_ENSURE(Impl->GroupId.contains(groupName), "unknown scheduler group"); + auto id = Impl->GroupId.at(groupName); + + TSchedulerEntityHandle result{new TSchedulerEntity()}; + (*result).Weight = weight; + (*result).MaxDelay = Impl->MaxDelay; + + AddToGroup(now, id, result); + return result; +} + +void TComputeScheduler::TImpl::AdvanceTime(TMonotonic now, TSchedulerEntity::TGroupRecord* record) { + if (Counters) { + record->InitCounters(Counters); + } + record->MutableStats.Next()->Capacity = record->Share->GetValue(); + auto& v = record->MutableStats; + { + auto group = v.Current(); + if (group.get()->LastNowRecalc > now) { + return; + } + double delta = 0; + + auto tracked = record->TrackedMicroSeconds.load(); + v.Next()->MaxLimitDeviation = SmoothPeriod.MicroSeconds() * v.Next()->Capacity; + v.Next()->LastNowRecalc = now; + v.Next()->TrackedBefore = + Max( + tracked - FromDuration(ForgetInteval) * group.get()->Capacity, + Min(group.get()->Limit(now) - group.get()->MaxLimitDeviation, tracked)); + + v.Next()->MaxDeviation = (FromDuration(SmoothPeriod) * v.Next()->Capacity) / v.Next()->Capacity; + + //if (group.get()->EntitiesWeight > 0) { + // delta = FromDuration(now - group.get()->LastNowRecalc) * group.get()->Capacity / group.get()->EntitiesWeight; + //} + + if (record->Vtime) { + record->SchedulerLimitUs->Set(group.get()->Limit(now)); + record->SchedulerTrackedUs->Set(record->TrackedMicroSeconds.load()); + record->SchedulerClock->Add(now.MicroSeconds() - group.get()->LastNowRecalc.MicroSeconds()); + record->Vtime->Add(delta); + record->EntitiesWeight->Set(v.Next()->EntitiesWeight); + record->Limit->Add(FromDuration(now - group.get()->LastNowRecalc) * group.get()->Capacity); + record->Weight->Set(group.get()->Capacity); + } + } + v.Publish(); +} + +void TComputeScheduler::AdvanceTime(TMonotonic now) { + Impl->WeightsUpdater.UpdateAll(); + for (size_t i = 0; i < Impl->Records.size(); ++i) { + Impl->AdvanceTime(now, Impl->Records[i].get()); + } + Impl->CollectGroups(); + if (Impl->Counters) { + Impl->Counters->SchedulerGroupsCount->Set(Impl->Records.size()); + Impl->Counters->SchedulerValuesCount->Set(Impl->WeightsUpdater.ValuesCount()); + } +} + +void TComputeScheduler::Deregister(TSchedulerEntityHandle& self, TMonotonic now) { + for (auto group : (*self).Groups) { + auto* next = group->MutableStats.Next(); + next->EntitiesWeight -= (*self).Weight; + Impl->AdvanceTime(now, group); + } +} + +ui64 TComputeScheduler::MakePerQueryGroup(TMonotonic now, double share, TString baseGroup) { + auto baseId = Impl->GroupId.at(baseGroup); + auto perQueryShare = Impl->WeightsUpdater.FindOrAddParameter({baseGroup, TImpl::PerQueryShare}, share); + + Impl->CreateGroup(MakeHolder(Impl->Records[baseId]->Share.Get(), perQueryShare), now); + ui64 res = Impl->Records.size() - 1; + Impl->AdvanceTime(now, Impl->Records[res].get()); + return res; +} + +void TSchedulerEntityHandle::TrackTime(TDuration time, TMonotonic now) { + Ptr->TrackTime(time, now); +} + +void TSchedulerEntityHandle::ReportBatchTime(TDuration time) { + Ptr->UpdateBatchTime(time); +} + +TMaybe TSchedulerEntityHandle::Delay(TMonotonic now) { + return Ptr->GroupDelay(now); +} + +void TSchedulerEntityHandle::MarkResumed() { + Ptr->MarkResumed(); +} + +void TSchedulerEntityHandle::MarkThrottled() { + Ptr->MarkThrottled(); +} + +void TSchedulerEntityHandle::Clear() { + Ptr.reset(); +} + +void TComputeScheduler::ReportCounters(TIntrusivePtr counters) { + Impl->Counters = counters; +} + +void TComputeScheduler::SetMaxDeviation(TDuration period) { + Impl->SmoothPeriod = period; +} + +void TComputeScheduler::SetForgetInterval(TDuration period) { + Impl->ForgetInteval = period; +} + +bool TComputeScheduler::Disabled(TString group) { + auto ptr = Impl->GroupId.FindPtr(group); + return !ptr || Impl->Records[*ptr]->MutableStats.Current().get()->Disabled; +} + + +void TComputeScheduler::Disable(TString group, TMonotonic now) { + auto ptr = Impl->GroupId.FindPtr(group); + // if ptr == 0 it's already disabled + if (ptr) { + Impl->Records[*ptr]->MutableStats.Next()->Disabled = true; + Impl->AdvanceTime(now, Impl->Records[*ptr].get()); + } +} + +void TComputeScheduler::UpdateGroupShare(TString group, double share, TMonotonic now) { + auto ptr = Impl->GroupId.FindPtr(group); + + auto* shareValue = Impl->WeightsUpdater.FindOrAddParameter({group, TImpl::Share}, share); + shareValue->SetValue(share); + if (!ptr) { + auto cap = MakeHolder(&Impl->SumCores, shareValue); + Impl->CreateGroup(std::move(cap), now, group); + } else { + auto& record = Impl->Records[*ptr]; + record->MutableStats.Next()->Disabled = false; + Impl->AdvanceTime(now, record.get()); + } +} + +void TComputeScheduler::UpdatePerQueryShare(TString group, double share, TMonotonic) { + auto ptr = Impl->WeightsUpdater.FindOrAddParameter({group, TImpl::PerQueryShare}, share); + ptr->SetValue(share); +} + +void TComputeScheduler::SetCapacity(ui64 cores) { + Impl->SumCores.SetValue(cores); +} + +::NMonitoring::TDynamicCounters::TCounterPtr TComputeScheduler::GetGroupUsageCounter(TString group) const { + return Impl->Counters + ->GetKqpCounters() + ->GetSubgroup("NodeScheduler/Group", group) + ->GetCounter("Usage", true); +} + + +struct TEvPingPool : public TEventLocal { + TString DatabaseId; + TString Pool; + + TEvPingPool(TString databaseId, TString pool) + : DatabaseId(databaseId) + , Pool(pool) + { + } +}; + +class TSchedulerActor : public TActorBootstrapped { +public: + TSchedulerActor(TSchedulerActorOptions options) + : Opts(options) + { + if (!Opts.Scheduler) { + Opts.Scheduler = std::make_shared(); + } + Opts.Scheduler->SetForgetInterval(Opts.ForgetOverflowTimeout); + Opts.Scheduler->ReportCounters(Opts.Counters); + } + + void Bootstrap() { + Schedule(Opts.AdvanceTimeInterval, new TEvents::TEvWakeup()); + + ui32 tableServiceConfigKind = (ui32) NKikimrConsole::TConfigItem::TableServiceConfigItem; + Send(NConsole::MakeConfigsDispatcherID(SelfId().NodeId()), + new NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionRequest({tableServiceConfigKind}), + IEventHandle::FlagTrackDelivery); + + Become(&TSchedulerActor::State); + SetCapacity(SelfId().PoolID()); + } + + void SetCapacity(ui32 pool) { + NActors::TExecutorPoolStats poolStats; + TVector threadsStats; + TlsActivationContext->ActorSystem()->GetPoolStats(pool, poolStats, threadsStats); + ui64 threads = Max(poolStats.MaxThreadCount, 1); + Opts.Counters->SchedulerCapacity->Set(threads); + Opts.Scheduler->SetCapacity(threads); + } + + STATEFN(State) { + switch (ev->GetTypeRewrite()) { + hFunc(NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionResponse, Handle); + hFunc(NConsole::TEvConsole::TEvConfigNotificationRequest, Handle); + + hFunc(NWorkload::TEvUpdatePoolInfo, Handle); + + hFunc(TEvSchedulerDeregister, Handle); + hFunc(TEvSchedulerNewPool, Handle); + hFunc(TEvPingPool, Handle); + hFunc(TEvents::TEvWakeup, Handle); + default: { + Y_ABORT("Unexpected event 0x%x for TKqpSchedulerService", ev->GetTypeRewrite()); + } + } + } + + void Handle(NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionResponse::TPtr&) { + LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::KQP_NODE, "Subscribed for config changes"); + } + + void Handle(TEvSchedulerDeregister::TPtr& ev) { + if (ev->Get()->SchedulerEntity) { + Opts.Scheduler->Deregister(ev->Get()->SchedulerEntity, TlsActivationContext->Monotonic()); + } + } + + void Handle(TEvSchedulerNewPool::TPtr& ev) { + Send(MakeKqpWorkloadServiceId(SelfId().NodeId()), new NWorkload::TEvSubscribeOnPoolChanges(ev->Get()->DatabaseId, ev->Get()->Pool)); + } + + void Handle(TEvPingPool::TPtr& ev) { + Send(MakeKqpWorkloadServiceId(SelfId().NodeId()), new NWorkload::TEvSubscribeOnPoolChanges(ev->Get()->DatabaseId, ev->Get()->Pool)); + } + + void Handle(NWorkload::TEvUpdatePoolInfo::TPtr& ev) { + if (ev->Get()->Config.has_value()) { + auto totalShare = ev->Get()->Config->TotalCpuLimitPercentPerNode / 100.0; + auto queryShare = ev->Get()->Config->QueryCpuLimitPercentPerNode / 100.0; + + if (totalShare <= 0 && queryShare > 0) { + totalShare = 1; + } + + if (queryShare <= 0) { + queryShare = 1; + } + + Opts.Scheduler->UpdateGroupShare(ev->Get()->PoolId, totalShare, TlsActivationContext->Monotonic()); + Opts.Scheduler->UpdatePerQueryShare(ev->Get()->PoolId, queryShare, TlsActivationContext->Monotonic()); + } else { + Opts.Scheduler->Disable(ev->Get()->PoolId, TlsActivationContext->Monotonic()); + } + } + + void Handle(TEvents::TEvWakeup::TPtr&) { + SetCapacity(SelfId().PoolID()); + Opts.Scheduler->AdvanceTime(TlsActivationContext->Monotonic()); + Schedule(Opts.AdvanceTimeInterval, new TEvents::TEvWakeup()); + } + + void Handle(NConsole::TEvConsole::TEvConfigNotificationRequest::TPtr& ev) { + auto &event = ev->Get()->Record; + auto& config = event.GetConfig().GetTableServiceConfig().GetComputeSchedulerSettings(); + + Opts.AdvanceTimeInterval = TDuration::MicroSeconds(config.GetAdvanceTimeIntervalUsec()); + Opts.ActivePoolPollingTimeout = TDuration::Seconds(config.GetActivePoolPollingSec()); + Opts.Scheduler->SetForgetInterval(TDuration::MicroSeconds(config.GetForgetOverflowTimeoutUsec())); + } + +private: + TSchedulerActorOptions Opts; +}; + +IActor* CreateSchedulerActor(TSchedulerActorOptions opts) { + return new TSchedulerActor(opts); +} + +} // namespace NKqp +} // namespace NKikimr diff --git a/ydb/core/kqp/runtime/kqp_compute_scheduler.h b/ydb/core/kqp/runtime/kqp_compute_scheduler.h new file mode 100644 index 000000000000..4c1f5f8c972f --- /dev/null +++ b/ydb/core/kqp/runtime/kqp_compute_scheduler.h @@ -0,0 +1,339 @@ +#pragma once + +#include +#include + +#include + +#include +#include +#include + +#include + +namespace NKikimr { +namespace NKqp { + +class TSchedulerEntity; +class TSchedulerEntityHandle { +private: + std::unique_ptr Ptr; + +public: + TSchedulerEntityHandle(TSchedulerEntity*); + + TSchedulerEntityHandle(); + TSchedulerEntityHandle(TSchedulerEntityHandle&&); + + TSchedulerEntityHandle& operator = (TSchedulerEntityHandle&&); + + bool Defined() const { + return Ptr.get() != nullptr; + } + + operator bool () const { + return Defined(); + } + + TSchedulerEntity& operator*() { + return *Ptr; + } + + void TrackTime(TDuration time, TMonotonic now); + void ReportBatchTime(TDuration time); + + TMaybe Delay(TMonotonic now); + + void MarkThrottled(); + void MarkResumed(); + + double EstimateWeight(TMonotonic now, TDuration minTime); + + void Clear(); + + ~TSchedulerEntityHandle(); +}; + +class TComputeScheduler { +public: + TComputeScheduler(); + ~TComputeScheduler(); + + void ReportCounters(TIntrusivePtr); + + + void SetCapacity(ui64 cores); + + void UpdateGroupShare(TString name, double share, TMonotonic now); + void UpdatePerQueryShare(TString name, double share, TMonotonic now); + + ui64 MakePerQueryGroup(TMonotonic now, double share, TString baseGroup); + void AddToGroup(TMonotonic now, ui64, TSchedulerEntityHandle&); + + void SetMaxDeviation(TDuration); + void SetForgetInterval(TDuration); + ::NMonitoring::TDynamicCounters::TCounterPtr GetGroupUsageCounter(TString group) const; + + TSchedulerEntityHandle Enroll(TString group, i64 weight, TMonotonic now); + + void AdvanceTime(TMonotonic now); + + void Deregister(TSchedulerEntityHandle& self, TMonotonic now); + + bool Disabled(TString group); + void Disable(TString group, TMonotonic now); + +private: + struct TImpl; + std::unique_ptr Impl; +}; + +struct TComputeActorSchedulingOptions { + TMonotonic Now; + NActors::TActorId SchedulerActorId; + TSchedulerEntityHandle Handle; + TComputeScheduler* Scheduler; + TString Group = ""; + double Weight = 1; + bool NoThrottle = true; + TIntrusivePtr Counters = nullptr; +}; + +struct TKqpComputeSchedulerEvents { + enum EKqpComputeSchedulerEvents { + EvDeregister = EventSpaceBegin(TKikimrEvents::ES_KQP) + 400, + EvNewPool, + EvPingPool, + }; +}; + +struct TEvSchedulerDeregister : public TEventLocal { + TSchedulerEntityHandle SchedulerEntity; + + TEvSchedulerDeregister(TSchedulerEntityHandle entity) + : SchedulerEntity(std::move(entity)) + { + } +}; + +struct TEvSchedulerNewPool : public TEventLocal { + TString DatabaseId; + TString Pool; + + TEvSchedulerNewPool(TString databaseId, TString pool) + : DatabaseId(databaseId) + , Pool(pool) + { + } +}; + + +template +class TSchedulableComputeActorBase : public NYql::NDq::TDqSyncComputeActorBase { +private: + using TBase = NYql::NDq::TDqSyncComputeActorBase; + + static constexpr double SecToUsec = 1e6; + +public: + template + TSchedulableComputeActorBase(TComputeActorSchedulingOptions options, TArgs&&... args) + : TBase(std::forward(args)...) + , SelfHandle(std::move(options.Handle)) + , SchedulerActorId(options.SchedulerActorId) + , NoThrottle(options.NoThrottle) + , Counters(options.Counters) + , Group(options.Group) + , Weight(options.Weight) + { + if (!NoThrottle) { + Y_ABORT_UNLESS(Counters); + Y_ABORT_UNLESS(SelfHandle); + GroupUsage = options.Scheduler->GetGroupUsageCounter(options.Group); + } else { + Y_ABORT_UNLESS(!SelfHandle); + } + } + + static constexpr ui64 ResumeWakeupTag = 201; + + TMonotonic Now() { + return TMonotonic::Now(); + //return TlsActivationContext->Monotonic(); + } + + void HandleWakeup(NActors::TEvents::TEvWakeup::TPtr& ev) { + auto tag = ev->Get()->Tag; + CA_LOG_D("wakeup with tag " << tag); + if (tag == ResumeWakeupTag) { + TBase::DoExecute(); + } else { + TBase::HandleExecuteBase(ev); + } + } + + STFUNC(BaseStateFuncBody) { + AccountActorSystemStats(TlsActivationContext->Monotonic()); + // we assume that exception handling is done in parents/descendents + switch (ev->GetTypeRewrite()) { + hFunc(NActors::TEvents::TEvWakeup, TSchedulableComputeActorBase::HandleWakeup); + default: + TBase::BaseStateFuncBody(ev); + } + } + + void DoBoostrap() { + if (!SelfHandle.Defined()) { + return; + } + + OldActivationStats = TlsActivationContext->AsActorContext().Mailbox.GetElapsedCycles(); + if (!OldActivationStats.has_value()) { + TlsActivationContext->AsActorContext().Mailbox.EnableStats(); + OldActivationStats = TlsActivationContext->AsActorContext().Mailbox.GetElapsedCycles(); + } + + Y_ABORT_UNLESS(OldActivationStats.has_value()); + } + +private: + void ReportThrottledTime(TMonotonic now) { + if (Counters && Throttled) { + Counters->SchedulerThrottled->Add((now - *Throttled).MicroSeconds()); + } + if (Throttled) { + SelfHandle.MarkResumed(); + Throttled.Clear(); + } + } + +protected: + void DoExecuteImpl() override { + if (!SelfHandle.Defined()) { + if (NoThrottle) { + return TBase::DoExecuteImpl(); + } else { + return; + } + } + + TMonotonic now = Now(); + AccountActorSystemStats(now); + TMaybe delay = CalcDelay(now); + bool executed = false; + if (NoThrottle || !delay) { + ReportThrottledTime(now); + executed = true; + + ExecutionTimer.ConstructInPlace(); + TBase::DoExecuteImpl(); + + TDuration passed = TDuration::MicroSeconds(ExecutionTimer->Passed() * SecToUsec); + + if (Finished) { + return; + } + TrackedWork += passed; + SelfHandle.ReportBatchTime(passed); + SelfHandle.TrackTime(passed, now); + GroupUsage->Add(passed.MicroSeconds()); + Counters->ComputeActorExecutions->Collect(passed.MicroSeconds()); + } + if (delay) { + Counters->SchedulerDelays->Collect(delay->MicroSeconds()); + CA_LOG_D("schedule wakeup after " << delay->MicroSeconds() << " msec "); + this->Schedule(*delay, new NActors::TEvents::TEvWakeup(ResumeWakeupTag)); + } + + if (!executed) { + if (!Throttled) { + SelfHandle.MarkThrottled(); + Throttled = now; + } else { + Counters->ThrottledActorsSpuriousActivations->Inc(); + } + } + ExecutionTimer.Clear(); + } + + void AccountActorSystemStats(NMonotonic::TMonotonic now) { + if (!SelfHandle.Defined()) { + return; + } + + auto newStats = TlsActivationContext->AsActorContext().Mailbox.GetElapsedCycles(); + Y_ABORT_UNLESS(OldActivationStats.has_value()); + Y_ABORT_UNLESS(newStats.has_value()); + Y_ABORT_UNLESS(*newStats >= *OldActivationStats); + auto toAccount = TDuration::MicroSeconds(NHPTimer::GetSeconds(*newStats - *OldActivationStats) * 1e6); + { + auto minTime = Min(toAccount, TrackedWork); + TrackedWork -= minTime; + toAccount -= minTime; + } + + GroupUsage->Add(toAccount.MicroSeconds()); + SelfHandle.TrackTime(toAccount, now); + OldActivationStats = newStats; + } + + TMaybe CalcDelay(NMonotonic::TMonotonic now) { + auto result = SelfHandle.Delay(now); + Counters->ComputeActorDelays->Collect(result.GetOrElse(TDuration::Zero()).MicroSeconds()); + if (NoThrottle || !result.Defined()) { + return {}; + } else { + return result; + } + } + + void PassAway() override { + Finished = true; + if (SelfHandle) { + auto now = Now(); + if (Throttled) { + SelfHandle.MarkResumed(); + } + if (ExecutionTimer) { + TDuration passed = TDuration::MicroSeconds(ExecutionTimer->Passed() * SecToUsec); + SelfHandle.TrackTime(passed, now); + GroupUsage->Add(passed.MicroSeconds()); + } + } + if (SelfHandle) { + auto finishEv = MakeHolder(std::move(SelfHandle)); + this->Send(SchedulerActorId, finishEv.Release()); + } + TBase::PassAway(); + } + +private: + TMaybe ExecutionTimer; + TDuration TrackedWork = TDuration::Zero(); + TMaybe Throttled; + TSchedulerEntityHandle SelfHandle; + NActors::TActorId SchedulerActorId; + bool NoThrottle; + bool Finished = false; + + std::optional OldActivationStats; + + TIntrusivePtr Counters; + ::NMonitoring::TDynamicCounters::TCounterPtr GroupUsage; + + TString Group; + double Weight; +}; + +struct TSchedulerActorOptions { + std::shared_ptr Scheduler; + TDuration AdvanceTimeInterval; + TDuration ForgetOverflowTimeout; + TDuration ActivePoolPollingTimeout; + TIntrusivePtr Counters; +}; + +IActor* CreateSchedulerActor(TSchedulerActorOptions); + +} // namespace NKqp +} // namespace NKikimR diff --git a/ydb/core/kqp/runtime/kqp_read_actor.cpp b/ydb/core/kqp/runtime/kqp_read_actor.cpp index 7e02b7e58b76..aa17440a948d 100644 --- a/ydb/core/kqp/runtime/kqp_read_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_read_actor.cpp @@ -1,4 +1,5 @@ #include "kqp_read_actor.h" +#include "kqp_compute_scheduler.h" #include #include @@ -837,8 +838,9 @@ class TKqpReadActor : public TActorBootstrapped, public NYql::NDq << ", limit: " << limit << ", readId = " << id << ", reverse = " << record.GetReverse() - << " snapshot = (txid=" << Settings->GetSnapshot().GetTxId() << ",step=" << Settings->GetSnapshot().GetStep() << ")" - << " lockTxId = " << Settings->GetLockTxId()); + << ", snapshot = (txid=" << Settings->GetSnapshot().GetTxId() << ",step=" << Settings->GetSnapshot().GetStep() << ")" + << ", lockTxId = " << Settings->GetLockTxId() + << ", lockNodeId = " << Settings->GetLockNodeId()); Counters->CreatedIterators->Inc(); ReadIdByTabletId[state->TabletId].push_back(id); @@ -894,6 +896,26 @@ class TKqpReadActor : public TActorBootstrapped, public NYql::NDq return; } + CA_LOG_D("Recv TEvReadResult from ShardID=" << Reads[id].Shard->TabletId + << ", ReadId=" << id + << ", Status=" << Ydb::StatusIds::StatusCode_Name(record.GetStatus().GetCode()) + << ", Finished=" << record.GetFinished() + << ", RowCount=" << record.GetRowCount() + << ", TxLocks= " << [&]() { + TStringBuilder builder; + for (const auto& lock : record.GetTxLocks()) { + builder << lock.ShortDebugString(); + } + return builder; + }() + << ", BrokenTxLocks= " << [&]() { + TStringBuilder builder; + for (const auto& lock : record.GetBrokenTxLocks()) { + builder << lock.ShortDebugString(); + } + return builder; + }()); + if (!record.HasNodeId()) { Counters->ReadActorAbsentNodeId->Inc(); } else if (record.GetNodeId() != SelfId().NodeId()) { diff --git a/ydb/core/kqp/runtime/kqp_scan_data.cpp b/ydb/core/kqp/runtime/kqp_scan_data.cpp index 2e8b430681e9..96c8f6dbbca3 100644 --- a/ydb/core/kqp/runtime/kqp_scan_data.cpp +++ b/ydb/core/kqp/runtime/kqp_scan_data.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include @@ -296,26 +296,27 @@ TBytesStatistics WriteColumnValuesFromArrowSpecImpl(TAccessor editAccessor, auto trivialChunkedArray = std::make_shared(chunkedArrayExt); NArrow::NAccessor::IChunkedArray::TReader reader(trivialChunkedArray); - std::optional chunkIdx; std::optional currentIdxFrom; std::optional address; const typename TElementAccessor::TArrayType* currentArray = nullptr; const auto applyToIndex = [&](const ui32 rowIndexFrom, const ui32 rowIndexTo) { + bool changed = false; if (!currentIdxFrom) { address = reader.GetReadChunk(rowIndexFrom); AFL_ENSURE(rowIndexFrom == 0)("real", rowIndexFrom); + changed = true; } else { AFL_ENSURE(rowIndexFrom == *currentIdxFrom + 1)("next", rowIndexFrom)("current", *currentIdxFrom); if (!address->NextPosition()) { address = reader.GetReadChunk(rowIndexFrom); + changed = true; } } currentIdxFrom = rowIndexFrom; - if (!chunkIdx || *chunkIdx != address->GetChunkIdx()) { + if (changed) { currentArray = static_cast(address->GetArray().get()); TElementAccessor::Validate(*currentArray); - chunkIdx = address->GetChunkIdx(); } auto& rowItem = editAccessor(rowIndexTo, columnIndex); diff --git a/ydb/core/kqp/runtime/kqp_stream_lookup_actor.cpp b/ydb/core/kqp/runtime/kqp_stream_lookup_actor.cpp index e986a9cd3e96..7f94a524d3cc 100644 --- a/ydb/core/kqp/runtime/kqp_stream_lookup_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_stream_lookup_actor.cpp @@ -38,6 +38,7 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped()) + , NodeLockId(settings.HasLockNodeId() ? settings.GetLockNodeId() : TMaybe()) , SchemeCacheRequestTimeout(SCHEME_CACHE_REQUEST_TIMEOUT) , StreamLookupWorker(CreateStreamLookupWorker(std::move(settings), args.TypeEnv, args.HolderFactory, args.InputDesc)) , Counters(counters) @@ -277,8 +278,6 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrappedGet()->Record; - CA_LOG_D("TEvReadResult was received for table: " << StreamLookupWorker->GetTablePath() << - ", readId: " << record.GetReadId() << ", finished: " << record.GetFinished()); auto readIt = Reads.find(record.GetReadId()); if (readIt == Reads.end() || readIt->second.State != EReadState::Running) { @@ -288,6 +287,27 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrappedsecond; + CA_LOG_D("Recv TEvReadResult (stream lookup) from ShardID=" << read.ShardId + << ", Table = " << StreamLookupWorker->GetTablePath() + << ", ReadId=" << record.GetReadId() + << ", Status=" << Ydb::StatusIds::StatusCode_Name(record.GetStatus().GetCode()) + << ", Finished=" << record.GetFinished() + << ", RowCount=" << record.GetRowCount() + << ", TxLocks= " << [&]() { + TStringBuilder builder; + for (const auto& lock : record.GetTxLocks()) { + builder << lock.ShortDebugString(); + } + return builder; + }() + << ", BrokenTxLocks= " << [&]() { + TStringBuilder builder; + for (const auto& lock : record.GetBrokenTxLocks()) { + builder << lock.ShortDebugString(); + } + return builder; + }()); + for (auto& lock : record.GetBrokenTxLocks()) { BrokenLocks.push_back(lock); } @@ -456,11 +476,22 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrappedRecord; record.SetMaxRows(defaultSettings.GetMaxRows()); record.SetMaxBytes(defaultSettings.GetMaxBytes()); record.SetResultFormat(NKikimrDataEvents::FORMAT_CELLVEC); + CA_LOG_D(TStringBuilder() << "Send EvRead (stream lookup) to shardId=" << shardId + << ", readId = " << record.GetReadId() + << ", tablePath: " << StreamLookupWorker->GetTablePath() + << ", snapshot=(txid=" << record.GetSnapshot().GetTxId() << ", step=" << record.GetSnapshot().GetStep() << ")" + << ", lockTxId=" << record.GetLockTxId() + << ", lockNodeId=" << record.GetLockNodeId()); + Send(MainPipeCacheId, new TEvPipeCache::TEvForward(request.Release(), shardId, true), IEventHandle::FlagTrackDelivery, 0, LookupActorSpan.GetTraceId()); @@ -586,6 +617,7 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped LockTxId; + const TMaybe NodeLockId; std::unordered_map Reads; std::unordered_map ReadsPerShard; std::shared_ptr> Partitioning; diff --git a/ydb/core/kqp/runtime/kqp_stream_lookup_worker.cpp b/ydb/core/kqp/runtime/kqp_stream_lookup_worker.cpp index 4cfd3ea28c24..af995daf05bd 100644 --- a/ydb/core/kqp/runtime/kqp_stream_lookup_worker.cpp +++ b/ydb/core/kqp/runtime/kqp_stream_lookup_worker.cpp @@ -142,7 +142,8 @@ TKqpStreamLookupWorker::TKqpStreamLookupWorker(NKikimrKqp::TKqpStreamLookupSetti , HolderFactory(holderFactory) , InputDesc(inputDesc) , TablePath(settings.GetTable().GetPath()) - , TableId(MakeTableId(settings.GetTable())) { + , TableId(MakeTableId(settings.GetTable())) + , Strategy(settings.GetLookupStrategy()) { KeyColumns.reserve(settings.GetKeyColumns().size()); i32 keyOrder = 0; @@ -748,6 +749,11 @@ class TKqpJoinRows : public TKqpStreamLookupWorker { auto leftRowIt = PendingLeftRowsByKey.find(joinKeyCells); YQL_ENSURE(leftRowIt != PendingLeftRowsByKey.end()); + if (Strategy == NKqpProto::EStreamLookupStrategy::SEMI_JOIN && leftRowIt->second.RightRowExist) { + // Semi join should return one result row per key + continue; + } + TReadResultStats rowStats; i64 availableSpace = freeSpace - (i64)resultStats.ResultBytesCount; auto resultRow = TryBuildResultRow(leftRowIt->second, row, rowStats, availableSpace, result.ShardId); @@ -962,6 +968,7 @@ std::unique_ptr CreateStreamLookupWorker(NKikimrKqp::TKq case NKqpProto::EStreamLookupStrategy::LOOKUP: return std::make_unique(std::move(settings), typeEnv, holderFactory, inputDesc); case NKqpProto::EStreamLookupStrategy::JOIN: + case NKqpProto::EStreamLookupStrategy::SEMI_JOIN: return std::make_unique(std::move(settings), typeEnv, holderFactory, inputDesc); default: return {}; diff --git a/ydb/core/kqp/runtime/kqp_stream_lookup_worker.h b/ydb/core/kqp/runtime/kqp_stream_lookup_worker.h index 46b15745b3fc..6b9e35a1074f 100644 --- a/ydb/core/kqp/runtime/kqp_stream_lookup_worker.h +++ b/ydb/core/kqp/runtime/kqp_stream_lookup_worker.h @@ -71,6 +71,7 @@ class TKqpStreamLookupWorker { std::unordered_map KeyColumns; std::vector LookupKeyColumns; std::vector Columns; + const NKqpProto::EStreamLookupStrategy Strategy; }; std::unique_ptr CreateStreamLookupWorker(NKikimrKqp::TKqpStreamLookupSettings&& settings, diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 8f63dcf2c030..085b02a1f68f 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -24,8 +24,8 @@ namespace { constexpr i64 kInFlightMemoryLimitPerActor = 64_MB; - constexpr i64 kMemoryLimitPerMessage = 48_MB; - constexpr i64 kMaxBatchesPerMessage = 1; + constexpr i64 kMemoryLimitPerMessage = 64_MB; + constexpr i64 kMaxBatchesPerMessage = 8; struct TWriteActorBackoffSettings { TDuration StartRetryDelay = TDuration::MilliSeconds(250); @@ -81,12 +81,12 @@ namespace { namespace NKikimr { namespace NKqp { -class TKqpWriteActor : public TActorBootstrapped, public NYql::NDq::IDqComputeActorAsyncOutput { - using TBase = TActorBootstrapped; +class TKqpDirectWriteActor : public TActorBootstrapped, public NYql::NDq::IDqComputeActorAsyncOutput { + using TBase = TActorBootstrapped; class TResumeNotificationManager { public: - TResumeNotificationManager(TKqpWriteActor& writer) + TResumeNotificationManager(TKqpDirectWriteActor& writer) : Writer(writer) { CheckMemory(); } @@ -102,7 +102,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N } private: - TKqpWriteActor& Writer; + TKqpDirectWriteActor& Writer; i64 LastFreeMemory = std::numeric_limits::max(); }; @@ -127,7 +127,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N }; public: - TKqpWriteActor( + TKqpDirectWriteActor( NKikimrKqp::TKqpTableSinkSettings&& settings, NYql::NDq::TDqAsyncIoFactory::TSinkArguments&& args, TIntrusivePtr counters) @@ -137,6 +137,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N , Callbacks(args.Callback) , Counters(counters) , TypeEnv(args.TypeEnv) + , Alloc(args.Alloc) , TxId(args.TxId) , TableId( Settings.GetTable().GetOwnerId(), @@ -157,13 +158,13 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N void Bootstrap() { LogPrefix = TStringBuilder() << "SelfId: " << this->SelfId() << ", " << LogPrefix; ResolveTable(); - Become(&TKqpWriteActor::StateFunc); + Become(&TKqpDirectWriteActor::StateFunc); } static constexpr char ActorName[] = "KQP_WRITE_ACTOR"; private: - virtual ~TKqpWriteActor() { + virtual ~TKqpDirectWriteActor() { } void CommitState(const NYql::NDqProto::TCheckpoint&) final {}; @@ -227,6 +228,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N hFunc(TEvPipeCache::TEvDeliveryProblem, Handle); IgnoreFunc(TEvTxUserProxy::TEvAllocateTxIdResult); hFunc(TEvPrivate::TEvShardRequestTimeout, Handle); + hFunc(TEvPrivate::TEvResolveRequestPlanned, Handle); IgnoreFunc(TEvInterconnect::TEvNodeConnected); IgnoreFunc(TEvTxProxySchemeCache::TEvInvalidateTableResult); } @@ -246,21 +248,26 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N } void PlanResolveTable() { + CA_LOG_D("Plan resolve with delay " << CalculateNextAttemptDelay(ResolveAttempts)); TlsActivationContext->Schedule( CalculateNextAttemptDelay(ResolveAttempts), new IEventHandle(SelfId(), SelfId(), new TEvPrivate::TEvResolveRequestPlanned{}, 0, 0)); } + void Handle(TEvPrivate::TEvResolveRequestPlanned::TPtr&) { + ResolveTable(); + } + void ResolveTable() { SchemeEntry.reset(); SchemeRequest.reset(); if (ResolveAttempts++ >= BackoffSettings()->MaxResolveAttempts) { - const auto error = TStringBuilder() - << "Too many table resolve attempts for Sink=" << this->SelfId() << "."; - CA_LOG_E(error); + CA_LOG_E(TStringBuilder() + << "Too many table resolve attempts for table " << TableId << "."); RuntimeError( - error, + TStringBuilder() + << "Too many table resolve attempts for table `" << Settings.GetTable().GetPath() << "`.", NYql::NDqProto::StatusIds::SCHEME_ERROR); return; } @@ -272,6 +279,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N entry.RequestType = NSchemeCache::TSchemeCacheNavigate::TEntry::ERequestType::ByTableId; entry.Operation = NSchemeCache::TSchemeCacheNavigate::OpTable; entry.SyncVersion = false; + entry.ShowPrivatePath = true; request->ResultSet.emplace_back(entry); Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvInvalidateTable(TableId, {})); @@ -279,14 +287,16 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N } void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { + auto& resultSet = ev->Get()->Request->ResultSet; + YQL_ENSURE(resultSet.size() == 1); + if (ev->Get()->Request->ErrorCount > 0) { CA_LOG_E(TStringBuilder() << "Failed to get table: " - << TableId << "'"); + << TableId << "'. Entry: " << resultSet[0].ToString()); PlanResolveTable(); return; } - auto& resultSet = ev->Get()->Request->ResultSet; - YQL_ENSURE(resultSet.size() == 1); + SchemeEntry = resultSet[0]; CA_LOG_D("Resolved TableId=" << TableId << " (" @@ -361,6 +371,18 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N return issues; }; + CA_LOG_D("Recv EvWriteResult from ShardID=" << ev->Get()->Record.GetOrigin() + << ", Status=" << NKikimrDataEvents::TEvWriteResult::EStatus_Name(ev->Get()->GetStatus()) + << ", TxId=" << ev->Get()->Record.GetTxId() + << ", Locks= " << [&]() { + TStringBuilder builder; + for (const auto& lock : ev->Get()->Record.GetTxLocks()) { + builder << lock.ShortDebugString(); + } + return builder; + }() + << ", Cookie=" << ev->Cookie); + switch (ev->Get()->GetStatus()) { case NKikimrDataEvents::TEvWriteResult::STATUS_UNSPECIFIED: { CA_LOG_E("Got UNSPECIFIED for table `" @@ -423,6 +445,13 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N << " Ignored this error." << getIssues().ToOneLineString()); // TODO: support waiting + if (!InconsistentTx) { + RuntimeError( + TStringBuilder() << "Got OVERLOADED for table `" + << SchemeEntry->TableId.PathId.ToString() << "`.", + NYql::NDqProto::StatusIds::OVERLOADED, + getIssues()); + } return; } case NKikimrDataEvents::TEvWriteResult::STATUS_CANCELLED: { @@ -489,18 +518,30 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N CA_LOG_D("Got completed result TxId=" << ev->Get()->Record.GetTxId() << ", TabletId=" << ev->Get()->Record.GetOrigin() << ", Cookie=" << ev->Cookie - << ", LocksCount=" << ev->Get()->Record.GetTxLocks().size()); + << ", Locks=" << [&]() { + TStringBuilder builder; + for (const auto& lock : ev->Get()->Record.GetTxLocks()) { + builder << lock.ShortDebugString(); + } + return builder; + }()); - PopShardBatch(ev->Get()->Record.GetOrigin(), ev->Cookie); + OnMessageAcknowledged(ev->Get()->Record.GetOrigin(), ev->Cookie); for (const auto& lock : ev->Get()->Record.GetTxLocks()) { - LocksInfo[ev->Get()->Record.GetOrigin()].AddAndCheckLock(lock); + if (!LocksInfo[ev->Get()->Record.GetOrigin()].AddAndCheckLock(lock)) { + RuntimeError( + TStringBuilder() << "Got LOCKS BROKEN for table `" + << SchemeEntry->TableId.PathId.ToString() << "`.", + NYql::NDqProto::StatusIds::ABORTED, + NYql::TIssues{}); + } } ProcessBatches(); } - void PopShardBatch(ui64 shardId, ui64 cookie) { + void OnMessageAcknowledged(ui64 shardId, ui64 cookie) { TResumeNotificationManager resumeNotificator(*this); const auto removedDataSize = ShardedWriteController->OnMessageAcknowledged(shardId, cookie); if (removedDataSize) { @@ -579,10 +620,18 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N ShardedWriteController->GetDataFormat()); } - CA_LOG_D("Send EvWrite to ShardID=" << shardId << ", TxId=" << std::get(TxId) + CA_LOG_D("Send EvWrite to ShardID=" << shardId << ", TxId=" << evWrite->Record.GetTxId() + << ", TxMode=" << evWrite->Record.GetTxMode() << ", LockTxId=" << evWrite->Record.GetLockTxId() << ", LockNodeId=" << evWrite->Record.GetLockNodeId() + << ", Locks= " << [&]() { + TStringBuilder builder; + for (const auto& lock : evWrite->Record.GetLocks().GetLocks()) { + builder << lock.ShortDebugString(); + } + return builder; + }() << ", Size=" << serializationResult.TotalDataSize << ", Cookie=" << metadata->Cookie - << ", Operations=" << metadata->OperationsCount << ", IsFinal=" << metadata->IsFinal + << ", OperationsCount=" << metadata->OperationsCount << ", IsFinal=" << metadata->IsFinal << ", Attempts=" << metadata->SendAttempts); Send( PipeCacheId, @@ -669,7 +718,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N void PassAway() override { Send(PipeCacheId, new TEvPipeCache::TEvUnlink(0)); - TActorBootstrapped::PassAway(); + TActorBootstrapped::PassAway(); } void Prepare() { @@ -693,7 +742,8 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N : kMaxBatchesPerMessage), }, std::move(columnsMetadata), - TypeEnv); + TypeEnv, + Alloc); } catch (...) { RuntimeError( CurrentExceptionMessage(), @@ -721,7 +771,6 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N Callbacks->ResumeExecution(); } - NActors::TActorId TxProxyId = MakeTxProxyID(); NActors::TActorId PipeCacheId = NKikimr::MakePipePerNodeCacheID(false); TString LogPrefix; @@ -731,6 +780,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N NYql::NDq::IDqComputeActorAsyncOutput::ICallbacks * Callbacks = nullptr; TIntrusivePtr Counters; const NMiniKQL::TTypeEnvironment& TypeEnv; + std::shared_ptr Alloc; const NYql::NDq::TTxId TxId; const TTableId TableId; @@ -754,7 +804,7 @@ void RegisterKqpWriteActor(NYql::NDq::TDqAsyncIoFactory& factory, TIntrusivePtr< factory.RegisterSink( TString(NYql::KqpTableSinkName), [counters] (NKikimrKqp::TKqpTableSinkSettings&& settings, NYql::NDq::TDqAsyncIoFactory::TSinkArguments&& args) { - auto* actor = new TKqpWriteActor(std::move(settings), std::move(args), counters); + auto* actor = new TKqpDirectWriteActor(std::move(settings), std::move(args), counters); return std::make_pair(actor, actor); }); } diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index 0fc9add54970..21dc4b1f2734 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -1,7 +1,5 @@ #include "kqp_write_table.h" -#include -#include #include #include #include @@ -11,6 +9,7 @@ #include #include #include +#include #include namespace NKikimr { @@ -18,8 +17,44 @@ namespace NKqp { namespace { -constexpr ui64 MaxBatchBytes = 8_MB; -constexpr ui64 MaxUnshardedBatchBytes = 4_MB; +constexpr ui64 DataShardMaxOperationBytes = 8_MB; +constexpr ui64 ColumnShardMaxOperationBytes = 8_MB; +constexpr ui64 MaxUnshardedBatchBytes = 0_MB; + +class IPayloadSerializer : public TThrRefBase { +public: + class IBatch : public TThrRefBase { + public: + virtual TString SerializeToString() const = 0; + virtual i64 GetMemory() const = 0; + bool IsEmpty() const; + }; + + using IBatchPtr = TIntrusivePtr; + + virtual void AddData(NMiniKQL::TUnboxedValueBatch&& data) = 0; + virtual void AddBatch(const IBatchPtr& batch) = 0; + + virtual void Close() = 0; + + virtual bool IsClosed() = 0; + virtual bool IsEmpty() = 0; + virtual bool IsFinished() = 0; + + virtual NKikimrDataEvents::EDataFormat GetDataFormat() = 0; + virtual std::vector GetWriteColumnIds() = 0; + + using TBatches = THashMap>; + + virtual TBatches FlushBatchesForce() = 0; + + virtual IBatchPtr FlushBatch(ui64 shardId) = 0; + virtual const THashSet& GetShardIds() const = 0; + + virtual i64 GetMemory() = 0; +}; + +using IPayloadSerializerPtr = TIntrusivePtr; TVector BuildColumns(const TConstArrayRef inputColumns) { TVector result; @@ -169,6 +204,136 @@ TVector BuildKeyColumnTypes( return keyColumnTypes; } +struct TRowWithData { + TVector Cells; + NUdf::TStringValue Data; +}; + +class TRowBuilder { +private: + struct TCellInfo { + NScheme::TTypeInfo Type; + NUdf::TUnboxedValuePod Value; + TString PgBinaryValue; + }; + +public: + explicit TRowBuilder(size_t size) + : CellsInfo(size) { + } + + TRowBuilder& AddCell( + const size_t index, + const NScheme::TTypeInfo type, + const NUdf::TUnboxedValuePod& value, + const i32 typmod = -1) { + CellsInfo[index].Type = type; + CellsInfo[index].Value = value; + + if (type.GetTypeId() == NScheme::NTypeIds::Pg) { + const auto typeDesc = type.GetTypeDesc(); + if (typmod != -1 && NPg::TypeDescNeedsCoercion(typeDesc)) { + TMaybe err; + CellsInfo[index].PgBinaryValue = NYql::NCommon::PgValueCoerce(value, NPg::PgTypeIdFromTypeDesc(typeDesc), typmod, &err); + if (err) { + ythrow yexception() << "PgValueCoerce error: " << *err; + } + } else { + CellsInfo[index].PgBinaryValue = NYql::NCommon::PgValueToNativeBinary(value, NPg::PgTypeIdFromTypeDesc(typeDesc)); + } + } else { + CellsInfo[index].PgBinaryValue.clear(); + } + return *this; + } + + size_t DataSize() const { + size_t result = 0; + for (const auto& cellInfo : CellsInfo) { + result += GetCellSize(cellInfo); + } + return result; + } + + TRowWithData Build() { + TVector cells; + cells.reserve(CellsInfo.size()); + const auto size = DataSize(); + auto data = Allocate(size); + char* ptr = data.Data(); + + for (const auto& cellInfo : CellsInfo) { + cells.push_back(BuildCell(cellInfo, ptr)); + } + + AFL_ENSURE(ptr == data.Data() + size); + + return TRowWithData { + .Cells = std::move(cells), + .Data = std::move(data), + }; + } + +private: + TCell BuildCell(const TCellInfo& cellInfo, char*& dataPtr) { + if (!cellInfo.Value) { + return TCell(); + } + + switch(cellInfo.Type.GetTypeId()) { + #define MAKE_PRIMITIVE_TYPE_CELL_CASE(type, layout) \ + case NUdf::TDataType::Id: return NMiniKQL::MakeCell(cellInfo.Value); + KNOWN_FIXED_VALUE_TYPES(MAKE_PRIMITIVE_TYPE_CELL_CASE) + case NUdf::TDataType::Id: + { + auto intValue = cellInfo.Value.GetInt128(); + constexpr auto valueSize = sizeof(intValue); + + char* initialPtr = dataPtr; + std::memcpy(initialPtr, reinterpret_cast(&intValue), valueSize); + dataPtr += valueSize; + return TCell(initialPtr, valueSize); + } + } + + const auto ref = cellInfo.Type.GetTypeId() == NScheme::NTypeIds::Pg + ? NYql::NUdf::TStringRef(cellInfo.PgBinaryValue) + : cellInfo.Value.AsStringRef(); + + char* initialPtr = dataPtr; + std::memcpy(initialPtr, ref.Data(), ref.Size()); + dataPtr += ref.Size(); + return TCell(initialPtr, ref.Size()); + } + + size_t GetCellSize(const TCellInfo& cellInfo) const { + if (!cellInfo.Value) { + return 0; + } + + switch(cellInfo.Type.GetTypeId()) { + #define MAKE_PRIMITIVE_TYPE_CELL_CASE_SIZE(type, layout) \ + case NUdf::TDataType::Id: + KNOWN_FIXED_VALUE_TYPES(MAKE_PRIMITIVE_TYPE_CELL_CASE_SIZE) + return 0; + case NUdf::TDataType::Id: + return sizeof(cellInfo.Value.GetInt128()); + } + + if (cellInfo.Type.GetTypeId() == NScheme::NTypeIds::Pg) { + return cellInfo.PgBinaryValue.size(); + } + return cellInfo.Value.AsStringRef().Size(); + } + + NUdf::TStringValue Allocate(size_t size) { + Y_DEBUG_ABORT_UNLESS(NMiniKQL::TlsAllocState); + return NUdf::TStringValue(size); + } + + TVector CellsInfo; +}; + class TColumnShardPayloadSerializer : public IPayloadSerializer { using TRecordBatchPtr = std::shared_ptr; @@ -206,10 +371,8 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { public: TColumnShardPayloadSerializer( const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, - const TConstArrayRef inputColumns, // key columns then value columns - const NMiniKQL::TTypeEnvironment& typeEnv) - : TypeEnv(typeEnv) - , Columns(BuildColumns(inputColumns)) + const TConstArrayRef inputColumns) // key columns then value columns + : Columns(BuildColumns(inputColumns)) , WriteIndex(BuildWriteIndex(schemeEntry, inputColumns)) , WriteColumnIds(BuildWriteColumnIds(inputColumns, WriteIndex)) , BatchBuilder(arrow::Compression::UNCOMPRESSED, BuildNotNullColumns(inputColumns)) { @@ -241,16 +404,13 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { return; } - TVector cells(Columns.size()); + TRowBuilder rowBuilder(Columns.size()); data.ForEachRow([&](const auto& row) { for (size_t index = 0; index < Columns.size(); ++index) { - cells[WriteIndex[index]] = MakeCell( - Columns[index].PType, - row.GetElement(index), - TypeEnv, - /* copy */ false); + rowBuilder.AddCell(WriteIndex[index], Columns[index].PType, row.GetElement(index)); } - BatchBuilder.AddRow(TConstArrayRef{cells.begin(), cells.end()}); + auto rowWithData = rowBuilder.Build(); + BatchBuilder.AddRow(TConstArrayRef{rowWithData.Cells.begin(), rowWithData.Cells.end()}); }); FlushUnsharded(false); @@ -265,7 +425,7 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { } void FlushUnsharded(bool force) { - if ((BatchBuilder.Bytes() > 0 && force) || BatchBuilder.Bytes() >= MaxUnshardedBatchBytes) { + if ((BatchBuilder.Bytes() > 0 && force) || BatchBuilder.Bytes() > MaxUnshardedBatchBytes) { const auto unshardedBatch = BatchBuilder.FlushBatch(true); YQL_ENSURE(unshardedBatch); ShardAndFlushBatch(unshardedBatch, force); @@ -289,7 +449,7 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { } void FlushUnpreparedBatch(const ui64 shardId, TUnpreparedBatch& unpreparedBatch, bool force) { - while (!unpreparedBatch.Batches.empty() && (unpreparedBatch.TotalDataSize >= MaxBatchBytes || force)) { + while (!unpreparedBatch.Batches.empty() && (unpreparedBatch.TotalDataSize >= ColumnShardMaxOperationBytes || force)) { std::vector toPrepare; i64 toPrepareSize = 0; while (!unpreparedBatch.Batches.empty()) { @@ -309,7 +469,7 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { for (i64 index = 0; index < batch->num_rows(); ++index) { i64 nextRowSize = rowCalculator.GetRowBytesSize(index); - if (toPrepareSize + nextRowSize >= (i64)MaxBatchBytes) { + if (toPrepareSize + nextRowSize >= (i64)ColumnShardMaxOperationBytes) { YQL_ENSURE(index > 0); toPrepare.push_back(batch->Slice(0, index)); @@ -413,7 +573,6 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { } private: - const NMiniKQL::TTypeEnvironment& TypeEnv; std::shared_ptr Sharding; const TVector Columns; @@ -434,7 +593,7 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { class TBatch : public IPayloadSerializer::IBatch { public: TString SerializeToString() const override { - return TSerializedCellMatrix::Serialize(Data, Rows, Columns); + return TSerializedCellMatrix::Serialize(Cells, Rows, Columns); } i64 GetMemory() const override { @@ -442,37 +601,94 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { } bool IsEmpty() const { - return Data.empty(); + return Cells.empty(); } - std::vector Extract() { + std::pair, std::vector> Extract() { Size = 0; Rows = 0; - return std::move(Data); + return {std::move(Cells), std::move(Data)}; } - TBatch(std::vector&& data, i64 size, ui32 rows, ui16 columns) - : Data(std::move(data)) + TBatch(std::vector&& cells, std::vector&& data, i64 size, ui32 rows, ui16 columns) + : Cells(std::move(cells)) + , Data(std::move(data)) , Size(size) , Rows(rows) , Columns(columns) { } private: - std::vector Data; + std::vector Cells; + std::vector Data; ui64 Size = 0; ui32 Rows = 0; ui16 Columns = 0; }; + class TRowsBatcher { + public: + explicit TRowsBatcher(ui16 columnCount, ui64 maxBytesPerBatch) + : ColumnCount(columnCount) + , MaxBytesPerBatch(maxBytesPerBatch) { + } + + bool IsEmpty() const { + return Batches.empty(); + } + + struct TBatch { + ui64 Memory = 0; + ui64 MemorySerialized = 0; + TVector Cells; + TVector Data; + }; + + TBatch Flush(bool force) { + TBatch res; + if ((!Batches.empty() && force) || Batches.size() > 1) { + res = std::move(Batches.front()); + Batches.pop_front(); + } + return res; + } + + ui64 AddRow(TRowWithData&& rowWithData) { + Y_ABORT_UNLESS(rowWithData.Cells.size() == ColumnCount); + ui64 newMemory = 0; + for (const auto& cell : rowWithData.Cells) { + newMemory += cell.Size(); + } + if (Batches.empty() || newMemory + GetCellHeaderSize() * ColumnCount + Batches.back().MemorySerialized > MaxBytesPerBatch) { + Batches.emplace_back(); + Batches.back().Memory = 0; + Batches.back().MemorySerialized = GetCellMatrixHeaderSize(); + } + + for (auto& cell : rowWithData.Cells) { + Batches.back().Cells.emplace_back(std::move(cell)); + } + Batches.back().Data.emplace_back(std::move(rowWithData.Data)); + + Batches.back().Memory += newMemory; + Batches.back().MemorySerialized += newMemory + GetCellHeaderSize() * ColumnCount; + + return newMemory; + } + + private: + std::deque Batches; + + ui16 ColumnCount; + ui64 MaxBytesPerBatch; + }; + public: TDataShardPayloadSerializer( const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry, - const TConstArrayRef inputColumns, - const NMiniKQL::TTypeEnvironment& typeEnv) - : TypeEnv(typeEnv) - , SchemeEntry(schemeEntry) + const TConstArrayRef inputColumns) + : SchemeEntry(schemeEntry) , KeyDescription(std::move(partitionsEntry.KeyDescription)) , Columns(BuildColumns(inputColumns)) , WriteIndex(BuildWriteIndexKeyFirst(SchemeEntry, inputColumns)) @@ -480,11 +696,11 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { , KeyColumnTypes(BuildKeyColumnTypes(SchemeEntry)) { } - void AddRow(TArrayRef row, const TKeyDesc& keyRange) { + void AddRow(TRowWithData&& row, const TKeyDesc& keyRange) { auto shardIter = std::lower_bound( std::begin(keyRange.GetPartitions()), std::end(keyRange.GetPartitions()), - TArrayRef(row.data(), KeyColumnTypes.size()), + TArrayRef(row.Cells.data(), KeyColumnTypes.size()), [this](const auto &partition, const auto& key) { const auto& range = *partition.Range; return 0 > CompareBorders(range.EndKeyPrefix.GetCells(), key, @@ -497,42 +713,39 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { if (batcherIter == std::end(Batchers)) { Batchers.emplace( shardIter->ShardId, - TCellsBatcher(Columns.size(), MaxBatchBytes)); + TRowsBatcher(Columns.size(), DataShardMaxOperationBytes)); } - Memory += Batchers.at(shardIter->ShardId).AddRow(row); + Memory += Batchers.at(shardIter->ShardId).AddRow(std::move(row)); ShardIds.insert(shardIter->ShardId); } void AddData(NMiniKQL::TUnboxedValueBatch&& data) override { YQL_ENSURE(!Closed); - TVector cells(Columns.size()); + TRowBuilder rowBuilder(Columns.size()); data.ForEachRow([&](const auto& row) { for (size_t index = 0; index < Columns.size(); ++index) { - // TODO: move to SerializedVector - cells[WriteIndex[index]] = MakeCell( - Columns[index].PType, - row.GetElement(index), - TypeEnv, - /* copy */ true); + rowBuilder.AddCell(WriteIndex[index], Columns[index].PType, row.GetElement(index)); } - AddRow(cells, GetKeyRange()); - - cells.resize(Columns.size()); + auto rowWithData = rowBuilder.Build(); + AddRow(std::move(rowWithData), GetKeyRange()); }); } void AddBatch(const IPayloadSerializer::IBatchPtr& batch) override { auto datashardBatch = dynamic_cast(batch.Get()); YQL_ENSURE(datashardBatch); - auto data = datashardBatch->Extract(); - const auto rows = data.size() / Columns.size(); - YQL_ENSURE(data.size() == rows * Columns.size()); + auto [cells, data] = datashardBatch->Extract(); + const auto rows = cells.size() / Columns.size(); + YQL_ENSURE(cells.size() == rows * Columns.size()); for (size_t rowIndex = 0; rowIndex < rows; ++rowIndex) { AddRow( - TArrayRef{&data[rowIndex * Columns.size()], Columns.size()}, + TRowWithData{ + TVector(cells.begin() + (rowIndex * Columns.size()), cells.begin() + (rowIndex * Columns.size()) + Columns.size()), + data[rowIndex], + }, GetKeyRange()); } } @@ -566,12 +779,13 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { return IsClosed() && IsEmpty(); } - IBatchPtr ExtractNextBatch(TCellsBatcher& batcher, bool force) { + IBatchPtr ExtractNextBatch(TRowsBatcher& batcher, bool force) { auto batchResult = batcher.Flush(force); Memory -= batchResult.Memory; - const ui32 rows = batchResult.Data.size() / Columns.size(); + const ui32 rows = batchResult.Cells.size() / Columns.size(); YQL_ENSURE(Columns.size() <= std::numeric_limits::max()); return MakeIntrusive( + std::move(batchResult.Cells), std::move(batchResult.Data), static_cast(batchResult.MemorySerialized), rows, @@ -610,7 +824,6 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { return *KeyDescription; } - const NMiniKQL::TTypeEnvironment& TypeEnv; const NSchemeCache::TSchemeCacheNavigate::TEntry SchemeEntry; THolder KeyDescription; @@ -619,7 +832,7 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { const std::vector WriteColumnIds; const TVector KeyColumnTypes; - THashMap Batchers; + THashMap Batchers; THashSet ShardIds; i64 Memory = 0; @@ -635,19 +848,17 @@ bool IPayloadSerializer::IBatch::IsEmpty() const { IPayloadSerializerPtr CreateColumnShardPayloadSerializer( const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, - const TConstArrayRef inputColumns, - const NMiniKQL::TTypeEnvironment& typeEnv) { + const TConstArrayRef inputColumns) { return MakeIntrusive( - schemeEntry, inputColumns, typeEnv); + schemeEntry, inputColumns); } IPayloadSerializerPtr CreateDataShardPayloadSerializer( const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry, - const TConstArrayRef inputColumns, - const NMiniKQL::TTypeEnvironment& typeEnv) { + const TConstArrayRef inputColumns) { return MakeIntrusive( - schemeEntry, std::move(partitionsEntry), inputColumns, typeEnv); + schemeEntry, std::move(partitionsEntry), inputColumns); } namespace { @@ -826,8 +1037,7 @@ class TShardedWriteController : public IShardedWriteController { BeforePartitioningChanged(); Serializer = CreateColumnShardPayloadSerializer( schemeEntry, - InputColumnsMetadata, - TypeEnv); + InputColumnsMetadata); AfterPartitioningChanged(); } @@ -838,8 +1048,7 @@ class TShardedWriteController : public IShardedWriteController { Serializer = CreateDataShardPayloadSerializer( schemeEntry, std::move(partitionsEntry), - InputColumnsMetadata, - TypeEnv); + InputColumnsMetadata); AfterPartitioningChanged(); } @@ -867,6 +1076,7 @@ class TShardedWriteController : public IShardedWriteController { YQL_ENSURE(!data.IsWide(), "Wide stream is not supported yet"); YQL_ENSURE(!Closed); + auto allocGuard = TypeEnv.BindAllocator(); YQL_ENSURE(Serializer); Serializer->AddData(std::move(data)); @@ -874,6 +1084,7 @@ class TShardedWriteController : public IShardedWriteController { } void Close() override { + auto allocGuard = TypeEnv.BindAllocator(); YQL_ENSURE(Serializer); Closed = true; Serializer->Close(); @@ -931,6 +1142,7 @@ class TShardedWriteController : public IShardedWriteController { } std::optional OnMessageAcknowledged(ui64 shardId, ui64 cookie) override { + auto allocGuard = TypeEnv.BindAllocator(); auto& shardInfo = ShardsInfo.GetShard(shardId); const auto removedDataSize = shardInfo.PopBatches(cookie); return removedDataSize; @@ -972,10 +1184,19 @@ class TShardedWriteController : public IShardedWriteController { TShardedWriteController( const TShardedWriteControllerSettings settings, TVector&& inputColumnsMetadata, - const NMiniKQL::TTypeEnvironment& typeEnv) + const NMiniKQL::TTypeEnvironment& typeEnv, + std::shared_ptr alloc) : Settings(settings) , InputColumnsMetadata(std::move(inputColumnsMetadata)) - , TypeEnv(typeEnv) { + , TypeEnv(typeEnv) + , Alloc(alloc) { + } + + ~TShardedWriteController() { + Y_ABORT_UNLESS(Alloc); + TGuard allocGuard(*Alloc); + ShardsInfo.Clear(); + Serializer = nullptr; } private: @@ -1019,6 +1240,7 @@ class TShardedWriteController : public IShardedWriteController { TShardedWriteControllerSettings Settings; TVector InputColumnsMetadata; const NMiniKQL::TTypeEnvironment& TypeEnv; + std::shared_ptr Alloc; TShardsInfo ShardsInfo; bool Closed = false; @@ -1032,8 +1254,10 @@ class TShardedWriteController : public IShardedWriteController { IShardedWriteControllerPtr CreateShardedWriteController( const TShardedWriteControllerSettings& settings, TVector&& inputColumns, - const NMiniKQL::TTypeEnvironment& typeEnv) { - return MakeIntrusive(settings, std::move(inputColumns), typeEnv); + const NMiniKQL::TTypeEnvironment& typeEnv, + std::shared_ptr alloc) { + return MakeIntrusive( + settings, std::move(inputColumns), typeEnv, alloc); } } diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index 7846cb954cc6..46e5ac4f7308 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -10,54 +10,6 @@ namespace NKikimr { namespace NKqp { -class IPayloadSerializer : public TThrRefBase { -public: - class IBatch : public TThrRefBase { - public: - virtual TString SerializeToString() const = 0; - virtual i64 GetMemory() const = 0; - bool IsEmpty() const; - }; - - using IBatchPtr = TIntrusivePtr; - - virtual void AddData(NMiniKQL::TUnboxedValueBatch&& data) = 0; - virtual void AddBatch(const IBatchPtr& batch) = 0; - - virtual void Close() = 0; - - virtual bool IsClosed() = 0; - virtual bool IsEmpty() = 0; - virtual bool IsFinished() = 0; - - virtual NKikimrDataEvents::EDataFormat GetDataFormat() = 0; - virtual std::vector GetWriteColumnIds() = 0; - - using TBatches = THashMap>; - - virtual TBatches FlushBatchesForce() = 0; - - virtual IBatchPtr FlushBatch(ui64 shardId) = 0; - virtual const THashSet& GetShardIds() const = 0; - - virtual i64 GetMemory() = 0; -}; - -using IPayloadSerializerPtr = TIntrusivePtr; - - -IPayloadSerializerPtr CreateColumnShardPayloadSerializer( - const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, - const TConstArrayRef inputColumns, - const NMiniKQL::TTypeEnvironment& typeEnv); - -IPayloadSerializerPtr CreateDataShardPayloadSerializer( - const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, - NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry, - const TConstArrayRef inputColumns, - const NMiniKQL::TTypeEnvironment& typeEnv); - - class IShardedWriteController : public TThrRefBase { public: virtual void OnPartitioningChanged(const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry) = 0; @@ -112,7 +64,8 @@ struct TShardedWriteControllerSettings { IShardedWriteControllerPtr CreateShardedWriteController( const TShardedWriteControllerSettings& settings, TVector&& inputColumns, - const NMiniKQL::TTypeEnvironment& typeEnv); + const NMiniKQL::TTypeEnvironment& typeEnv, + std::shared_ptr alloc); } } diff --git a/ydb/core/kqp/runtime/ya.make b/ydb/core/kqp/runtime/ya.make index 3801d1df0ea0..615ccbc9cbf4 100644 --- a/ydb/core/kqp/runtime/ya.make +++ b/ydb/core/kqp/runtime/ya.make @@ -5,6 +5,7 @@ SRCS( kqp_effects.cpp kqp_output_stream.cpp kqp_program_builder.cpp + kqp_compute_scheduler.cpp kqp_read_actor.cpp kqp_read_iterator_common.cpp kqp_read_table.cpp diff --git a/ydb/core/kqp/session_actor/kqp_query_state.cpp b/ydb/core/kqp/session_actor/kqp_query_state.cpp index e35e25292f00..984957857bd3 100644 --- a/ydb/core/kqp/session_actor/kqp_query_state.cpp +++ b/ydb/core/kqp/session_actor/kqp_query_state.cpp @@ -117,7 +117,7 @@ std::unique_ptr TKqpQueryState::BuildN auto navigate = MakeHolder(); navigate->DatabaseName = Database; - if (UserToken && !UserToken->GetSerializedToken().empty()) { + if (HasUserToken()) { navigate->UserToken = UserToken; } @@ -138,6 +138,7 @@ std::unique_ptr TKqpQueryState::BuildN bool TKqpQueryState::SaveAndCheckCompileResult(TEvKqp::TEvCompileResponse* ev) { + CompilationRunning = false; CompileResult = ev->CompileResult; YQL_ENSURE(CompileResult); MaxReadType = CompileResult->MaxReadType; @@ -160,7 +161,7 @@ bool TKqpQueryState::SaveAndCheckCompileResult(TEvKqp::TEvCompileResponse* ev) { CommandTagName = CompileResult->CommandTagName; } for (const auto& param : PreparedQuery->GetParameters()) { - const auto& ast = CompileResult->Ast; + const auto& ast = CompileResult->GetAst(); if (!ast || !ast->PgAutoParamValues || !ast->PgAutoParamValues->contains(param.GetName())) { ResultParams.push_back(param); } @@ -198,12 +199,12 @@ std::unique_ptr TKqpQueryState::BuildCompileRequest(s TGUCSettings gUCSettings = gUCSettingsPtr ? *gUCSettingsPtr : TGUCSettings(); switch (GetAction()) { case NKikimrKqp::QUERY_ACTION_EXECUTE: - query = TKqpQueryId(Cluster, Database, GetQuery(), settings, GetQueryParameterTypes(), gUCSettings); + query = TKqpQueryId(Cluster, Database, UserRequestContext->DatabaseId, GetQuery(), settings, GetQueryParameterTypes(), gUCSettings); keepInCache = GetQueryKeepInCache() && query->IsSql(); break; case NKikimrKqp::QUERY_ACTION_PREPARE: - query = TKqpQueryId(Cluster, Database, GetQuery(), settings, GetQueryParameterTypes(), gUCSettings); + query = TKqpQueryId(Cluster, Database, UserRequestContext->DatabaseId, GetQuery(), settings, GetQueryParameterTypes(), gUCSettings); keepInCache = query->IsSql(); break; @@ -213,7 +214,7 @@ std::unique_ptr TKqpQueryState::BuildCompileRequest(s break; case NKikimrKqp::QUERY_ACTION_EXPLAIN: - query = TKqpQueryId(Cluster, Database, GetQuery(), settings, GetQueryParameterTypes(), gUCSettings); + query = TKqpQueryId(Cluster, Database, UserRequestContext->DatabaseId, GetQuery(), settings, GetQueryParameterTypes(), gUCSettings); keepInCache = false; break; @@ -254,11 +255,11 @@ std::unique_ptr TKqpQueryState::BuildReCompileReque switch (GetAction()) { case NKikimrKqp::QUERY_ACTION_EXPLAIN: case NKikimrKqp::QUERY_ACTION_EXECUTE: - query = TKqpQueryId(Cluster, Database, GetQuery(), settings, GetQueryParameterTypes(), gUCSettings); + query = TKqpQueryId(Cluster, Database, UserRequestContext->DatabaseId, GetQuery(), settings, GetQueryParameterTypes(), gUCSettings); break; case NKikimrKqp::QUERY_ACTION_PREPARE: - query = TKqpQueryId(Cluster, Database, GetQuery(), settings, GetQueryParameterTypes(), gUCSettings); + query = TKqpQueryId(Cluster, Database, UserRequestContext->DatabaseId, GetQuery(), settings, GetQueryParameterTypes(), gUCSettings); break; case NKikimrKqp::QUERY_ACTION_EXECUTE_PREPARED: @@ -275,15 +276,9 @@ std::unique_ptr TKqpQueryState::BuildReCompileReque compileDeadline = Min(compileDeadline, QueryDeadlines.CancelAt); } - TMaybe statementAst; - if (!Statements.empty()) { - YQL_ENSURE(CurrentStatementId < Statements.size()); - statementAst = Statements[CurrentStatementId]; - } - return std::make_unique(UserToken, CompileResult->Uid, query, isQueryActionPrepare, compileDeadline, DbCounters, gUCSettingsPtr, ApplicationName, std::move(cookie), UserRequestContext, std::move(Orbit), TempTablesState, - statementAst); + CompileResult->QueryAst); } std::unique_ptr TKqpQueryState::BuildSplitRequest(std::shared_ptr> cookie, const TGUCSettings::TPtr& gUCSettingsPtr) { @@ -304,7 +299,7 @@ std::unique_ptr TKqpQueryState::BuildCompileSplittedR switch (GetAction()) { case NKikimrKqp::QUERY_ACTION_EXECUTE: - query = TKqpQueryId(Cluster, Database, GetQuery(), settings, GetQueryParameterTypes(), gUCSettings); + query = TKqpQueryId(Cluster, Database, UserRequestContext->DatabaseId, GetQuery(), settings, GetQueryParameterTypes(), gUCSettings); break; default: YQL_ENSURE(false); @@ -412,12 +407,23 @@ std::unique_ptr TKqpQueryState::BuildSchemeC consumer = operations.GetConsumer(); TopicOperations.FillSchemeCacheNavigate(*navigate, std::move(consumer)); - navigate->UserToken = UserToken; + if (HasUserToken()) { + navigate->UserToken = UserToken; + } navigate->Cookie = QueryId; return navigate; } +bool TKqpQueryState::HasUserToken() const +{ + return UserToken && !UserToken->GetSerializedToken().empty(); +} + bool TKqpQueryState::IsAccessDenied(const NSchemeCache::TSchemeCacheNavigate& response, TString& message) { + if (!HasUserToken()) { + return false; + } + auto checkAccessDenied = [&] (const NSchemeCache::TSchemeCacheNavigate::TEntry& result) { static const auto selectRowRights = NACLib::EAccessRights::SelectRow; static const auto accessAttributesRights = NACLib::EAccessRights::ReadAttributes | NACLib::EAccessRights::WriteAttributes; diff --git a/ydb/core/kqp/session_actor/kqp_query_state.h b/ydb/core/kqp/session_actor/kqp_query_state.h index 38447c484c8c..94ec2e2b8aa0 100644 --- a/ydb/core/kqp/session_actor/kqp_query_state.h +++ b/ydb/core/kqp/session_actor/kqp_query_state.h @@ -67,8 +67,9 @@ class TKqpQueryState : public TNonCopyable { , StartedAt(startedAt) { RequestEv.reset(ev->Release().Release()); - - if (AppData()->FeatureFlags.GetEnableImplicitQueryParameterTypes() && !RequestEv->GetYdbParameters().empty()) { + bool enableImplicitQueryParameterTypes = tableServiceConfig.GetEnableImplicitQueryParameterTypes() || + AppData()->FeatureFlags.GetEnableImplicitQueryParameterTypes(); + if (enableImplicitQueryParameterTypes && !RequestEv->GetYdbParameters().empty()) { QueryParameterTypes = std::make_shared>(); for (const auto& [name, typedValue] : RequestEv->GetYdbParameters()) { QueryParameterTypes->insert({name, typedValue.Gettype()}); @@ -86,6 +87,8 @@ class TKqpQueryState : public TNonCopyable { UserRequestContext = MakeIntrusive(RequestEv->GetTraceId(), Database, sessionId); } UserRequestContext->PoolId = RequestEv->GetPoolId(); + UserRequestContext->PoolConfig = RequestEv->GetPoolConfig(); + UserRequestContext->DatabaseId = RequestEv->GetDatabaseId(); } // the monotonously growing counter, the ordinal number of the query, @@ -115,11 +118,13 @@ class TKqpQueryState : public TNonCopyable { bool IsDocumentApiRestricted_ = false; TInstant StartTime; + TInstant ContinueTime; NYql::TKikimrQueryDeadlines QueryDeadlines; TKqpQueryStats QueryStats; bool KeepSession = false; TIntrusiveConstPtr UserToken; NActors::TMonotonic StartedAt; + bool CompilationRunning = false; THashMap TableVersions; @@ -313,10 +318,6 @@ class TKqpQueryState : public TNonCopyable { bool NeedPersistentSnapshot() const { auto type = GetType(); - if (type == NKikimrKqp::QUERY_TYPE_SQL_GENERIC_CONCURRENT_QUERY || - type == NKikimrKqp::QUERY_TYPE_SQL_GENERIC_QUERY) { - return ::NKikimr::NKqp::HasOlapTableReadInTx(PreparedQuery->GetPhysicalQuery()); - } return ( type == NKikimrKqp::QUERY_TYPE_SQL_SCAN || type == NKikimrKqp::QUERY_TYPE_AST_SCAN @@ -494,18 +495,6 @@ class TKqpQueryState : public TNonCopyable { PrepareCurrentStatement(); } - void PrepareStatementTransaction(NKqpProto::TKqpPhyTx_EType txType) { - if (!HasTxControl()) { - switch (txType) { - case NKqpProto::TKqpPhyTx::TYPE_SCHEME: - TxCtx->EffectiveIsolationLevel = NKikimrKqp::ISOLATION_LEVEL_UNDEFINED; - break; - default: - TxCtx->EffectiveIsolationLevel = NKikimrKqp::ISOLATION_LEVEL_SERIALIZABLE; - } - } - } - // validate the compiled query response and ensure that all table versions are not // changed since the last compilation. bool EnsureTableVersions(const TEvTxProxySchemeCache::TEvNavigateKeySetResult& response); @@ -603,6 +592,8 @@ class TKqpQueryState : public TNonCopyable { std::unique_ptr BuildSchemeCacheNavigate(); bool IsAccessDenied(const NSchemeCache::TSchemeCacheNavigate& response, TString& message); bool HasErrors(const NSchemeCache::TSchemeCacheNavigate& response, TString& message); + + bool HasUserToken() const; }; diff --git a/ydb/core/kqp/session_actor/kqp_query_stats.cpp b/ydb/core/kqp/session_actor/kqp_query_stats.cpp index e26d6b5e7b8f..922b788419ea 100644 --- a/ydb/core/kqp/session_actor/kqp_query_stats.cpp +++ b/ydb/core/kqp/session_actor/kqp_query_stats.cpp @@ -210,6 +210,7 @@ ui64 CalcRequestUnit(const TKqpQueryStats& stats) { NKqpProto::TKqpStatsQuery TKqpQueryStats::ToProto() const { NKqpProto::TKqpStatsQuery result; result.SetDurationUs(DurationUs); + result.SetQueuedTimeUs(QueuedTimeUs); if (Compilation) { result.MutableCompilation()->SetFromCache(Compilation->FromCache); diff --git a/ydb/core/kqp/session_actor/kqp_query_stats.h b/ydb/core/kqp/session_actor/kqp_query_stats.h index f73ce6316f07..9cda3417beb9 100644 --- a/ydb/core/kqp/session_actor/kqp_query_stats.h +++ b/ydb/core/kqp/session_actor/kqp_query_stats.h @@ -8,6 +8,7 @@ namespace NKikimr::NKqp { struct TKqpQueryStats { ui64 DurationUs = 0; + ui64 QueuedTimeUs = 0; std::optional Compilation; ui64 WorkerCpuTimeUs = 0; diff --git a/ydb/core/kqp/session_actor/kqp_response.cpp b/ydb/core/kqp/session_actor/kqp_response.cpp index e1864d104c19..cc90a3031bf2 100644 --- a/ydb/core/kqp/session_actor/kqp_response.cpp +++ b/ydb/core/kqp/session_actor/kqp_response.cpp @@ -53,36 +53,6 @@ bool HasSchemeOrFatalIssues(const TIssue& issue) { } // namespace -void ConvertKqpQueryResultToDbResult(const NKikimrMiniKQL::TResult& from, Ydb::ResultSet* to) { - const auto& type = from.GetType(); - TStackVec columnTypes; - Y_ENSURE(type.GetKind() == NKikimrMiniKQL::ETypeKind::Struct); - for (const auto& member : type.GetStruct().GetMember()) { - if (member.GetType().GetKind() == NKikimrMiniKQL::ETypeKind::List) { - for (const auto& column : member.GetType().GetList().GetItem().GetStruct().GetMember()) { - auto columnMeta = to->add_columns(); - columnMeta->set_name(column.GetName()); - columnTypes.push_back(column.GetType()); - ConvertMiniKQLTypeToYdbType(column.GetType(), *columnMeta->mutable_type()); - } - } - } - for (const auto& responseStruct : from.GetValue().GetStruct()) { - for (const auto& row : responseStruct.GetList()) { - auto newRow = to->add_rows(); - ui32 columnCount = static_cast(row.StructSize()); - Y_ENSURE(columnCount == columnTypes.size()); - for (ui32 i = 0; i < columnCount; i++) { - const auto& column = row.GetStruct(i); - ConvertMiniKQLValueToYdbValue(columnTypes[i], column, *newRow->add_items()); - } - } - if (responseStruct.Getvalue_valueCase() == NKikimrMiniKQL::TValue::kBool) { - to->set_truncated(responseStruct.GetBool()); - } - } -} - TMaybe GetYdbStatus(const TIssue& issue) { if (issue.GetSeverity() == TSeverityIds::S_FATAL) { return Ydb::StatusIds::INTERNAL_ERROR; diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index b66c28e20be1..9ea406597e75 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -159,7 +159,10 @@ class TKqpSessionActor : public TActorBootstrapped { return NKikimrServices::TActivity::KQP_SESSION_ACTOR; } - TKqpSessionActor(const TActorId& owner, const TString& sessionId, const TKqpSettings::TConstPtr& kqpSettings, + TKqpSessionActor(const TActorId& owner, + std::shared_ptr resourceManager, + std::shared_ptr caFactory, + const TString& sessionId, const TKqpSettings::TConstPtr& kqpSettings, const TKqpWorkerSettings& workerSettings, std::optional federatedQuerySetup, NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, @@ -168,6 +171,8 @@ class TKqpSessionActor : public TActorBootstrapped { const TActorId& kqpTempTablesAgentActor) : Owner(owner) , SessionId(sessionId) + , ResourceManager_(std::move(resourceManager)) + , CaFactory_(std::move(caFactory)) , Counters(counters) , Settings(workerSettings) , AsyncIoFactory(std::move(asyncIoFactory)) @@ -237,13 +242,20 @@ class TKqpSessionActor : public TActorBootstrapped { } void PassRequestToResourcePool() { + if (QueryState->UserRequestContext->PoolConfig) { + LOG_D("request placed into pool from cache: " << QueryState->UserRequestContext->PoolId); + CompileQuery(); + return; + } + Send(MakeKqpWorkloadServiceId(SelfId().NodeId()), new NWorkload::TEvPlaceRequestIntoPool( - QueryState->Database, + QueryState->UserRequestContext->DatabaseId, SessionId, QueryState->UserRequestContext->PoolId, QueryState->UserToken ), IEventHandle::FlagTrackDelivery); + QueryState->PoolHandlerActor = MakeKqpWorkloadServiceId(SelfId().NodeId()); Become(&TKqpSessionActor::ExecuteState); } @@ -341,8 +353,9 @@ class TKqpSessionActor : public TActorBootstrapped { auto abortEv = TEvKqp::TEvAbortExecution::Aborted("Client lost"); // any status code can be here Send(ExecuterId, abortEv.Release()); + } else { + Cleanup(); } - Cleanup(); } void Handle(TEvKqp::TEvQueryRequest::TPtr& ev) { @@ -402,6 +415,7 @@ class TKqpSessionActor : public TActorBootstrapped { << " text: " << QueryState->GetQuery() << " rpcActor: " << QueryState->RequestActorId << " database: " << QueryState->GetDatabase() + << " databaseId: " << QueryState->UserRequestContext->DatabaseId << " pool id: " << QueryState->UserRequestContext->PoolId ); @@ -470,6 +484,7 @@ class TKqpSessionActor : public TActorBootstrapped { void Handle(NWorkload::TEvContinueRequest::TPtr& ev) { YQL_ENSURE(QueryState); + QueryState->ContinueTime = TInstant::Now(); if (ev->Get()->Status == Ydb::StatusIds::UNSUPPORTED) { LOG_T("Failed to place request in resource pool, feature flag is disabled"); @@ -568,10 +583,18 @@ class TKqpSessionActor : public TActorBootstrapped { LWTRACK(KqpSessionQueryCompiled, QueryState->Orbit, TStringBuilder() << QueryState->CompileResult->Status); if (QueryState->CompileResult->NeedToSplit) { - YQL_ENSURE(!QueryState->HasTxControl() && QueryState->GetAction() == NKikimrKqp::QUERY_ACTION_EXECUTE); - auto ev = QueryState->BuildSplitRequest(CompilationCookie, GUCSettings); - Send(MakeKqpCompileServiceID(SelfId().NodeId()), ev.release(), 0, QueryState->QueryId, - QueryState->KqpSessionSpan.GetTraceId()); + if (!QueryState->HasTxControl()) { + YQL_ENSURE(QueryState->GetAction() == NKikimrKqp::QUERY_ACTION_EXECUTE); + auto ev = QueryState->BuildSplitRequest(CompilationCookie, GUCSettings); + Send(MakeKqpCompileServiceID(SelfId().NodeId()), ev.release(), 0, QueryState->QueryId, + QueryState->KqpSessionSpan.GetTraceId()); + } else { + NYql::TIssues issues; + ReplyQueryError( + ::Ydb::StatusIds::StatusCode::StatusIds_StatusCode_BAD_REQUEST, + "CTAS statement can be executed only in NoTx mode.", + MessageFromIssues(issues)); + } } else { ReplyQueryCompileError(); } @@ -832,9 +855,10 @@ class TKqpSessionActor : public TActorBootstrapped { const NKqpProto::TKqpPhyQuery& phyQuery = QueryState->PreparedQuery->GetPhysicalQuery(); HasOlapTable |= ::NKikimr::NKqp::HasOlapTableReadInTx(phyQuery) || ::NKikimr::NKqp::HasOlapTableWriteInTx(phyQuery); HasOltpTable |= ::NKikimr::NKqp::HasOltpTableReadInTx(phyQuery) || ::NKikimr::NKqp::HasOltpTableWriteInTx(phyQuery); - if (HasOlapTable && HasOltpTable) { + HasTableWrite |= ::NKikimr::NKqp::HasOlapTableWriteInTx(phyQuery) || ::NKikimr::NKqp::HasOltpTableWriteInTx(phyQuery); + if (HasOlapTable && HasOltpTable && HasTableWrite) { ReplyQueryError(Ydb::StatusIds::PRECONDITION_FAILED, - "Transactions between column and row tables are disabled at current time."); + "Write transactions between column and row tables are disabled at current time."); return false; } QueryState->TxCtx->SetTempTables(QueryState->TempTablesState); @@ -878,8 +902,8 @@ class TKqpSessionActor : public TActorBootstrapped { try { const auto& parameters = QueryState->GetYdbParameters(); QueryState->QueryData->ParseParameters(parameters); - if (QueryState->CompileResult && QueryState->CompileResult->Ast && QueryState->CompileResult->Ast->PgAutoParamValues) { - for(const auto& [name, param] : *QueryState->CompileResult->Ast->PgAutoParamValues) { + if (QueryState->CompileResult && QueryState->CompileResult->GetAst() && QueryState->CompileResult->GetAst()->PgAutoParamValues) { + for(const auto& [name, param] : *QueryState->CompileResult->GetAst()->PgAutoParamValues) { if (!parameters.contains(name)) { QueryState->QueryData->AddTypedValueParam(name, param); } @@ -1091,11 +1115,10 @@ class TKqpSessionActor : public TActorBootstrapped { bool ExecutePhyTx(const TKqpPhyTxHolder::TConstPtr& tx, bool commit) { if (tx) { - QueryState->PrepareStatementTransaction(tx->GetType()); switch (tx->GetType()) { case NKqpProto::TKqpPhyTx::TYPE_SCHEME: YQL_ENSURE(tx->StagesSize() == 0); - if (QueryState->HasTxControl() && QueryState->TxCtx->EffectiveIsolationLevel != NKikimrKqp::ISOLATION_LEVEL_UNDEFINED) { + if (QueryState->HasTxControl() && !QueryState->HasImplicitTx() && QueryState->TxCtx->EffectiveIsolationLevel != NKikimrKqp::ISOLATION_LEVEL_UNDEFINED) { ReplyQueryError(Ydb::StatusIds::PRECONDITION_FAILED, "Scheme operations cannot be executed inside transaction"); return true; @@ -1264,11 +1287,16 @@ class TKqpSessionActor : public TActorBootstrapped { request.PerRequestDataSizeLimit = RequestControls.PerRequestDataSizeLimit; request.MaxShardCount = RequestControls.MaxShardCount; request.TraceId = QueryState ? QueryState->KqpSessionSpan.GetTraceId() : NWilson::TTraceId(); + request.CaFactory_ = CaFactory_; + request.ResourceManager_ = ResourceManager_; LOG_D("Sending to Executer TraceId: " << request.TraceId.GetTraceId() << " " << request.TraceId.GetSpanIdSize()); const bool useEvWrite = ((HasOlapTable && Settings.TableService.GetEnableOlapSink()) || (!HasOlapTable && Settings.TableService.GetEnableOltpSink())) - && (request.QueryType == NKikimrKqp::EQueryType::QUERY_TYPE_SQL_GENERIC_QUERY - || request.QueryType == NKikimrKqp::EQueryType::QUERY_TYPE_SQL_GENERIC_CONCURRENT_QUERY); + && (request.QueryType == NKikimrKqp::EQueryType::QUERY_TYPE_UNDEFINED + || request.QueryType == NKikimrKqp::EQueryType::QUERY_TYPE_SQL_GENERIC_QUERY + || request.QueryType == NKikimrKqp::EQueryType::QUERY_TYPE_SQL_GENERIC_CONCURRENT_QUERY + || (!HasOlapTable && request.QueryType == NKikimrKqp::EQueryType::QUERY_TYPE_SQL_DML) + || (!HasOlapTable && request.QueryType == NKikimrKqp::EQueryType::QUERY_TYPE_PREPARED_DML)); auto executerActor = CreateKqpExecuter(std::move(request), Settings.Database, QueryState ? QueryState->UserToken : TIntrusiveConstPtr(), RequestCounters, Settings.TableService.GetAggregationConfig(), Settings.TableService.GetExecuterRetriesConfig(), @@ -1344,7 +1372,7 @@ class TKqpSessionActor : public TActorBootstrapped { executionStats.Swap(&stats); stats = QueryState->QueryStats.ToProto(); stats.MutableExecutions()->MergeFrom(executionStats.GetExecutions()); - ev->Get()->Record.SetQueryPlan(SerializeAnalyzePlan(stats)); + ev->Get()->Record.SetQueryPlan(SerializeAnalyzePlan(stats, QueryState->UserRequestContext->PoolId)); } } @@ -1412,6 +1440,12 @@ class TKqpSessionActor : public TActorBootstrapped { ExecuterId = TActorId{}; + auto& executerResults = *response->MutableResult(); + if (executerResults.HasStats()) { + QueryState->QueryStats.Executions.emplace_back(); + QueryState->QueryStats.Executions.back().Swap(executerResults.MutableStats()); + } + if (response->GetStatus() != Ydb::StatusIds::SUCCESS) { const auto executionType = ev->ExecutionType; @@ -1424,6 +1458,12 @@ class TKqpSessionActor : public TActorBootstrapped { // Invalidate query cache on scheme/internal errors switch (status) { + case Ydb::StatusIds::ABORTED: { + if (ev->BrokenLockPathId) { + issues.AddIssue(GetLocksInvalidatedIssue(*QueryState->TxCtx, *ev->BrokenLockPathId)); + } + break; + } case Ydb::StatusIds::SCHEME_ERROR: case Ydb::StatusIds::INTERNAL_ERROR: InvalidateQuery(); @@ -1467,16 +1507,10 @@ class TKqpSessionActor : public TActorBootstrapped { QueryState->TxCtx->Locks.LockHandle = std::move(ev->LockHandle); } - auto& executerResults = *response->MutableResult(); if (!MergeLocksWithTxResult(executerResults)) { return; } - if (executerResults.HasStats()) { - QueryState->QueryStats.Executions.emplace_back(); - QueryState->QueryStats.Executions.back().Swap(executerResults.MutableStats()); - } - if (!response->GetIssues().empty()){ NYql::IssuesFromMessage(response->GetIssues(), QueryState->Issues); } @@ -1545,6 +1579,9 @@ class TKqpSessionActor : public TActorBootstrapped { stats->DurationUs = ((TInstant::Now() - QueryState->StartTime).MicroSeconds()); stats->WorkerCpuTimeUs = (QueryState->GetCpuTime().MicroSeconds()); + if (const auto continueTime = QueryState->ContinueTime) { + stats->QueuedTimeUs = (continueTime - QueryState->StartTime).MicroSeconds(); + } if (QueryState->CompileResult) { stats->Compilation.emplace(); stats->Compilation->FromCache = (QueryState->CompileStats.FromCache); @@ -1567,6 +1604,10 @@ class TKqpSessionActor : public TActorBootstrapped { void FillStats(NKikimrKqp::TEvQueryResponse* record) { YQL_ENSURE(QueryState); + // workaround to ensure that request was not transfered to worker. + if (WorkerId || !QueryState->RequestEv) { + return; + } FillSystemViewQueryStats(record); @@ -1583,8 +1624,13 @@ class TKqpSessionActor : public TActorBootstrapped { if (QueryState->ReportStats()) { auto stats = QueryState->QueryStats.ToProto(); if (QueryState->GetStatsMode() >= Ydb::Table::QueryStatsCollection::STATS_COLLECTION_FULL) { - response->SetQueryPlan(SerializeAnalyzePlan(stats)); - response->SetQueryAst(QueryState->CompileResult->PreparedQuery->GetPhysicalQuery().GetQueryAst()); + response->SetQueryPlan(SerializeAnalyzePlan(stats, QueryState->UserRequestContext->PoolId)); + if (QueryState->CompileResult) { + auto preparedQuery = QueryState->CompileResult->PreparedQuery; + if (preparedQuery) { + response->SetQueryAst(preparedQuery->GetPhysicalQuery().GetQueryAst()); + } + } } response->MutableQueryStats()->Swap(&stats); } @@ -1737,7 +1783,6 @@ class TKqpSessionActor : public TActorBootstrapped { // Result for scan query is sent directly to target actor. Y_ABORT_UNLESS(response->GetArena()); if (QueryState->PreparedQuery) { - bool useYdbResponseFormat = QueryState->GetUsePublicResponseDataFormat(); auto& phyQuery = QueryState->PreparedQuery->GetPhysicalQuery(); size_t trailingResultsCount = 0; for (size_t i = 0; i < phyQuery.ResultBindingsSize(); ++i) { @@ -1754,28 +1799,12 @@ class TKqpSessionActor : public TActorBootstrapped { continue; } - if (useYdbResponseFormat) { - TMaybe effectiveRowsLimit = FillSettings.RowsLimitPerWrite; - if (QueryState->PreparedQuery->GetResults(i).GetRowsLimit()) { - effectiveRowsLimit = QueryState->PreparedQuery->GetResults(i).GetRowsLimit(); - } - auto* ydbResult = QueryState->QueryData->GetYdbTxResult(phyQuery.GetResultBindings(i), response->GetArena(), effectiveRowsLimit); - response->AddYdbResults()->Swap(ydbResult); - } else { - auto* protoRes = QueryState->QueryData->GetMkqlTxResult(phyQuery.GetResultBindings(i), response->GetArena()); - std::optional fillSettings; - if (QueryState->PreparedQuery->ResultsSize()) { - YQL_ENSURE(phyQuery.ResultBindingsSize() == QueryState->PreparedQuery->ResultsSize(), "" - << phyQuery.ResultBindingsSize() << " != " << QueryState->PreparedQuery->ResultsSize()); - const auto& result = QueryState->PreparedQuery->GetResults(i); - if (result.GetRowsLimit()) { - fillSettings = FillSettings; - fillSettings->RowsLimitPerWrite = result.GetRowsLimit(); - } - } - auto* finalResult = KikimrResultToProto(*protoRes, {}, fillSettings.value_or(FillSettings), response->GetArena()); - response->AddResults()->Swap(finalResult); + TMaybe effectiveRowsLimit = FillSettings.RowsLimitPerWrite; + if (QueryState->PreparedQuery->GetResults(i).GetRowsLimit()) { + effectiveRowsLimit = QueryState->PreparedQuery->GetResults(i).GetRowsLimit(); } + auto* ydbResult = QueryState->QueryData->GetYdbTxResult(phyQuery.GetResultBindings(i), response->GetArena(), effectiveRowsLimit); + response->AddYdbResults()->Swap(ydbResult); } } @@ -2069,8 +2098,15 @@ class TKqpSessionActor : public TActorBootstrapped { } CleanupCtx->Final = isFinal; CleanupCtx->IsWaitingForWorkloadServiceCleanup = true; + + const auto& stats = QueryState->QueryStats; + auto event = std::make_unique( + QueryState->UserRequestContext->DatabaseId, SessionId, QueryState->UserRequestContext->PoolId, + TDuration::MicroSeconds(stats.DurationUs), TDuration::MicroSeconds(stats.WorkerCpuTimeUs) + ); + auto forwardId = MakeKqpWorkloadServiceId(SelfId().NodeId()); - Send(new IEventHandle(*QueryState->PoolHandlerActor, SelfId(), new NWorkload::TEvCleanupRequest(QueryState->Database, SessionId, QueryState->UserRequestContext->PoolId), IEventHandle::FlagForwardOnNondelivery, 0, &forwardId)); + Send(new IEventHandle(*QueryState->PoolHandlerActor, SelfId(), event.release(), IEventHandle::FlagForwardOnNondelivery, 0, &forwardId)); QueryState->PoolHandlerActor = Nothing(); } @@ -2192,13 +2228,10 @@ class TKqpSessionActor : public TActorBootstrapped { Y_ENSURE(QueryState); if (QueryState->CompileResult) { AddQueryIssues(*response, QueryState->CompileResult->Issues); - - auto preparedQuery = QueryState->CompileResult->PreparedQuery; - if (preparedQuery && QueryState->ReportStats() && QueryState->GetStatsMode() >= Ydb::Table::QueryStatsCollection::STATS_COLLECTION_FULL) { - response->SetQueryAst(preparedQuery->GetPhysicalQuery().GetQueryAst()); - } } + FillStats(&QueryResponse->Record.GetRef()); + if (issues) { for (auto& i : *issues) { response->AddQueryIssues()->Swap(&i); @@ -2356,6 +2389,7 @@ class TKqpSessionActor : public TActorBootstrapped { hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, HandleNoop); hFunc(TEvents::TEvUndelivered, HandleNoop); hFunc(TEvTxUserProxy::TEvAllocateTxIdResult, HandleNoop); + hFunc(TEvKqpExecuter::TEvStreamData, HandleNoop); hFunc(NWorkload::TEvContinueRequest, HandleNoop); // always come from WorkerActor @@ -2492,6 +2526,8 @@ class TKqpSessionActor : public TActorBootstrapped { TActorId Owner; TString SessionId; + std::shared_ptr ResourceManager_; + std::shared_ptr CaFactory_; // cached lookups to issue counters THashMap CachedIssueCounters; TInstant CreationTime; @@ -2525,13 +2561,15 @@ class TKqpSessionActor : public TActorBootstrapped { bool HasOlapTable = false; bool HasOltpTable = false; + bool HasTableWrite = false; TGUCSettings::TPtr GUCSettings; }; } // namespace -IActor* CreateKqpSessionActor(const TActorId& owner, const TString& sessionId, +IActor* CreateKqpSessionActor(const TActorId& owner, std::shared_ptr resourceManager, + std::shared_ptr caFactory, const TString& sessionId, const TKqpSettings::TConstPtr& kqpSettings, const TKqpWorkerSettings& workerSettings, std::optional federatedQuerySetup, NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, @@ -2539,7 +2577,7 @@ IActor* CreateKqpSessionActor(const TActorId& owner, const TString& sessionId, const NKikimrConfig::TQueryServiceConfig& queryServiceConfig, const TActorId& kqpTempTablesAgentActor) { - return new TKqpSessionActor(owner, sessionId, kqpSettings, workerSettings, federatedQuerySetup, + return new TKqpSessionActor(owner, std::move(resourceManager), std::move(caFactory), sessionId, kqpSettings, workerSettings, federatedQuerySetup, std::move(asyncIoFactory), std::move(moduleResolverState), counters, queryServiceConfig, kqpTempTablesAgentActor); } diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.h b/ydb/core/kqp/session_actor/kqp_session_actor.h index bbcaa76dab6f..f26fff2b00ca 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.h +++ b/ydb/core/kqp/session_actor/kqp_session_actor.h @@ -11,6 +11,14 @@ #include #include +namespace NKikimr::NKqp::NComputeActor { + struct IKqpNodeComputeActorFactory; +} + +namespace NKikimr::NKqp::NRm { + class IKqpResourceManager; +} + namespace NKikimr::NKqp { struct TKqpWorkerSettings { @@ -48,7 +56,10 @@ struct TKqpWorkerSettings { } }; -IActor* CreateKqpSessionActor(const TActorId& owner, const TString& sessionId, +IActor* CreateKqpSessionActor(const TActorId& owner, + std::shared_ptr resourceManager_, + std::shared_ptr caFactory_, + const TString& sessionId, const TKqpSettings::TConstPtr& kqpSettings, const TKqpWorkerSettings& workerSettings, std::optional federatedQuerySetup, NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, diff --git a/ydb/core/kqp/session_actor/kqp_worker_actor.cpp b/ydb/core/kqp/session_actor/kqp_worker_actor.cpp index 870ffa599ff7..a18f69b7f347 100644 --- a/ydb/core/kqp/session_actor/kqp_worker_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_worker_actor.cpp @@ -182,13 +182,13 @@ class TKqpWorkerActor : public TActorBootstrapped { std::shared_ptr loader = std::make_shared( Settings.Cluster, TlsActivationContext->ActorSystem(), Config, false, nullptr); - Gateway = CreateKikimrIcGateway(Settings.Cluster, QueryState->RequestEv->GetType(), Settings.Database, std::move(loader), + Gateway = CreateKikimrIcGateway(Settings.Cluster, QueryState->RequestEv->GetType(), Settings.Database, QueryState->RequestEv->GetDatabaseId(), std::move(loader), ctx.ExecutorThread.ActorSystem, ctx.SelfID.NodeId(), RequestCounters, QueryServiceConfig); Config->FeatureFlags = AppData(ctx)->FeatureFlags; KqpHost = CreateKqpHost(Gateway, Settings.Cluster, Settings.Database, Config, ModuleResolverState->ModuleResolver, FederatedQuerySetup, - QueryState->RequestEv->GetUserToken(), GUCSettings, Settings.ApplicationName, AppData(ctx)->FunctionRegistry, !Settings.LongSession, false); + QueryState->RequestEv->GetUserToken(), GUCSettings, QueryServiceConfig, Settings.ApplicationName, AppData(ctx)->FunctionRegistry, !Settings.LongSession, false, nullptr, nullptr, nullptr); auto& queryRequest = QueryState->RequestEv; QueryState->ProxyRequestId = proxyRequestId; @@ -960,7 +960,7 @@ class TKqpWorkerActor : public TActorBootstrapped { // If we have result it must be allocated on protobuf arena Y_ASSERT(result->GetArena()); Y_ASSERT(resp->GetArena() == result->GetArena()); - resp->AddResults()->Swap(result); + resp->AddYdbResults()->Swap(result); } } else { auto resp = ev.MutableResponse(); diff --git a/ydb/core/kqp/session_actor/kqp_worker_common.cpp b/ydb/core/kqp/session_actor/kqp_worker_common.cpp index 63db80b02d88..e7cf7113b049 100644 --- a/ydb/core/kqp/session_actor/kqp_worker_common.cpp +++ b/ydb/core/kqp/session_actor/kqp_worker_common.cpp @@ -119,7 +119,7 @@ void SlowLogQuery(const TActorContext &ctx, const TKikimrConfiguration* config, << 'b'; ui64 resultsSize = 0; - for (auto& result : record->GetResponse().GetResults()) { + for (auto& result : record->GetResponse().GetYdbResults()) { resultsSize += result.ByteSize(); } diff --git a/ydb/core/kqp/topics/kqp_topics.cpp b/ydb/core/kqp/topics/kqp_topics.cpp index bb190899dfad..6b90767ff7b7 100644 --- a/ydb/core/kqp/topics/kqp_topics.cpp +++ b/ydb/core/kqp/topics/kqp_topics.cpp @@ -105,15 +105,15 @@ void TTopicPartitionOperations::AddOperation(const TString& topic, ui32 partitio HasWriteOperations_ = true; } -void TTopicPartitionOperations::BuildTopicTxs(THashMap &txs) +void TTopicPartitionOperations::BuildTopicTxs(TTopicOperationTransactions& txs) { Y_ABORT_UNLESS(TabletId_.Defined()); Y_ABORT_UNLESS(Partition_.Defined()); - auto& tx = txs[*TabletId_]; + auto& t = txs[*TabletId_]; for (auto& [consumer, operations] : Operations_) { - NKikimrPQ::TPartitionOperation* o = tx.MutableOperations()->Add(); + NKikimrPQ::TPartitionOperation* o = t.tx.MutableOperations()->Add(); o->SetPartitionId(*Partition_); auto [begin, end] = operations.GetRange(); o->SetBegin(begin); @@ -123,12 +123,13 @@ void TTopicPartitionOperations::BuildTopicTxs(THashMapAdd(); + NKikimrPQ::TPartitionOperation* o = t.tx.MutableOperations()->Add(); o->SetPartitionId(*Partition_); o->SetPath(*Topic_); if (SupportivePartition_.Defined()) { o->SetSupportivePartition(*SupportivePartition_); } + t.hasWrite = true; } } @@ -355,7 +356,7 @@ bool TTopicOperations::ProcessSchemeCacheNavigate(const NSchemeCache::TSchemeCac return true; } -void TTopicOperations::BuildTopicTxs(THashMap &txs) +void TTopicOperations::BuildTopicTxs(TTopicOperationTransactions& txs) { for (auto& [_, operations] : Operations_) { operations.BuildTopicTxs(txs); diff --git a/ydb/core/kqp/topics/kqp_topics.h b/ydb/core/kqp/topics/kqp_topics.h index e0e425c3c1f7..f4ff0bb180e2 100644 --- a/ydb/core/kqp/topics/kqp_topics.h +++ b/ydb/core/kqp/topics/kqp_topics.h @@ -42,6 +42,13 @@ class TConsumerOperations { TDisjointIntervalTree Offsets_; }; +struct TTopicOperationTransaction { + NKikimrPQ::TDataTransaction tx; + bool hasWrite = false; +}; + +using TTopicOperationTransactions = THashMap; + class TTopicPartitionOperations { public: bool IsValid() const; @@ -52,7 +59,7 @@ class TTopicPartitionOperations { void AddOperation(const TString& topic, ui32 partition, TMaybe supportivePartition); - void BuildTopicTxs(THashMap &txs); + void BuildTopicTxs(TTopicOperationTransactions &txs); void Merge(const TTopicPartitionOperations& rhs); @@ -109,7 +116,7 @@ class TTopicOperations { Ydb::StatusIds_StatusCode& status, TString& message); - void BuildTopicTxs(THashMap &txs); + void BuildTopicTxs(TTopicOperationTransactions &txs); void Merge(const TTopicOperations& rhs); diff --git a/ydb/core/kqp/ut/common/columnshard.cpp b/ydb/core/kqp/ut/common/columnshard.cpp index 90e3992b7176..6318a8f1e7ea 100644 --- a/ydb/core/kqp/ut/common/columnshard.cpp +++ b/ydb/core/kqp/ut/common/columnshard.cpp @@ -22,7 +22,7 @@ namespace NKqp { } SecretableSecretKey: { Value: { - Data: "secretSecretKey" + Data: "fakeSecret" } } } @@ -31,32 +31,37 @@ namespace NKqp { using namespace NYdb; - TTestHelper::TTestHelper(const TKikimrSettings& settings) - : Kikimr(settings) - , TableClient(Kikimr.GetTableClient()) - , Session(TableClient.CreateSession().GetValueSync().GetSession()) - {} + TTestHelper::TTestHelper(const TKikimrSettings& settings) { + TKikimrSettings kikimrSettings(settings); + if (!kikimrSettings.FeatureFlags.HasEnableTieringInColumnShard()) { + kikimrSettings.SetEnableTieringInColumnShard(true); + } + + Kikimr = std::make_unique(kikimrSettings); + TableClient = std::make_unique(Kikimr->GetTableClient()); + Session = std::make_unique(TableClient->CreateSession().GetValueSync().GetSession()); + } NKikimr::NKqp::TKikimrRunner& TTestHelper::GetKikimr() { - return Kikimr; + return *Kikimr; } TTestActorRuntime& TTestHelper::GetRuntime() { - return *Kikimr.GetTestServer().GetRuntime(); + return *Kikimr->GetTestServer().GetRuntime(); } NYdb::NTable::TSession& TTestHelper::GetSession() { - return Session; + return *Session; } void TTestHelper::CreateTable(const TColumnTableBase& table, const EStatus expectedStatus) { std::cerr << (table.BuildQuery()) << std::endl; - auto result = Session.ExecuteSchemeQuery(table.BuildQuery()).GetValueSync(); + auto result = GetSession().ExecuteSchemeQuery(table.BuildQuery()).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), expectedStatus, result.GetIssues().ToString()); } void TTestHelper::CreateTier(const TString& tierName) { - auto result = Session.ExecuteSchemeQuery("CREATE OBJECT " + tierName + " (TYPE TIER) WITH tierConfig = `" + GetConfigProtoWithName(tierName) + "`").GetValueSync(); + auto result = GetSession().ExecuteSchemeQuery("CREATE OBJECT " + tierName + " (TYPE TIER) WITH tierConfig = `" + GetConfigProtoWithName(tierName) + "`").GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); } @@ -70,43 +75,43 @@ namespace NKqp { } ] })"; - auto result = Session.ExecuteSchemeQuery("CREATE OBJECT IF NOT EXISTS " + ruleName + " (TYPE TIERING_RULE) WITH (defaultColumn = " + columnName + ", description = `" + configTieringStr + "`)").GetValueSync(); + auto result = GetSession().ExecuteSchemeQuery("CREATE OBJECT IF NOT EXISTS " + ruleName + " (TYPE TIERING_RULE) WITH (defaultColumn = " + columnName + ", description = `" + configTieringStr + "`)").GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); return ruleName; } void TTestHelper::SetTiering(const TString& tableName, const TString& ruleName) { auto alterQuery = TStringBuilder() << "ALTER TABLE `" << tableName << "` SET (TIERING = '" << ruleName << "')"; - auto result = Session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + auto result = GetSession().ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); } void TTestHelper::ResetTiering(const TString& tableName) { auto alterQuery = TStringBuilder() << "ALTER TABLE `" << tableName << "` RESET (TIERING)"; - auto result = Session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + auto result = GetSession().ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); } void TTestHelper::DropTable(const TString& tableName) { - auto result = Session.DropTable(tableName).GetValueSync(); + auto result = GetSession().DropTable(tableName).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); } void TTestHelper::BulkUpsert(const TColumnTable& table, TTestHelper::TUpdatesBuilder& updates, const Ydb::StatusIds_StatusCode& opStatus /*= Ydb::StatusIds::SUCCESS*/) { Y_UNUSED(opStatus); - NKikimr::Tests::NCS::THelper helper(Kikimr.GetTestServer()); + NKikimr::Tests::NCS::THelper helper(GetKikimr().GetTestServer()); auto batch = updates.BuildArrow(); helper.SendDataViaActorSystem(table.GetName(), batch, opStatus); } void TTestHelper::BulkUpsert(const TColumnTable& table, std::shared_ptr batch, const Ydb::StatusIds_StatusCode& opStatus /*= Ydb::StatusIds::SUCCESS*/) { Y_UNUSED(opStatus); - NKikimr::Tests::NCS::THelper helper(Kikimr.GetTestServer()); + NKikimr::Tests::NCS::THelper helper(GetKikimr().GetTestServer()); helper.SendDataViaActorSystem(table.GetName(), batch, opStatus); } void TTestHelper::ReadData(const TString& query, const TString& expected, const EStatus opStatus /*= EStatus::SUCCESS*/) { - auto it = TableClient.StreamExecuteScanQuery(query).GetValueSync(); + auto it = TableClient->StreamExecuteScanQuery(query).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); // Means stream successfully get TString result = StreamResultToYson(it, false, opStatus); if (opStatus == EStatus::SUCCESS) { @@ -115,21 +120,29 @@ namespace NKqp { } void TTestHelper::RebootTablets(const TString& tableName) { - auto runtime = Kikimr.GetTestServer().GetRuntime(); + auto runtime = GetKikimr().GetTestServer().GetRuntime(); TActorId sender = runtime->AllocateEdgeActor(); TVector shards; { - auto describeResult = DescribeTable(&Kikimr.GetTestServer(), sender, tableName); + auto describeResult = DescribeTable(&GetKikimr().GetTestServer(), sender, tableName); for (auto shard : describeResult.GetPathDescription().GetColumnTableDescription().GetSharding().GetColumnShards()) { shards.push_back(shard); } } for (auto shard : shards) { - Kikimr.GetTestServer().GetRuntime()->Send(MakePipePerNodeCacheID(false), NActors::TActorId(), new TEvPipeCache::TEvForward( + GetKikimr().GetTestServer().GetRuntime()->Send(MakePipePerNodeCacheID(false), NActors::TActorId(), new TEvPipeCache::TEvForward( new TEvents::TEvPoisonPill(), shard, false)); } } + void TTestHelper::WaitTabletDeletionInHive(ui64 tabletId, TDuration duration) { + auto deadline = TInstant::Now() + duration; + while (GetKikimr().GetTestClient().TabletExistsInHive(&GetRuntime(), tabletId) && TInstant::Now() <= deadline) { + Cerr << "WaitTabletDeletionInHive: wait until " << tabletId << " is deleted" << Endl; + Sleep(TDuration::Seconds(1)); + } + } + TString TTestHelper::TColumnSchema::BuildQuery() const { TStringBuilder str; str << Name << ' '; @@ -233,6 +246,8 @@ namespace NKqp { return arrow::field(name, arrow::int64(), nullable); case NScheme::NTypeIds::JsonDocument: return arrow::field(name, arrow::binary(), nullable); + case NScheme::NTypeIds::Decimal: + return arrow::field(name, arrow::decimal(22, 9)); case NScheme::NTypeIds::Pg: switch (NPg::PgTypeIdFromTypeDesc(typeDesc)) { case INT2OID: diff --git a/ydb/core/kqp/ut/common/columnshard.h b/ydb/core/kqp/ut/common/columnshard.h index a938e91b4d4a..d1be363fd5ef 100644 --- a/ydb/core/kqp/ut/common/columnshard.h +++ b/ydb/core/kqp/ut/common/columnshard.h @@ -2,15 +2,14 @@ #include "kqp_ut_common.h" #include +#include +#include +#include #include #include #include #include -#include -#include -#include - #include namespace NKikimr { @@ -63,9 +62,9 @@ namespace NKqp { }; private: - TKikimrRunner Kikimr; - NYdb::NTable::TTableClient TableClient; - NYdb::NTable::TSession Session; + std::unique_ptr Kikimr; + std::unique_ptr TableClient; + std::unique_ptr Session; public: TTestHelper(const TKikimrSettings& settings); @@ -82,6 +81,7 @@ namespace NKqp { void BulkUpsert(const TColumnTable& table, std::shared_ptr batch, const Ydb::StatusIds_StatusCode& opStatus = Ydb::StatusIds::SUCCESS); void ReadData(const TString& query, const TString& expected, const NYdb::EStatus opStatus = NYdb::EStatus::SUCCESS); void RebootTablets(const TString& tableName); + void WaitTabletDeletionInHive(ui64 tabletId, TDuration duration); }; } diff --git a/ydb/core/kqp/ut/common/kqp_ut_common.cpp b/ydb/core/kqp/ut/common/kqp_ut_common.cpp index d99f2d4fc082..84fbe3585652 100644 --- a/ydb/core/kqp/ut/common/kqp_ut_common.cpp +++ b/ydb/core/kqp/ut/common/kqp_ut_common.cpp @@ -129,9 +129,10 @@ TKikimrRunner::TKikimrRunner(const TKikimrSettings& settings) { ServerSettings->SetFrFactory(&UdfFrFactory); ServerSettings->SetEnableNotNullColumns(true); ServerSettings->SetEnableMoveIndex(true); - ServerSettings->SetEnableUniqConstraint(true); ServerSettings->SetUseRealThreads(settings.UseRealThreads); ServerSettings->SetEnableTablePgTypes(true); + ServerSettings->SetEnablePgSyntax(true); + ServerSettings->SetEnableOlapCompression(true); ServerSettings->S3ActorsFactory = settings.S3ActorsFactory; if (settings.Storage) { @@ -1402,7 +1403,7 @@ bool JoinOrderAndAlgosMatch(const TString& optimized, const TString& reference){ } /* Temporary solution to canonize tests */ -NJson::TJsonValue CanonizeJoinOrderImpl(const NJson::TJsonValue& opt) { +NJson::TJsonValue GetDetailedJoinOrderImpl(const NJson::TJsonValue& opt) { NJson::TJsonValue res; auto op = opt.GetMapSafe().at("Operators").GetArraySafe()[0]; @@ -1416,18 +1417,41 @@ NJson::TJsonValue CanonizeJoinOrderImpl(const NJson::TJsonValue& opt) { auto subplans = opt.GetMapSafe().at("Plans").GetArraySafe(); for (size_t i = 0; i< subplans.size(); ++i) { - res["args"].AppendValue(CanonizeJoinOrderImpl(subplans[i])); + res["args"].AppendValue(GetDetailedJoinOrderImpl(subplans[i])); } return res; } -/* Temporary solution to canonize tests */ -NJson::TJsonValue CanonizeJoinOrder(const TString& deserializedPlan) { +NJson::TJsonValue GetDetailedJoinOrder(const TString& deserializedPlan) { + NJson::TJsonValue optRoot; + NJson::ReadJsonTree(deserializedPlan, &optRoot, true); + optRoot = SimplifyPlan(optRoot.GetMapSafe().at("SimplifiedPlan")); + return GetDetailedJoinOrderImpl(SimplifyPlan(optRoot)); +} + +NJson::TJsonValue GetJoinOrderImpl(const NJson::TJsonValue& opt) { + if (!opt.GetMapSafe().contains("Plans")) { + auto op = opt.GetMapSafe().at("Operators").GetArraySafe()[0]; + return op.GetMapSafe().at("Table").GetStringSafe(); + } + + NJson::TJsonValue res; + + auto subplans = opt.GetMapSafe().at("Plans").GetArraySafe(); + for (size_t i = 0; i < subplans.size(); ++i) { + res.AppendValue(GetJoinOrderImpl(subplans[i])); + } + + return res; +} + +NJson::TJsonValue GetJoinOrder(const TString& deserializedPlan) { NJson::TJsonValue optRoot; NJson::ReadJsonTree(deserializedPlan, &optRoot, true); optRoot = SimplifyPlan(optRoot.GetMapSafe().at("SimplifiedPlan")); - return CanonizeJoinOrderImpl(SimplifyPlan(optRoot)); + return GetJoinOrderImpl(SimplifyPlan(optRoot)); } + } // namspace NKqp } // namespace NKikimr diff --git a/ydb/core/kqp/ut/common/kqp_ut_common.h b/ydb/core/kqp/ut/common/kqp_ut_common.h index a43d6d14da0d..a02aeafc2b38 100644 --- a/ydb/core/kqp/ut/common/kqp_ut_common.h +++ b/ydb/core/kqp/ut/common/kqp_ut_common.h @@ -97,6 +97,7 @@ struct TKikimrSettings: public TTestFeatureFlagsHolder { exchangerSettings->SetStartDelayMs(10); exchangerSettings->SetMaxDelayMs(10); AppConfig.MutableColumnShardConfig()->SetDisabledOnSchemeShard(false); + FeatureFlags.SetEnableSparsedColumns(true); } TKikimrSettings& SetAppConfig(const NKikimrConfig::TAppConfig& value) { AppConfig = value; return *this; } @@ -337,8 +338,11 @@ void WaitForZeroSessions(const NKqp::TKqpCounters& counters); bool JoinOrderAndAlgosMatch(const TString& optimized, const TString& reference); -/* Temporary solution to canonize tests */ -NJson::TJsonValue CanonizeJoinOrder(const TString& deserializedPlan); +/* Gets join order with details as: join algo, join type and scan type. */ +NJson::TJsonValue GetDetailedJoinOrder(const TString& deserializedPlan); + +/* Gets tables join order without details : only tables. */ +NJson::TJsonValue GetJoinOrder(const TString& deserializedPlan); } // namespace NKqp } // namespace NKikimr diff --git a/ydb/core/kqp/ut/federated_query/common/common.cpp b/ydb/core/kqp/ut/federated_query/common/common.cpp index 3eedafce2684..c9c071b1ba95 100644 --- a/ydb/core/kqp/ut/federated_query/common/common.cpp +++ b/ydb/core/kqp/ut/federated_query/common/common.cpp @@ -3,6 +3,16 @@ #include namespace NKikimr::NKqp::NFederatedQueryTest { + TString GetSymbolsString(char start, char end, const TString& skip) { + TStringBuilder result; + for (char symbol = start; symbol <= end; ++symbol) { + if (skip.Contains(symbol)) { + continue; + } + result << symbol; + } + return result; + } NYdb::NQuery::TScriptExecutionOperation WaitScriptExecutionOperation(const NYdb::TOperation::TOperationId& operationId, const NYdb::TDriver& ydbDriver) { NYdb::NOperation::TOperationClient client(ydbDriver); @@ -27,6 +37,7 @@ namespace NKikimr::NKqp::NFederatedQueryTest { NKikimrConfig::TFeatureFlags featureFlags; featureFlags.SetEnableExternalDataSources(true); featureFlags.SetEnableScriptExecutionOperations(true); + featureFlags.SetEnableExternalSourceSchemaInference(true); if (!appConfig) { appConfig.emplace(); } diff --git a/ydb/core/kqp/ut/federated_query/common/common.h b/ydb/core/kqp/ut/federated_query/common/common.h index 88a9d279875d..02d8783f6608 100644 --- a/ydb/core/kqp/ut/federated_query/common/common.h +++ b/ydb/core/kqp/ut/federated_query/common/common.h @@ -8,6 +8,8 @@ namespace NKikimr::NKqp::NFederatedQueryTest { using namespace NKikimr::NKqp; + TString GetSymbolsString(char start, char end, const TString& skip = ""); + NYdb::NQuery::TScriptExecutionOperation WaitScriptExecutionOperation( const NYdb::TOperation::TOperationId& operationId, const NYdb::TDriver& ydbDriver); diff --git a/ydb/core/kqp/ut/federated_query/generic_ut/kqp_generic_provider_ut.cpp b/ydb/core/kqp/ut/federated_query/generic_ut/kqp_generic_provider_ut.cpp index 175718089600..3baf3fe7bcee 100644 --- a/ydb/core/kqp/ut/federated_query/generic_ut/kqp_generic_provider_ut.cpp +++ b/ydb/core/kqp/ut/federated_query/generic_ut/kqp_generic_provider_ut.cpp @@ -35,7 +35,7 @@ namespace NKikimr::NKqp { Ydb, }; - NApi::TDataSourceInstance MakeDataSourceInstance(EProviderType providerType) { + NYql::TGenericDataSourceInstance MakeDataSourceInstance(EProviderType providerType) { switch (providerType) { case EProviderType::PostgreSQL: return TConnectorClientMock::TPostgreSQLDataSourceInstanceBuilder<>().GetResult(); @@ -96,7 +96,7 @@ namespace NKikimr::NKqp { // prepare mock auto clientMock = std::make_shared(); - const NApi::TDataSourceInstance dataSourceInstance = MakeDataSourceInstance(providerType); + const NYql::TGenericDataSourceInstance dataSourceInstance = MakeDataSourceInstance(providerType); // step 1: DescribeTable // clang-format off @@ -125,6 +125,7 @@ namespace NKikimr::NKqp { // step 3: ReadSplits std::vector colData = {10, 20, 30, 40, 50}; clientMock->ExpectReadSplits() + .Filtering(NYql::NConnector::NApi::TReadSplitsRequest::FILTERING_OPTIONAL) .Split() .Description("some binary description") .Select() @@ -191,7 +192,7 @@ namespace NKikimr::NKqp { // prepare mock auto clientMock = std::make_shared(); - const NApi::TDataSourceInstance dataSourceInstance = MakeDataSourceInstance(providerType); + const NYql::TGenericDataSourceInstance dataSourceInstance = MakeDataSourceInstance(providerType); constexpr size_t ROWS_COUNT = 5; @@ -221,6 +222,7 @@ namespace NKikimr::NKqp { // step 3: ReadSplits clientMock->ExpectReadSplits() + .Filtering(NYql::NConnector::NApi::TReadSplitsRequest::FILTERING_OPTIONAL) .Split() .Description("some binary description") .Select() @@ -283,7 +285,7 @@ namespace NKikimr::NKqp { // prepare mock auto clientMock = std::make_shared(); - const NApi::TDataSourceInstance dataSourceInstance = MakeDataSourceInstance(providerType); + const NYql::TGenericDataSourceInstance dataSourceInstance = MakeDataSourceInstance(providerType); constexpr size_t ROWS_COUNT = 5; @@ -313,6 +315,7 @@ namespace NKikimr::NKqp { // step 3: ReadSplits clientMock->ExpectReadSplits() + .Filtering(NYql::NConnector::NApi::TReadSplitsRequest::FILTERING_OPTIONAL) .Split() .Description("some binary description") .Select() @@ -371,7 +374,7 @@ namespace NKikimr::NKqp { // prepare mock auto clientMock = std::make_shared(); - const NApi::TDataSourceInstance dataSourceInstance = MakeDataSourceInstance(providerType); + const NYql::TGenericDataSourceInstance dataSourceInstance = MakeDataSourceInstance(providerType); // clang-format off const NApi::TSelect select = TConnectorClientMock::TSelectBuilder<>() .DataSourceInstance(dataSourceInstance) @@ -418,6 +421,7 @@ namespace NKikimr::NKqp { std::vector filterColumnData = {42, 24}; // clang-format off clientMock->ExpectReadSplits() + .Filtering(NYql::NConnector::NApi::TReadSplitsRequest::FILTERING_OPTIONAL) .Split() .Description("some binary description") .Select(select) diff --git a/ydb/core/kqp/ut/federated_query/s3/kqp_federated_query_ut.cpp b/ydb/core/kqp/ut/federated_query/s3/kqp_federated_query_ut.cpp index ff04ada74a18..4ff6d68be40e 100644 --- a/ydb/core/kqp/ut/federated_query/s3/kqp_federated_query_ut.cpp +++ b/ydb/core/kqp/ut/federated_query/s3/kqp_federated_query_ut.cpp @@ -24,7 +24,7 @@ Y_UNIT_TEST_SUITE(KqpFederatedQuery) { const TString externalDataSourceName = "/Root/external_data_source"; const TString externalTableName = "/Root/test_binding_resolve"; const TString bucket = "test_bucket1"; - const TString object = "test_object"; + const TString object = TStringBuilder() << "test_" << GetSymbolsString(' ', '~', "*?{}") << "_object"; CreateBucketWithObject(bucket, object, TEST_CONTENT); @@ -49,7 +49,7 @@ Y_UNIT_TEST_SUITE(KqpFederatedQuery) { "external_source"_a = externalDataSourceName, "external_table"_a = externalTableName, "location"_a = GetBucketLocation(bucket), - "object"_a = object + "object"_a = EscapeC(object) ); auto result = session.ExecuteSchemeQuery(query).GetValueSync(); UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); @@ -805,6 +805,82 @@ Y_UNIT_TEST_SUITE(KqpFederatedQuery) { UNIT_ASSERT_EQUAL(GetObjectKeys(writeBucket).size(), 3); } + Y_UNIT_TEST(InsertIntoBucketValuesCast) { + const TString writeDataSourceName = "/Root/write_data_source"; + const TString writeTableName = "/Root/write_binding"; + const TString writeBucket = "test_bucket_values_cast"; + const TString writeObject = "test_object_write/"; + { + Aws::S3::S3Client s3Client = MakeS3Client(); + CreateBucket(writeBucket, s3Client); + } + + auto kikimr = NTestUtils::MakeKikimrRunner(); + + auto tc = kikimr->GetTableClient(); + auto session = tc.CreateSession().GetValueSync().GetSession(); + { + const TString query = fmt::format(R"( + CREATE EXTERNAL DATA SOURCE `{write_source}` WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="{write_location}", + AUTH_METHOD="NONE" + ); + CREATE EXTERNAL TABLE `{write_table}` ( + key Uint64 NOT NULL, + value String NOT NULL + ) WITH ( + DATA_SOURCE="{write_source}", + LOCATION="{write_object}", + FORMAT="tsv_with_names" + ); + )", + "write_source"_a = writeDataSourceName, + "write_table"_a = writeTableName, + "write_location"_a = GetBucketLocation(writeBucket), + "write_object"_a = writeObject); + + const auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::SUCCESS, result.GetIssues().ToOneLineString()); + } + + auto db = kikimr->GetQueryClient(); + { + const TString query = fmt::format(R"( + INSERT INTO `{write_table}` + (key, value) + VALUES + (1, "#######"), + (4294967295u, "#######"); + + INSERT INTO `{write_source}`.`{write_object}` WITH (FORMAT = "tsv_with_names") + (key, value) + VALUES + (1, "#######"), + (4294967295u, "#######"); + + INSERT INTO `{write_table}` SELECT * FROM AS_TABLE([ + <|key: 1, value: "#####"|>, + <|key: 4294967295u, value: "#####"|> + ]); + + INSERT INTO `{write_source}`.`{write_object}` WITH (FORMAT = "tsv_with_names") + SELECT * FROM AS_TABLE([ + <|key: 1, value: "#####"|>, + <|key: 4294967295u, value: "#####"|> + ]); + )", + "write_source"_a = writeDataSourceName, + "write_table"_a = writeTableName, + "write_object"_a = writeObject); + + const auto result = db.ExecuteQuery(query, NYdb::NQuery::TTxControl::BeginTx().CommitTx()).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::SUCCESS, result.GetIssues().ToOneLineString()); + } + + UNIT_ASSERT_EQUAL(GetObjectKeys(writeBucket).size(), 4); + } + Y_UNIT_TEST(UpdateExternalTable) { const TString readDataSourceName = "/Root/read_data_source"; const TString readTableName = "/Root/read_binding"; @@ -930,7 +1006,7 @@ Y_UNIT_TEST_SUITE(KqpFederatedQuery) { const TString externalDataSourceName = "/Root/external_data_source"; const TString externalTableName = "/Root/test_binding_resolve"; const TString bucket = "test_bucket1"; - const TString object = "year=1/month=2/test_object"; + const TString object = TStringBuilder() << "year=1/month=2/test_" << GetSymbolsString(' ', '~') << "_object"; const TString content = "data,year,month\ntest,1,2"; CreateBucketWithObject(bucket, object, content); @@ -1793,19 +1869,13 @@ Y_UNIT_TEST_SUITE(KqpFederatedQuery) { ExecuteSelectQuery("test_bucket_execute_script_with_large_file", 5_MB, 500000); } - std::shared_ptr CreateSampleDataSource(const TString& externalDataSourceName, const TString& externalTableName) { - const TString bucket = "test_bucket3"; - const TString object = "test_object"; + Y_UNIT_TEST(TestReadEmptyFileWithCsvFormat) { + const TString externalDataSourceName = "/Root/external_data_source"; + const TString bucket = "test_bucket12"; - NKikimrConfig::TAppConfig appConfig; - appConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); - appConfig.MutableTableServiceConfig()->SetEnableOltpSink(true); - appConfig.MutableTableServiceConfig()->SetEnableCreateTableAs(true); - appConfig.MutableTableServiceConfig()->SetEnablePerStatementQueryExecution(true); - appConfig.MutableFeatureFlags()->SetEnableTempTables(true); - auto kikimr = NTestUtils::MakeKikimrRunner(appConfig, "TestDomain"); + CreateBucketWithObject(bucket, "test_object", ""); - CreateBucketWithObject(bucket, "test_object", TEST_CONTENT); + auto kikimr = NTestUtils::MakeKikimrRunner(); auto tc = kikimr->GetTableClient(); auto session = tc.CreateSession().GetValueSync().GetSession(); @@ -1814,152 +1884,418 @@ Y_UNIT_TEST_SUITE(KqpFederatedQuery) { SOURCE_TYPE="ObjectStorage", LOCATION="{location}", AUTH_METHOD="NONE" - ); - CREATE EXTERNAL TABLE `{external_table}` ( - key Utf8 NOT NULL, - value Utf8 NOT NULL - ) WITH ( - DATA_SOURCE="{external_source}", - LOCATION="{object}", - FORMAT="json_each_row" );)", "external_source"_a = externalDataSourceName, - "external_table"_a = externalTableName, - "location"_a = GetBucketLocation(bucket), - "object"_a = object + "location"_a = GetBucketLocation(bucket) + ); + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); + + const TString sql = fmt::format(R"( + SELECT * FROM `{external_source}`.`/` + WITH ( + SCHEMA = ( + data String + ), + FORMAT = "csv_with_names" + ) + )", "external_source"_a=externalDataSourceName); + + auto db = kikimr->GetQueryClient(); + auto scriptExecutionOperation = db.ExecuteScript(sql).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString()); + UNIT_ASSERT(scriptExecutionOperation.Metadata().ExecutionId); + + NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver()); + UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Completed, readyOp.Status().GetIssues().ToString()); + } + + Y_UNIT_TEST(TestWildcardValidation) { + const TString bucket = "test_bucket13"; + + CreateBucket(bucket); + + auto kikimr = NTestUtils::MakeKikimrRunner(); + + auto tc = kikimr->GetTableClient(); + auto session = tc.CreateSession().GetValueSync().GetSession(); + const TString query = fmt::format(R"( + CREATE EXTERNAL DATA SOURCE `/Root/external_data_source` WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="{location}", + AUTH_METHOD="NONE" + );)", + "location"_a = GetBucketLocation(bucket) ); auto result = session.ExecuteSchemeQuery(query).GetValueSync(); UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); - return kikimr; + auto db = kikimr->GetQueryClient(); + + { // path validation + const TString sql = R"( + SELECT * FROM `/Root/external_data_source`.`/{` WITH ( + SCHEMA = (data String), + FORMAT = "csv_with_names" + ))"; + + auto scriptExecutionOperation = db.ExecuteScript(sql).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString()); + + NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver()); + UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Failed, readyOp.Status().GetIssues().ToString()); + UNIT_ASSERT_STRING_CONTAINS(readyOp.Status().GetIssues().ToString(), "Path '/{' contains invalid wildcard:"); + } + + { // file pattern validation + const TString sql = R"( + SELECT * FROM `/Root/external_data_source`.`/` WITH ( + SCHEMA = (data String), + FORMAT = "csv_with_names", + FILE_PATTERN = "{" + ))"; + + auto scriptExecutionOperation = db.ExecuteScript(sql).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString()); + + NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver()); + UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Failed, readyOp.Status().GetIssues().ToString()); + UNIT_ASSERT_STRING_CONTAINS(readyOp.Status().GetIssues().ToString(), "File pattern '{' contains invalid wildcard:"); + } } - void ValidateResult(const TExecuteQueryResult& result) { - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - UNIT_ASSERT_VALUES_EQUAL_C(result.GetResultSets().size(), 1, "Unexpected result sets count"); + Y_UNIT_TEST(TestSecretsExistingValidation) { + const TString bucket = "test_bucket14"; - TResultSetParser resultSet(result.GetResultSet(0)); - UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnsCount(), 2); - UNIT_ASSERT_VALUES_EQUAL(resultSet.RowsCount(), 2); + CreateBucket(bucket); - UNIT_ASSERT(resultSet.TryNextRow()); - UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(0).GetUtf8(), "1"); - UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(1).GetUtf8(), "trololo"); + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableFeatureFlags()->SetEnableExternalSourceSchemaInference(true); + auto kikimr = NTestUtils::MakeKikimrRunner(appConfig); - UNIT_ASSERT(resultSet.TryNextRow()); - UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(0).GetUtf8(), "2"); - UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(1).GetUtf8(), "hello world"); + auto tc = kikimr->GetTableClient(); + auto session = tc.CreateSession().GetValueSync().GetSession(); + const TString query = fmt::format(R"( + GRANT ALL ON `/Root` TO `test@builtin`; + CREATE OBJECT TestSecret (TYPE SECRET) WITH value = `test_value`; + CREATE EXTERNAL DATA SOURCE `/Root/external_data_source` WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="{location}", + AUTH_METHOD="SERVICE_ACCOUNT", + SERVICE_ACCOUNT_ID="TestSa", + SERVICE_ACCOUNT_SECRET_NAME="TestSecret" + );)", + "location"_a = GetBucketLocation(bucket) + ); + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); + + auto db = kikimr->GetQueryClient(NYdb::NQuery::TClientSettings().AuthToken("test@builtin")); + + const TString sql = R"( + SELECT * FROM `/Root/external_data_source`.`/` WITH ( + SCHEMA = (data String), + FORMAT = "csv_with_names" + ))"; + auto scriptExecutionOperation = db.ExecuteScript(sql).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString()); + + NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver()); + UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Failed, readyOp.Status().GetIssues().ToString()); + UNIT_ASSERT_STRING_CONTAINS(readyOp.Status().GetIssues().ToString(), "secret with name 'TestSecret' not found"); } - void ValidateTables(TQueryClient& client, const TString& oltpTable, const TString& olapTable) { + Y_UNIT_TEST(OverridePlannerDefaults) { + const TString root = "/Root/"; + const TString source = "source"; + const TString table1 = "table1"; + const TString table2 = "table2"; + const TString bucket = "bucket"; + const TString object1 = "object1"; + const TString object2 = "object2"; + const TString content1 = "foo,bar\naaa,0\nbbb,2"; + const TString content2 = "foo,bar\naaa,1\nbbb,3"; + + Aws::S3::S3Client s3Client = MakeS3Client(); + CreateBucket(bucket, s3Client); + UploadObject(bucket, table1 + "/" + object1, content1, s3Client); + UploadObject(bucket, table1 + "/" + object2, content2, s3Client); + UploadObject(bucket, table2 + "/" + object1, content1, s3Client); + UploadObject(bucket, table2 + "/" + object2, content2, s3Client); + + auto kikimr = NTestUtils::MakeKikimrRunner(); + + auto tc = kikimr->GetTableClient(); + auto session = tc.CreateSession().GetValueSync().GetSession(); + const TString query = fmt::format(R"( + CREATE EXTERNAL DATA SOURCE `{source}` WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="{location}", + AUTH_METHOD="NONE" + ); + CREATE EXTERNAL TABLE `{table1}` ( + foo STRING NOT NULL, + bar UINT32 NOT NULL + ) WITH ( + DATA_SOURCE="{source}", + LOCATION="/{location_table1}/", + FORMAT="csv_with_names" + ); + CREATE EXTERNAL TABLE `{table2}` ( + foo STRING NOT NULL, + bar UINT32 NOT NULL + ) WITH ( + DATA_SOURCE="{source}", + LOCATION="/{location_table2}/", + FORMAT="csv_with_names" + ); + )", + "source"_a = root + source, + "table1"_a = root + table1, + "table2"_a = root + table2, + "location_table1"_a = table1, + "location_table2"_a = table2, + "location"_a = TStringBuilder() << GetEnv("S3_ENDPOINT") << '/' << bucket + ); + + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); + + ui32 source1_id = 0; + ui32 source2_id = 0; + ui32 join_id = 0; + ui32 limit_id = 0; + auto queryClient = kikimr->GetQueryClient(); + + { + // default planner values + + const TString sql = fmt::format(R"( + pragma ydb.CostBasedOptimizationLevel = "1"; + + SELECT SUM(t1.bar + t2.bar) as sum FROM `{table1}` as t1 JOIN /*+grace()*/ `{table2}`as t2 ON t1.foo = t2.foo + )", + "table1"_a = root + table1, + "table2"_a = root + table2); + + TExecuteQueryResult queryResult = queryClient.ExecuteQuery( + sql, + TTxControl::BeginTx().CommitTx(), + TExecuteQuerySettings().ExecMode(EExecMode::Execute).StatsMode(EStatsMode::Full)).GetValueSync(); + + UNIT_ASSERT_C(queryResult.IsSuccess(), queryResult.GetIssues().ToString()); + UNIT_ASSERT(queryResult.GetStats()); + UNIT_ASSERT(queryResult.GetStats()->GetPlan()); + NJson::TJsonValue plan; + UNIT_ASSERT(NJson::ReadJsonTree(*queryResult.GetStats()->GetPlan(), &plan)); + UNIT_ASSERT_VALUES_EQUAL(plan["Plan"]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Stats"]["Tasks"].GetIntegerSafe(), 2); + UNIT_ASSERT_VALUES_EQUAL(plan["Plan"]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][1]["Plans"][0]["Stats"]["Tasks"].GetIntegerSafe(), 2); + UNIT_ASSERT_VALUES_EQUAL(plan["Plan"]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Stats"]["Tasks"].GetIntegerSafe(), 4); + UNIT_ASSERT_VALUES_EQUAL(plan["Plan"]["Plans"][0]["Plans"][0]["Stats"]["Tasks"].GetIntegerSafe(), 1); + + source1_id = plan["Plan"]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Stats"]["PhysicalStageId"].GetIntegerSafe(); + source2_id = plan["Plan"]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][1]["Plans"][0]["Stats"]["PhysicalStageId"].GetIntegerSafe(); + join_id = plan["Plan"]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Stats"]["PhysicalStageId"].GetIntegerSafe(); + limit_id = plan["Plan"]["Plans"][0]["Plans"][0]["Stats"]["PhysicalStageId"].GetIntegerSafe(); + } + { - const TString query = TStringBuilder() << "SELECT Unwrap(key), Unwrap(value) FROM `" << oltpTable << "`;"; - ValidateResult(client.ExecuteQuery(query, NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync()); + // scale down + + const TString sql = fmt::format(R"( + pragma ydb.CostBasedOptimizationLevel = "1"; + pragma ydb.OverridePlanner = @@ [ + {{ "tx": 0, "stage": {source1_id}, "tasks": 1 }}, + {{ "tx": 0, "stage": {source2_id}, "tasks": 1 }}, + {{ "tx": 0, "stage": {join_id}, "tasks": 1 }}, + {{ "tx": 0, "stage": {limit_id}, "tasks": 1 }} + ] @@; + + SELECT SUM(t1.bar + t2.bar) as sum FROM `{table1}` as t1 JOIN /*+grace()*/ `{table2}`as t2 ON t1.foo = t2.foo + )", + "source1_id"_a = source1_id, + "source2_id"_a = source2_id, + "join_id"_a = join_id, + "limit_id"_a = limit_id, + "table1"_a = root + table1, + "table2"_a = root + table2); + + TExecuteQueryResult queryResult = queryClient.ExecuteQuery( + sql, + TTxControl::BeginTx().CommitTx(), + TExecuteQuerySettings().ExecMode(EExecMode::Execute).StatsMode(EStatsMode::Full)).GetValueSync(); + + UNIT_ASSERT_C(queryResult.IsSuccess(), queryResult.GetIssues().ToString()); + UNIT_ASSERT(queryResult.GetStats()); + UNIT_ASSERT(queryResult.GetStats()->GetPlan()); + NJson::TJsonValue plan; + UNIT_ASSERT(NJson::ReadJsonTree(*queryResult.GetStats()->GetPlan(), &plan)); + UNIT_ASSERT_VALUES_EQUAL(plan["Plan"]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Stats"]["Tasks"].GetIntegerSafe(), 1); + UNIT_ASSERT_VALUES_EQUAL(plan["Plan"]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][1]["Plans"][0]["Stats"]["Tasks"].GetIntegerSafe(), 1); + UNIT_ASSERT_VALUES_EQUAL(plan["Plan"]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Stats"]["Tasks"].GetIntegerSafe(), 1); + UNIT_ASSERT_VALUES_EQUAL(plan["Plan"]["Plans"][0]["Plans"][0]["Stats"]["Tasks"].GetIntegerSafe(), 1); } { - const TString query = TStringBuilder() << "SELECT key, value FROM `" << olapTable << "` ORDER BY key;"; - ValidateResult(client.ExecuteQuery(query, NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync()); + // scale up + + const TString sql = fmt::format(R"( + pragma ydb.CostBasedOptimizationLevel = "1"; + pragma ydb.OverridePlanner = @@ [ + {{ "tx": 0, "stage": {source1_id}, "tasks": 10 }}, + {{ "tx": 0, "stage": {source2_id}, "tasks": 10 }}, + {{ "tx": 0, "stage": {join_id}, "tasks": 10 }}, + {{ "tx": 0, "stage": {limit_id}, "tasks": 10 }} + ] @@; + + SELECT SUM(t1.bar + t2.bar) as sum FROM `{table1}` as t1 JOIN /*+grace()*/ `{table2}`as t2 ON t1.foo = t2.foo + )", + "source1_id"_a = source1_id, + "source2_id"_a = source2_id, + "join_id"_a = join_id, + "limit_id"_a = limit_id, + "table1"_a = root + table1, + "table2"_a = root + table2); + + TExecuteQueryResult queryResult = queryClient.ExecuteQuery( + sql, + TTxControl::BeginTx().CommitTx(), + TExecuteQuerySettings().ExecMode(EExecMode::Execute).StatsMode(EStatsMode::Full)).GetValueSync(); + + UNIT_ASSERT_C(queryResult.IsSuccess(), queryResult.GetIssues().ToString()); + UNIT_ASSERT(queryResult.GetStats()); + UNIT_ASSERT(queryResult.GetStats()->GetPlan()); + NJson::TJsonValue plan; + UNIT_ASSERT(NJson::ReadJsonTree(*queryResult.GetStats()->GetPlan(), &plan)); + // only 2 files => sources stay with 2 tasks + // join scales to 10 tasks + // limit ignores hint and keeps being in the only task + UNIT_ASSERT_VALUES_EQUAL(plan["Plan"]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Stats"]["Tasks"].GetIntegerSafe(), 2); + UNIT_ASSERT_VALUES_EQUAL(plan["Plan"]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][1]["Plans"][0]["Stats"]["Tasks"].GetIntegerSafe(), 2); + UNIT_ASSERT_VALUES_EQUAL(plan["Plan"]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Stats"]["Tasks"].GetIntegerSafe(), 10); + UNIT_ASSERT_VALUES_EQUAL(plan["Plan"]["Plans"][0]["Plans"][0]["Stats"]["Tasks"].GetIntegerSafe(), 1); } } - Y_UNIT_TEST(CreateTableAsSelectFromExternalDataSource) { - const TString externalDataSourceName = "external_data_source"; - const TString externalTableName = "test_binding_resolve"; + Y_UNIT_TEST(TestOlapToS3Insert) { + const TString root = "/Root/"; + const TString source = "source"; + const TString table1 = "table1"; + const TString table2 = "table2"; + const TString bucket = "bucket"; - auto kikimr = CreateSampleDataSource(externalDataSourceName, externalTableName); - auto client = kikimr->GetQueryClient(); + CreateBucket(bucket); - const TString oltpTable = "DestinationOltp"; - { - const TString query = fmt::format(R"( - PRAGMA TablePathPrefix = "TestDomain"; - CREATE TABLE `{destination}` ( - PRIMARY KEY (key, value) - ) - AS SELECT * - FROM `{external_source}`.`/` WITH ( - format="json_each_row", - schema( - key Utf8 NOT NULL, - value Utf8 NOT NULL - ) - );)", - "destination"_a = oltpTable, - "external_source"_a = externalDataSourceName - ); - auto result = client.ExecuteQuery(query, NYdb::NQuery::TTxControl::NoTx()).ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - } + auto kikimr = NTestUtils::MakeKikimrRunner(); + + auto tc = kikimr->GetTableClient(); + auto session = tc.CreateSession().GetValueSync().GetSession(); const TString olapTable = "DestinationOlap"; - { - const TString query = fmt::format(R"( - PRAGMA TablePathPrefix = "TestDomain"; - CREATE TABLE `{destination}` ( - PRIMARY KEY (key, value) - ) - WITH (STORE = COLUMN) - AS SELECT * - FROM `{external_source}`.`/` WITH ( - format="json_each_row", - schema( - key Utf8 NOT NULL, - value Utf8 NOT NULL - ) - );)", - "destination"_a = olapTable, - "external_source"_a = externalDataSourceName + + const TString query = fmt::format(R"( + CREATE EXTERNAL DATA SOURCE `{source}` WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="{location}", + AUTH_METHOD="NONE" ); - auto result = client.ExecuteQuery(query, NYdb::NQuery::TTxControl::NoTx()).ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - } + CREATE EXTERNAL TABLE `{table1}` ( + key Int64 NOT NULL, + value String NOT NULL, + ) WITH ( + DATA_SOURCE="{source}", + LOCATION="/{location_table1}/", + FORMAT="csv_with_names" + ); + CREATE EXTERNAL TABLE `{table2}` ( + key Int64 NOT NULL, + value String NOT NULL, + year String NOT NULL + ) WITH ( + DATA_SOURCE="{source}", + LOCATION="/{location_table2}/", + FORMAT="csv_with_names", + PARTITIONED_BY="['year']" + ); + CREATE TABLE `{olap_table}` ( + key Int64 NOT NULL, + value String NOT NULL, + PRIMARY KEY (key) + ) + WITH (STORE = COLUMN);)", + "location"_a = GetBucketLocation(bucket), + "source"_a = root + source, + "table1"_a = root + table1, + "table2"_a = root + table2, + "location_table1"_a = table1, + "location_table2"_a = table2, + "olap_table"_a = olapTable + ); + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); - ValidateTables(client, oltpTable, olapTable); - } + auto db = kikimr->GetQueryClient(); - Y_UNIT_TEST(CreateTableAsSelectFromExternalTable) { - const TString externalDataSourceName = "external_data_source"; - const TString externalTableName = "test_binding_resolve"; + { + const TString sql = fmt::format(R"( + INSERT INTO {destination} + SELECT key, value FROM {source};)", + "destination"_a = table1, + "source"_a = olapTable); - auto kikimr = CreateSampleDataSource(externalDataSourceName, externalTableName); - auto client = kikimr->GetQueryClient(); + auto scriptExecutionOperation = db.ExecuteScript(sql).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString()); + UNIT_ASSERT(scriptExecutionOperation.Metadata().ExecutionId); + + NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver()); + UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Completed, readyOp.Status().GetIssues().ToString()); + } - const TString oltpTable = "DestinationOltp"; { - const TString query = fmt::format(R"( - PRAGMA TablePathPrefix = "TestDomain"; - CREATE TABLE `{destination}` ( - PRIMARY KEY (key, value) - ) - AS SELECT * - FROM `{external_table}`;)", - "destination"_a = oltpTable, - "external_table"_a = externalTableName - ); - auto result = client.ExecuteQuery(query, NYdb::NQuery::TTxControl::NoTx()).ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + const TString sql = fmt::format(R"( + INSERT INTO {destination} + SELECT key, value FROM {source} LIMIT 1;)", + "destination"_a = table1, + "source"_a = olapTable); + + auto scriptExecutionOperation = db.ExecuteScript(sql).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString()); + UNIT_ASSERT(scriptExecutionOperation.Metadata().ExecutionId); + + NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver()); + UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Completed, readyOp.Status().GetIssues().ToString()); } - const TString olapTable = "DestinationOlap"; { - const TString query = fmt::format(R"( - PRAGMA TablePathPrefix = "TestDomain"; - CREATE TABLE `{destination}` ( - PRIMARY KEY (key, value) - ) - WITH (STORE = COLUMN) - AS SELECT * - FROM `{external_table}`;)", - "destination"_a = olapTable, - "external_table"_a = externalTableName - ); - auto result = client.ExecuteQuery(query, NYdb::NQuery::TTxControl::NoTx()).ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + const TString sql = fmt::format(R"( + INSERT INTO {destination} + SELECT key, value, "2024" AS year FROM {source};)", + "destination"_a = table2, + "source"_a = olapTable); + + auto scriptExecutionOperation = db.ExecuteScript(sql).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString()); + UNIT_ASSERT(scriptExecutionOperation.Metadata().ExecutionId); + + NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver()); + UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Completed, readyOp.Status().GetIssues().ToString()); } - ValidateTables(client, oltpTable, olapTable); + { + const TString sql = fmt::format(R"( + INSERT INTO {destination} + SELECT key, value, "2024" AS year FROM {source} LIMIT 1;)", + "destination"_a = table2, + "source"_a = olapTable); + + auto scriptExecutionOperation = db.ExecuteScript(sql).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString()); + UNIT_ASSERT(scriptExecutionOperation.Metadata().ExecutionId); + + NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver()); + UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Completed, readyOp.Status().GetIssues().ToString()); + } } } diff --git a/ydb/core/kqp/ut/federated_query/s3/kqp_federated_scheme_ut.cpp b/ydb/core/kqp/ut/federated_query/s3/kqp_federated_scheme_ut.cpp index 39e53fd71eef..57f1694e1ea4 100644 --- a/ydb/core/kqp/ut/federated_query/s3/kqp_federated_scheme_ut.cpp +++ b/ydb/core/kqp/ut/federated_query/s3/kqp_federated_scheme_ut.cpp @@ -215,6 +215,28 @@ Y_UNIT_TEST_SUITE(KqpFederatedSchemeTest) { }; TestInvalidDropForExternalTableWithAuth(queryClientExecutor, "generic_query"); } + + Y_UNIT_TEST(ExternalTableDdlLocationValidation) { + auto kikimr = NTestUtils::MakeKikimrRunner(); + auto db = kikimr->GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + auto query = TStringBuilder() << R"( + CREATE EXTERNAL DATA SOURCE `/Root/ExternalDataSource` WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket", + AUTH_METHOD="NONE" + ); + CREATE EXTERNAL TABLE `/Root/ExternalTable` ( + Key Uint64, + Value String + ) WITH ( + DATA_SOURCE="/Root/ExternalDataSource", + LOCATION="{" + );)"; + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SCHEME_ERROR); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Location '{' contains invalid wildcard:"); + } } } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/ut/federated_query/s3/kqp_s3_plan_ut.cpp b/ydb/core/kqp/ut/federated_query/s3/kqp_s3_plan_ut.cpp index 423fdaeb14c3..b88a1c47e988 100644 --- a/ydb/core/kqp/ut/federated_query/s3/kqp_s3_plan_ut.cpp +++ b/ydb/core/kqp/ut/federated_query/s3/kqp_s3_plan_ut.cpp @@ -143,14 +143,15 @@ Y_UNIT_TEST_SUITE(KqpS3PlanTest) { UNIT_ASSERT(NJson::ReadJsonTree(*queryResult.GetStats()->GetPlan(), &plan)); const auto& writeStagePlan = plan["Plan"]["Plans"][0]["Plans"][0]; - UNIT_ASSERT_VALUES_EQUAL(writeStagePlan["Node Type"].GetStringSafe(), "Limit-Sink"); - UNIT_ASSERT(writeStagePlan["Operators"].GetArraySafe().size() >= 2); - const auto& sinkOp = writeStagePlan["Operators"].GetArraySafe()[1]; + UNIT_ASSERT_VALUES_EQUAL(writeStagePlan["Node Type"].GetStringSafe(), "Stage-Sink"); + UNIT_ASSERT(writeStagePlan["Operators"].GetArraySafe().size() >= 1); + const auto& sinkOp = writeStagePlan["Operators"].GetArraySafe()[0]; UNIT_ASSERT_VALUES_EQUAL(sinkOp["ExternalDataSource"].GetStringSafe(), "write_data_source"); UNIT_ASSERT_VALUES_EQUAL(sinkOp["Compression"].GetStringSafe(), "gzip"); - const auto& readStagePlan = plan["Plan"]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]; + const auto& readStagePlan = plan["Plan"]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]; UNIT_ASSERT_VALUES_EQUAL(readStagePlan["Node Type"].GetStringSafe(), "Source"); + UNIT_ASSERT(readStagePlan["Operators"].GetArraySafe().size() >= 1); const auto& sourceOp = readStagePlan["Operators"].GetArraySafe()[0]; UNIT_ASSERT_VALUES_EQUAL(sourceOp["ExternalDataSource"].GetStringSafe(), "read_data_source"); diff --git a/ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp b/ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp index 0903b929a4b5..38de83491cbb 100644 --- a/ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp +++ b/ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp @@ -35,7 +35,7 @@ TIntrusivePtr GetIcGateway(Tests::TServer& server) { counters->Counters = new TKqpCounters(server.GetRuntime()->GetAppData(0).Counters); counters->TxProxyMon = new NTxProxy::TTxProxyMon(server.GetRuntime()->GetAppData(0).Counters); std::shared_ptr loader = std::make_shared(TestCluster, server.GetRuntime()->GetAnyNodeActorSystem(),TIntrusivePtr(nullptr),false); - return NKqp::CreateKikimrIcGateway(TestCluster, NKikimrKqp::QUERY_TYPE_SQL_GENERIC_QUERY, "/Root", std::move(loader), server.GetRuntime()->GetAnyNodeActorSystem(), + return NKqp::CreateKikimrIcGateway(TestCluster, NKikimrKqp::QUERY_TYPE_SQL_GENERIC_QUERY, "/Root", "/Root", std::move(loader), server.GetRuntime()->GetAnyNodeActorSystem(), server.GetRuntime()->GetNodeId(0), counters, server.GetSettings().AppConfig->GetQueryServiceConfig()); } @@ -53,9 +53,9 @@ TIntrusivePtr CreateKikimrQueryProcessor(TIntrusivePtr ga UNIT_ASSERT(TryParseFromTextFormat(defaultSettingsStream, defaultSettings)); kikimrConfig->Init(defaultSettings.GetDefaultSettings(), cluster, settings, true); - auto federatedQuerySetup = std::make_optional({NYql::IHTTPGateway::Make(), nullptr, nullptr, nullptr, {}, {}, {}, nullptr, nullptr}); + auto federatedQuerySetup = std::make_optional({NYql::IHTTPGateway::Make(), nullptr, nullptr, nullptr, {}, {}, {}, nullptr, nullptr, {}}); return NKqp::CreateKqpHost(gateway, cluster, "/Root", kikimrConfig, moduleResolver, - federatedQuerySetup, nullptr, nullptr, {}, funcRegistry, funcRegistry, keepConfigChanges, nullptr, actorSystem); + federatedQuerySetup, nullptr, nullptr, NKikimrConfig::TQueryServiceConfig(), {}, funcRegistry, funcRegistry, keepConfigChanges, nullptr, actorSystem, nullptr); } NYql::NNodes::TExprBase GetExpr(const TString& ast, NYql::TExprContext& ctx, NYql::IModuleResolver* moduleResolver) { @@ -3500,14 +3500,17 @@ R"([[#;#;["Primary1"];[41u]];[["Secondary2"];[2u];["Primary2"];[42u]];[["Seconda } } - Y_UNIT_TEST(SecondaryIndexUsingInJoin) { + Y_UNIT_TEST_TWIN(SecondaryIndexUsingInJoin, UseStreamJoin) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableKqpDataQueryStreamLookup(true); + appConfig.MutableTableServiceConfig()->SetEnableKqpDataQueryStreamIdxLookupJoin(UseStreamJoin); auto setting = NKikimrKqp::TKqpSetting(); auto serverSettings = TKikimrSettings() - .SetKqpSettings({setting}); + .SetKqpSettings({setting}) + .SetAppConfig(appConfig); TKikimrRunner kikimr(serverSettings); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); - bool streamLookupEnabled = serverSettings.AppConfig.GetTableServiceConfig().GetEnableKqpDataQueryStreamLookup(); { auto tableBuilder = db.GetTableBuilder(); @@ -3575,16 +3578,30 @@ R"([[#;#;["Primary1"];[41u]];[["Secondary2"];[2u];["Primary2"];[42u]];[["Seconda auto& stats = NYdb::TProtoAccessor::GetProto(*result.GetStats()); - int indexPhaseId = streamLookupEnabled ? 1 : 2; - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), streamLookupEnabled ? 2 : 3); + if (UseStreamJoin) { + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 2); + + for (const auto& ta : stats.query_phases(0).table_access()) { + if (ta.name() == "/Root/TestTable1") { + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 3); + } else { + UNIT_ASSERT_VALUES_EQUAL(ta.name(), "/Root/TestTable2/Index1/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 2); + } + } + } else { + int indexPhaseId = 1; + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 2); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TestTable1"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TestTable1"); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2/Index1/indexImplTable"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2/Index1/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); + } } { @@ -3605,16 +3622,30 @@ R"([[#;#;["Primary1"];[41u]];[["Secondary2"];[2u];["Primary2"];[42u]];[["Seconda auto& stats = NYdb::TProtoAccessor::GetProto(*result.GetStats()); - int indexPhaseId = streamLookupEnabled ? 1 : 2; - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), streamLookupEnabled ? 2 : 3); + if (UseStreamJoin) { + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 2); + + for (const auto& ta : stats.query_phases(0).table_access()) { + if (ta.name() == "/Root/TestTable1") { + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 3); + } else { + UNIT_ASSERT_VALUES_EQUAL(ta.name(), "/Root/TestTable2/Index1/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 2); + } + } + } else { + int indexPhaseId = 1; + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 2); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TestTable1"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TestTable1"); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2/Index1/indexImplTable"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2/Index1/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); + } } { @@ -3636,17 +3667,30 @@ R"([[#;#;["Primary1"];[41u]];[["Secondary2"];[2u];["Primary2"];[42u]];[["Seconda auto& stats = NYdb::TProtoAccessor::GetProto(*result.GetStats()); - int indexPhaseId = 1; - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), streamLookupEnabled ? 2 : 3); - indexPhaseId = streamLookupEnabled ? 1 : 2; + if (UseStreamJoin) { + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 2); + + for (const auto& ta : stats.query_phases(0).table_access()) { + if (ta.name() == "/Root/TestTable1") { + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 3); + } else { + UNIT_ASSERT_VALUES_EQUAL(ta.name(), "/Root/TestTable2/Index1/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 2); + } + } + } else { + int indexPhaseId = 1; + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 2); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TestTable1"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TestTable1"); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2/Index1/indexImplTable"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2/Index1/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); + } } { @@ -3667,27 +3711,44 @@ R"([[#;#;["Primary1"];[41u]];[["Secondary2"];[2u];["Primary2"];[42u]];[["Seconda auto& stats = NYdb::TProtoAccessor::GetProto(*result.GetStats()); - int indexPhaseId = streamLookupEnabled ? 1 : 2; - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), streamLookupEnabled ? 2 : 3); + if (UseStreamJoin) { + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 2); + + for (const auto& ta : stats.query_phases(0).table_access()) { + if (ta.name() == "/Root/TestTable1") { + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 3); + } else { + UNIT_ASSERT_VALUES_EQUAL(ta.name(), "/Root/TestTable2/Index1/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 2); + } + } + } else { + int indexPhaseId = 1; + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 2); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TestTable1"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TestTable1"); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2/Index1/indexImplTable"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2/Index1/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); + } } } - Y_UNIT_TEST(SecondaryIndexUsingInJoin2) { + Y_UNIT_TEST_TWIN(SecondaryIndexUsingInJoin2, UseStreamJoin) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableKqpDataQueryStreamLookup(true); + appConfig.MutableTableServiceConfig()->SetEnableKqpDataQueryStreamIdxLookupJoin(UseStreamJoin); auto setting = NKikimrKqp::TKqpSetting(); auto serverSettings = TKikimrSettings() - .SetKqpSettings({setting}); + .SetKqpSettings({setting}) + .SetAppConfig(appConfig); TKikimrRunner kikimr(serverSettings); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); - bool streamLookupEnabled = serverSettings.AppConfig.GetTableServiceConfig().GetEnableKqpDataQueryStreamLookup(); NYdb::NTable::TExecDataQuerySettings execSettings; execSettings.CollectQueryStats(ECollectQueryStatsMode::Basic); @@ -3756,29 +3817,36 @@ R"([[#;#;["Primary1"];[41u]];[["Secondary2"];[2u];["Primary2"];[42u]];[["Seconda auto& stats = NYdb::TProtoAccessor::GetProto(*result.GetStats()); - int indexPhaseId = streamLookupEnabled ? 1 : 2; - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), streamLookupEnabled ? 2 : 4); - - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TestTable1"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); - - if (streamLookupEnabled) { - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 2); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(1).name(), "/Root/TestTable2/Index1/indexImplTable"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(1).reads().rows(), 2); + if (UseStreamJoin) { + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 3); + + for (const auto& ta : stats.query_phases(0).table_access()) { + if (ta.name() == "/Root/TestTable1") { + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 3); + } else if (ta.name() == "/Root/TestTable2") { + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 2); + } else { + UNIT_ASSERT_VALUES_EQUAL(ta.name(), "/Root/TestTable2/Index1/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 2); + } + } } else { - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2/Index1/indexImplTable"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); - - indexPhaseId++; - - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 2); + + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TestTable1"); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); + + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(1).table_access().size(), 2); + for (const auto& ta : stats.query_phases(1).table_access()) { + if (ta.name() == "/Root/TestTable2") { + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 2); + } else { + UNIT_ASSERT_VALUES_EQUAL(ta.name(), "/Root/TestTable2/Index1/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 2); + } + } } } @@ -3801,29 +3869,36 @@ R"([[#;#;["Primary1"];[41u]];[["Secondary2"];[2u];["Primary2"];[42u]];[["Seconda auto& stats = NYdb::TProtoAccessor::GetProto(*result.GetStats()); - int indexPhaseId = streamLookupEnabled ? 1 : 2; - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), streamLookupEnabled ? 2 : 4); - - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TestTable1"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); - - if (streamLookupEnabled) { - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 2); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(1).name(), "/Root/TestTable2/Index1/indexImplTable"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(1).reads().rows(), 2); + if (UseStreamJoin) { + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 3); + + for (const auto& ta : stats.query_phases(0).table_access()) { + if (ta.name() == "/Root/TestTable1") { + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 3); + } else if (ta.name() == "/Root/TestTable2") { + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 2); + } else { + UNIT_ASSERT_VALUES_EQUAL(ta.name(), "/Root/TestTable2/Index1/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 2); + } + } } else { - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2/Index1/indexImplTable"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); - - indexPhaseId++; - - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 2); + + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TestTable1"); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); + + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(1).table_access().size(), 2); + for (const auto& ta : stats.query_phases(1).table_access()) { + if (ta.name() == "/Root/TestTable2") { + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 2); + } else { + UNIT_ASSERT_VALUES_EQUAL(ta.name(), "/Root/TestTable2/Index1/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 2); + } + } } } } diff --git a/ydb/core/kqp/ut/join/data/join_order/lookupbug.json b/ydb/core/kqp/ut/join/data/join_order/lookupbug.json new file mode 100644 index 000000000000..b57351147ecf --- /dev/null +++ b/ydb/core/kqp/ut/join/data/join_order/lookupbug.json @@ -0,0 +1,62 @@ +{ + "op_name":"LeftJoin (MapJoin)", + "args": + [ + { + "op_name":"LeftJoin (MapJoin)", + "args": + [ + { + "op_name":"LeftJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"quotas_browsers_relation" + }, + { + "op_name":"TablePointLookup", + "args": + [ + { + "op_name":"TableFullScan", + "table":"quotas_browsers_relation" + } + ] + } + ] + }, + { + "op_name":"TablePointLookup", + "args": + [ + { + "op_name":"LeftJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"quotas_browsers_relation" + }, + { + "op_name":"TablePointLookup", + "args": + [ + { + "op_name":"TableFullScan", + "table":"quotas_browsers_relation" + } + ] + } + ] + } + ] + } + ] + }, + { + "op_name":"TableFullScan", + "table":"quota" + } + ] +} diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcc.json b/ydb/core/kqp/ut/join/data/join_order/tpcc.json new file mode 100644 index 000000000000..54ab7ad3c989 --- /dev/null +++ b/ydb/core/kqp/ut/join/data/join_order/tpcc.json @@ -0,0 +1,26 @@ +{ + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TablePointLookup", + "args": + [ + { + "op_name":"Filter", + "args": + [ + { + "op_name":"TableRangeScan", + "table":"test\/tpcc\/order_line" + } + ] + } + ] + }, + { + "op_name":"TableRangeScan", + "table":"test\/tpcc\/order_line" + } + ] +} diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s.json index 45076827e81f..10e7220a79d7 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s.json @@ -1 +1,374 @@ -{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/income_band"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/income_band"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/promotion"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/date_dim"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/date_dim"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/customer_address"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/household_demographics"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/customer_demographics"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"InnerJoin (Grace)","args":[{"op_name":"TableFullScan","table":"test\/ds\/catalog_sales"},{"op_name":"TableFullScan","table":"test\/ds\/catalog_returns"}]},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/store_returns"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/item"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/customer_address"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/household_demographics"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/customer_demographics"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/customer"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/date_dim"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/store_sales"},{"op_name":"TableFullScan","table":"test\/ds\/store"}]}]}]}]}]}]}]}]}]}]}]}]}]}]}]}]}]},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/income_band"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/income_band"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/promotion"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/date_dim"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/date_dim"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/customer_address"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/household_demographics"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/customer_demographics"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"InnerJoin (Grace)","args":[{"op_name":"TableFullScan","table":"test\/ds\/catalog_sales"},{"op_name":"TableFullScan","table":"test\/ds\/catalog_returns"}]},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/store_returns"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/item"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/customer_address"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/household_demographics"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/customer_demographics"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/customer"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/date_dim"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"test\/ds\/store_sales"},{"op_name":"TableFullScan","table":"test\/ds\/store"}]}]}]}]}]}]}]}]}]}]}]}]}]}]}]}]}]}]} \ No newline at end of file +{ + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/store_sales" + }, + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_returns" + } + ] + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/item" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/store_returns" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_address" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_demographics" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/date_dim" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/store" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/promotion" + } + ] + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/household_demographics" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/income_band" + } + ] + } + ] + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_demographics" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_address" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/date_dim" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/date_dim" + } + ] + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/household_demographics" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/income_band" + } + ] + } + ] + } + ] + }, + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/store_sales" + }, + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_returns" + } + ] + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/item" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/store_returns" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_address" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_demographics" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/date_dim" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/store" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/promotion" + } + ] + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/household_demographics" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/income_band" + } + ] + } + ] + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_demographics" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_address" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/date_dim" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/date_dim" + } + ] + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/household_demographics" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/income_band" + } + ] + } + ] + } + ] + } + ] +} diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s_column_store.json b/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s_column_store.json new file mode 100644 index 000000000000..10e7220a79d7 --- /dev/null +++ b/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s_column_store.json @@ -0,0 +1,374 @@ +{ + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/store_sales" + }, + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_returns" + } + ] + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/item" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/store_returns" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_address" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_demographics" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/date_dim" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/store" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/promotion" + } + ] + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/household_demographics" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/income_band" + } + ] + } + ] + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_demographics" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_address" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/date_dim" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/date_dim" + } + ] + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/household_demographics" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/income_band" + } + ] + } + ] + } + ] + }, + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/store_sales" + }, + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_returns" + } + ] + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/item" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/store_returns" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_address" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_demographics" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/date_dim" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/store" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/promotion" + } + ] + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/household_demographics" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/income_band" + } + ] + } + ] + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_demographics" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_address" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/date_dim" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/date_dim" + } + ] + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/household_demographics" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/income_band" + } + ] + } + ] + } + ] + } + ] +} diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s.json new file mode 100644 index 000000000000..5ce49b9af11a --- /dev/null +++ b/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s.json @@ -0,0 +1,54 @@ +{ + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"CrossJoin", + "args": + [ + { + "op_name":"CrossJoin", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/store" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/item" + } + ] + }, + { + "op_name":"CrossJoin", + "args": + [ + { + "op_name":"CrossJoin", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/store" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/item" + } + ] + } + ] +} diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s_column_store.json b/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s_column_store.json new file mode 100644 index 000000000000..5ce49b9af11a --- /dev/null +++ b/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s_column_store.json @@ -0,0 +1,54 @@ +{ + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"CrossJoin", + "args": + [ + { + "op_name":"CrossJoin", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/store" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/item" + } + ] + }, + { + "op_name":"CrossJoin", + "args": + [ + { + "op_name":"CrossJoin", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/store" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/item" + } + ] + } + ] +} diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s.json index 7b3f2cd87a16..0b7c1e407f39 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s.json @@ -1,76 +1,84 @@ { - "op_name": "LeftJoin (Grace)", - "args": [ - { - "op_name": "LeftJoin (Grace)", - "args": [ - { - "op_name": "LeftJoin (Grace)", - "args": [ + "op_name":"LeftJoin (Grace)", + "args": + [ + { + "op_name":"LeftJoin (Grace)", + "args": + [ { - "op_name": "InnerJoin (MapJoin)", - "args": [ - { - "op_name": "TableFullScan", - "table": "test/ds/store_sales" - }, - { - "op_name": "TableFullScan", - "table": "test/ds/date_dim" - } - ] + "op_name":"LeftJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/store_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/date_dim" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/store_returns" + } + ] }, { - "op_name": "TableFullScan", - "table": "test/ds/store_returns" + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"LeftJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/web_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/web_returns" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/date_dim" + } + ] } ] - }, - { - "op_name": "InnerJoin (MapJoin)", - "args": [ + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ { - "op_name": "LeftJoin (Grace)", - "args": [ - { - "op_name": "TableFullScan", - "table": "test/ds/web_sales" - }, - { - "op_name": "TableFullScan", - "table": "test/ds/web_returns" - } - ] + "op_name":"LeftJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_returns" + } + ] }, { - "op_name": "TableFullScan", - "table": "test/ds/date_dim" + "op_name":"TableFullScan", + "table":"test\/ds\/date_dim" } ] - } - ] - }, - { - "op_name": "InnerJoin (MapJoin)", - "args": [ - { - "op_name": "LeftJoin (Grace)", - "args": [ - { - "op_name": "TableFullScan", - "table": "test/ds/catalog_sales" - }, - { - "op_name": "TableFullScan", - "table": "test/ds/catalog_returns" - } - ] - }, - { - "op_name": "TableFullScan", - "table": "test/ds/date_dim" - } - ] - } - ] + } + ] } diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s_column_store.json b/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s_column_store.json new file mode 100644 index 000000000000..0b7c1e407f39 --- /dev/null +++ b/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s_column_store.json @@ -0,0 +1,84 @@ +{ + "op_name":"LeftJoin (Grace)", + "args": + [ + { + "op_name":"LeftJoin (Grace)", + "args": + [ + { + "op_name":"LeftJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/store_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/date_dim" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/store_returns" + } + ] + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"LeftJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/web_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/web_returns" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/date_dim" + } + ] + } + ] + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"LeftJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_returns" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/date_dim" + } + ] + } + ] +} diff --git a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json index 61e524b69def..44cb435d8771 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json @@ -1 +1,96 @@ -{"op_name":"InnerJoin (Grace)","args":[{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"partsupp"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"supplier"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"nation"},{"op_name":"TableFullScan","table":"region"}]}]}]},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"partsupp"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"supplier"},{"op_name":"InnerJoin (MapJoin)","args":[{"op_name":"TableFullScan","table":"nation"},{"op_name":"TableFullScan","table":"region"}]}]}]},{"op_name":"TableFullScan","table":"part"}]}]} +{ + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"partsupp" + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"supplier" + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TablePointLookup", + "args": + [ + { + "op_name":"TableFullScan", + "table":"nation" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"nation" + } + ] + } + ] + } + ] + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"partsupp" + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"supplier" + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TablePointLookup", + "args": + [ + { + "op_name":"TableFullScan", + "table":"nation" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"nation" + } + ] + } + ] + } + ] + }, + { + "op_name":"TableFullScan", + "table":"part" + } + ] + } + ] +} diff --git a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json new file mode 100644 index 000000000000..827ca629c612 --- /dev/null +++ b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json @@ -0,0 +1,84 @@ +{ + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"partsupp" + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"supplier" + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"nation" + }, + { + "op_name":"TableFullScan", + "table":"region" + } + ] + } + ] + } + ] + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"partsupp" + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"supplier" + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"nation" + }, + { + "op_name":"TableFullScan", + "table":"region" + } + ] + } + ] + } + ] + }, + { + "op_name":"TableFullScan", + "table":"part" + } + ] + } + ] +} diff --git a/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s.json index 56e74720fbcd..606c72699f4f 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s.json @@ -1,49 +1,54 @@ { - "op_name": "InnerJoin (Grace)", - "args": [ - { - "op_name": "InnerJoin (Grace)", - "args": [ - { - "op_name": "TableFullScan", - "table": "orders" - }, - { - "op_name": "InnerJoin (Grace)", - "args": [ + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ { - "op_name": "TableFullScan", - "table": "lineitem" + "op_name":"TableFullScan", + "table":"orders" }, { - "op_name": "InnerJoin (Grace)", - "args": [ - { - "op_name": "TableFullScan", - "table": "partsupp" - }, - { - "op_name": "TableFullScan", - "table": "part" - } - ] + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"lineitem" + }, + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"partsupp" + }, + { + "op_name":"TableFullScan", + "table":"part" + } + ] + } + ] } ] - } - ] - }, - { - "op_name": "InnerJoin (MapJoin)", - "args": [ - { - "op_name": "TableFullScan", - "table": "supplier" - }, - { - "op_name": "TableFullScan", - "table": "nation" - } - ] - } - ] + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"supplier" + }, + { + "op_name":"TableFullScan", + "table":"nation" + } + ] + } + ] } diff --git a/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s_column_store.json b/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s_column_store.json new file mode 100644 index 000000000000..606c72699f4f --- /dev/null +++ b/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s_column_store.json @@ -0,0 +1,54 @@ +{ + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"orders" + }, + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"lineitem" + }, + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"partsupp" + }, + { + "op_name":"TableFullScan", + "table":"part" + } + ] + } + ] + } + ] + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"supplier" + }, + { + "op_name":"TableFullScan", + "table":"nation" + } + ] + } + ] +} diff --git a/ydb/core/kqp/ut/join/data/queries/five_way_join_stats_override.sql b/ydb/core/kqp/ut/join/data/queries/five_way_join_stats_override.sql index f21b9f8edf32..b97127a3097b 100644 --- a/ydb/core/kqp/ut/join/data/queries/five_way_join_stats_override.sql +++ b/ydb/core/kqp/ut/join/data/queries/five_way_join_stats_override.sql @@ -1,4 +1,4 @@ -PRAGMA ydb.OverrideStatistics = '{"/Root/R":{"n_rows":100500, "key_columns":["id"], "columns":[{"name":"id", "n_unique_vals":50}]}}'; +PRAGMA ydb.OptOverrideStatistics = '{"/Root/R":{"n_rows":100500, "key_columns":["id"], "columns":[{"name":"id", "n_unique_vals":50}]}}'; SELECT * FROM `/Root/R` as R INNER JOIN diff --git a/ydb/core/kqp/ut/join/data/queries/join_order_hints_complex.sql b/ydb/core/kqp/ut/join/data/queries/join_order_hints_complex.sql new file mode 100644 index 000000000000..cba3a99a1d37 --- /dev/null +++ b/ydb/core/kqp/ut/join/data/queries/join_order_hints_complex.sql @@ -0,0 +1,22 @@ +PRAGMA TablePathPrefix='/Root'; + +PRAGMA ydb.OptJoinOrderHints= + '[ + [["R", "S"], ["T", "U"]] + ]'; + +PRAGMA ydb.OptCardinalityHints = + '[ + {"labels":["R"], "op":"#", "value":10e8}, + {"labels":["T"], "op":"#", "value":1}, + {"labels":["R", "T"], "op":"#", "value":1}, + {"labels":["R", "S"], "op":"#", "value":10e8}, + {"labels":["T", "U"], "op":"#", "value":10e8}, + {"labels":["V"], "op":"#", "value":1} + ]'; + +SELECT * FROM + R INNER JOIN S on R.id = S.id + INNER JOIN T on R.id = T.id + INNER JOIN U on T.id = U.id + INNER JOIN V on U.id = V.id; diff --git a/ydb/core/kqp/ut/join/data/queries/join_order_hints_many_hint_trees.sql b/ydb/core/kqp/ut/join/data/queries/join_order_hints_many_hint_trees.sql new file mode 100644 index 000000000000..5ddee5c2e543 --- /dev/null +++ b/ydb/core/kqp/ut/join/data/queries/join_order_hints_many_hint_trees.sql @@ -0,0 +1,22 @@ +PRAGMA TablePathPrefix='/Root'; + +PRAGMA ydb.OptJoinOrderHints= + '[ + ["R", "S"], + ["T", "U"] + ]'; +PRAGMA ydb.OptCardinalityHints = + '[ + {"labels":["R"], "op":"#", "value":10e8}, + {"labels":["T"], "op":"#", "value":1}, + {"labels":["R", "T"], "op":"#", "value":1}, + {"labels":["R", "S"], "op":"#", "value":10e8}, + {"labels":["T", "U"], "op":"#", "value":10e8}, + {"labels":["V"], "op":"#", "value":1} + ]'; + +SELECT * FROM + R INNER JOIN S on R.id = S.id + INNER JOIN T on R.id = T.id + INNER JOIN U on T.id = U.id + INNER JOIN V on U.id = V.id; diff --git a/ydb/core/kqp/ut/join/data/queries/join_order_hints_simple.sql b/ydb/core/kqp/ut/join/data/queries/join_order_hints_simple.sql new file mode 100644 index 000000000000..7482d192cea3 --- /dev/null +++ b/ydb/core/kqp/ut/join/data/queries/join_order_hints_simple.sql @@ -0,0 +1,18 @@ +PRAGMA TablePathPrefix='/Root'; + +PRAGMA ydb.OptCardinalityHints = + '[ + {"labels":["R"], "op":"#", "value":10e8}, + {"labels":["T"], "op":"#", "value":1}, + {"labels":["S"], "op":"#", "value":10e8}, + {"labels":["R", "T"], "op":"#", "value":1}, + {"labels":["R", "S"], "op":"#", "value":10e8} + ]'; +PRAGMA ydb.OptJoinOrderHints= + '[ + ["T", ["R", "S"]] + ]'; + +SELECT * FROM + R INNER JOIN S on R.id = S.id + INNER JOIN T on R.id = T.id \ No newline at end of file diff --git a/ydb/core/kqp/ut/join/data/queries/lookupbug.sql b/ydb/core/kqp/ut/join/data/queries/lookupbug.sql new file mode 100644 index 000000000000..ec88e372986a --- /dev/null +++ b/ydb/core/kqp/ut/join/data/queries/lookupbug.sql @@ -0,0 +1,38 @@ +DECLARE $quotaName as Utf8?; +DECLARE $browserGroup as Utf8?; +DECLARE $limit as Uint32; +DECLARE $offset as Uint32; +PRAGMA TablePathPrefix ="/Root/"; + +$browsers = ( + SELECT + b.id as id, + q.name AS quota_name, + b.name AS name, + b.version AS version, + b.group AS group, + b.description AS description, + bg.browser_platform AS platform, + MAX_OF(qb.created_at, b.created_at) AS created_at, + qb.deleted_at AS deleted_at + FROM + quotas_browsers_relation AS qb + LEFT JOIN + browsers AS b + ON qb.browser_id = b.id + LEFT JOIN + browser_groups AS bg + ON + b.group = bg.name + LEFT JOIN + quota as q + ON + qb.quota_id = q.id + WHERE + ( + ($quotaName IS NOT NULL AND q.name = $quotaName) OR + $quotaName IS NULL ) AND + ( ($browserGroup IS NOT NULL AND b.group = $browserGroup) OR $browserGroup IS NULL + ) AND ( group IS NOT NULL )); + + SELECT * FROM $browsers ORDER BY created_at LIMIT $limit OFFSET $offset; \ No newline at end of file diff --git a/ydb/core/kqp/ut/join/data/queries/test_join_hint.sql b/ydb/core/kqp/ut/join/data/queries/test_join_hint.sql new file mode 100644 index 000000000000..b58b49e3e3f7 --- /dev/null +++ b/ydb/core/kqp/ut/join/data/queries/test_join_hint.sql @@ -0,0 +1,8 @@ +PRAGMA ydb.OptCardinalityHints = '[{"labels":["R"], "op":"#", "value":1}, {"labels":["R","S"], "op":"#", "value":10e6}]'; +PRAGMA ydb.OptJoinAlgoHints = '[{"labels":["R","S"], "algo":"GraceJoin"}]'; + +SELECT * +FROM `/Root/R` as R + INNER JOIN + `/Root/S` as S + ON R.id = S.id \ No newline at end of file diff --git a/ydb/core/kqp/ut/join/data/queries/test_join_hint2.sql b/ydb/core/kqp/ut/join/data/queries/test_join_hint2.sql new file mode 100644 index 000000000000..1e56ae6aef04 --- /dev/null +++ b/ydb/core/kqp/ut/join/data/queries/test_join_hint2.sql @@ -0,0 +1,16 @@ +PRAGMA ydb.OptCardinalityHints = '[{"labels":["R","S"], "op":"#", "value":1.0}]'; + +SELECT * +FROM `/Root/R` as R + INNER JOIN + `/Root/S` as S + ON R.id = S.id + INNER JOIN + `/Root/T` as T + ON S.id = T.id + INNER JOIN + `/Root/U` as U + ON T.id = U.id + INNER JOIN + `/Root/V` as V + ON U.id = V.id diff --git a/ydb/core/kqp/ut/join/data/queries/tpcc.sql b/ydb/core/kqp/ut/join/data/queries/tpcc.sql new file mode 100644 index 000000000000..38daad0a176e --- /dev/null +++ b/ydb/core/kqp/ut/join/data/queries/tpcc.sql @@ -0,0 +1,8 @@ +SELECT COUNT(DISTINCT (s.S_I_ID)) AS STOCK_COUNT +FROM `/Root/test/tpcc/order_line` as ol INNER JOIN `/Root/test/tpcc/stock` as s ON s.S_I_ID = ol.OL_I_ID +WHERE ol.OL_W_ID = 1 +AND ol.OL_D_ID = 10 +AND ol.OL_O_ID < 3000 +AND ol.OL_O_ID >= 2900 +AND s.S_W_ID = 1 +AND s.S_QUANTITY < 15 diff --git a/ydb/core/kqp/ut/join/data/queries/tpcds34.sql b/ydb/core/kqp/ut/join/data/queries/tpcds34.sql new file mode 100644 index 000000000000..782b5c6d92d9 --- /dev/null +++ b/ydb/core/kqp/ut/join/data/queries/tpcds34.sql @@ -0,0 +1,38 @@ +pragma TablePathPrefix = "/Root/test/ds/"; + +-- NB: Subquerys +-- start query 1 in stream 0 using template query34.tpl and seed 1971067816 +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select store_sales.ss_ticket_number ss_ticket_number + ,store_sales.ss_customer_sk ss_customer_sk + ,count(*) cnt + from store_sales as store_sales + cross join date_dim as date_dim + cross join store as store + cross join household_demographics as household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'Unknown') + and household_demographics.hd_vehicle_count > 0 + and (case when household_demographics.hd_vehicle_count > 0 + then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count + else null + end) > 1.2 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Salem County','Terrell County','Arthur County','Oglethorpe County', + 'Lunenburg County','Perry County','Halifax County','Sumner County') + group by store_sales.ss_ticket_number,store_sales.ss_customer_sk) dn + cross join customer as customer + where ss_customer_sk = c_customer_sk + and cnt between 15 and 20 + order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc, ss_ticket_number; + +-- end query 1 in stream 0 using template query34.tpl \ No newline at end of file diff --git a/ydb/core/kqp/ut/join/data/queries/tpcds64_small.sql b/ydb/core/kqp/ut/join/data/queries/tpcds64_small.sql new file mode 100644 index 000000000000..ec9d4b872532 --- /dev/null +++ b/ydb/core/kqp/ut/join/data/queries/tpcds64_small.sql @@ -0,0 +1,42 @@ +PRAGMA TablePathPrefix='/Root/test/ds'; + +--- NB: Subquerys + +$cs_ui = + + (select catalog_sales.cs_item_sk + + from catalog_sales as catalog_sales + + group by catalog_sales.cs_item_sk +); + +$cross_sales = + + (select item.i_product_name product_name + + ,store.s_store_name store_name + + FROM $cs_ui cs_ui + + cross join store as store + + cross join item as item + +group by item.i_product_name + + ,store.s_store_name + +); + +-- start query 1 in stream 0 using template query64.tpl and seed 1220860970 + +select cs1.product_name + +from $cross_sales cs1 cross join $cross_sales cs2 + +where cs1.store_name = cs2.store_name; + + + +-- end query 1 in stream 0 using template query64.tpl \ No newline at end of file diff --git a/ydb/core/kqp/ut/join/data/queries/tpcds87.sql b/ydb/core/kqp/ut/join/data/queries/tpcds87.sql new file mode 100644 index 000000000000..cefa42e92043 --- /dev/null +++ b/ydb/core/kqp/ut/join/data/queries/tpcds87.sql @@ -0,0 +1,42 @@ +PRAGMA TablePathPrefix='/Root/test/ds'; + +-- NB: Subquerys +$bla1 = (select distinct + COALESCE(c_last_name,'') as c_last_name, + COALESCE(c_first_name,'') as c_first_name, + COALESCE(cast(d_date as date), cast(0 as Date)) as d_date + from store_sales as store_sales + cross join date_dim as date_dim + cross join customer as customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1221 and 1221+11); + +$bla2 = ((select distinct + COALESCE(c_last_name,'') as c_last_name, + COALESCE(c_first_name,'') as c_first_name, + COALESCE(cast(d_date as date), cast(0 as Date)) as d_date + from catalog_sales as catalog_sales + cross join date_dim as date_dim + cross join customer as customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1221 and 1221+11) + union all + (select distinct + COALESCE(c_last_name,'') as c_last_name, + COALESCE(c_first_name,'') as c_first_name, + COALESCE(cast(d_date as date), cast(0 as Date)) as d_date + from web_sales as web_sales + cross join date_dim as date_dim + cross join customer as customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1221 and 1221+11)); + +-- start query 1 in stream 0 using template query87.tpl and seed 1819994127 +select count(*) +from $bla1 bla1 left only join $bla2 bla2 using (c_last_name, c_first_name, d_date) +; + +-- end query 1 in stream 0 using template query87.tpl \ No newline at end of file diff --git a/ydb/core/kqp/ut/join/data/schema/lookupbug.sql b/ydb/core/kqp/ut/join/data/schema/lookupbug.sql new file mode 100644 index 000000000000..efb7a5af7af8 --- /dev/null +++ b/ydb/core/kqp/ut/join/data/schema/lookupbug.sql @@ -0,0 +1,46 @@ +CREATE TABLE `/Root/quotas_browsers_relation` ( + quota_id Utf8 NOT NULL, + browser_id Utf8 NOT NULL, + id Utf8, + created_at Timestamp, + deleted_at Timestamp, + primary key (quota_id, browser_id) +); + +CREATE TABLE `/Root/browsers` ( + id Utf8 NOT NULL, + name Utf8, + version Utf8, + group Utf8, + created_at Timestamp, + deleted_at Timestamp, + description Utf8, + primary key (id) +); + +CREATE TABLE `/Root/browser_groups` ( + name Utf8 NOT NULL, + platform Utf8, + sessions_per_agent_limit Uint32 , + cpu_cores_per_session Double , + ramdrive_gb_per_session Double , + ram_gb_per_session Double , + ramdrive_size_gb Double , + session_request_timeout_ms Uint32 , + browser_platform Utf8 , + service_startup_timeout_ms Uint32 , + session_attempt_timeout_ms Uint32, + primary key (name) +); + +CREATE TABLE `/Root/quota` ( + name Utf8 NOT NULL, + created_at Timestamp , + owner Utf8 , + agents_max_limit Uint32 , + agent_kill_timeout_ms Uint32 , + agent_queue_time_limit_ms Uint32 , + agent_secret_id Utf8 , + id Utf8 , + primary key(name) +); diff --git a/ydb/core/kqp/ut/join/data/schema/tpcc.sql b/ydb/core/kqp/ut/join/data/schema/tpcc.sql new file mode 100644 index 000000000000..816ef71ad5d9 --- /dev/null +++ b/ydb/core/kqp/ut/join/data/schema/tpcc.sql @@ -0,0 +1,133 @@ +CREATE TABLE`/Root/test/tpcc/warehouse` ( + W_ID Int32 NOT NULL, + W_YTD Double, + W_TAX Double, + W_NAME Utf8, + W_STREET_1 Utf8, + W_STREET_2 Utf8, + W_CITY Utf8, + W_STATE Utf8, + W_ZIP Utf8, + PRIMARY KEY (W_ID) + ); + +CREATE TABLE `/Root/test/tpcc/item` ( + I_ID Int32 NOT NULL, + I_NAME Utf8, + I_PRICE Double, + I_DATA Utf8, + I_IM_ID Int32, + PRIMARY KEY (I_ID) + ); + +CREATE TABLE `/Root/test/tpcc/stock` ( + S_W_ID Int32 NOT NULL, + S_I_ID Int32 NOT NULL, + S_QUANTITY Int32, + S_YTD Double, + S_ORDER_CNT Int32, + S_REMOTE_CNT Int32, + S_DATA Utf8, + S_DIST_01 Utf8, + S_DIST_02 Utf8, + S_DIST_03 Utf8, + S_DIST_04 Utf8, + S_DIST_05 Utf8, + S_DIST_06 Utf8, + S_DIST_07 Utf8, + S_DIST_08 Utf8, + S_DIST_09 Utf8, + S_DIST_10 Utf8, + PRIMARY KEY (S_W_ID, S_I_ID) + ); + +CREATE TABLE `/Root/test/tpcc/district` ( + D_W_ID Int32 NOT NULL, + D_ID Int32 NOT NULL, + D_YTD Double, + D_TAX Double, + D_NEXT_O_ID Int32, + D_NAME Utf8, + D_STREET_1 Utf8, + D_STREET_2 Utf8, + D_CITY Utf8, + D_STATE Utf8, + D_ZIP Utf8, + PRIMARY KEY (D_W_ID, D_ID) + ); + +CREATE TABLE `/Root/test/tpcc/customer` ( + C_W_ID Int32 NOT NULL, + C_D_ID Int32 NOT NULL, + C_ID Int32 NOT NULL, + C_DISCOUNT Double, + C_CREDIT Utf8, + C_LAST Utf8, + C_FIRST Utf8, + C_CREDIT_LIM Double, + C_BALANCE Double, + C_YTD_PAYMENT Double, + C_PAYMENT_CNT Int32, + C_DELIVERY_CNT Int32, + C_STREET_1 Utf8, + C_STREET_2 Utf8, + C_CITY Utf8, + C_STATE Utf8, + C_ZIP Utf8, + C_PHONE Utf8, + C_SINCE Timestamp, + C_MIDDLE Utf8, + C_DATA Utf8, + + PRIMARY KEY (C_W_ID, C_D_ID, C_ID) + ); + +CREATE TABLE `/Root/test/tpcc/history` ( + H_C_W_ID Int32 NOT NULL, + H_C_ID Int32, + H_C_D_ID Int32, + H_D_ID Int32, + H_W_ID Int32, + H_DATE Timestamp, + H_AMOUNT Double, + H_DATA Utf8, + H_C_NANO_TS Int64 NOT NULL, + + PRIMARY KEY (H_C_W_ID, H_C_NANO_TS) + ); + +CREATE TABLE `/Root/test/tpcc/oorder` ( + O_W_ID Int32 NOT NULL, + O_D_ID Int32 NOT NULL, + O_ID Int32 NOT NULL, + O_C_ID Int32, + O_CARRIER_ID Int32, + O_OL_CNT Int32, + O_ALL_LOCAL Int32, + O_ENTRY_D Timestamp, + + PRIMARY KEY (O_W_ID, O_D_ID, O_ID) + ); + + CREATE TABLE `/Root/test/tpcc/new_order` ( + NO_W_ID Int32 NOT NULL, + NO_D_ID Int32 NOT NULL, + NO_O_ID Int32 NOT NULL, + + PRIMARY KEY (NO_W_ID, NO_D_ID, NO_O_ID) + ); + +CREATE TABLE `/Root/test/tpcc/order_line` ( + OL_W_ID Int32 NOT NULL, + OL_D_ID Int32 NOT NULL, + OL_O_ID Int32 NOT NULL, + OL_NUMBER Int32 NOT NULL, + OL_I_ID Int32, + OL_DELIVERY_D Timestamp, + OL_AMOUNT Double, + OL_SUPPLY_W_ID Int32, + OL_QUANTITY Double, + OL_DIST_INFO Utf8, + + PRIMARY KEY (OL_W_ID, OL_D_ID, OL_O_ID, OL_NUMBER) + ); diff --git a/ydb/core/kqp/ut/join/data/schema/tpcds.sql b/ydb/core/kqp/ut/join/data/schema/tpcds.sql index 07971328f51f..5e7894b62f17 100644 --- a/ydb/core/kqp/ut/join/data/schema/tpcds.sql +++ b/ydb/core/kqp/ut/join/data/schema/tpcds.sql @@ -2,23 +2,23 @@ CREATE TABLE `/Root/test/ds/customer_address` ( ca_address_sk Int64 not null, ca_address_id String not null, ca_street_number String , - ca_street_name String , + ca_street_name String , ca_street_type String , ca_suite_number String , - ca_city String , - ca_county String , - ca_state String , + ca_city String , + ca_county String , + ca_state String , ca_zip String , - ca_country String , - ca_gmt_offset Double , + ca_country String , + ca_gmt_offset Double , ca_location_type String , primary key (ca_address_sk) ); CREATE TABLE `/Root/test/ds/customer_demographics` ( cd_demo_sk Int64 not null, - cd_gender String , - cd_marital_status String , + cd_gender String , + cd_marital_status String , cd_education_status String , cd_purchase_estimate Int64 , cd_credit_rating String , @@ -31,7 +31,7 @@ CREATE TABLE `/Root/test/ds/customer_demographics` ( CREATE TABLE `/Root/test/ds/date_dim` ( d_date_sk Int64 not null, d_date_id String not null, - d_date Date , + d_date Date , d_month_seq Int64 , d_week_seq Int64 , d_quarter_seq Int64 , @@ -43,20 +43,20 @@ CREATE TABLE `/Root/test/ds/date_dim` ( d_fy_year Int64 , d_fy_quarter_seq Int64 , d_fy_week_seq Int64 , - d_day_name String , - d_quarter_name String , - d_holiday String , - d_weekend String , - d_following_holiday String , + d_day_name String , + d_quarter_name String , + d_holiday String , + d_weekend String , + d_following_holiday String , d_first_dom Int64 , d_last_dom Int64 , d_same_day_ly Int64 , d_same_day_lq Int64 , - d_current_day String , - d_current_week String , - d_current_month String , - d_current_quarter String , - d_current_year String , + d_current_day String , + d_current_week String , + d_current_month String , + d_current_quarter String , + d_current_year String , primary key (d_date_sk) ); diff --git a/ydb/core/kqp/ut/join/data/schema/tpch.sql b/ydb/core/kqp/ut/join/data/schema/tpch.sql index 117b6008a54b..72a05a64e21c 100644 --- a/ydb/core/kqp/ut/join/data/schema/tpch.sql +++ b/ydb/core/kqp/ut/join/data/schema/tpch.sql @@ -2,7 +2,7 @@ CREATE TABLE `/Root/customer` ( c_acctbal Double, c_address String, c_comment String, - c_custkey Int32, -- Identifier + c_custkey Int32 NOT NULL, -- Identifier c_mktsegment String , c_name String , c_nationkey Int32 , -- FK to N_NATIONKEY @@ -15,9 +15,9 @@ CREATE TABLE `/Root/lineitem` ( l_commitdate Date , l_discount Double , -- it should be Decimal(12, 2) l_extendedprice Double , -- it should be Decimal(12, 2) - l_linenumber Int32 , + l_linenumber Int32 NOT NULL, l_linestatus String , - l_orderkey Int32 , -- FK to O_ORDERKEY + l_orderkey Int32 NOT NULL, -- FK to O_ORDERKEY l_partkey Int32 , -- FK to P_PARTKEY, first part of the compound FK to (PS_PARTKEY, PS_SUPPKEY) with L_SUPPKEY l_quantity Double , -- it should be Decimal(12, 2) l_receiptdate Date , @@ -33,7 +33,7 @@ CREATE TABLE `/Root/lineitem` ( CREATE TABLE `/Root/nation` ( n_comment String , n_name String , - n_nationkey Int32 , -- Identifier + n_nationkey Int32 NOT NULL, -- Identifier n_regionkey Int32 , -- FK to R_REGIONKEY PRIMARY KEY(n_nationkey) ); @@ -43,7 +43,7 @@ CREATE TABLE `/Root/orders` ( o_comment String , o_custkey Int32 , -- FK to C_CUSTKEY o_orderdate Date , - o_orderkey Int32 , -- Identifier + o_orderkey Int32 NOT NULL, -- Identifier o_orderpriority String , o_orderstatus String , o_shippriority Int32 , @@ -57,7 +57,7 @@ CREATE TABLE `/Root/part` ( p_container String , p_mfgr String , p_name String , - p_partkey Int32 , -- Identifier + p_partkey Int32 NOT NULL, -- Identifier p_retailprice Double , -- it should be Decimal(12, 2) p_size Int32 , p_type String , @@ -67,8 +67,8 @@ CREATE TABLE `/Root/part` ( CREATE TABLE `/Root/partsupp` ( ps_availqty Int32 , ps_comment String , - ps_partkey Int32 , -- FK to P_PARTKEY - ps_suppkey Int32 , -- FK to S_SUPPKEY + ps_partkey Int32 NOT NULL, -- FK to P_PARTKEY + ps_suppkey Int32 NOT NULL, -- FK to S_SUPPKEY ps_supplycost Double , -- it should be Decimal(12, 2) PRIMARY KEY(ps_partkey, ps_suppkey) ); @@ -76,7 +76,7 @@ CREATE TABLE `/Root/partsupp` ( CREATE TABLE `/Root/region` ( r_comment String , r_name String , - r_regionkey Int32 , -- Identifier + r_regionkey Int32 NOT NULL, -- Identifier PRIMARY KEY(r_regionkey) ); @@ -87,6 +87,6 @@ CREATE TABLE `/Root/supplier` ( s_name String , s_nationkey Int32 , -- FK to N_NATIONKEY s_phone String , - s_suppkey Int32 , -- Identifier + s_suppkey Int32 NOT NULL, -- Identifier PRIMARY KEY(s_suppkey) ); diff --git a/ydb/core/kqp/ut/join/data/stats/basic.json b/ydb/core/kqp/ut/join/data/stats/basic.json new file mode 100644 index 000000000000..29c63ecd14b9 --- /dev/null +++ b/ydb/core/kqp/ut/join/data/stats/basic.json @@ -0,0 +1,22 @@ +{ + "/Root/R": { + "n_rows": 10, + "byte_size": 100 + }, + "/Root/S": { + "n_rows": 10, + "byte_size": 100 + }, + "/Root/T": { + "n_rows": 10, + "byte_size": 100 + }, + "/Root/U": { + "n_rows": 10, + "byte_size": 100 + }, + "/Root/V": { + "n_rows": 10, + "byte_size": 100 + } +} \ No newline at end of file diff --git a/ydb/core/kqp/ut/join/data/stats/lookupbug.json b/ydb/core/kqp/ut/join/data/stats/lookupbug.json new file mode 100644 index 000000000000..9857d867f2fd --- /dev/null +++ b/ydb/core/kqp/ut/join/data/stats/lookupbug.json @@ -0,0 +1,18 @@ +{ + "/Root/quotas_browsers_relation": { + "n_rows": 222, + "byte_size": 28140 + }, + "/Root/browsers": { + "n_rows": 87, + "byte_size": 26719 + }, + "/Root/browser_groups": { + "n_rows": 17, + "byte_size": 1905 + }, + "/Root/quota": { + "n_rows": 55, + "byte_size": 9241 + } +} \ No newline at end of file diff --git a/ydb/core/kqp/ut/join/data/stats/tpcc.json b/ydb/core/kqp/ut/join/data/stats/tpcc.json new file mode 100644 index 000000000000..65216bf8d3f0 --- /dev/null +++ b/ydb/core/kqp/ut/join/data/stats/tpcc.json @@ -0,0 +1,1126 @@ +{ + "/Root/test/tpcc/customer": { + "byte_size": 364073640263, + "n_rows": 502385014, + "n_attrs": 21, + "key_columns": [ + "C_W_ID", + "C_D_ID", + "C_ID" + ], + "columns": [ + { + "name": "C_BALANCE", + "most_common_value": [ + { + "frequency": 446216512, + "value": -10, + "type": "Double" + } + ], + "hyperloglog": 11726893 + }, + { + "name": "C_CITY", + "most_common_value": [ + { + "frequency": 1, + "value": "oqfvodrxknyozpvlho", + "type": "Utf8" + } + ], + "hyperloglog": 483844714 + }, + { + "name": "C_CREDIT", + "most_common_value": [ + { + "frequency": 431991877, + "value": "GC", + "type": "Utf8" + } + ], + "hyperloglog": 2 + }, + { + "name": "C_CREDIT_LIM", + "most_common_value": [ + { + "frequency": 480000000, + "value": 50000, + "type": "Double" + } + ], + "hyperloglog": 1 + }, + { + "name": "C_DATA", + "most_common_value": [ + { + "frequency": 1, + "value": "ycrhjuzbhdemdsiwsluqzicrbxphbdkovldvdpgvcpkivexexhncyumaqwqajquaowrbbbcfbhfiuubydkpugamrphgrimpsmiftsttnnpmsrowhrrrryohjwbuuejmqeqgysxuhkocmqyxakghxwcqzuwxkxpknrmtagpukwzvgfxoadevtxlewyealzcubstrritucaqrkyckwrwmnknlnxjtotmtoqpvqofqfulciwqwjckxdatsjceuzzieuqevzrdjmchwrjpfhovmwxwqoinhqqxkzqinidghnswrvlbaaedbgnhkqfvrtjtowfohbgylyisbeuicfdqdlrinbvwhocjkegffnzghtybqovkbjfropomanqvkjnthtszlldjsxszutqfnyqdgjhhgytiaof", + "type": "Utf8" + } + ], + "hyperloglog": 474174349 + }, + { + "name": "C_DELIVERY_CNT", + "most_common_value": [ + { + "frequency": 462671200, + "value": 0, + "type": "Int32" + } + ], + "hyperloglog": 2 + }, + { + "name": "C_DISCOUNT", + "most_common_value": [ + { + "frequency": 8885, + "value": 0.2019, + "type": "Double" + } + ], + "hyperloglog": 4976 + }, + { + "name": "C_D_ID", + "most_common_value": [ + { + "frequency": 48000000, + "value": 1, + "type": "Int32" + } + ], + "hyperloglog": 10 + }, + { + "name": "C_FIRST", + "most_common_value": [ + { + "frequency": 1, + "value": "tmrpgxzvej", + "type": "Utf8" + } + ], + "hyperloglog": 474656250 + }, + { + "name": "C_ID", + "most_common_value": [ + { + "frequency": 160000, + "value": 1, + "type": "Int32" + } + ], + "hyperloglog": 2992 + }, + { + "name": "C_LAST", + "most_common_value": [ + { + "frequency": 8361629, + "value": "ANTIANTIATION", + "type": "Utf8" + } + ], + "hyperloglog": 1000 + }, + { + "name": "C_MIDDLE", + "most_common_value": [ + { + "frequency": 480000000, + "value": "OE", + "type": "Utf8" + } + ], + "hyperloglog": 1 + }, + { + "name": "C_PAYMENT_CNT", + "most_common_value": [ + { + "frequency": 462927566, + "value": 1, + "type": "Int32" + } + ], + "hyperloglog": 14 + }, + { + "name": "C_PHONE", + "most_common_value": [ + { + "frequency": 1, + "value": "0071372729356440", + "type": "Utf8" + } + ], + "hyperloglog": 477456883 + }, + { + "name": "C_SINCE", + "most_common_value": [ + { + "frequency": 1313, + "value": "2023-11-28T12:27:16.886000Z", + "type": "Timestamp" + } + ], + "hyperloglog": 1348820 + }, + { + "name": "C_STATE", + "most_common_value": [ + { + "frequency": 178037, + "value": "TR", + "type": "Utf8" + } + ], + "hyperloglog": 676 + }, + { + "name": "C_STREET_1", + "most_common_value": [ + { + "frequency": 1, + "value": "qdxmgxsnnneky", + "type": "Utf8" + } + ], + "hyperloglog": 475295227 + }, + { + "name": "C_STREET_2", + "most_common_value": [ + { + "frequency": 1, + "value": "msvtqyfximgibejeklk", + "type": "Utf8" + } + ], + "hyperloglog": 472307567 + }, + { + "name": "C_W_ID", + "most_common_value": [ + { + "frequency": 30000, + "value": 4301, + "type": "Int32" + } + ], + "hyperloglog": 15871 + }, + { + "name": "C_YTD_PAYMENT", + "most_common_value": [ + { + "frequency": 462927566, + "value": 10, + "type": "Double" + } + ], + "hyperloglog": 1253170 + }, + { + "name": "C_ZIP", + "most_common_value": [ + { + "frequency": 3303, + "value": "139311111", + "type": "Utf8" + } + ], + "hyperloglog": 10087 + } + ] + }, + "/Root/test/tpcc/district": { + "byte_size": 30807162, + "n_rows": 160000, + "n_attrs": 11, + "key_columns": [ + "D_W_ID", + "D_ID" + ], + "columns": [ + { + "name": "D_CITY", + "most_common_value": [ + { + "frequency": 1, + "value": "ckhktweebpdmslxp", + "type": "Utf8" + } + ], + "hyperloglog": 160227 + }, + { + "name": "D_ID", + "most_common_value": [ + { + "frequency": 16000, + "value": 1, + "type": "Int32" + } + ], + "hyperloglog": 10 + }, + { + "name": "D_NAME", + "most_common_value": [ + { + "frequency": 1, + "value": "dofbomzm", + "type": "Utf8" + } + ], + "hyperloglog": 159468 + }, + { + "name": "D_NEXT_O_ID", + "most_common_value": [ + { + "frequency": 4598, + "value": 3125, + "type": "Int32" + } + ], + "hyperloglog": 114 + }, + { + "name": "D_STATE", + "most_common_value": [ + { + "frequency": 95, + "value": "TK", + "type": "Utf8" + } + ], + "hyperloglog": 676 + }, + { + "name": "D_STREET_1", + "most_common_value": [ + { + "frequency": 1, + "value": "ipbwydczjge", + "type": "Utf8" + } + ], + "hyperloglog": 159930 + }, + { + "name": "D_STREET_2", + "most_common_value": [ + { + "frequency": 1, + "value": "jypxlaanveayjik", + "type": "Utf8" + } + ], + "hyperloglog": 160038 + }, + { + "name": "D_TAX", + "most_common_value": [ + { + "frequency": 27, + "value": 0.1475, + "type": "Double" + } + ], + "hyperloglog": 2001 + }, + { + "name": "D_W_ID", + "most_common_value": [ + { + "frequency": 10, + "value": 301, + "type": "Int32" + } + ], + "hyperloglog": 15871 + }, + { + "name": "D_YTD", + "most_common_value": [ + { + "frequency": 1, + "value": 326749.86, + "type": "Double" + } + ], + "hyperloglog": 160208 + }, + { + "name": "D_ZIP", + "most_common_value": [ + { + "frequency": 160000, + "value": "123456789", + "type": "Utf8" + } + ], + "hyperloglog": 1 + } + ] + }, + "/Root/test/tpcc/history": { + "byte_size": 41478646661, + "n_rows": 499625922, + "n_attrs": 9, + "key_columns": [ + "H_C_NANO_TS" + ], + "columns": [ + { + "name": "H_AMOUNT", + "most_common_value": [ + { + "frequency": 480000040, + "value": 10, + "type": "Double" + } + ], + "hyperloglog": 501152 + }, + { + "name": "H_C_D_ID", + "most_common_value": [ + { + "frequency": 49965889, + "value": 2, + "type": "Int32" + } + ], + "hyperloglog": 10 + }, + { + "name": "H_C_ID", + "most_common_value": [ + { + "frequency": 213359, + "value": 75, + "type": "Int32" + } + ], + "hyperloglog": 2992 + }, + { + "name": "H_C_NANO_TS", + "most_common_value": [ + { + "frequency": 1, + "value": 3543847373496118, + "type": "Int64" + } + ], + "hyperloglog": 497158709 + }, + { + "name": "H_C_W_ID", + "most_common_value": [ + { + "frequency": 31756, + "value": 5278, + "type": "Int32" + } + ], + "hyperloglog": 15871 + }, + { + "name": "H_DATA", + "most_common_value": [ + { + "frequency": 161, + "value": "lgxdat dblfxfb", + "type": "Utf8" + } + ], + "hyperloglog": 485105795 + }, + { + "name": "H_DATE", + "most_common_value": [ + { + "frequency": 7639, + "value": "2023-11-28T12:22:10.829000Z", + "type": "Timestamp" + } + ], + "hyperloglog": 7299624 + }, + { + "name": "H_D_ID", + "most_common_value": [ + { + "frequency": 49964655, + "value": 2, + "type": "Int32" + } + ], + "hyperloglog": 10 + }, + { + "name": "H_W_ID", + "most_common_value": [ + { + "frequency": 31259, + "value": 6050, + "type": "Int32" + } + ], + "hyperloglog": 15871 + } + ] + }, + "/Root/test/tpcc/item": { + "byte_size": 9762242, + "n_rows": 100000, + "n_attrs": 5, + "key_columns": [ + "I_ID" + ], + "columns": [ + { + "name": "I_DATA", + "most_common_value": [ + { + "frequency": 1, + "value": "myudbheisdtjypfsbaijhlnvzmvwa", + "type": "Utf8" + } + ], + "hyperloglog": 100201 + }, + { + "name": "I_ID", + "most_common_value": [ + { + "frequency": 1, + "value": 20625, + "type": "Int32" + } + ], + "hyperloglog": 98520 + }, + { + "name": "I_IM_ID", + "most_common_value": [ + { + "frequency": 11, + "value": 8262, + "type": "Int32" + } + ], + "hyperloglog": 9983 + }, + { + "name": "I_NAME", + "most_common_value": [ + { + "frequency": 1, + "value": "livzdcddkzovol", + "type": "Utf8" + } + ], + "hyperloglog": 102336 + }, + { + "name": "I_PRICE", + "most_common_value": [ + { + "frequency": 9, + "value": 51.89, + "type": "Double" + } + ], + "hyperloglog": 9831 + } + ] + }, + "/Root/test/tpcc/new_order": { + "byte_size": 3736621547, + "n_rows": 181109395, + "n_attrs": 3, + "key_columns": [ + "NO_W_ID", + "NO_D_ID", + "NO_O_ID" + ], + "columns": [ + { + "name": "NO_D_ID", + "most_common_value": [ + { + "frequency": 14661893, + "value": 10, + "type": "Int32" + } + ], + "hyperloglog": 10 + }, + { + "name": "NO_O_ID", + "most_common_value": [ + { + "frequency": 160000, + "value": 2285, + "type": "Int32" + } + ], + "hyperloglog": 1019 + }, + { + "name": "NO_W_ID", + "most_common_value": [ + { + "frequency": 9586, + "value": 8460, + "type": "Int32" + } + ], + "hyperloglog": 15871 + } + ] + }, + "/Root/test/tpcc/oorder": { + "byte_size": 38933878538, + "n_rows": 514367988, + "n_attrs": 8, + "key_columns": [ + "O_W_ID", + "O_D_ID", + "O_ID" + ], + "columns": [ + { + "name": "O_ALL_LOCAL", + "most_common_value": [ + { + "frequency": 498049256, + "value": 1, + "type": "Int32" + } + ], + "hyperloglog": 2 + }, + { + "name": "O_CARRIER_ID", + "most_common_value": [ + { + "frequency": 126671200, + "value": 0, + "type": "Int32" + } + ], + "hyperloglog": 11 + }, + { + "name": "O_C_ID", + "most_common_value": [ + { + "frequency": 456301, + "value": 1283, + "type": "Int32" + } + ], + "hyperloglog": 2992 + }, + { + "name": "O_D_ID", + "most_common_value": [ + { + "frequency": 49994773, + "value": 10, + "type": "Int32" + } + ], + "hyperloglog": 10 + }, + { + "name": "O_ENTRY_D", + "most_common_value": [ + { + "frequency": 8186, + "value": "2023-11-28T12:22:40.484000Z", + "type": "Timestamp" + } + ], + "hyperloglog": 7973722 + }, + { + "name": "O_ID", + "most_common_value": [ + { + "frequency": 160000, + "value": 1, + "type": "Int32" + } + ], + "hyperloglog": 3166 + }, + { + "name": "O_OL_CNT", + "most_common_value": [ + { + "frequency": 45519522, + "value": 7, + "type": "Int32" + } + ], + "hyperloglog": 11 + }, + { + "name": "O_W_ID", + "most_common_value": [ + { + "frequency": 31489, + "value": 8086, + "type": "Int32" + } + ], + "hyperloglog": 15871 + } + ] + }, + "/Root/test/tpcc/order_line": { + "byte_size": 479855221145, + "n_rows": 5053103449, + "n_attrs": 10, + "key_columns": [ + "OL_W_ID", + "OL_D_ID", + "OL_O_ID", + "OL_NUMBER" + ], + "columns": [ + { + "name": "OL_AMOUNT", + "most_common_value": [ + { + "frequency": 100804305, + "value": 0, + "type": "Double" + } + ], + "hyperloglog": 998151 + }, + { + "name": "OL_DELIVERY_D", + "most_common_value": [ + { + "frequency": 4529362528, + "value": "1970-01-01T00:00:00.000000Z", + "type": "Timestamp" + } + ], + "hyperloglog": 10386126 + }, + { + "name": "OL_DIST_INFO", + "most_common_value": [ + { + "frequency": 11, + "value": "iodfnhufrvksbxsgvolhfiv", + "type": "Utf8" + } + ], + "hyperloglog": 4969823023 + }, + { + "name": "OL_D_ID", + "most_common_value": [ + { + "frequency": 499905256, + "value": 8, + "type": "Int32" + } + ], + "hyperloglog": 10 + }, + { + "name": "OL_I_ID", + "most_common_value": [ + { + "frequency": 260692, + "value": 32487, + "type": "Int32" + } + ], + "hyperloglog": 98520 + }, + { + "name": "OL_NUMBER", + "most_common_value": [ + { + "frequency": 499935520, + "value": 1, + "type": "Int32" + } + ], + "hyperloglog": 15 + }, + { + "name": "OL_O_ID", + "most_common_value": [ + { + "frequency": 332407, + "value": 505, + "type": "Int32" + } + ], + "hyperloglog": 3166 + }, + { + "name": "OL_QUANTITY", + "most_common_value": [ + { + "frequency": 4819789043, + "value": 5, + "type": "Double" + } + ], + "hyperloglog": 10 + }, + { + "name": "OL_SUPPLY_W_ID", + "most_common_value": [ + { + "frequency": 315544, + "value": 8740, + "type": "Int32" + } + ], + "hyperloglog": 15871 + }, + { + "name": "OL_W_ID", + "most_common_value": [ + { + "frequency": 315679, + "value": 8740, + "type": "Int32" + } + ], + "hyperloglog": 15871 + } + ] + }, + "/Root/test/tpcc/stock": { + "byte_size": 671217489018, + "n_rows": 1709535853, + "n_attrs": 17, + "key_columns": [ + "S_W_ID", + "S_I_ID" + ], + "columns": [ + { + "name": "S_DATA", + "most_common_value": [ + { + "frequency": 1, + "value": "ewwmspkramcqkhjoxdvpbnucibhbbmzsnfwbmlxwvvtiaaizl", + "type": "Utf8" + } + ], + "hyperloglog": 1601496867 + }, + { + "name": "S_DIST_01", + "most_common_value": [ + { + "frequency": 1, + "value": "zwuoyreziuyfqanfepbyutn", + "type": "Utf8" + } + ], + "hyperloglog": 1619141249 + }, + { + "name": "S_DIST_02", + "most_common_value": [ + { + "frequency": 1, + "value": "cftgvsxxfatlhioqkjdcifz", + "type": "Utf8" + } + ], + "hyperloglog": 1605043436 + }, + { + "name": "S_DIST_03", + "most_common_value": [ + { + "frequency": 1, + "value": "kqysaivijxddogkugeppkuw", + "type": "Utf8" + } + ], + "hyperloglog": 1593035873 + }, + { + "name": "S_DIST_04", + "most_common_value": [ + { + "frequency": 1, + "value": "jhdriqzhdqsxesmefzscmcs", + "type": "Utf8" + } + ], + "hyperloglog": 1614620856 + }, + { + "name": "S_DIST_05", + "most_common_value": [ + { + "frequency": 1, + "value": "bsvfsdoshzqbqpamjhuugsd", + "type": "Utf8" + } + ], + "hyperloglog": 1608127910 + }, + { + "name": "S_DIST_06", + "most_common_value": [ + { + "frequency": 1, + "value": "liommwinzjcuhqgqrxjuqch", + "type": "Utf8" + } + ], + "hyperloglog": 1608998551 + }, + { + "name": "S_DIST_07", + "most_common_value": [ + { + "frequency": 1, + "value": "hrzncabpbzsllvwhpxcangt", + "type": "Utf8" + } + ], + "hyperloglog": 1595808444 + }, + { + "name": "S_DIST_08", + "most_common_value": [ + { + "frequency": 1, + "value": "jjanqwevafndsidydpylvlg", + "type": "Utf8" + } + ], + "hyperloglog": 1603549619 + }, + { + "name": "S_DIST_09", + "most_common_value": [ + { + "frequency": 1, + "value": "opmoxfjaqcaapumfpalsycu", + "type": "Utf8" + } + ], + "hyperloglog": 1596371895 + }, + { + "name": "S_DIST_10", + "most_common_value": [ + { + "frequency": 1, + "value": "eegmvopbvshfxchhpaxpalk", + "type": "Utf8" + } + ], + "hyperloglog": 1587912389 + }, + { + "name": "S_I_ID", + "most_common_value": [ + { + "frequency": 16000, + "value": 1, + "type": "Int32" + } + ], + "hyperloglog": 98520 + }, + { + "name": "S_ORDER_CNT", + "most_common_value": [ + { + "frequency": 1473756452, + "value": 0, + "type": "Int32" + } + ], + "hyperloglog": 2 + }, + { + "name": "S_QUANTITY", + "most_common_value": [ + { + "frequency": 17595758, + "value": 96, + "type": "Int32" + } + ], + "hyperloglog": 91 + }, + { + "name": "S_REMOTE_CNT", + "most_common_value": [ + { + "frequency": 1598736389, + "value": 0, + "type": "Int32" + } + ], + "hyperloglog": 2 + }, + { + "name": "S_W_ID", + "most_common_value": [ + { + "frequency": 100000, + "value": 721, + "type": "Int32" + } + ], + "hyperloglog": 15871 + }, + { + "name": "S_YTD", + "most_common_value": [ + { + "frequency": 12632304, + "value": 7, + "type": "Double" + } + ], + "hyperloglog": 10 + } + ] + }, + "/Root/test/tpcc/warehouse": { + "byte_size": 2527068, + "n_rows": 16000, + "n_attrs": 9, + "key_columns": [ + "W_ID" + ], + "columns": [ + { + "name": "W_CITY", + "most_common_value": [ + { + "frequency": 1, + "value": "syzobegklbrsy", + "type": "Utf8" + } + ], + "hyperloglog": 16131 + }, + { + "name": "W_ID", + "most_common_value": [ + { + "frequency": 1, + "value": 13601, + "type": "Int32" + } + ], + "hyperloglog": 15871 + }, + { + "name": "W_NAME", + "most_common_value": [ + { + "frequency": 1, + "value": "exucl", + "type": "Utf8" + } + ], + "hyperloglog": 15903 + }, + { + "name": "W_STATE", + "most_common_value": [ + { + "frequency": 26, + "value": "GR", + "type": "Utf8" + } + ], + "hyperloglog": 676 + }, + { + "name": "W_STREET_1", + "most_common_value": [ + { + "frequency": 1, + "value": "yvowutfetbmznmfpo", + "type": "Utf8" + } + ], + "hyperloglog": 16226 + }, + { + "name": "W_STREET_2", + "most_common_value": [ + { + "frequency": 1, + "value": "wsdlnkymvefrv", + "type": "Utf8" + } + ], + "hyperloglog": 15987 + }, + { + "name": "W_TAX", + "most_common_value": [ + { + "frequency": 11, + "value": 0.0868, + "type": "Double" + } + ], + "hyperloglog": 2000 + }, + { + "name": "W_YTD", + "most_common_value": [ + { + "frequency": 1, + "value": 3195732.239999999, + "type": "Double" + } + ], + "hyperloglog": 16124 + }, + { + "name": "W_ZIP", + "most_common_value": [ + { + "frequency": 16000, + "value": "123456789", + "type": "Utf8" + } + ], + "hyperloglog": 1 + } + ] + } +} \ No newline at end of file diff --git a/ydb/core/kqp/ut/join/kqp_flip_join_ut.cpp b/ydb/core/kqp/ut/join/kqp_flip_join_ut.cpp index b1a51610441d..73ababf42e77 100644 --- a/ydb/core/kqp/ut/join/kqp_flip_join_ut.cpp +++ b/ydb/core/kqp/ut/join/kqp_flip_join_ut.cpp @@ -5,6 +5,45 @@ namespace NKikimr::NKqp { using namespace NYdb; using namespace NYdb::NTable; +TKikimrRunner GetKikimrRunnerWithStats() { + static TString STATS = R"( + { + "/Root/FJ_Table_1": + { + "n_rows": 4 + }, + "/Root/FJ_Table_2": + { + "n_rows": 2 + }, + "/Root/FJ_Table_3": + { + "n_rows": 4 + }, + "/Root/FJ_Table_4": + { + "n_rows": 3 + } + } + )"; + + TVector settings; + + NKikimrKqp::TKqpSetting setting; + setting.SetName("OptOverrideStatistics"); + setting.SetValue(STATS); + settings.push_back(setting); + + setting.SetName("CostBasedOptimizationLevel"); + setting.SetValue("4"); + settings.push_back(setting); + + TKikimrSettings serverSettings; + serverSettings.SetKqpSettings(settings); + + return TKikimrRunner(serverSettings); +} + static void CreateSampleTables(TSession session) { UNIT_ASSERT(session.ExecuteSchemeQuery(R"( CREATE TABLE `/Root/FJ_Table_1` ( @@ -52,7 +91,7 @@ Y_UNIT_TEST_SUITE(KqpFlipJoin) { // simple inner join, only 2 tables Y_UNIT_TEST(Inner_1) { - TKikimrRunner kikimr; + auto kikimr = GetKikimrRunnerWithStats(); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -69,13 +108,15 @@ Y_UNIT_TEST_SUITE(KqpFlipJoin) { auto result = ExecQueryAndTestResult(session, query, NoParams, R"([[["Value11"];["Value21"]];[["Value12"];["Value22"]]])"); - AssertTableReads(result, "/Root/FJ_Table_1", 4); + Cerr << result.GetQueryPlan() << Endl; + + AssertTableReads(result, "/Root/FJ_Table_1", 2); AssertTableReads(result, "/Root/FJ_Table_2", 2); } // hierarchy of joins, flip on the last layer Y_UNIT_TEST(Inner_2) { - TKikimrRunner kikimr; + auto kikimr = GetKikimrRunnerWithStats(); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -93,13 +134,13 @@ Y_UNIT_TEST_SUITE(KqpFlipJoin) { R"([[["Value31"];["Value21"];["Value4_101"]]])"); AssertTableReads(result, "/Root/FJ_Table_2", 2); - AssertTableReads(result, "/Root/FJ_Table_3", 4); + AssertTableReads(result, "/Root/FJ_Table_3", 1); AssertTableReads(result, "/Root/FJ_Table_4", 1); } // hierarchy of joins, flip on the top layer Y_UNIT_TEST(Inner_3) { - TKikimrRunner kikimr; + auto kikimr = GetKikimrRunnerWithStats(); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -122,12 +163,12 @@ Y_UNIT_TEST_SUITE(KqpFlipJoin) { AssertTableReads(result, "/Root/FJ_Table_1", 4); AssertTableReads(result, "/Root/FJ_Table_2", 2); - AssertTableReads(result, "/Root/FJ_Table_3", 4); + AssertTableReads(result, "/Root/FJ_Table_3", 2); } // simple left semi join, only 2 tables Y_UNIT_TEST(LeftSemi_1) { - TKikimrRunner kikimr; + auto kikimr = GetKikimrRunnerWithStats(); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -149,7 +190,7 @@ Y_UNIT_TEST_SUITE(KqpFlipJoin) { // hierarchy of joins, flip on the last layer Y_UNIT_TEST(LeftSemi_2) { - TKikimrRunner kikimr; + auto kikimr = GetKikimrRunnerWithStats(); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -166,14 +207,14 @@ Y_UNIT_TEST_SUITE(KqpFlipJoin) { auto result = ExecQueryAndTestResult(session, query, NoParams, R"([[[1];["Value11"]];[[2];["Value12"]]])"); - AssertTableReads(result, "/Root/FJ_Table_1", 2); + AssertTableReads(result, "/Root/FJ_Table_1", 3); AssertTableReads(result, "/Root/FJ_Table_2", 2); AssertTableReads(result, "/Root/FJ_Table_3", 4); } // hierarchy of joins, flip on the top layer Y_UNIT_TEST(LeftSemi_3) { - TKikimrRunner kikimr; + auto kikimr = GetKikimrRunnerWithStats(); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -201,7 +242,7 @@ Y_UNIT_TEST_SUITE(KqpFlipJoin) { // simple right semi join, only 2 tables Y_UNIT_TEST(RightSemi_1) { - TKikimrRunner kikimr; + auto kikimr = GetKikimrRunnerWithStats(); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -222,7 +263,7 @@ Y_UNIT_TEST_SUITE(KqpFlipJoin) { // hierarchy of joins, flip on the last layer Y_UNIT_TEST(RightSemi_2) { - TKikimrRunner kikimr; + auto kikimr = GetKikimrRunnerWithStats(); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -246,7 +287,7 @@ Y_UNIT_TEST_SUITE(KqpFlipJoin) { // hierarchy of joins, flip on the top layer Y_UNIT_TEST(RightSemi_3) { - TKikimrRunner kikimr; + auto kikimr = GetKikimrRunnerWithStats(); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -274,7 +315,7 @@ Y_UNIT_TEST_SUITE(KqpFlipJoin) { // simple right join, only 2 tables Y_UNIT_TEST(Right_1) { - TKikimrRunner kikimr; + auto kikimr = GetKikimrRunnerWithStats(); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -295,7 +336,7 @@ Y_UNIT_TEST_SUITE(KqpFlipJoin) { // hierarchy of joins, flip on the last layer Y_UNIT_TEST(Right_2) { - TKikimrRunner kikimr; + auto kikimr = GetKikimrRunnerWithStats(); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -347,7 +388,7 @@ Y_UNIT_TEST_SUITE(KqpFlipJoin) { // simple right only join, only 2 tables Y_UNIT_TEST(RightOnly_1) { - TKikimrRunner kikimr; + auto kikimr = GetKikimrRunnerWithStats(); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -368,7 +409,7 @@ Y_UNIT_TEST_SUITE(KqpFlipJoin) { // hierarchy of joins, flip on the last layer Y_UNIT_TEST(RightOnly_2) { - TKikimrRunner kikimr; + auto kikimr = GetKikimrRunnerWithStats(); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -395,7 +436,7 @@ Y_UNIT_TEST_SUITE(KqpFlipJoin) { // hierarchy of joins, flip on the top layer Y_UNIT_TEST(RightOnly_3) { - TKikimrRunner kikimr; + auto kikimr = GetKikimrRunnerWithStats(); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); diff --git a/ydb/core/kqp/ut/join/kqp_index_lookup_join_ut.cpp b/ydb/core/kqp/ut/join/kqp_index_lookup_join_ut.cpp index 1ff3126cb00b..107a4935a533 100644 --- a/ydb/core/kqp/ut/join/kqp_index_lookup_join_ut.cpp +++ b/ydb/core/kqp/ut/join/kqp_index_lookup_join_ut.cpp @@ -1008,7 +1008,21 @@ Y_UNIT_TEST_TWIN(JoinByComplexKeyWithNullComponents, StreamLookupJoin) { Y_UNIT_TEST_TWIN(JoinWithComplexCondition, StreamLookupJoin) { NKikimrConfig::TAppConfig appConfig; appConfig.MutableTableServiceConfig()->SetEnableKqpDataQueryStreamIdxLookupJoin(StreamLookupJoin); - auto settings = TKikimrSettings().SetAppConfig(appConfig); + + TString stats = R"( + {"/Root/Left":{"n_rows":3}, "/Root/Right":{"n_rows":3}} + )"; + + TVector settings; + + NKikimrKqp::TKqpSetting setting; + setting.SetName("OptOverrideStatistics"); + setting.SetValue(stats); + settings.push_back(setting); + + TKikimrSettings serverSettings = TKikimrSettings().SetAppConfig(appConfig);; + serverSettings.SetKqpSettings(settings); + TKikimrRunner kikimr(settings); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -1068,7 +1082,7 @@ Y_UNIT_TEST_TWIN(JoinWithComplexCondition, StreamLookupJoin) { [[1];[1];[1];[1]] ])", FormatResultSetYson(result.GetResultSet(0))); - const ui32 index = (settings.AppConfig.GetTableServiceConfig().GetEnableKqpDataQueryStreamIdxLookupJoin() ? 0 : 1); + const ui32 index = (serverSettings.AppConfig.GetTableServiceConfig().GetEnableKqpDataQueryStreamIdxLookupJoin() ? 0 : 1); auto& stats = NYdb::TProtoAccessor::GetProto(*result.GetStats()); for (const auto& tableStats : stats.query_phases(index).table_access()) { if (tableStats.name() == "/Root/Right") { @@ -1098,7 +1112,7 @@ Y_UNIT_TEST_TWIN(JoinWithComplexCondition, StreamLookupJoin) { [[2];[2];[20];#] ])", FormatResultSetYson(result.GetResultSet(0))); - const ui32 index = (settings.AppConfig.GetTableServiceConfig().GetEnableKqpDataQueryStreamIdxLookupJoin() ? 0 : 1); + const ui32 index = (serverSettings.AppConfig.GetTableServiceConfig().GetEnableKqpDataQueryStreamIdxLookupJoin() ? 0 : 1); auto& stats = NYdb::TProtoAccessor::GetProto(*result.GetStats()); for (const auto& tableStats : stats.query_phases(index).table_access()) { if (tableStats.name() == "/Root/Right") { @@ -1127,7 +1141,7 @@ Y_UNIT_TEST_TWIN(JoinWithComplexCondition, StreamLookupJoin) { [[2];[2];[2];["two"];["two"];["two"]] ])", FormatResultSetYson(result.GetResultSet(0))); - const ui32 index = (settings.AppConfig.GetTableServiceConfig().GetEnableKqpDataQueryStreamIdxLookupJoin() ? 0 : 1); + const ui32 index = (serverSettings.AppConfig.GetTableServiceConfig().GetEnableKqpDataQueryStreamIdxLookupJoin() ? 0 : 1); auto& stats = NYdb::TProtoAccessor::GetProto(*result.GetStats()); for (const auto& tableStats : stats.query_phases(index).table_access()) { if (tableStats.name() == "/Root/Right") { @@ -1158,7 +1172,7 @@ Y_UNIT_TEST_TWIN(JoinWithComplexCondition, StreamLookupJoin) { [[2];[2];[2];["two"];["two"];["two"]] ])", FormatResultSetYson(result.GetResultSet(0))); - const ui32 index = (settings.AppConfig.GetTableServiceConfig().GetEnableKqpDataQueryStreamIdxLookupJoin() ? 0 : 1); + const ui32 index = (serverSettings.AppConfig.GetTableServiceConfig().GetEnableKqpDataQueryStreamIdxLookupJoin() ? 0 : 1); auto& stats = NYdb::TProtoAccessor::GetProto(*result.GetStats()); for (const auto& tableStats : stats.query_phases(index).table_access()) { if (tableStats.name() == "/Root/Right") { @@ -1168,6 +1182,69 @@ Y_UNIT_TEST_TWIN(JoinWithComplexCondition, StreamLookupJoin) { } } +Y_UNIT_TEST_TWIN(LeftSemiJoinWithDuplicatesInRightTable, StreamLookupJoin) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableKqpDataQueryStreamLookup(true); + appConfig.MutableTableServiceConfig()->SetEnableKqpDataQueryStreamIdxLookupJoin(StreamLookupJoin); + auto settings = TKikimrSettings().SetAppConfig(appConfig); + TKikimrRunner kikimr(settings); + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + { // create tables + const TString query = R"( + CREATE TABLE `/Root/Left` ( + Key1 Int64, + Key2 Int64, + Value String, + PRIMARY KEY (Key1, Key2) + ); + + CREATE TABLE `/Root/Right` ( + Key1 Int64, + Key2 Int64, + Value String, + PRIMARY KEY (Key1, Key2) + ); + )"; + UNIT_ASSERT(session.ExecuteSchemeQuery(query).GetValueSync().IsSuccess()); + } + + { // fill tables + const TString query = R"( + REPLACE INTO `/Root/Left` (Key1, Key2, Value) VALUES + (1, 10, "value1"), + (2, 20, "value2"), + (3, 30, "value3"); + + REPLACE INTO `/Root/Right` (Key1, Key2, Value) VALUES + (10, 100, "value1"), + (10, 101, "value1"), + (10, 102, "value1"), + (20, 200, "value2"), + (20, 201, "value2"), + (30, 300, "value3"); + )"; + UNIT_ASSERT(session.ExecuteDataQuery(query, TTxControl::BeginTx().CommitTx()).GetValueSync().IsSuccess()); + } + + { + const TString query = R"( + SELECT l.Key1, l.Key2, l.Value + FROM `/Root/Left` AS l + LEFT SEMI JOIN `/Root/Right` AS r + ON l.Key2 = r.Key1 ORDER BY l.Key1, l.Key2, l.Value + )"; + + auto result = session.ExecuteDataQuery(query, TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + CompareYson(R"([ + [[1];[10];["value1"]]; + [[2];[20];["value2"]]; + [[3];[30];["value3"]] + ])", FormatResultSetYson(result.GetResultSet(0))); + } +} + } // suite } // namespace NKqp diff --git a/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp b/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp index c0e8bd96aebd..15f906eb6f09 100644 --- a/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp +++ b/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp @@ -5,6 +5,7 @@ #include #include +#include namespace NKikimr { namespace NKqp { @@ -12,6 +13,19 @@ namespace NKqp { using namespace NYdb; using namespace NYdb::NTable; +#define Y_UNIT_TEST_XOR_OR_BOTH_FALSE(N, OPT1, OPT2) \ + template void N(NUnitTest::TTestContext&); \ + struct TTestRegistration##N { \ + TTestRegistration##N() { \ + TCurrentTest::AddTest(#N "-" #OPT1 "-" #OPT2, static_cast(&N), false); \ + TCurrentTest::AddTest(#N "+" #OPT1 "-" #OPT2, static_cast(&N), false); \ + TCurrentTest::AddTest(#N "-" #OPT1 "+" #OPT2, static_cast(&N), false); \ + } \ + }; \ + static TTestRegistration##N testRegistration##N; \ + template \ + void N(NUnitTest::TTestContext&) + TString GetStatic(const TString& filePath) { TString fullPath = SRC_("data/" + filePath); @@ -27,47 +41,52 @@ TString GetStatic(const TString& filePath) { return buffer.str(); } +void CreateTables(TSession session, const TString& schemaPath, bool useColumnStore) { + std::string query = GetStatic(schemaPath); + + if (useColumnStore) { + std::regex pattern(R"(CREATE TABLE [^\(]+ \([^;]*\))", std::regex::multiline); + query = std::regex_replace(query, pattern, "$& WITH (STORE = COLUMN, AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 16);"); + } + + auto res = session.ExecuteSchemeQuery(TString(query)).GetValueSync(); + res.GetIssues().PrintTo(Cerr); + UNIT_ASSERT(res.IsSuccess()); +} /* * A basic join order test. We define 5 tables sharing the same * key attribute and construct various full clique join queries */ -static void CreateSampleTable(TSession session) { - UNIT_ASSERT(session.ExecuteSchemeQuery(GetStatic("schema/rstuv.sql")).GetValueSync().IsSuccess()); +static void CreateSampleTable(TSession session, bool useColumnStore) { + CreateTables(session, "schema/rstuv.sql", useColumnStore); + + CreateTables(session, "schema/tpch.sql", useColumnStore); - UNIT_ASSERT(session.ExecuteDataQuery(R"( - REPLACE INTO `/Root/R` (id, payload1, ts) VALUES (1, "blah", CAST("1998-12-01" AS Date) ); - REPLACE INTO `/Root/S` (id, payload2) VALUES (1, "blah"); - REPLACE INTO `/Root/T` (id, payload3) VALUES (1, "blah"); - REPLACE INTO `/Root/U` (id, payload4) VALUES (1, "blah"); - REPLACE INTO `/Root/V` (id, payload5) VALUES (1, "blah"); - )", TTxControl::BeginTx().CommitTx()).GetValueSync().IsSuccess()); + CreateTables(session, "schema/tpcds.sql", useColumnStore); - UNIT_ASSERT(session.ExecuteSchemeQuery(GetStatic("schema/tpch.sql")).GetValueSync().IsSuccess()); + CreateTables(session, "schema/tpcc.sql", useColumnStore); - UNIT_ASSERT(session.ExecuteSchemeQuery(GetStatic("schema/tpcds.sql")).GetValueSync().IsSuccess()); + CreateTables(session, "schema/lookupbug.sql", useColumnStore); } static TKikimrRunner GetKikimrWithJoinSettings(bool useStreamLookupJoin = false, TString stats = ""){ TVector settings; NKikimrKqp::TKqpSetting setting; - - setting.SetName("CostBasedOptimizationLevel"); - setting.SetValue("3"); - settings.push_back(setting); - setting.SetName("OptEnableConstantFolding"); - setting.SetValue("true"); + setting.SetName("CostBasedOptimizationLevel"); + setting.SetValue("4"); settings.push_back(setting); if (stats != "") { - setting.SetName("OverrideStatistics"); + setting.SetName("OptOverrideStatistics"); setting.SetValue(stats); settings.push_back(setting); } NKikimrConfig::TAppConfig appConfig; appConfig.MutableTableServiceConfig()->SetEnableKqpDataQueryStreamIdxLookupJoin(useStreamLookupJoin); + appConfig.MutableTableServiceConfig()->SetEnableConstantFolding(true); appConfig.MutableTableServiceConfig()->SetCompileTimeoutMs(TDuration::Minutes(10).MilliSeconds()); auto serverSettings = TKikimrSettings().SetAppConfig(appConfig); @@ -75,32 +94,53 @@ static TKikimrRunner GetKikimrWithJoinSettings(bool useStreamLookupJoin = false, return TKikimrRunner(serverSettings); } -class TChainConstructor { +void PrintPlan(const TString& plan) { + Cout << plan << Endl; +} + +class TChainTester { public: - TChainConstructor(size_t chainSize) - : Kikimr_(GetKikimrWithJoinSettings()) - , TableClient_(Kikimr_.GetTableClient()) - , Session_(TableClient_.CreateSession().GetValueSync().GetSession()) - , ChainSize_(chainSize) + TChainTester(size_t chainSize) + : Kikimr(GetKikimrWithJoinSettings(false, GetStats(chainSize))) + , TableClient(Kikimr.GetTableClient()) + , Session(TableClient.CreateSession().GetValueSync().GetSession()) + , ChainSize(chainSize) {} +public: + void Test() { + CreateTables(); + JoinTables(); + } + + static TString GetStats(size_t chainSize) { + srand(228); + NJson::TJsonValue stats; + for (size_t i = 0; i < chainSize; ++i) { + ui64 nRows = rand(); + NJson::TJsonValue tableStat; + tableStat["n_rows"] = nRows; + tableStat["byte_size"] = nRows * 10; + + TString table = Sprintf("/Root/table_%ld", i); + stats[table] = std::move(tableStat); + } + return stats.GetStringRobust(); + } + +private: void CreateTables() { - for (size_t i = 0; i < ChainSize_; ++i) { - TString tableName; - - tableName - .append("/Root/table_").append(ToString(i));; - - TString createTable; - createTable - += "CREATE TABLE `" + tableName + "` (id" - + ToString(i) + " Int32, " - + "PRIMARY KEY (id" + ToString(i) + "));"; - - std::cout << createTable << std::endl; - auto res = Session_.ExecuteSchemeQuery(createTable).GetValueSync(); - std::cout << res.GetIssues().ToString() << std::endl; - UNIT_ASSERT(res.IsSuccess()); + for (size_t i = 0; i < ChainSize; ++i) { + TString tableName = Sprintf("/Root/table_%ld", i); + + TString createTable = Sprintf( + "CREATE TABLE `%s` (id%ld Int32, PRIMARY KEY (id%ld));", + tableName.c_str(), i, i + ); + + auto result = Session.ExecuteSchemeQuery(createTable).GetValueSync(); + result.GetIssues().PrintTo(Cerr); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); } } @@ -109,38 +149,37 @@ class TChainConstructor { joinRequest.append("SELECT * FROM `/Root/table_0` as t0 "); - for (size_t i = 1; i < ChainSize_; ++i) { - TString table = "/Root/table_" + ToString(i); + for (size_t i = 1; i < ChainSize; ++i) { + TString table = Sprintf("/Root/table_%ld", i); - TString prevAliasTable = "t" + ToString(i - 1); - TString aliasTable = "t" + ToString(i); + TString prevAliasTable = Sprintf("t%ld", i - 1); + TString aliasTable = Sprintf("t%ld", i); - joinRequest - += "INNER JOIN `" + table + "`" + " AS " + aliasTable + " ON " - + aliasTable + ".id" + ToString(i) + "=" + prevAliasTable + ".id" - + ToString(i-1) + " "; + joinRequest += + Sprintf( + "INNER JOIN `%s` AS %s ON %s.id%ld = %s.id%ld ", + table.c_str(), aliasTable.c_str(), aliasTable.c_str(), i, prevAliasTable.c_str(), i - 1 + ); } - auto result = Session_.ExecuteDataQuery(joinRequest, TTxControl::BeginTx().CommitTx()).ExtractValueSync(); - - std::cout << result.GetIssues().ToString() << std::endl; - std::cout << joinRequest << std::endl; + auto result = Session.ExplainDataQuery(joinRequest).ExtractValueSync(); + result.GetIssues().PrintTo(Cerr); UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); + PrintPlan(result.GetPlan()); } -private: - TKikimrRunner Kikimr_; - NYdb::NTable::TTableClient TableClient_; - TSession Session_; - size_t ChainSize_; + TKikimrRunner Kikimr; + NYdb::NTable::TTableClient TableClient; + TSession Session; + size_t ChainSize; }; -void ExplainJoinOrderTestDataQuery(const TString& queryPath, bool useStreamLookupJoin) { +void ExplainJoinOrderTestDataQuery(const TString& queryPath, bool useStreamLookupJoin, bool useColumnStore) { auto kikimr = GetKikimrWithJoinSettings(useStreamLookupJoin); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); - CreateSampleTable(session); + CreateSampleTable(session, useColumnStore); /* join with parameters */ { @@ -152,21 +191,21 @@ void ExplainJoinOrderTestDataQuery(const TString& queryPath, bool useStreamLooku NJson::TJsonValue plan; NJson::ReadJsonTree(result.GetPlan(), &plan, true); - Cout << result.GetPlan(); + Cerr << result.GetPlan() << Endl; } } -void ExecuteJoinOrderTestDataQuery(const TString& queryPath, bool useStreamLookupJoin) { +void ExecuteJoinOrderTestDataQuery(const TString& queryPath, bool useStreamLookupJoin, bool useColumnStore) { auto kikimr = GetKikimrWithJoinSettings(useStreamLookupJoin); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); - CreateSampleTable(session); + CreateSampleTable(session, useColumnStore); /* join with parameters */ { const TString query = GetStatic(queryPath); - + auto result = session.ExecuteDataQuery(query,TTxControl::BeginTx().CommitTx()).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); @@ -175,156 +214,286 @@ void ExecuteJoinOrderTestDataQuery(const TString& queryPath, bool useStreamLooku Y_UNIT_TEST_SUITE(KqpJoinOrder) { Y_UNIT_TEST(Chain65Nodes) { - TChainConstructor chain(65); - chain.CreateTables(); - chain.JoinTables(); + TChainTester(65).Test(); + } + + TString ExecuteJoinOrderTestDataQueryWithStats(const TString& queryPath, const TString& statsPath, bool useStreamLookupJoin, bool useColumnStore) { + auto kikimr = GetKikimrWithJoinSettings(useStreamLookupJoin, GetStatic(statsPath)); + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + CreateSampleTable(session, useColumnStore); + + /* join with parameters */ + { + const TString query = GetStatic(queryPath); + + auto execRes = db.StreamExecuteScanQuery(query, TStreamExecScanQuerySettings().Explain(true)).ExtractValueSync(); + execRes.GetIssues().PrintTo(Cerr); + UNIT_ASSERT_VALUES_EQUAL(execRes.GetStatus(), EStatus::SUCCESS); + auto plan = CollectStreamResult(execRes).PlanJson; + PrintPlan(plan.GetRef()); + return plan.GetRef(); + } + } + + void CheckJoinCardinality(const TString& queryPath, const TString& statsPath, const TString& joinKind, double card, bool useStreamLookupJoin, bool useColumnStore) { + auto kikimr = GetKikimrWithJoinSettings(useStreamLookupJoin, GetStatic(statsPath)); + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + CreateSampleTable(session, useColumnStore); + + /* join with parameters */ + { + const TString query = GetStatic(queryPath); + + auto result = session.ExplainDataQuery(query).ExtractValueSync(); + Cerr << result.GetPlan() << Endl; + NJson::TJsonValue plan; + NJson::ReadJsonTree(result.GetPlan(), &plan, true); + + if(!useStreamLookupJoin) { + auto joinNode = FindPlanNodeByKv(plan.GetMapSafe().at("SimplifiedPlan"), "Node Type", joinKind); + UNIT_ASSERT(joinNode.IsDefined()); + auto op = joinNode.GetMapSafe().at("Operators").GetArraySafe()[0]; + auto eRows = op.GetMapSafe().at("E-Rows").GetStringSafe(); + UNIT_ASSERT_EQUAL(std::stod(eRows), card); + } + } + } + + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(FiveWayJoin, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats( + "queries/five_way_join.sql", "stats/basic.json", StreamLookupJoin, ColumnStore + ); + } + + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(FiveWayJoinStatsOverride, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats( + "queries/five_way_join_stats_override.sql", "stats/basic.json", StreamLookupJoin, ColumnStore + ); } - Y_UNIT_TEST_TWIN(FiveWayJoin, StreamLookupJoin) { - ExecuteJoinOrderTestDataQuery("queries/five_way_join.sql", StreamLookupJoin); + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(FourWayJoinLeftFirst, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats( + "queries/four_way_join_left_first.sql", "stats/basic.json", StreamLookupJoin, ColumnStore + ); } - Y_UNIT_TEST_TWIN(FiveWayJoinStatsOverride, StreamLookupJoin) { - ExecuteJoinOrderTestDataQuery("queries/five_way_join_stats_override.sql", StreamLookupJoin); + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(FiveWayJoinWithPreds, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats( + "queries/five_way_join_with_preds.sql", "stats/basic.json", StreamLookupJoin, ColumnStore + ); } - Y_UNIT_TEST_TWIN(FourWayJoinLeftFirst, StreamLookupJoin) { - ExecuteJoinOrderTestDataQuery("queries/four_way_join_left_first.sql", StreamLookupJoin); + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(FiveWayJoinWithComplexPreds, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats( + "queries/five_way_join_with_complex_preds.sql", "stats/basic.json", StreamLookupJoin, ColumnStore + ); } - Y_UNIT_TEST_TWIN(FiveWayJoinWithPreds, StreamLookupJoin) { - ExplainJoinOrderTestDataQuery("queries/five_way_join_with_preds.sql", StreamLookupJoin); + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(FiveWayJoinWithComplexPreds2, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats( + "queries/five_way_join_with_complex_preds2.sql", "stats/basic.json", StreamLookupJoin, ColumnStore + ); } - Y_UNIT_TEST_TWIN(FiveWayJoinWithComplexPreds, StreamLookupJoin) { - ExplainJoinOrderTestDataQuery("queries/five_way_join_with_complex_preds.sql", StreamLookupJoin); + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(FiveWayJoinWithPredsAndEquiv, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats( + "queries/four_way_join_with_preds_and_equiv.sql", "stats/basic.json", StreamLookupJoin, ColumnStore + ); } - Y_UNIT_TEST_TWIN(FiveWayJoinWithComplexPreds2, StreamLookupJoin) { - ExplainJoinOrderTestDataQuery("queries/five_way_join_with_complex_preds2.sql", StreamLookupJoin); + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(FourWayJoinWithPredsAndEquivAndLeft, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats( + "queries/four_way_join_with_preds_and_equiv_and_left.sql", "stats/basic.json", StreamLookupJoin, ColumnStore + ); } - Y_UNIT_TEST_TWIN(FiveWayJoinWithPredsAndEquiv, StreamLookupJoin) { - ExplainJoinOrderTestDataQuery("queries/four_way_join_with_preds_and_equiv.sql", StreamLookupJoin); + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(TestJoinHint, StreamLookupJoin, ColumnStore) { + CheckJoinCardinality("queries/test_join_hint.sql", "stats/basic.json", "InnerJoin (Grace)", 10e6, StreamLookupJoin, ColumnStore); } - Y_UNIT_TEST_TWIN(FourWayJoinWithPredsAndEquivAndLeft, StreamLookupJoin) { - ExplainJoinOrderTestDataQuery("queries/four_way_join_with_preds_and_equiv_and_left.sql", StreamLookupJoin); + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(TestJoinHint2, StreamLookupJoin, ColumnStore) { + CheckJoinCardinality("queries/test_join_hint2.sql", "stats/basic.json", "InnerJoin (MapJoin)", 1, StreamLookupJoin, ColumnStore); } - Y_UNIT_TEST_TWIN(FiveWayJoinWithConstantFold, StreamLookupJoin) { - ExplainJoinOrderTestDataQuery("queries/five_way_join_with_constant_fold.sql", StreamLookupJoin); + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(FiveWayJoinWithConstantFold, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats("queries/five_way_join_with_constant_fold.sql", "stats/basic.json", StreamLookupJoin, ColumnStore); } - Y_UNIT_TEST_TWIN(FiveWayJoinWithConstantFoldOpt, StreamLookupJoin) { - ExplainJoinOrderTestDataQuery("queries/five_way_join_with_constant_fold_opt.sql", StreamLookupJoin); + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(FiveWayJoinWithConstantFoldOpt, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats("queries/five_way_join_with_constant_fold_opt.sql", "stats/basic.json", StreamLookupJoin, ColumnStore); } - Y_UNIT_TEST_TWIN(DatetimeConstantFold, StreamLookupJoin) { - ExplainJoinOrderTestDataQuery("queries/datetime_constant_fold.sql", StreamLookupJoin); + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(DatetimeConstantFold, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats("queries/datetime_constant_fold.sql", "stats/basic.json", StreamLookupJoin, ColumnStore); } - Y_UNIT_TEST_TWIN(TPCH3, StreamLookupJoin) { - ExplainJoinOrderTestDataQuery("queries/tpch3.sql", StreamLookupJoin); + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(TPCH3, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats("queries/tpch3.sql", "stats/tpch1000s.json", StreamLookupJoin, ColumnStore); } - Y_UNIT_TEST_TWIN(TPCH5, StreamLookupJoin) { - ExplainJoinOrderTestDataQuery("queries/tpch5.sql", StreamLookupJoin); + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(TPCH5, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats("queries/tpch5.sql", "stats/tpch1000s.json", StreamLookupJoin, ColumnStore); } - Y_UNIT_TEST_TWIN(TPCH10, StreamLookupJoin) { - ExplainJoinOrderTestDataQuery("queries/tpch10.sql", StreamLookupJoin); + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(TPCH10, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats("queries/tpch10.sql", "stats/tpch1000s.json", StreamLookupJoin, ColumnStore); } - Y_UNIT_TEST_TWIN(TPCH11, StreamLookupJoin) { - ExplainJoinOrderTestDataQuery("queries/tpch11.sql", StreamLookupJoin); + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(TPCH11, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats("queries/tpch11.sql", "stats/tpch1000s.json", StreamLookupJoin, ColumnStore); } - Y_UNIT_TEST_TWIN(TPCH21, StreamLookupJoin) { - ExplainJoinOrderTestDataQuery("queries/tpch21.sql", StreamLookupJoin); + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(TPCH21, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats("queries/tpch21.sql", "stats/tpch1000s.json", StreamLookupJoin, ColumnStore); } - Y_UNIT_TEST_TWIN(TPCDS16, StreamLookupJoin) { - ExplainJoinOrderTestDataQuery("queries/tpcds16.sql", StreamLookupJoin); + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(TPCDS16, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats("queries/tpcds16.sql", "stats/tpcds1000s.json", StreamLookupJoin, ColumnStore); } - Y_UNIT_TEST_TWIN(TPCDS61, StreamLookupJoin) { - ExplainJoinOrderTestDataQuery("queries/tpcds61.sql", StreamLookupJoin); + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(TPCDS34, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats("queries/tpcds34.sql", "stats/tpcds1000s.json", StreamLookupJoin, ColumnStore); } - Y_UNIT_TEST_TWIN(TPCDS88, StreamLookupJoin) { - ExplainJoinOrderTestDataQuery("queries/tpcds88.sql", StreamLookupJoin); + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(TPCDS61, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats("queries/tpcds61.sql", "stats/tpcds1000s.json", StreamLookupJoin, ColumnStore); } - Y_UNIT_TEST_TWIN(TPCDS90, StreamLookupJoin) { - ExplainJoinOrderTestDataQuery("queries/tpcds90.sql", StreamLookupJoin); + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(TPCDS87, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats("queries/tpcds87.sql", "stats/tpcds1000s.json", StreamLookupJoin, ColumnStore); + } + + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(TPCDS88, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats("queries/tpcds88.sql", "stats/tpcds1000s.json", StreamLookupJoin, ColumnStore); + } + + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(TPCDS90, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats("queries/tpcds90.sql", "stats/tpcds1000s.json", StreamLookupJoin, ColumnStore); } - Y_UNIT_TEST_TWIN(TPCDS92, StreamLookupJoin) { - ExplainJoinOrderTestDataQuery("queries/tpcds92.sql", StreamLookupJoin); + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(TPCDS92, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats("queries/tpcds92.sql", "stats/tpcds1000s.json", StreamLookupJoin, ColumnStore); } - Y_UNIT_TEST_TWIN(TPCDS94, StreamLookupJoin) { - ExplainJoinOrderTestDataQuery("queries/tpcds94.sql", StreamLookupJoin); + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(TPCDS94, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats("queries/tpcds94.sql", "stats/tpcds1000s.json", StreamLookupJoin, ColumnStore); } - Y_UNIT_TEST_TWIN(TPCDS95, StreamLookupJoin) { - ExplainJoinOrderTestDataQuery("queries/tpcds95.sql", StreamLookupJoin); + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(TPCDS95, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats("queries/tpcds95.sql", "stats/tpcds1000s.json", StreamLookupJoin, ColumnStore); } - Y_UNIT_TEST_TWIN(TPCDS96, StreamLookupJoin) { - ExplainJoinOrderTestDataQuery("queries/tpcds96.sql", StreamLookupJoin); + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(TPCDS96, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats("queries/tpcds96.sql", "stats/tpcds1000s.json", StreamLookupJoin, ColumnStore); } - void JoinOrderTestWithOverridenStats(const TString& queryPath, const TString& statsPath, const TString& correctJoinOrderPath, bool useStreamLookupJoin) { + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(TestJoinOrderHintsSimple, StreamLookupJoin, ColumnStore) { + auto plan = ExecuteJoinOrderTestDataQueryWithStats("queries/join_order_hints_simple.sql", "stats/basic.json", StreamLookupJoin, ColumnStore); + UNIT_ASSERT_VALUES_EQUAL(GetJoinOrder(plan).GetStringRobust(), R"(["T",["R","S"]])") ; + } + + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(TestJoinOrderHintsComplex, StreamLookupJoin, ColumnStore) { + auto plan = ExecuteJoinOrderTestDataQueryWithStats("queries/join_order_hints_complex.sql", "stats/basic.json", StreamLookupJoin, ColumnStore); + auto joinOrder = GetJoinOrder(plan).GetStringRobust(); + UNIT_ASSERT_C(joinOrder.find(R"([["R","S"],["T","U"]])") != TString::npos, joinOrder); + } + + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(TestJoinOrderHintsManyHintTrees, StreamLookupJoin, ColumnStore) { + auto plan = ExecuteJoinOrderTestDataQueryWithStats("queries/join_order_hints_many_hint_trees.sql", "stats/basic.json", StreamLookupJoin, ColumnStore); + auto joinOrder = GetJoinOrder(plan).GetStringRobust(); + UNIT_ASSERT_C(joinOrder.find(R"(["R","S"])") != TString::npos, joinOrder); + UNIT_ASSERT_C(joinOrder.find(R"(["T","U"])") != TString::npos, joinOrder); + } + + void JoinOrderTestWithOverridenStats(const TString& queryPath, const TString& statsPath, TString correctJoinOrderPath, bool useStreamLookupJoin, bool useColumnStore + ) { auto kikimr = GetKikimrWithJoinSettings(useStreamLookupJoin, GetStatic(statsPath)); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); - CreateSampleTable(session); + CreateSampleTable(session, useColumnStore); /* join with parameters */ { const TString query = GetStatic(queryPath); - - TStreamExecScanQuerySettings settings; - settings.Explain(true); - - auto it = kikimr.GetTableClient().StreamExecuteScanQuery(query, settings).ExtractValueSync(); - auto res = CollectStreamResult(it); + + auto result = session.ExplainDataQuery(query).ExtractValueSync(); + result.GetIssues().PrintTo(Cerr); + Cerr << result.GetPlan() << Endl; + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); + if (useStreamLookupJoin) { + return; + } + + if (useColumnStore) { + correctJoinOrderPath = correctJoinOrderPath.substr(0, correctJoinOrderPath.find(".json")) + "_column_store.json"; + } + + auto currentJoinOrder = GetDetailedJoinOrder(result.GetPlan()); + Cerr << currentJoinOrder << Endl; + /* to canonize the tests use --test-param CANONIZE_JOIN_ORDER_TESTS=TRUE */ + TString canonize = GetTestParam("CANONIZE_JOIN_ORDER_TESTS"); canonize.to_lower(); + if (canonize.equal("true")) { + Cerr << "--------------------CANONIZING THE TESTS--------------------"; + TOFStream stream(SRC_("data/" + correctJoinOrderPath)); + NJsonWriter::TBuf writer; + writer.SetIndentSpaces(2); + writer.WriteJsonValue(¤tJoinOrder); + writer.FlushTo(&stream); + stream << Endl; + } TString ref = GetStatic(correctJoinOrderPath); + UNIT_ASSERT(JoinOrderAndAlgosMatch(result.GetPlan(), ref)); + } + } - /* correct canonized join order in cout, change corresponding join_order/.json file */ - Cout << CanonizeJoinOrder(*res.PlanJson) << Endl; + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(OverrideStatsTPCH2, StreamLookupJoin, ColumnStore) { + JoinOrderTestWithOverridenStats( + "queries/tpch2.sql", "stats/tpch1000s.json", "join_order/tpch2_1000s.json", StreamLookupJoin, ColumnStore + ); + } - UNIT_ASSERT(JoinOrderAndAlgosMatch(*res.PlanJson, ref)); - } + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(OverrideStatsTPCH9, StreamLookupJoin, ColumnStore) { + JoinOrderTestWithOverridenStats( + "queries/tpch9.sql", "stats/tpch1000s.json", "join_order/tpch9_1000s.json", StreamLookupJoin, ColumnStore + ); } - Y_UNIT_TEST_TWIN(OverrideStatsTPCH2, StreamLookupJoin) { + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(OverrideStatsTPCDS64, StreamLookupJoin, ColumnStore) { JoinOrderTestWithOverridenStats( - "queries/tpch2.sql", "stats/tpch1000s.json", "join_order/tpch2_1000s.json", StreamLookupJoin + "queries/tpcds64.sql", "stats/tpcds1000s.json", "join_order/tpcds64_1000s.json", StreamLookupJoin, ColumnStore ); } - Y_UNIT_TEST_TWIN(OverrideStatsTPCH9, StreamLookupJoin) { + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(OverrideStatsTPCDS64_small, StreamLookupJoin, ColumnStore) { JoinOrderTestWithOverridenStats( - "queries/tpch9.sql", "stats/tpch1000s.json", "join_order/tpch9_1000s.json", StreamLookupJoin + "queries/tpcds64_small.sql", "stats/tpcds1000s.json", "join_order/tpcds64_small_1000s.json", StreamLookupJoin, ColumnStore + ); + } + + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(OverrideStatsTPCDS78, StreamLookupJoin, ColumnStore) { + JoinOrderTestWithOverridenStats( + "queries/tpcds78.sql", "stats/tpcds1000s.json", "join_order/tpcds78_1000s.json", StreamLookupJoin, ColumnStore ); } - Y_UNIT_TEST_TWIN(OverrideStatsTPCDS64, StreamLookupJoin) { + Y_UNIT_TEST(TPCC) { JoinOrderTestWithOverridenStats( - "queries/tpcds64.sql", "stats/tpcds1000s.json", "join_order/tpcds64_1000s.json", StreamLookupJoin + "queries/tpcc.sql", "stats/tpcc.json", "join_order/tpcc.json", false, false ); } - Y_UNIT_TEST_TWIN(OverrideStatsTPCDS78, StreamLookupJoin) { + Y_UNIT_TEST(LookupBug) { JoinOrderTestWithOverridenStats( - "queries/tpcds78.sql", "stats/tpcds1000s.json", "join_order/tpcds78_1000s.json", StreamLookupJoin + "queries/lookupbug.sql", "stats/lookupbug.json", "join_order/lookupbug.json", false, false ); } + } } } diff --git a/ydb/core/kqp/ut/join/kqp_join_ut.cpp b/ydb/core/kqp/ut/join/kqp_join_ut.cpp index fae91c918e04..49b8772f06bd 100644 --- a/ydb/core/kqp/ut/join/kqp_join_ut.cpp +++ b/ydb/core/kqp/ut/join/kqp_join_ut.cpp @@ -816,6 +816,72 @@ Y_UNIT_TEST_SUITE(KqpJoin) { } } + Y_UNIT_TEST(TwoJoinsWithQueryService) { + NKikimrConfig::TAppConfig appConfig; + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetWithSampleTables(false); + + TKikimrRunner kikimr(serverSettings); + auto client = kikimr.GetTableClient(); + auto db = kikimr.GetQueryClient(); + auto settings = NYdb::NQuery::TExecuteQuerySettings(); + + { + auto session = client.CreateSession().GetValueSync().GetSession(); + const auto query = Q_(R"( + CREATE TABLE ta( + a Int64, + b Int64, + c Int64, + PRIMARY KEY(a) + ); + )"); + auto result = session.ExecuteSchemeQuery(query).ExtractValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } + { + auto session = client.CreateSession().GetValueSync().GetSession(); + const auto query = Q_(R"( + CREATE TABLE tb( + b Int64, + bval Int64, + PRIMARY KEY(b) + ); + )"); + auto result = session.ExecuteSchemeQuery(query).ExtractValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } + { + auto session = client.CreateSession().GetValueSync().GetSession(); + const auto query = Q_(R"( + CREATE TABLE tc( + c Int64, + cval Int64, + PRIMARY KEY(c) + ); + )"); + auto result = session.ExecuteSchemeQuery(query).ExtractValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } + { + auto result = db.ExecuteQuery(R"( + UPSERT INTO ta(a, b, c) VALUES (1, 1001, 2001), (2, 1002, 2002), (3, 1003, 2003); + UPSERT INTO tb(b, bval) VALUES (1001, 1001), (1002, 1002), (1003, 1003); + UPSERT INTO tc(c, cval) VALUES (2001, 2001), (2002, 2002), (2003, 2003); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), settings).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + { + auto result = db.ExecuteQuery(R"( + SELECT ta.a, tb.bval, tc.cval FROM ta INNER JOIN tb ON ta.b = tb.b LEFT JOIN tc ON ta.c = tc.cval + ORDER BY ta.a, tb.bval, tc.cval; + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), settings).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([[[1];[1001];[2001]];[[2];[1002];[2002]];[[3];[1003];[2003]]])", FormatResultSetYson(result.GetResultSet(0))); + } + } + // join on key prefix => index-lookup Y_UNIT_TEST(RightSemiJoin_KeyPrefix) { TKikimrRunner kikimr(SyntaxV1Settings()); @@ -1506,31 +1572,6 @@ Y_UNIT_TEST_SUITE(KqpJoin) { CompareYson(R"([[2]])", FormatResultSetYson(result.GetResultSet(0))); } - Y_UNIT_TEST(JoinPragmaHashJoinMode) { - TKikimrRunner kikimr; - auto db = kikimr.GetTableClient(); - auto session = db.CreateSession().GetValueSync().GetSession(); - - CreateSampleTables(session); - - auto query = Q1_(R"( - PRAGMA ydb.HashJoinMode='grace'; - - SELECT t1.Value - FROM `/Root/Join1_1` AS t1 - INNER JOIN `/Root/Join1_2` AS t2 - ON t1.Value == t2.Value; - )"); - - auto result = session.ExecuteDataQuery(query, TTxControl::BeginTx().CommitTx(), BuildPureTableParams(db)).GetValueSync(); - - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - CompareYson(R"([])", FormatResultSetYson(result.GetResultSet(0))); - - auto explain = session.ExplainDataQuery(query).GetValueSync(); - UNIT_ASSERT(explain.GetAst().Contains("GraceJoinCore")); - } - Y_UNIT_TEST(FullOuterJoinNotNullJoinKey) { TKikimrRunner kikimr; auto db = kikimr.GetTableClient(); @@ -1702,6 +1743,160 @@ Y_UNIT_TEST_SUITE(KqpJoin) { UNIT_ASSERT(result.GetQueryPlan().Contains("Lookup")); } } + + Y_UNIT_TEST(ComplexJoin) { + TKikimrRunner kikimr; + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + { // init tables + AssertSuccessResult(session.ExecuteSchemeQuery(R"( + --!syntax_v1 + + CREATE TABLE Input1 + ( + k1 String, + v1 String, + u1 String, + PRIMARY KEY (k1, v1, u1) + ); + + CREATE TABLE Input2 + ( + k2 String, + v2 String, + u2 String, + PRIMARY KEY (k2, v2, u2) + ); + + CREATE TABLE Input3 + ( + k3 String, + v3 String, + u3 String, + PRIMARY KEY (k3, v3, u3) + ); + + CREATE TABLE Input4 + ( + k4 String, + v4 String, + u4 String, + PRIMARY KEY (k4, v4, u4) + ); + + CREATE TABLE Input5 + ( + k5 String, + v5 String, + u5 String, + PRIMARY KEY (k5, v5, u5) + ); + )").GetValueSync()); + + auto result = session.ExecuteDataQuery(R"( + --!syntax_v1 + + REPLACE INTO Input1 (k1, v1, u1) VALUES + ("01","1","01"), + ("01","2","02"), + ("02","1","03"), + ("02","2","05"), + ("02","2","06"), + ("03",NULL,"07"), + ("03","1","08"), + ("03","2","09"), + ("04","1","10"), + ("04","2","11"), + ("05","1","12"), + ("05","2","13"), + ("06","1","14"), + ("06","2","15"), + ("07","1","16"), + ("07","2","17"), + ("08","1","18"), + ("08","2","19"), + ("09","1","20"), + ("09","2","21"), + ("10","1","22"), + ("10","2","23"); + + REPLACE INTO Input2 (k2, v2, u2) VALUES + ("02","1","01"), + ("02","1","02"), + ("02","2","03"), + ("02","2","04"), + ("03","1","05"), + ("03","2","06"), + ("04","1","07"), + ("04","2","08"), + ("05","1","09"), + ("05","2","10"), + ("06","1","11"), + ("06","2","12"), + ("07","1","13"), + ("07","2","14"), + ("09","1","15"), + ("09","2","16"); + + REPLACE INTO Input3 (k3, v3, u3) VALUES + (NULL,"1","01"), + (NULL,"2","02"), + ("04","1","03"), + ("04","2","04"), + ("05","1","05"), + ("05","2","06"); + + REPLACE INTO Input4 (k4, v4, u4) VALUES + ("03",NULL,"01"), + ("03","1","02"), + ("03","2","03"), + ("04","1","04"), + ("04","2","05"), + ("05","1","06"), + ("05","2","07"), + ("06","1","08"), + ("06","2","09"), + ("07","1","10"), + ("07","2","11"), + ("08","1","12"), + ("08","2","13"); + + REPLACE INTO Input5 (k5, v5, u5) VALUES + (NULL,"1","01"), + (NULL,"2","02"), + ("02","1","03"), + ("02","1","04"), + ("02","2","05"), + ("02","2","06"), + ("03","1","07"), + ("03","2","08"), + ("09","1","09"), + ("09","2","10"); + )", TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); + } + + { + auto result = session.ExecuteDataQuery(R"( + --!syntax_v1 + pragma ydb.CostBasedOptimizationLevel = "4"; + pragma ydb.OptOverrideStatistics = '{"/Root/Input1": {"n_rows":10000}, "/Root/Input2" : {"n_rows":10000}, "/Root/Input3":{"n_rows":10000}, "/Root/Input4":{"n_rows":10000}, "/Root/Input5":{"n_rows":10000}}'; + + $rightSemi = select * from Input2 as b right semi join Input1 as a on a.v1 = b.v2 and a.k1 = b.k2; + $leftOnly = select * from $rightSemi as rs left only join Input3 as c on rs.k1 = c.k3 and rs.v1 = c.v3; + $right = select * from Input4 as d right join $leftOnly as lo on d.v4 = lo.v1 and lo.k1 = d.k4; + $inner = select * from $right as r join any Input5 as e on r.k1 = e.k5 and e.v5 = r.v1; + + select * from $inner order by u1,u5; + )", TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + Cout << FormatResultSetYson(result.GetResultSet(0)); + CompareYson(R"( + [[["02"];#;["02"];["03"];#;["03"];["1"];#;["1"]];[["02"];#;["02"];["03"];#;["04"];["1"];#;["1"]];[["02"];#;["02"];["05"];#;["05"];["2"];#;["2"]];[["02"];#;["02"];["05"];#;["06"];["2"];#;["2"]];[["02"];#;["02"];["06"];#;["05"];["2"];#;["2"]];[["02"];#;["02"];["06"];#;["06"];["2"];#;["2"]];[["03"];["03"];["03"];["08"];["02"];["07"];["1"];["1"];["1"]];[["03"];["03"];["03"];["09"];["03"];["08"];["2"];["2"];["2"]];[["09"];#;["09"];["20"];#;["09"];["1"];#;["1"]];[["09"];#;["09"];["21"];#;["10"];["2"];#;["2"]]] + )", FormatResultSetYson(result.GetResultSet(0))); + } + } } } // namespace NKqp diff --git a/ydb/core/kqp/ut/olap/aggregations_ut.cpp b/ydb/core/kqp/ut/olap/aggregations_ut.cpp index 9a70ee376fed..28dcf8d19069 100644 --- a/ydb/core/kqp/ut/olap/aggregations_ut.cpp +++ b/ydb/core/kqp/ut/olap/aggregations_ut.cpp @@ -163,7 +163,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) { // Check plan #if SSA_RUNTIME_VERSION >= 2U - CheckPlanForAggregatePushdown(query, tableClient, { "WideCombiner" }, "Aggregate-TableFullScan"); + CheckPlanForAggregatePushdown(query, tableClient, { "WideCombiner" }, "TableFullScan"); // CheckPlanForAggregatePushdown(query, tableClient, { "TKqpOlapAgg" }, "TableFullScan"); #else CheckPlanForAggregatePushdown(query, tableClient, { "CombineCore" }, ""); @@ -171,6 +171,49 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) { } } + Y_UNIT_TEST_TWIN(DisableBlockEngineInAggregationWithSpilling, AllowSpilling) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false); + settings.AppConfig.MutableTableServiceConfig()->SetBlockChannelsMode(NKikimrConfig::TTableServiceConfig_EBlockChannelsMode_BLOCK_CHANNELS_FORCE); + if (AllowSpilling) { + settings.AppConfig.MutableTableServiceConfig()->SetEnableSpillingNodes("Aggregation"); + } else { + settings.AppConfig.MutableTableServiceConfig()->SetEnableSpillingNodes("None"); + } + TKikimrRunner kikimr(settings); + + TLocalHelper(kikimr).CreateTestOlapTable(); + auto client = kikimr.GetQueryClient(); + + { + WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 11000, 3001000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 12000, 3002000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 13000, 3003000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 14000, 3004000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 20000, 2000000, 7000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); + } + + { + TString query = R"( + --!syntax_v1 + SELECT + COUNT(*) + FROM `/Root/olapStore/olapTable` + GROUP BY level + )"; + + auto res = StreamExplainQuery(query, client); + UNIT_ASSERT_C(res.IsSuccess(), res.GetIssues().ToString()); + + auto plan = CollectStreamResult(res); + + bool hasWideCombiner = plan.QueryStats->Getquery_ast().Contains("WideCombiner"); + UNIT_ASSERT_C(hasWideCombiner == AllowSpilling, plan.QueryStats->Getquery_ast()); + } + } + Y_UNIT_TEST_TWIN(CountAllPushdown, UseLlvm) { auto settings = TKikimrSettings() .SetWithSampleTables(false); @@ -352,7 +395,8 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) { .AddExpectedPlanOptions("KqpOlapFilter") #if SSA_RUNTIME_VERSION >= 2U .AddExpectedPlanOptions("TKqpOlapAgg") - .MutableLimitChecker().SetExpectedResultCount(1) + // See https://github.com/ydb-platform/ydb/issues/7299 for explanation, why resultCount = 3 + .MutableLimitChecker().SetExpectedResultCount(3) #else .AddExpectedPlanOptions("CombineCore") #endif @@ -980,7 +1024,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) { ORDER BY c, resource_id DESC LIMIT 3 )") .SetExpectedReply("[[[\"40999\"];[4];1u];[[\"40998\"];[3];1u];[[\"40997\"];[2];1u]]") - .SetExpectedReadNodeType("Aggregate-TableFullScan"); + .SetExpectedReadNodeType("TableFullScan"); testCase.FillExpectedAggregationGroupByPlanOptions(); TestAggregations({ testCase }); } diff --git a/ydb/core/kqp/ut/olap/blobs_sharing_ut.cpp b/ydb/core/kqp/ut/olap/blobs_sharing_ut.cpp index 560482ca4c44..ea97c44484f3 100644 --- a/ydb/core/kqp/ut/olap/blobs_sharing_ut.cpp +++ b/ydb/core/kqp/ut/olap/blobs_sharing_ut.cpp @@ -15,7 +15,6 @@ namespace NKikimr::NKqp { Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { - namespace { class TTransferStatus { private: @@ -60,7 +59,6 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { return TConclusionStatus::Success(); } virtual void DoSerializeToProto(NKikimrColumnShardDataSharingProto::TInitiator::TController& /*proto*/) const override { - } virtual TString GetClassName() const override { @@ -68,10 +66,16 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { } }; + TKikimrSettings GetKikimrSettings() { + NKikimrConfig::TFeatureFlags featureFlags; + featureFlags.SetEnableAlterShardingInColumnShard(true); + return TKikimrSettings().SetWithSampleTables(false).SetFeatureFlags(featureFlags); + } + class TSharingDataTestCase { private: const ui32 ShardsCount; - TKikimrRunner& Kikimr; + TKikimrRunner Kikimr; TTypedLocalHelper Helper; NYDBTest::TControllers::TGuard Controller; std::vector ShardIds; @@ -86,15 +90,15 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { Helper.FillPKOnly(kff, recordsCount); } - TSharingDataTestCase(const ui32 shardsCount, TKikimrRunner& kikimr) + TSharingDataTestCase(const ui32 shardsCount) : ShardsCount(shardsCount) - , Kikimr(kikimr) + , Kikimr(GetKikimrSettings()) , Helper("", Kikimr, "olapTable", "olapStore12") , Controller(NYDBTest::TControllers::RegisterCSControllerGuard()) { Controller->SetCompactionControl(NYDBTest::EOptimizerCompactionWeightControl::Disable); Controller->SetExpectedShardsCount(ShardsCount); - Controller->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); - Controller->SetReadTimeoutClean(TDuration::Seconds(1)); + Controller->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + Controller->SetOverrideReadTimeoutClean(TDuration::Seconds(1)); Tests::NCommon::TLoggerInit(Kikimr).SetComponents({ NKikimrServices::TX_COLUMNSHARD }, "CS").Initialize(); @@ -111,6 +115,7 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { } void WaitNormalization() { + Controller->SetOverrideReadTimeoutClean(TDuration::Seconds(1)); Controller->SetCompactionControl(NYDBTest::EOptimizerCompactionWeightControl::Force); const auto start = TInstant::Now(); while (!Controller->IsTrivialLinks() && TInstant::Now() - start < TDuration::Seconds(30)) { @@ -119,9 +124,11 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { } AFL_VERIFY(Controller->IsTrivialLinks()); Controller->CheckInvariants(); + Controller->SetOverrideReadTimeoutClean(TDuration::Minutes(5)); } void Execute(const ui64 destinationIdx, const std::vector& sourceIdxs, const bool move, const NOlap::TSnapshot& snapshot, const std::set& pathIdxs) { + Controller->SetOverrideReadTimeoutClean(TDuration::Seconds(1)); AFL_VERIFY(destinationIdx < ShardIds.size()); const ui64 destination = ShardIds[destinationIdx]; std::vector sources; @@ -188,21 +195,19 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { } CSTransferStatus->Reset(); AFL_VERIFY(!Controller->IsTrivialLinks()); + Controller->CheckInvariants(); + Controller->SetOverrideReadTimeoutClean(TDuration::Minutes(5)); } }; Y_UNIT_TEST(BlobsSharingSplit1_1) { - auto settings = TKikimrSettings().SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - TSharingDataTestCase tester(4, kikimr); + TSharingDataTestCase tester(4); tester.AddRecords(800000); Sleep(TDuration::Seconds(1)); tester.Execute(0, { 1 }, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), { 0 }); } Y_UNIT_TEST(BlobsSharingSplit1_1_clean) { - auto settings = TKikimrSettings().SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - TSharingDataTestCase tester(2, kikimr); + TSharingDataTestCase tester(2); tester.AddRecords(80000); CompareYson(tester.GetHelper().GetQueryResult("SELECT COUNT(*) FROM `/Root/olapStore12/olapTable`"), R"([[80000u;]])"); Sleep(TDuration::Seconds(1)); @@ -214,9 +219,7 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { } Y_UNIT_TEST(BlobsSharingSplit1_1_clean_with_restarts) { - auto settings = TKikimrSettings().SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - TSharingDataTestCase tester(2, kikimr); + TSharingDataTestCase tester(2); tester.SetRebootTablet(true); tester.AddRecords(80000); CompareYson(tester.GetHelper().GetQueryResult("SELECT COUNT(*) FROM `/Root/olapStore12/olapTable`"), R"([[80000u;]])"); @@ -229,18 +232,14 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { } Y_UNIT_TEST(BlobsSharingSplit3_1) { - auto settings = TKikimrSettings().SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - TSharingDataTestCase tester(4, kikimr); + TSharingDataTestCase tester(4); tester.AddRecords(800000); Sleep(TDuration::Seconds(1)); tester.Execute(0, { 1, 2, 3 }, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), { 0 }); } Y_UNIT_TEST(BlobsSharingSplit1_3_1) { - auto settings = TKikimrSettings().SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - TSharingDataTestCase tester(4, kikimr); + TSharingDataTestCase tester(4); tester.AddRecords(800000); Sleep(TDuration::Seconds(1)); tester.Execute(1, { 0 }, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), { 0 }); @@ -250,9 +249,7 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { } Y_UNIT_TEST(BlobsSharingSplit1_3_2_1_clean) { - auto settings = TKikimrSettings().SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - TSharingDataTestCase tester(4, kikimr); + TSharingDataTestCase tester(4); tester.AddRecords(800000); Sleep(TDuration::Seconds(1)); tester.Execute(1, { 0 }, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), { 0 }); @@ -266,8 +263,15 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { } class TReshardingTest { - private: - YDB_ACCESSOR(TString, ShardingType, "HASH_FUNCTION_CONSISTENCY_64"); + public: + TReshardingTest() + : Kikimr(GetKikimrSettings()) + , CSController(NYDBTest::TControllers::RegisterCSControllerGuard()) + , TableClient(Kikimr.GetTableClient()) { + CSController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + CSController->SetOverrideLagForCompactionBeforeTierings(TDuration::Seconds(1)); + CSController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); + } void WaitResharding(const TString& hint = "") { const TInstant start = TInstant::Now(); @@ -303,28 +307,22 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { CompareYson(result, "[[" + ::ToString(expectation) + "u;]]"); } + protected: TKikimrRunner Kikimr; - public: + NKikimr::NYDBTest::TControllers::TGuard CSController; + NYdb::NTable::TTableClient TableClient; + }; - TReshardingTest() - : Kikimr(TKikimrSettings().SetWithSampleTables(false)) { + class TShardingTypeTest: public TReshardingTest { + YDB_ACCESSOR(TString, ShardingType, "HASH_FUNCTION_CONSISTENCY_64"); - } + public: + using TReshardingTest::TReshardingTest; void Execute() { - auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); - csController->SetLagForCompactionBeforeTierings(TDuration::Seconds(1)); - csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); - TLocalHelper(Kikimr).SetShardingMethod(ShardingType).CreateTestOlapTable("olapTable", "olapStore", 24, 4); - auto tableClient = Kikimr.GetTableClient(); - Tests::NCommon::TLoggerInit(Kikimr).SetComponents({ NKikimrServices::TX_COLUMNSHARD }, "CS").SetPriority(NActors::NLog::PRI_DEBUG).Initialize(); - - std::vector uids; - std::vector resourceIds; - std::vector levels; + Tests::NCommon::TLoggerInit(Kikimr).SetComponents({ NKikimrServices::TX_COLUMNSHARD, NKikimrServices::TX_COLUMNSHARD_SCAN }, "CS").SetPriority(NActors::NLog::PRI_DEBUG).Initialize(); { WriteTestData(Kikimr, "/Root/olapStore/olapTable", 1000000, 300000000, 10000); @@ -334,82 +332,161 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { WriteTestData(Kikimr, "/Root/olapStore/olapTable", 1400000, 300400000, 10000); WriteTestData(Kikimr, "/Root/olapStore/olapTable", 2000000, 200000000, 70000); WriteTestData(Kikimr, "/Root/olapStore/olapTable", 3000000, 100000000, 110000); - - const auto filler = [&](const ui32 startRes, const ui32 startUid, const ui32 count) { - for (ui32 i = 0; i < count; ++i) { - uids.emplace_back("uid_" + ::ToString(startUid + i)); - resourceIds.emplace_back(::ToString(startRes + i)); - levels.emplace_back(i % 5); - } - }; - - filler(1000000, 300000000, 10000); - filler(1100000, 300100000, 10000); - filler(1200000, 300200000, 10000); - filler(1300000, 300300000, 10000); - filler(1400000, 300400000, 10000); - filler(2000000, 200000000, 70000); - filler(3000000, 100000000, 110000); - } CheckCount(230000); for (ui32 i = 0; i < 2; ++i) { auto alterQuery = TStringBuilder() << R"(ALTER OBJECT `/Root/olapStore/olapTable` (TYPE TABLESTORE) SET (ACTION=ALTER_SHARDING, MODIFICATION=SPLIT);)"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto session = TableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); WaitResharding("SPLIT:" + ::ToString(i)); } { auto alterQuery = TStringBuilder() << R"(ALTER OBJECT `/Root/olapStore/olapTable` (TYPE TABLESTORE) SET (ACTION=ALTER_SHARDING, MODIFICATION=SPLIT);)"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto session = TableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_UNEQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } - AFL_VERIFY(csController->GetShardingFiltersCount().Val() == 0); + AFL_VERIFY(CSController->GetShardingFiltersCount().Val() == 0); CheckCount(230000); - i64 count = csController->GetShardingFiltersCount().Val(); + i64 count = CSController->GetShardingFiltersCount().Val(); AFL_VERIFY(count >= 16)("count", count); + CSController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + CSController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + CSController->WaitIndexation(TDuration::Seconds(3)); + CSController->WaitCompactions(TDuration::Seconds(3)); WriteTestData(Kikimr, "/Root/olapStore/olapTable", 1000000, 300000000, 10000); - csController->WaitIndexation(TDuration::Seconds(5)); - csController->WaitCompactions(TDuration::Seconds(5)); + CheckCount(230000); + CSController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + CSController->WaitIndexation(TDuration::Seconds(5)); + CheckCount(230000); + CSController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + CSController->WaitCompactions(TDuration::Seconds(5)); + count = CSController->GetShardingFiltersCount().Val(); + CheckCount(230000); - csController->SetCompactionControl(NYDBTest::EOptimizerCompactionWeightControl::Disable); + CSController->SetCompactionControl(NYDBTest::EOptimizerCompactionWeightControl::Disable); CheckCount(230000); - AFL_VERIFY(count == csController->GetShardingFiltersCount().Val())("count", count)("val", csController->GetShardingFiltersCount().Val()); + AFL_VERIFY(count == CSController->GetShardingFiltersCount().Val())("count", count)( + "val", CSController->GetShardingFiltersCount().Val()); const ui32 portionsCount = 16; for (ui32 i = 0; i < 4; ++i) { { auto alterQuery = TStringBuilder() << R"(ALTER OBJECT `/Root/olapStore/olapTable` (TYPE TABLESTORE) SET (ACTION=ALTER_SHARDING, MODIFICATION=MERGE);)"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto session = TableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } WaitResharding("MERGE:" + ::ToString(i)); - // csController->WaitCleaning(TDuration::Seconds(5)); + // CSController->WaitCleaning(TDuration::Seconds(5)); CheckCount(230000); - AFL_VERIFY(count + portionsCount == csController->GetShardingFiltersCount().Val())("count", count)("val", csController->GetShardingFiltersCount().Val()); + AFL_VERIFY(count + portionsCount == CSController->GetShardingFiltersCount().Val())("count", count)( + "val", CSController->GetShardingFiltersCount().Val()); count += portionsCount; } { auto alterQuery = TStringBuilder() << R"(ALTER OBJECT `/Root/olapStore/olapTable` (TYPE TABLESTORE) SET (ACTION=ALTER_SHARDING, MODIFICATION=MERGE);)"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto session = TableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_UNEQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } + CSController->CheckInvariants(); } }; Y_UNIT_TEST(TableReshardingConsistency64) { - TReshardingTest().SetShardingType("HASH_FUNCTION_CONSISTENCY_64").Execute(); + TShardingTypeTest().SetShardingType("HASH_FUNCTION_CONSISTENCY_64").Execute(); } Y_UNIT_TEST(TableReshardingModuloN) { - TReshardingTest().SetShardingType("HASH_FUNCTION_MODULO_N").Execute(); + TShardingTypeTest().SetShardingType("HASH_FUNCTION_CONSISTENCY_64").Execute(); + } + + class TAsyncReshardingTest: public TReshardingTest { + YDB_ACCESSOR(TString, ShardingType, "HASH_FUNCTION_CONSISTENCY_64"); + + public: + TAsyncReshardingTest() { + TLocalHelper(Kikimr).CreateTestOlapTable("olapTable", "olapStore", 24, 4); + } + + void AddBatch(int numRows) { + WriteTestData(Kikimr, "/Root/olapStore/olapTable", LastPathId, LastTs, numRows); + LastPathId += numRows * 10; + LastTs += numRows * 10; + NumRows += numRows; + } + + void StartResharding(TString modification) { + auto alterQuery = + TStringBuilder() << R"(ALTER OBJECT `/Root/olapStore/olapTable` (TYPE TABLESTORE) SET (ACTION=ALTER_SHARDING, MODIFICATION=)" + << modification << ");"; + auto session = TableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + + void CheckCount() { + TReshardingTest::CheckCount(NumRows); + } + + void ChangeSchema() { + auto alterQuery = + "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=level, " + "`SERIALIZER.CLASS_NAME`=`ARROW_SERIALIZER`, " + "`COMPRESSION.TYPE`=`zstd`);"; + auto session = TableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + + void DisableCompaction() { + CSController->SetCompactionControl(NYDBTest::EOptimizerCompactionWeightControl::Disable); + } + + private: + ui64 LastPathId = 1000000; + ui64 LastTs = 300000000; + ui64 NumRows = 0; + }; + + Y_UNIT_TEST(UpsertWhileSplitTest) { + TAsyncReshardingTest tester; + + tester.AddBatch(10000); + + tester.CheckCount(); + + for (int i = 0; i < 4; i++) { + tester.StartResharding("SPLIT"); + + tester.CheckCount(); + tester.AddBatch(10000); + tester.CheckCount(); + tester.WaitResharding(); + } + tester.AddBatch(10000); + tester.CheckCount(); + } + + Y_UNIT_TEST(ChangeSchemaAndSplit) { + TAsyncReshardingTest tester; + tester.DisableCompaction(); + + tester.AddBatch(10000); + tester.ChangeSchema(); + tester.AddBatch(10000); + + tester.StartResharding("SPLIT"); + tester.WaitResharding(); + + tester.CheckCount(); } } } diff --git a/ydb/core/kqp/ut/olap/clickbench_ut.cpp b/ydb/core/kqp/ut/olap/clickbench_ut.cpp index 1cbcd7be436d..dfdb63033187 100644 --- a/ydb/core/kqp/ut/olap/clickbench_ut.cpp +++ b/ydb/core/kqp/ut/olap/clickbench_ut.cpp @@ -155,7 +155,7 @@ Y_UNIT_TEST_SUITE(KqpOlapClickbench) { //.SetExpectedReply("[[[\"40999\"];[4];1u];[[\"40998\"];[3];1u];[[\"40997\"];[2];1u]]") // Should be fixed in https://st.yandex-team.ru/KIKIMR-17009 // .SetExpectedReadNodeType("TableFullScan"); - .SetExpectedReadNodeType("Aggregate-TableFullScan"); + .SetExpectedReadNodeType("TableFullScan"); q7.FillExpectedAggregationGroupByPlanOptions(); TAggregationTestCase q9; @@ -186,7 +186,7 @@ Y_UNIT_TEST_SUITE(KqpOlapClickbench) { //.SetExpectedReply("[[[\"40999\"];[4];1u];[[\"40998\"];[3];1u];[[\"40997\"];[2];1u]]") // Should be fixed in https://st.yandex-team.ru/KIKIMR-17009 // .SetExpectedReadNodeType("TableFullScan"); - .SetExpectedReadNodeType("Aggregate-TableFullScan"); + .SetExpectedReadNodeType("TableFullScan"); q12.FillExpectedAggregationGroupByPlanOptions(); TAggregationTestCase q14; @@ -202,7 +202,7 @@ Y_UNIT_TEST_SUITE(KqpOlapClickbench) { //.SetExpectedReply("[[[\"40999\"];[4];1u];[[\"40998\"];[3];1u];[[\"40997\"];[2];1u]]") // Should be fixed in https://st.yandex-team.ru/KIKIMR-17009 // .SetExpectedReadNodeType("TableFullScan"); - .SetExpectedReadNodeType("Aggregate-TableFullScan"); + .SetExpectedReadNodeType("TableFullScan"); q14.FillExpectedAggregationGroupByPlanOptions(); TAggregationTestCase q22; diff --git a/ydb/core/kqp/ut/olap/datatime64_ut.cpp b/ydb/core/kqp/ut/olap/datatime64_ut.cpp index b4ea097eafe8..97e2a2f2fd28 100644 --- a/ydb/core/kqp/ut/olap/datatime64_ut.cpp +++ b/ydb/core/kqp/ut/olap/datatime64_ut.cpp @@ -170,6 +170,7 @@ Y_UNIT_TEST_SUITE(KqpDatetime64ColumnShard) { runnerSettings.WithSampleTables = false; TTestHelper testHelper(runnerSettings); + Tests::NCommon::TLoggerInit(testHelper.GetKikimr()).SetComponents({ NKikimrServices::GROUPED_MEMORY_LIMITER }, "CS").Initialize(); TVector schema = { TTestHelper::TColumnSchema().SetName("id").SetType(NScheme::NTypeIds::Int64).SetNullable(false), diff --git a/ydb/core/kqp/ut/olap/decimal_ut.cpp b/ydb/core/kqp/ut/olap/decimal_ut.cpp new file mode 100644 index 000000000000..151226bd01d6 --- /dev/null +++ b/ydb/core/kqp/ut/olap/decimal_ut.cpp @@ -0,0 +1,198 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace NKikimr { +namespace NKqp { + +using namespace NYdb; +using namespace NYdb::NTable; + +Y_UNIT_TEST_SUITE(KqpDecimalColumnShard) { + class TDecimalTestCase { + public: + TDecimalTestCase() + : TestHelper(TKikimrSettings().SetWithSampleTables(false)) { + } + + TTestHelper::TUpdatesBuilder Inserter() { + return TTestHelper::TUpdatesBuilder(TestTable.GetArrowSchema(Schema)); + } + + void Upsert(TTestHelper::TUpdatesBuilder& inserter) { + TestHelper.BulkUpsert(TestTable, inserter); + } + + void CheckQuery(const TString& query, const TString& expected) { + TestHelper.ReadData(query, expected); + } + + void PrepareTable1() { + Schema = { + TTestHelper::TColumnSchema().SetName("id").SetType(NScheme::NTypeIds::Int32).SetNullable(false), + TTestHelper::TColumnSchema().SetName("int").SetType(NScheme::NTypeIds::Int64), + TTestHelper::TColumnSchema().SetName("dec").SetType(NScheme::NTypeIds::Decimal), + }; + TestTable.SetName("/Root/Table1").SetPrimaryKey({ "id" }).SetSharding({ "id" }).SetSchema(Schema); + TestHelper.CreateTable(TestTable); + + { + TTestHelper::TUpdatesBuilder inserter = Inserter(); + inserter.AddRow().Add(1).Add(4).Add(TDecimalValue("3.14")); + inserter.AddRow().Add(2).Add(3).Add(TDecimalValue("8.16")); + Upsert(inserter); + } + { + TTestHelper::TUpdatesBuilder inserter = Inserter(); + inserter.AddRow().Add(4).Add(1).Add(TDecimalValue("12.46")); + inserter.AddRow().Add(3).Add(2).Add(TDecimalValue("8.492")); + + Upsert(inserter); + } + } + + void PrepareTable2() { + Schema = { + TTestHelper::TColumnSchema().SetName("id").SetType(NScheme::NTypeIds::Int32).SetNullable(false), + TTestHelper::TColumnSchema().SetName("table1_id").SetType(NScheme::NTypeIds::Int64), + TTestHelper::TColumnSchema().SetName("dec").SetType(NScheme::NTypeIds::Decimal), + }; + TestTable.SetName("/Root/Table2").SetPrimaryKey({ "id" }).SetSharding({ "id" }).SetSchema(Schema); + TestHelper.CreateTable(TestTable); + + { + TTestHelper::TUpdatesBuilder inserter = Inserter(); + inserter.AddRow().Add(1).Add(1).Add(TDecimalValue("12.46")); + inserter.AddRow().Add(2).Add(1).Add(TDecimalValue("8.16")); + inserter.AddRow().Add(3).Add(2).Add(TDecimalValue("12.46")); + inserter.AddRow().Add(4).Add(2).Add(TDecimalValue("8.16")); + Upsert(inserter); + } + } + + private: + TTestHelper TestHelper; + + TVector Schema; + TTestHelper::TColumnTable TestTable; + }; + + Y_UNIT_TEST(TestSimpleQueries) { + TDecimalTestCase tester; + tester.PrepareTable1(); + + tester.CheckQuery("SELECT * FROM `/Root/Table1` WHERE id=1", "[[[\"3.14\"];1;[4]]]"); + tester.CheckQuery( + "SELECT * FROM `/Root/Table1` order by id", "[[[\"3.14\"];1;[4]];[[\"8.16\"];2;[3]];[[\"8.492\"];3;[2]];[[\"12.46\"];4;[1]]]"); + } + + Y_UNIT_TEST(TestFilterEqual) { + TDecimalTestCase tester; + tester.PrepareTable1(); + + tester.CheckQuery("SELECT * FROM `/Root/Table1` WHERE dec == cast(\"3.14\" as decimal(22,9))", "[[[\"3.14\"];1;[4]]]"); + + tester.CheckQuery("SELECT * FROM `/Root/Table1` WHERE dec != cast(\"3.14\" as decimal(22,9)) order by id", + "[[[\"8.16\"];2;[3]];[[\"8.492\"];3;[2]];[[\"12.46\"];4;[1]]]"); + } + + Y_UNIT_TEST(TestFilterNulls) { + TDecimalTestCase tester; + tester.PrepareTable1(); + + TTestHelper::TUpdatesBuilder inserter = tester.Inserter(); + inserter.AddRow().Add(5).Add(5).AddNull(); + inserter.AddRow().Add(6).Add(6).AddNull(); + tester.Upsert(inserter); + + tester.CheckQuery("SELECT * FROM `/Root/Table1` WHERE dec is NULL order by id", "[[#;5;[5]];[#;6;[6]]]"); + + tester.CheckQuery("SELECT * FROM `/Root/Table1` WHERE dec is not NULL order by id", + "[[[\"3.14\"];1;[4]];[[\"8.16\"];2;[3]];[[\"8.492\"];3;[2]];[[\"12.46\"];4;[1]]]"); + } + + Y_UNIT_TEST(TestFilterCompare) { + TDecimalTestCase tester; + tester.PrepareTable1(); + + tester.CheckQuery("SELECT * FROM `/Root/Table1` WHERE dec < cast(\"12.46\" as decimal(22,9)) order by id", + "[[[\"3.14\"];1;[4]];[[\"8.16\"];2;[3]];[[\"8.492\"];3;[2]]]"); + + tester.CheckQuery( + "SELECT * FROM `/Root/Table1` WHERE dec > cast(\"8.16\" as decimal(22,9)) order by id", "[[[\"8.492\"];3;[2]];[[\"12.46\"];4;[1]]]"); + + tester.CheckQuery("SELECT * FROM `/Root/Table1` WHERE dec <= cast(\"12.46\" as decimal(22,9)) order by id", + "[[[\"3.14\"];1;[4]];[[\"8.16\"];2;[3]];[[\"8.492\"];3;[2]];[[\"12.46\"];4;[1]]]"); + + tester.CheckQuery("SELECT * FROM `/Root/Table1` WHERE dec >= cast(\"8.492\" as decimal(22,9)) order by id", + "[[[\"8.16\"];2;[3]];[[\"8.492\"];3;[2]];[[\"12.46\"];4;[1]]]"); + } + + Y_UNIT_TEST(TestOrderByDecimal) { + TDecimalTestCase tester; + tester.PrepareTable1(); + + tester.CheckQuery( + "SELECT * FROM `/Root/Table1` order by dec", "[[[\"3.14\"];1;[4]];[[\"8.16\"];2;[3]];[[\"8.492\"];3;[2]];[[\"12.46\"];4;[1]]]"); + } + + Y_UNIT_TEST(TestGroupByDecimal) { + TDecimalTestCase tester; + tester.PrepareTable1(); + + TTestHelper::TUpdatesBuilder inserter = tester.Inserter(); + inserter.AddRow().Add(5).Add(12).Add(TDecimalValue("8.492")); + inserter.AddRow().Add(6).Add(30).Add(TDecimalValue("12.46")); + tester.Upsert(inserter); + + tester.CheckQuery("SELECT dec, count(*) FROM `/Root/Table1` group by dec order by dec", + "[[[\"3.14\"];1u];[[\"8.16\"];1u];[[\"8.492\"];2u];[[\"12.46\"];2u]]"); + } + + Y_UNIT_TEST(TestAggregation) { + TDecimalTestCase tester; + tester.PrepareTable1(); + tester.CheckQuery("SELECT min(dec) FROM `/Root/Table1`", "[[[\"3.14\"]]]"); + tester.CheckQuery("SELECT max(dec) FROM `/Root/Table1`", "[[[\"12.46\"]]]"); + tester.CheckQuery("SELECT sum(dec) FROM `/Root/Table1`", "[[[\"32.252\"]]]"); + } + + Y_UNIT_TEST(TestJoinById) { + TDecimalTestCase tester; + tester.PrepareTable1(); + tester.PrepareTable2(); + + tester.CheckQuery( + "SELECT t1.id, t1.dec, t2.dec FROM `/Root/Table1` as t1 join `/Root/Table2` as t2 on t1.id = t2.table1_id order by t1.id, t1.dec, " + "t2.dec", + R"([[1;["3.14"];["8.16"]];[1;["3.14"];["12.46"]];[2;["8.16"];["8.16"]];[2;["8.16"];["12.46"]]])"); + } + + Y_UNIT_TEST(TestJoinByDecimal) { + TDecimalTestCase tester; + tester.PrepareTable1(); + tester.PrepareTable2(); + + tester.CheckQuery( + "SELECT t1.id, t2.id, t1.dec FROM `/Root/Table1` as t1 join `/Root/Table2` as t2 on t1.dec = t2.dec order by t1.id, t2.id, t1.dec", + R"([[2;2;["8.16"]];[2;4;["8.16"]];[4;1;["12.46"]];[4;3;["12.46"]]])"); + } +} + +} // namespace NKqp +} // namespace NKikimr diff --git a/ydb/core/kqp/ut/olap/helpers/aggregation.cpp b/ydb/core/kqp/ut/olap/helpers/aggregation.cpp index 7bed6f4a4aa4..1c03f6be9ffa 100644 --- a/ydb/core/kqp/ut/olap/helpers/aggregation.cpp +++ b/ydb/core/kqp/ut/olap/helpers/aggregation.cpp @@ -13,6 +13,7 @@ void TestAggregationsBase(const std::vector& cases) { TLocalHelper(kikimr).CreateTestOlapTable(); auto tableClient = kikimr.GetTableClient(); + Tests::NCommon::TLoggerInit(kikimr).SetComponents({ NKikimrServices::GROUPED_MEMORY_LIMITER, NKikimrServices::TX_COLUMNSHARD_SCAN }, "CS").Initialize(); { WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 1000); @@ -49,10 +50,11 @@ void TestAggregationsInternal(const std::vector& cases) { Tests::TServer::TPtr server = new Tests::TServer(settings); auto runtime = server->GetRuntime(); + Tests::NCommon::TLoggerInit(runtime).Initialize(); + Tests::NCommon::TLoggerInit(runtime).SetComponents({ NKikimrServices::GROUPED_MEMORY_LIMITER }, "CS").Initialize(); auto sender = runtime->AllocateEdgeActor(); InitRoot(server, sender); - Tests::NCommon::TLoggerInit(runtime).Initialize(); ui32 numShards = 1; ui32 numIterations = 10; diff --git a/ydb/core/kqp/ut/olap/helpers/aggregation.h b/ydb/core/kqp/ut/olap/helpers/aggregation.h index e83dcb32fd87..de9ab275fa79 100644 --- a/ydb/core/kqp/ut/olap/helpers/aggregation.h +++ b/ydb/core/kqp/ut/olap/helpers/aggregation.h @@ -81,12 +81,7 @@ class TAggregationTestCase { bool UseLlvm = true; public: void FillExpectedAggregationGroupByPlanOptions() { -#if SSA_RUNTIME_VERSION >= 2U - // AddExpectedPlanOptions("TKqpOlapAgg"); AddExpectedPlanOptions("WideCombiner"); -#else - AddExpectedPlanOptions("CombineCore"); -#endif } TString GetFixedQuery() const { TStringBuilder queryFixed; @@ -177,7 +172,8 @@ void CheckPlanForAggregatePushdown( const TString& query, TClient& client, const std::vector& expectedPlanNodes, - const std::string& readNodeType) { + const std::string& readNodeType) +{ auto res = StreamExplainQuery(query, client); UNIT_ASSERT_C(res.IsSuccess(), res.GetIssues().ToString()); @@ -242,4 +238,4 @@ void WriteTestDataForTableWithNulls(TKikimrRunner& kikimr, TString testTable); void TestTableWithNulls(const std::vector& cases, const bool genericQuery = false); -} \ No newline at end of file +} diff --git a/ydb/core/kqp/ut/olap/helpers/get_value.cpp b/ydb/core/kqp/ut/olap/helpers/get_value.cpp index 208e5e40e02e..67fe905d8e44 100644 --- a/ydb/core/kqp/ut/olap/helpers/get_value.cpp +++ b/ydb/core/kqp/ut/olap/helpers/get_value.cpp @@ -36,6 +36,11 @@ void PrintValue(IOutputStream& out, const NYdb::TValue& v) { out << value.GetInt64(); break; } + case NYdb::EPrimitiveType::Uint8: + { + out << value.GetUint8(); + break; + } case NYdb::EPrimitiveType::Utf8: { out << value.GetUtf8(); diff --git a/ydb/core/kqp/ut/olap/helpers/local.h b/ydb/core/kqp/ut/olap/helpers/local.h index dc957f98220e..9511ad1828ef 100644 --- a/ydb/core/kqp/ut/olap/helpers/local.h +++ b/ydb/core/kqp/ut/olap/helpers/local.h @@ -28,8 +28,14 @@ class TLocalHelper: public Tests::NCS::THelper { void CreateTestOlapTable(TString tableName = "olapTable", TString storeName = "olapStore", ui32 storeShardsCount = 4, ui32 tableShardsCount = 3) { - CreateOlapTableWithStore(tableName, storeName, storeShardsCount, tableShardsCount); + CreateOlapTablesWithStore({tableName}, storeName, storeShardsCount, tableShardsCount); } + + void CreateTestOlapTables(TVector tableNames = {"olapTable0", "olapTable1"}, TString storeName = "olapStore", + ui32 storeShardsCount = 4, ui32 tableShardsCount = 3) { + CreateOlapTablesWithStore(tableNames, storeName, storeShardsCount, tableShardsCount); + } + using TBase::TBase; TLocalHelper(TKikimrRunner& runner) diff --git a/ydb/core/kqp/ut/olap/helpers/typed_local.cpp b/ydb/core/kqp/ut/olap/helpers/typed_local.cpp index e592ed398d2b..32c08c2c8925 100644 --- a/ydb/core/kqp/ut/olap/helpers/typed_local.cpp +++ b/ydb/core/kqp/ut/olap/helpers/typed_local.cpp @@ -18,6 +18,31 @@ TString TTypedLocalHelper::GetTestTableSchema() const { return result; } +TString TTypedLocalHelper::GetMultiColumnTestTableSchema(ui32 reps) const { + TString result; + result += R"( + Columns { Name: "pk_int" Type: "Int64" NotNull: true } + Columns { Name: "ts" Type: "Timestamp" } + )"; + for (ui32 i = 0; i < reps; i++) { + TString strNum = ToString(i); + result += "Columns {Name: \"field_utf" + strNum + "\" Type: \"Utf8\"}\n"; + result += "Columns {Name: \"field_int" + strNum + "\" Type: \"Int64\"}\n"; + result += "Columns {Name: \"field_uint" + strNum + "\" Type: \"Uint8\"}\n"; + result += "Columns {Name: \"field_float" + strNum + "\" Type: \"Float\"}\n"; + result += "Columns {Name: \"field_double" + strNum + "\" Type: \"Double\"}\n"; + } + result += R"( + KeyColumnNames: "pk_int" + Engine: COLUMN_ENGINE_REPLACING_TIMESERIES + )"; + return result; +} + +void TTypedLocalHelper::CreateMultiColumnOlapTableWithStore(ui32 reps, ui32 storeShardsCount, ui32 tableShardsCount) { + CreateSchemaOlapTablesWithStore(GetMultiColumnTestTableSchema(reps), {TableName}, "olapStore", storeShardsCount, tableShardsCount); +} + void TTypedLocalHelper::ExecuteSchemeQuery(const TString& alterQuery, const NYdb::EStatus expectedStatus /*= EStatus::SUCCESS*/) const { auto session = KikimrRunner.GetTableClient().CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); @@ -79,7 +104,7 @@ NKikimr::NKqp::TTypedLocalHelper::TDistribution TTypedLocalHelper::GetDistributi } void TTypedLocalHelper::GetVolumes(ui64& rawBytes, ui64& bytes, const bool verbose /*= false*/, const std::vector columnNames /*= {}*/) { - TString selectQuery = "SELECT * FROM `" + TablePath + "/.sys/primary_index_stats` WHERE Activity = true"; + TString selectQuery = "SELECT * FROM `" + TablePath + "/.sys/primary_index_stats` WHERE Activity == 1"; if (columnNames.size()) { selectQuery += " AND EntityName IN ('" + JoinSeq("','", columnNames) + "')"; } @@ -143,16 +168,16 @@ void TTypedLocalHelper::FillPKOnly(const double pkKff /*= 0*/, const ui32 numRow TBase::SendDataViaActorSystem(TablePath, batch); } -void TTypedLocalHelper::GetStats(std::vector& stats, const bool verbose /*= false*/) { - TString selectQuery = "SELECT * FROM `" + TablePath + "/.sys/primary_index_portion_stats` WHERE Activity = true"; +void TTypedLocalHelper::GetStats(std::vector& stats, const bool verbose /*= false*/) { + TString selectQuery = "SELECT * FROM `" + TablePath + "/.sys/primary_index_portion_stats` WHERE Activity == 1"; auto tableClient = KikimrRunner.GetTableClient(); auto rows = ExecuteScanQuery(tableClient, selectQuery, verbose); for (auto&& r : rows) { for (auto&& c : r) { if (c.first == "Stats") { - NKikimrColumnShardStatisticsProto::TPortionStorage store; - AFL_VERIFY(google::protobuf::TextFormat::ParseFromString(GetUtf8(c.second), &store)); - stats.emplace_back(store); + NJson::TJsonValue jsonStore; + AFL_VERIFY(NJson::ReadJsonFastTree(GetUtf8(c.second), &jsonStore)); + stats.emplace_back(jsonStore); } } } diff --git a/ydb/core/kqp/ut/olap/helpers/typed_local.h b/ydb/core/kqp/ut/olap/helpers/typed_local.h index 1afef6b7a19b..df81c5cd6a86 100644 --- a/ydb/core/kqp/ut/olap/helpers/typed_local.h +++ b/ydb/core/kqp/ut/olap/helpers/typed_local.h @@ -1,10 +1,15 @@ #pragma once #include #include +#include +#include +#include + #include -#include -#include -#include + +#include + +#include namespace NKikimr::NKqp { @@ -19,14 +24,14 @@ class TTypedLocalHelper: public Tests::NCS::THelper { protected: virtual TString GetTestTableSchema() const override; virtual std::vector GetShardingColumns() const override { - return {"pk_int"}; + return { "pk_int" }; } public: TTypedLocalHelper(const TString& typeName, TKikimrRunner& kikimrRunner, const TString& tableName = "olapTable", const TString& storeName = "olapStore") : TBase(kikimrRunner.GetTestServer()) , TypeName(typeName) , KikimrRunner(kikimrRunner) - , TablePath("/Root/" + storeName + "/" + tableName) + , TablePath(storeName.empty() ? "/Root/" + tableName : "/Root/" + storeName + "/" + tableName) , TableName(tableName) , StoreName(storeName) { SetShardingMethod("HASH_FUNCTION_CONSISTENCY_64"); @@ -66,12 +71,12 @@ class TTypedLocalHelper: public Tests::NCS::THelper { void GetVolumes(ui64& rawBytes, ui64& bytes, const bool verbose = false, const std::vector columnNames = {}); - void GetStats(std::vector& stats, const bool verbose = false); + void GetStats(std::vector& stats, const bool verbose = false); void GetCount(ui64& count); template - void FillTable(const TFiller& fillPolicy, const ui32 pkKff = 0, const ui32 numRows = 800000) const { + void FillTable(const TFiller& fillPolicy, const double pkKff = 0, const ui32 numRows = 800000) const { std::vector builders; builders.emplace_back(NArrow::NConstruction::TSimpleArrayConstructor>::BuildNotNullable("pk_int", numRows * pkKff)); builders.emplace_back(std::make_shared>("field", fillPolicy)); @@ -80,11 +85,38 @@ class TTypedLocalHelper: public Tests::NCS::THelper { TBase::SendDataViaActorSystem(TablePath, batch); } + void FillMultiColumnTable(ui32 repCount, const double pkKff = 0, const ui32 numRows = 800000) const { + const double frq = 0.9; + NArrow::NConstruction::TPoolFiller int64Pool(1000, 0, frq); + NArrow::NConstruction::TPoolFiller uint8Pool(1000, 0, frq); + NArrow::NConstruction::TPoolFiller floatPool(1000, 0, frq); + NArrow::NConstruction::TPoolFiller doublePool(1000, 0, frq); + NArrow::NConstruction::TPoolFiller utfPool(1000, 52, "abcde", frq); + + std::vector builders; + builders.emplace_back(NArrow::NConstruction::TSimpleArrayConstructor>::BuildNotNullable("pk_int", numRows * pkKff)); + for (ui32 i = 0; i < repCount; i++) { + TString repStr = ToString(i); + builders.emplace_back(std::make_shared>>("field_utf" + repStr, utfPool, i)); + builders.emplace_back(std::make_shared>>("field_int" + repStr, int64Pool, i)); + builders.emplace_back(std::make_shared>>("field_uint" + repStr, uint8Pool, i)); + builders.emplace_back(std::make_shared>>("field_float" + repStr, floatPool, i)); + builders.emplace_back(std::make_shared>>("field_double" + repStr, doublePool, i)); + } + NArrow::NConstruction::TRecordBatchConstructor batchBuilder(builders); + std::shared_ptr batch = batchBuilder.BuildBatch(numRows); + TBase::SendDataViaActorSystem(TablePath, batch); + } + + void FillPKOnly(const double pkKff = 0, const ui32 numRows = 800000) const; void CreateTestOlapTable(ui32 storeShardsCount = 4, ui32 tableShardsCount = 3) { - CreateOlapTableWithStore(TableName, StoreName, storeShardsCount, tableShardsCount); + CreateOlapTablesWithStore({TableName}, StoreName, storeShardsCount, tableShardsCount); } + + TString GetMultiColumnTestTableSchema(ui32 reps) const; + void CreateMultiColumnOlapTableWithStore(ui32 reps, ui32 storeShardsCount = 4, ui32 tableShardsCount = 3); }; -} \ No newline at end of file +} diff --git a/ydb/core/kqp/ut/olap/indexes_ut.cpp b/ydb/core/kqp/ut/olap/indexes_ut.cpp index 6a80bc2bf133..13e98b57e9b6 100644 --- a/ydb/core/kqp/ut/olap/indexes_ut.cpp +++ b/ydb/core/kqp/ut/olap/indexes_ut.cpp @@ -6,6 +6,8 @@ #include #include +#include + #include namespace NKikimr::NKqp { @@ -17,8 +19,8 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { TKikimrRunner kikimr(settings); auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); - csController->SetLagForCompactionBeforeTierings(TDuration::Seconds(1)); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->SetOverrideLagForCompactionBeforeTierings(TDuration::Seconds(1)); csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); TLocalHelper(kikimr).CreateTestOlapTable(); @@ -59,7 +61,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { { auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, FEATURES=`{"column_names" : ["uid"], "false_positive_probability" : 0.05}`); )"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); @@ -68,7 +70,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { } { auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_resource_id, TYPE=BLOOM_FILTER, + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_resource_id, TYPE=BLOOM_FILTER, FEATURES=`{"column_names" : ["resource_id", "level"], "false_positive_probability" : 0.05}`); )"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); @@ -105,13 +107,136 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { } } + Y_UNIT_TEST(CountMinSketchIndex) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->SetOverrideLagForCompactionBeforeTierings(TDuration::Seconds(1)); + csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); + + TLocalHelper(kikimr).CreateTestOlapTable(); + auto tableClient = kikimr.GetTableClient(); + + Tests::NCommon::TLoggerInit(kikimr).SetComponents({NKikimrServices::TX_COLUMNSHARD}, "CS").SetPriority(NActors::NLog::PRI_DEBUG).Initialize(); + + { + auto alterQuery = TStringBuilder() << + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=cms_ts, TYPE=COUNT_MIN_SKETCH, + FEATURES=`{"column_names" : ['timestamp']}`); + )"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + + { + auto alterQuery = TStringBuilder() << + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=cms_res_id, TYPE=COUNT_MIN_SKETCH, + FEATURES=`{"column_names" : ['resource_id']}`); + )"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + + { + auto alterQuery = TStringBuilder() << + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=cms_uid, TYPE=COUNT_MIN_SKETCH, + FEATURES=`{"column_names" : ['uid']}`); + )"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + + { + auto alterQuery = TStringBuilder() << + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=cms_level, TYPE=COUNT_MIN_SKETCH, + FEATURES=`{"column_names" : ['level']}`); + )"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + + { + auto alterQuery = TStringBuilder() << + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=cms_message, TYPE=COUNT_MIN_SKETCH, + FEATURES=`{"column_names" : ['message']}`); + )"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + + WriteTestData(kikimr, "/Root/olapStore/olapTable", 1000000, 300000000, 10000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 1100000, 300100000, 10000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 1200000, 300200000, 10000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 1300000, 300300000, 10000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 1400000, 300400000, 10000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 2000000, 200000000, 70000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 3000000, 100000000, 110000); + + csController->WaitActualization(TDuration::Seconds(10)); + { + auto runtime = kikimr.GetTestServer().GetRuntime(); + auto sender = runtime->AllocateEdgeActor(); + + TAutoPtr handle; + + size_t shard = 0; + std::set pathids; + for (auto&& i : csController->GetShardActualIds()) { + Cerr << ">>> shard actual id: " << i << Endl; + for (auto&& j : csController->GetPathIds(i)) { + Cerr << ">>> path id: " << j << Endl; + pathids.insert(j); + } + if (++shard == 3) + break; + } + + UNIT_ASSERT(pathids.size() == 1); + ui64 pathId = *pathids.begin(); + + shard = 0; + for (auto&& i : csController->GetShardActualIds()) { + auto request = std::make_unique(); + request->Record.MutableTable()->MutablePathId()->SetLocalId(pathId); + + runtime->Send(MakePipePerNodeCacheID(false), sender, new TEvPipeCache::TEvForward( + request.release(), i, false)); + if (++shard == 3) + break; + } + + auto sketch = std::unique_ptr(TCountMinSketch::Create()); + for (size_t shard = 0; shard < 3; ++shard) { + auto event = runtime->GrabEdgeEvent(handle); + UNIT_ASSERT(event); + + auto& response = event->Record; + // Cerr << response << Endl; + UNIT_ASSERT_VALUES_EQUAL(response.GetStatus(), NKikimrStat::TEvStatisticsResponse::STATUS_SUCCESS); + UNIT_ASSERT(response.ColumnsSize() == 5); + TString someData = response.GetColumns(0).GetStatistics(0).GetData(); + *sketch += *std::unique_ptr(TCountMinSketch::FromString(someData.data(), someData.size())); + Cerr << ">>> sketch.GetElementCount() = " << sketch->GetElementCount() << Endl; + UNIT_ASSERT(sketch->GetElementCount() > 0); + } + } + } + Y_UNIT_TEST(SchemeActualizationOnceOnStart) { auto settings = TKikimrSettings() .SetWithSampleTables(false); TKikimrRunner kikimr(settings); auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); TLocalHelper(kikimr).CreateTestOlapTable(); auto tableClient = kikimr.GetTableClient(); @@ -171,78 +296,87 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { (ui64)csController->GetActualizationRefreshSchemeCount().Val())("updates", updatesCount)("count", csController->GetActualizationRefreshSchemeCount().Val()); } - Y_UNIT_TEST(Indexes) { - auto settings = TKikimrSettings() - .SetWithSampleTables(false); - TKikimrRunner kikimr(settings); + class TTestIndexesScenario { + private: + TKikimrSettings Settings; + std::unique_ptr Kikimr; + YDB_ACCESSOR(TString, StorageId, "__DEFAULT"); + public: + TTestIndexesScenario& Initialize() { + Settings = TKikimrSettings().SetWithSampleTables(false); + Kikimr = std::make_unique(Settings); + return *this; + } - TLocalHelper(kikimr).CreateTestOlapTable(); - auto tableClient = kikimr.GetTableClient(); + void Execute() const { + TLocalHelper(*Kikimr).CreateTestOlapTable(); + auto tableClient = Kikimr->GetTableClient(); -// Tests::NCommon::TLoggerInit(kikimr).Initialize(); + // Tests::NCommon::TLoggerInit(kikimr).Initialize(); - auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); - { - auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, - FEATURES=`{"column_names" : ["uid"], "false_positive_probability" : 0.05}`); - )"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); - auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); - } - { - auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_resource_id, TYPE=BLOOM_FILTER, - FEATURES=`{"column_names" : ["resource_id", "level"], "false_positive_probability" : 0.05}`); - )"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); - auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); - } - { - auto alterQuery = TStringBuilder() << - "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, EXTERNAL_GUARANTEE_EXCLUSIVE_PK=`true`);"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); - auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); - } - - std::vector uids; - std::vector resourceIds; - std::vector levels; - - { - WriteTestData(kikimr, "/Root/olapStore/olapTable", 1000000, 300000000, 10000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 1100000, 300100000, 10000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 1200000, 300200000, 10000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 1300000, 300300000, 10000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 1400000, 300400000, 10000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 2000000, 200000000, 70000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 3000000, 100000000, 110000); + { + auto alterQuery = TStringBuilder() << Sprintf( + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, + FEATURES=`{"column_names" : ["uid"], "false_positive_probability" : 0.05, "storage_id" : "%s"}`); + )", StorageId.data()); + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + { + auto alterQuery = TStringBuilder() << Sprintf( + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_resource_id, TYPE=BLOOM_FILTER, + FEATURES=`{"column_names" : ["resource_id", "level"], "false_positive_probability" : 0.05, "storage_id" : "%s"}`); + )", StorageId.data() + ); + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + { + auto alterQuery = TStringBuilder() << + "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, EXTERNAL_GUARANTEE_EXCLUSIVE_PK=`true`);"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } - const auto filler = [&](const ui32 startRes, const ui32 startUid, const ui32 count) { - for (ui32 i = 0; i < count; ++i) { - uids.emplace_back("uid_" + ::ToString(startUid + i)); - resourceIds.emplace_back(::ToString(startRes + i)); - levels.emplace_back(i % 5); - } - }; + std::vector uids; + std::vector resourceIds; + std::vector levels; + + { + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1000000, 300000000, 10000); + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1100000, 300100000, 10000); + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1200000, 300200000, 10000); + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1300000, 300300000, 10000); + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1400000, 300400000, 10000); + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 2000000, 200000000, 70000); + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 3000000, 100000000, 110000); + + const auto filler = [&](const ui32 startRes, const ui32 startUid, const ui32 count) { + for (ui32 i = 0; i < count; ++i) { + uids.emplace_back("uid_" + ::ToString(startUid + i)); + resourceIds.emplace_back(::ToString(startRes + i)); + levels.emplace_back(i % 5); + } + }; + + filler(1000000, 300000000, 10000); + filler(1100000, 300100000, 10000); + filler(1200000, 300200000, 10000); + filler(1300000, 300300000, 10000); + filler(1400000, 300400000, 10000); + filler(2000000, 200000000, 70000); + filler(3000000, 100000000, 110000); - filler(1000000, 300000000, 10000); - filler(1100000, 300100000, 10000); - filler(1200000, 300200000, 10000); - filler(1300000, 300300000, 10000); - filler(1400000, 300400000, 10000); - filler(2000000, 200000000, 70000); - filler(3000000, 100000000, 110000); - - } + } - { - auto it = tableClient.StreamExecuteScanQuery(R"( + { + auto it = tableClient.StreamExecuteScanQuery(R"( --!syntax_v1 SELECT @@ -250,27 +384,27 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { FROM `/Root/olapStore/olapTable` )").GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - TString result = StreamResultToYson(it); - Cout << result << Endl; - CompareYson(result, R"([[230000u;]])"); - } + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + CompareYson(result, R"([[230000u;]])"); + } - AFL_VERIFY(csController->GetIndexesSkippingOnSelect().Val() == 0); - AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() == 0); - TInstant start = Now(); - ui32 compactionsStart = csController->GetCompactionStartedCounter().Val(); - while (Now() - start < TDuration::Seconds(10)) { - if (compactionsStart != csController->GetCompactionStartedCounter().Val()) { - compactionsStart = csController->GetCompactionStartedCounter().Val(); - start = Now(); + AFL_VERIFY(csController->GetIndexesSkippingOnSelect().Val() == 0); + AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() == 0); + TInstant start = Now(); + ui32 compactionsStart = csController->GetCompactionStartedCounter().Val(); + while (Now() - start < TDuration::Seconds(10)) { + if (compactionsStart != csController->GetCompactionStartedCounter().Val()) { + compactionsStart = csController->GetCompactionStartedCounter().Val(); + start = Now(); + } + Cerr << "WAIT_COMPACTION: " << csController->GetCompactionStartedCounter().Val() << Endl; + Sleep(TDuration::Seconds(1)); } - Cerr << "WAIT_COMPACTION: " << csController->GetCompactionStartedCounter().Val() << Endl; - Sleep(TDuration::Seconds(1)); - } - { - auto it = tableClient.StreamExecuteScanQuery(R"( + { + auto it = tableClient.StreamExecuteScanQuery(R"( --!syntax_v1 SELECT @@ -279,40 +413,50 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { WHERE ((resource_id = '2' AND level = 222222) OR (resource_id = '1' AND level = 111111) OR (resource_id LIKE '%11dd%')) AND uid = '222' )").GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - TString result = StreamResultToYson(it); - Cout << result << Endl; - Cout << csController->GetIndexesSkippingOnSelect().Val() << " / " << csController->GetIndexesApprovedOnSelect().Val() << Endl; - CompareYson(result, R"([[0u;]])"); - AFL_VERIFY(csController->GetIndexesSkippedNoData().Val() == 0); - AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < csController->GetIndexesSkippingOnSelect().Val() * 0.3); - } - ui32 requestsCount = 100; - for (ui32 i = 0; i < requestsCount; ++i) { - const ui32 idx = RandomNumber(uids.size()); - const auto query = [](const TString& res, const TString& uid, const ui32 level) { - TStringBuilder sb; - sb << "SELECT" << Endl; - sb << "COUNT(*)" << Endl; - sb << "FROM `/Root/olapStore/olapTable`" << Endl; - sb << "WHERE(" << Endl; - sb << "resource_id = '" << res << "' AND" << Endl; - sb << "uid= '" << uid << "' AND" << Endl; - sb << "level= " << level << Endl; - sb << ")"; - return sb; - }; - auto it = tableClient.StreamExecuteScanQuery(query(resourceIds[idx], uids[idx], levels[idx])).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("result", result); + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("skip", csController->GetIndexesSkippingOnSelect().Val())("check", csController->GetIndexesApprovedOnSelect().Val()); + CompareYson(result, R"([[0u;]])"); + if (StorageId == "__LOCAL_METADATA") { + AFL_VERIFY(csController->GetIndexesSkippedNoData().Val()); + } else { + AFL_VERIFY(csController->GetIndexesSkippedNoData().Val() == 0)("val", csController->GetIndexesSkippedNoData().Val()); + } + AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < csController->GetIndexesSkippingOnSelect().Val()); + } + ui32 requestsCount = 100; + for (ui32 i = 0; i < requestsCount; ++i) { + const ui32 idx = RandomNumber(uids.size()); + const auto query = [](const TString& res, const TString& uid, const ui32 level) { + TStringBuilder sb; + sb << "SELECT COUNT(*) FROM `/Root/olapStore/olapTable`" << Endl; + sb << "WHERE(" << Endl; + sb << "resource_id = '" << res << "' AND" << Endl; + sb << "uid= '" << uid << "' AND" << Endl; + sb << "level= " << level << Endl; + sb << ")"; + return sb; + }; + auto it = tableClient.StreamExecuteScanQuery(query(resourceIds[idx], uids[idx], levels[idx])).GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << csController->GetIndexesSkippingOnSelect().Val() << " / " << csController->GetIndexesApprovedOnSelect().Val() << " / " << csController->GetIndexesSkippedNoData().Val() << Endl; + CompareYson(result, R"([[1u;]])"); + } - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - TString result = StreamResultToYson(it); - Cout << csController->GetIndexesSkippingOnSelect().Val() << " / " << csController->GetIndexesApprovedOnSelect().Val() << " / " << csController->GetIndexesSkippedNoData().Val() << Endl; - CompareYson(result, R"([[1u;]])"); + AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < csController->GetIndexesSkippingOnSelect().Val()) + ("approved", csController->GetIndexesApprovedOnSelect().Val())("skipped", csController->GetIndexesSkippingOnSelect().Val()); } + }; - AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < 0.20 * csController->GetIndexesSkippingOnSelect().Val()) - ("approved", csController->GetIndexesApprovedOnSelect().Val())("skipped", csController->GetIndexesSkippingOnSelect().Val()); + Y_UNIT_TEST(IndexesInBS) { + TTestIndexesScenario().SetStorageId("__DEFAULT").Initialize().Execute(); + } + Y_UNIT_TEST(IndexesInLocalMetadata) { + TTestIndexesScenario().SetStorageId("__LOCAL_METADATA").Initialize().Execute(); } Y_UNIT_TEST(IndexesModificationError) { @@ -328,7 +472,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { { auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, FEATURES=`{"column_names" : ["uid"], "false_positive_probability" : 0.05}`); )"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); @@ -338,7 +482,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { { auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, FEATURES=`{"column_names" : ["uid", "resource_id"], "false_positive_probability" : 0.05}`); )"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); @@ -348,7 +492,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { { auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, FEATURES=`{"column_names" : ["uid"], "false_positive_probability" : 0.005}`); )"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); @@ -358,7 +502,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { { auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, FEATURES=`{"column_names" : ["uid"], "false_positive_probability" : 0.01}`); )"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); diff --git a/ydb/core/kqp/ut/olap/kqp_olap_stats_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_stats_ut.cpp index f9959ede78e3..c1fcab4be0fd 100644 --- a/ydb/core/kqp/ut/olap/kqp_olap_stats_ut.cpp +++ b/ydb/core/kqp/ut/olap/kqp_olap_stats_ut.cpp @@ -12,7 +12,7 @@ using namespace NYdb::NTable; Y_UNIT_TEST_SUITE(KqpOlapStats) { constexpr size_t inserted_rows = 1000; constexpr size_t tables_in_store = 1000; - constexpr size_t size_single_table = 13352; + constexpr size_t size_single_table = 13152; const TVector schema = { TTestHelper::TColumnSchema().SetName("id").SetType(NScheme::NTypeIds::Int32).SetNullable(false), @@ -21,10 +21,10 @@ Y_UNIT_TEST_SUITE(KqpOlapStats) { class TOlapStatsController : public NYDBTest::NColumnShard::TController { public: - TDuration GetPeriodicWakeupActivationPeriod(const TDuration /*defaultValue*/) const override { + TDuration DoGetPeriodicWakeupActivationPeriod(const TDuration /*defaultValue*/) const override { return TDuration::MilliSeconds(10); } - TDuration GetStatsReportInterval(const TDuration /*defaultValue*/) const override { + TDuration DoGetStatsReportInterval(const TDuration /*defaultValue*/) const override { return TDuration::MilliSeconds(10); } }; diff --git a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp index 5e5156178415..390d28b931ca 100644 --- a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp +++ b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp @@ -51,9 +51,10 @@ Y_UNIT_TEST_SUITE(KqpOlap) { PARTITION BY HASH(timestamp) WITH ( STORE = COLUMN, - AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = %d + PARTITION_COUNT = %d ) - )", storeName.data(), tableName.data(), shardsCount); + )", + storeName.data(), tableName.data(), shardsCount); auto result = session.ExecuteSchemeQuery(query).GetValueSync(); if (result.GetStatus() != EStatus::SUCCESS) { Cerr << result.GetIssues().ToOneLineString() << Endl; @@ -939,7 +940,6 @@ Y_UNIT_TEST_SUITE(KqpOlap) { R"(`resource_id` = "10001")", R"(`resource_id` != "10001")", R"("XXX" == "YYY" OR `resource_id` != "10001")", - R"(`resource_id` != "10001" XOR "XXX" == "YYY")", R"(`level` = 1)", R"(`level` = Int8("1"))", R"(`level` = Int16("1"))", @@ -983,8 +983,8 @@ Y_UNIT_TEST_SUITE(KqpOlap) { R"(`message` IS NOT NULL)", R"((`level`, `uid`) > (Int32("1"), NULL))", R"((`level`, `uid`) != (Int32("1"), NULL))", - R"(`level` >= CAST("2" As Int32))", - R"(CAST("2" As Int32) >= `level`)", + //R"(`level` >= CAST("2" As Int32))", + //R"(CAST("2" As Int32) >= `level`)", #if SSA_RUNTIME_VERSION >= 2U R"(`uid` LIKE "%30000%")", R"(`uid` LIKE "uid%")", @@ -1021,6 +1021,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { R"((`timestamp`, `level`) >= (Timestamp("1970-01-01T00:00:03.000001Z"), 3))", #endif #if SSA_RUNTIME_VERSION >= 5U + R"(`resource_id` != "10001" XOR "XXX" == "YYY")", R"(IF(`level` > 3, -`level`, +`level`) < 2)", R"(StartsWith(`message` ?? `resource_id`, "10000"))", R"(NOT EndsWith(`message` ?? `resource_id`, "xxx"))", @@ -1085,7 +1086,6 @@ Y_UNIT_TEST_SUITE(KqpOlap) { R"(`level` != NULL)", R"(`level` > NULL)", R"(`timestamp` >= CAST(3000001U AS Timestamp))", - R"(`level` >= CAST("2" As Uint32))", R"(`level` = NULL)", R"(`level` > NULL)", R"(Re2::Match('uid.*')(`uid`))", @@ -1208,12 +1208,12 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto tableClient = kikimr.GetTableClient(); std::vector< std::pair > secondLvlFilters = { - { R"(`uid` LIKE "%30000%")", "TableFullScan" }, - { R"(`uid` NOT LIKE "%30000%")", "TableFullScan" }, - { R"(`uid` LIKE "uid%")", "TableFullScan" }, - { R"(`uid` LIKE "%001")", "TableFullScan" }, + { R"(`uid` LIKE "%30000%")", "Filter-TableFullScan" }, + { R"(`uid` NOT LIKE "%30000%")", "Filter-TableFullScan" }, + { R"(`uid` LIKE "uid%")", "Filter-TableFullScan" }, + { R"(`uid` LIKE "%001")", "Filter-TableFullScan" }, #if SSA_RUNTIME_VERSION >= 4U - { R"(`uid` LIKE "uid%001")", "TableFullScan" }, + { R"(`uid` LIKE "uid%001")", "Filter-TableFullScan" }, #else { R"(`uid` LIKE "uid%001")", "Filter-TableFullScan" }, // We have filter (Size >= 6) #endif @@ -1844,8 +1844,8 @@ Y_UNIT_TEST_SUITE(KqpOlap) { PARTITION BY HASH(WatchID) WITH ( STORE = COLUMN, - AUTO_PARTITIONING_MIN_PARTITIONS_COUNT =)" << numShards - << ")"; + PARTITION_COUNT =)" << numShards + << ")"; auto result = session.ExecuteSchemeQuery(query).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); @@ -1932,10 +1932,9 @@ Y_UNIT_TEST_SUITE(KqpOlap) { WITH ( STORE = COLUMN, AUTO_PARTITIONING_BY_SIZE = ENABLED, - AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 1 + PARTITION_COUNT = 1 ); - )" - ); + )"); lHelper.StartDataRequest( R"( @@ -1987,10 +1986,9 @@ Y_UNIT_TEST_SUITE(KqpOlap) { WITH ( STORE = COLUMN, AUTO_PARTITIONING_BY_SIZE = ENABLED, - AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 1 + PARTITION_COUNT = 1 ); - )" - ); + )"); lHelper.StartDataRequest( R"( @@ -1999,7 +1997,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { ); } -/* + /* Y_UNIT_TEST(OlapDeletePlanned) { TPortManager pm; @@ -2039,7 +2037,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { WITH ( STORE = COLUMN, AUTO_PARTITIONING_BY_SIZE = ENABLED, - AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 8 + PARTITION_COUNT = 8 ); )" ); @@ -2055,6 +2053,36 @@ Y_UNIT_TEST_SUITE(KqpOlap) { } */ + // Unit test for https://github.com/ydb-platform/ydb/issues/7967 + Y_UNIT_TEST(PredicatePushdownNulls) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false); + + TKikimrRunner kikimr(settings); + + TStreamExecScanQuerySettings scanSettings; + scanSettings.Explain(true); + + TLocalHelper(kikimr.GetTestServer()).CreateTestOlapTable(); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 10); + + auto tableClient = kikimr.GetTableClient(); + + TString query = R"( + SELECT `timestamp` FROM `/Root/olapStore/olapTable` WHERE + (case when level > 0 + then level + else null + end) > 0; + )"; + + auto it = tableClient.StreamExecuteScanQuery(query).GetValueSync(); + // Check for successful execution + auto streamPart = it.ReadNext().GetValueSync(); + + UNIT_ASSERT(streamPart.IsSuccess()); + } + Y_UNIT_TEST(PredicatePushdownCastErrors) { auto settings = TKikimrSettings() .SetWithSampleTables(false); @@ -2180,7 +2208,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto result = CollectStreamResult(it); auto ast = result.QueryStats->Getquery_ast(); - + pushdown = ast.find("KqpOlapFilter") != std::string::npos; } else { // Error means that predicate not pushed down @@ -2462,6 +2490,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { NKikimrConfig::TAppConfig appConfig; appConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); appConfig.MutableTableServiceConfig()->SetBlockChannelsMode(blockChannelsMode); + appConfig.MutableTableServiceConfig()->SetEnableSpillingNodes("None"); auto settings = TKikimrSettings() .SetAppConfig(appConfig) .SetWithSampleTables(true); @@ -2479,14 +2508,14 @@ Y_UNIT_TEST_SUITE(KqpOlap) { PRIMARY KEY (a) ) PARTITION BY HASH(a) - WITH (STORE = COLUMN, AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 4); + WITH (STORE = COLUMN, PARTITION_COUNT = 4); )"; auto result = session.ExecuteSchemeQuery(query).GetValueSync(); UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); auto client = kikimr.GetQueryClient(); - { + { auto prepareResult = client.ExecuteQuery(R"( REPLACE INTO `/Root/ColumnShard` (a, b, c) VALUES (1u, 1, 5), @@ -2516,15 +2545,18 @@ Y_UNIT_TEST_SUITE(KqpOlap) { UNIT_ASSERT_C(plan.QueryStats->Getquery_ast().Contains("return (FromFlow (NarrowMap (WideFromBlocks"), plan.QueryStats->Getquery_ast()); break; case NKikimrConfig::TTableServiceConfig_EBlockChannelsMode_BLOCK_CHANNELS_AUTO: - UNIT_ASSERT_C(plan.QueryStats->Getquery_ast().Contains("return (FromFlow (ExpandMap (NarrowMap (WideFromBlocks"), plan.QueryStats->Getquery_ast()); + UNIT_ASSERT_C(plan.QueryStats->Getquery_ast().Contains("(FromFlow (WideFromBlocks"), plan.QueryStats->Getquery_ast()); + UNIT_ASSERT_C(!plan.QueryStats->Getquery_ast().Contains("WideToBlocks"), plan.QueryStats->Getquery_ast()); + UNIT_ASSERT_EQUAL_C(plan.QueryStats->Getquery_ast().find("WideFromBlocks"), plan.QueryStats->Getquery_ast().rfind("WideFromBlocks"), plan.QueryStats->Getquery_ast()); break; case NKikimrConfig::TTableServiceConfig_EBlockChannelsMode_BLOCK_CHANNELS_FORCE: - UNIT_ASSERT_C(plan.QueryStats->Getquery_ast().Contains("return (FromFlow (WideMap"), plan.QueryStats->Getquery_ast()); + UNIT_ASSERT_C(plan.QueryStats->Getquery_ast().Contains("(FromFlow (WideSortBlocks"), plan.QueryStats->Getquery_ast()); + UNIT_ASSERT_C(plan.QueryStats->Getquery_ast().Contains("(FromFlow (NarrowMap (WideFromBlocks"), plan.QueryStats->Getquery_ast()); break; } } - { + { auto it = client.StreamExecuteQuery(R"( SELECT b, COUNT(*), SUM(a) @@ -2537,6 +2569,68 @@ Y_UNIT_TEST_SUITE(KqpOlap) { TString output = StreamResultToYson(it); CompareYson(output, R"([[[1];1u;1u];[[2];2u;6u]])"); } + + { + NYdb::NQuery::TExecuteQuerySettings scanSettings; + scanSettings.ExecMode(NYdb::NQuery::EExecMode::Explain); + auto it = client.StreamExecuteQuery(R"( + PRAGMA ydb.CostBasedOptimizationLevel='0'; + + $select1 = ( + SELECT b AS a1, COUNT(*) AS b1, SUM(a) AS c1 + FROM `/Root/ColumnShard` + WHERE c = 5 + GROUP BY b + ); + + $select2 = ( + SELECT (b1 + 1ul) AS a2, COUNT(*) AS b2, SUM(a1) AS c2 + FROM $select1 + WHERE c1 = 5 + GROUP BY b1 + ); + + $select3 = ( + SELECT b1 AS a3, COUNT(*) AS b3, MAX(a1) AS c3 + FROM $select1 + WHERE b1 = 6 + GROUP BY b1 + ); + + SELECT a2, b2 + FROM $select2 AS table2 + JOIN $select3 AS table3 + ON table2.a2 = table3.a3 + ORDER BY b2 + LIMIT 10 + ; + + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), scanSettings).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); + auto plan = CollectStreamResult(it); + + // auto CountSubstr = [](const TString& str, const TString& sub) -> ui64 { + // ui64 count = 0; + // for (auto pos = str.find(sub); pos != TString::npos; pos = str.find(sub, pos + sub.size())) { + // ++count; + // } + // return count; + // }; + + switch (blockChannelsMode) { + case NKikimrConfig::TTableServiceConfig_EBlockChannelsMode_BLOCK_CHANNELS_SCALAR: + // TODO: implement checks? + break; + case NKikimrConfig::TTableServiceConfig_EBlockChannelsMode_BLOCK_CHANNELS_AUTO: + // TODO: test fails because of some stages don't get wide channels. + // UNIT_ASSERT_EQUAL_C(CountSubstr(plan.QueryStats->Getquery_ast(), "WideFromBlocks"), 2, plan.QueryStats->Getquery_ast()); + // UNIT_ASSERT_C(!plan.QueryStats->Getquery_ast().Contains("WideToBlocks"), plan.QueryStats->Getquery_ast()); + break; + case NKikimrConfig::TTableServiceConfig_EBlockChannelsMode_BLOCK_CHANNELS_FORCE: + // TODO: implement checks? + break; + } + } } Y_UNIT_TEST(BlockChannelScalar) { @@ -2624,6 +2718,193 @@ Y_UNIT_TEST_SUITE(KqpOlap) { } + Y_UNIT_TEST(NormalizeAbsentColumn) { + auto settings = TKikimrSettings().SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + TLocalHelper testHelper(kikimr); + + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->SetOverrideLagForCompactionBeforeTierings(TDuration::Seconds(1)); + csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); + csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + + testHelper.CreateTestOlapTable(); + auto tableClient = kikimr.GetTableClient(); + + Tests::NCommon::TLoggerInit(kikimr).SetComponents({ NKikimrServices::TX_COLUMNSHARD, NKikimrServices::TX_COLUMNSHARD_SCAN }, "CS").SetPriority(NActors::NLog::PRI_DEBUG).Initialize(); + + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + + { + auto alterQuery = TStringBuilder() << "ALTER TABLESTORE `/Root/olapStore` ADD COLUMN new_column1 Uint64;"; + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + WriteTestData(kikimr, "/Root/olapStore/olapTable", 1000000, 300000000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 1100000, 300100000, 1000); + + { + auto alterQuery = TStringBuilder() << "ALTER TABLESTORE `/Root/olapStore` ADD COLUMN new_column2 Uint64;"; + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + + csController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + csController->WaitIndexation(TDuration::Seconds(5)); + + } + + Y_UNIT_TEST(MultiInsertWithSinks) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); + auto settings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + + TLocalHelper(kikimr).CreateTestOlapTable(); + + auto db = kikimr.GetQueryClient(); + + auto result = db.ExecuteQuery(R"( + INSERT INTO `/Root/olapStore/olapTable` (timestamp, uid, resource_id) VALUES (Timestamp('1970-01-01T00:00:00Z'), 'a', '0'); + INSERT INTO `/Root/olapStore/olapTable` (timestamp, uid, resource_id) VALUES (Timestamp('1970-01-01T00:00:01Z'), 'b', 't'); + INSERT INTO `/Root/olapStore/olapTable` (timestamp, uid, resource_id) VALUES (Timestamp('1970-01-01T00:00:02Z'), 'c', 'test'); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + + { + auto it = db.StreamExecuteQuery(R"( + --!syntax_v1 + + SELECT + * + FROM `/Root/olapStore/olapTable` ORDER BY uid + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + CompareYson(result, R"([[#;#;["0"];0u;"a"];[#;#;["t"];1000000u;"b"];[#;#;["test"];2000000u;"c"]])"); + } + } + + Y_UNIT_TEST(CountWhereColumnIsNull) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::TX_COLUMNSHARD_SCAN, NActors::NLog::PRI_DEBUG); + + TLocalHelper(kikimr).CreateTestOlapTable(); + + WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000, 300, true); + + auto client = kikimr.GetTableClient(); + + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + + { + auto it = client.StreamExecuteScanQuery(R"( + --!syntax_v1 + + SELECT COUNT(*), COUNT(level) + FROM `/Root/olapStore/olapTable` + WHERE level IS NULL + )").GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + CompareYson("[[100u;0u]]", result); + } + + { + auto it = client.StreamExecuteScanQuery(R"( + --!syntax_v1 + + SELECT COUNT(*), COUNT(level) + FROM `/Root/olapStore/olapTable` + WHERE level IS NULL AND uid IS NOT NULL + )").GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + CompareYson("[[100u;0u]]", result); + } + + { + auto it = client.StreamExecuteScanQuery(R"( + --!syntax_v1 + + SELECT COUNT(*), COUNT(level) + FROM `/Root/olapStore/olapTable` + WHERE level IS NULL + GROUP BY level + )").GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + CompareYson("[[100u;0u]]", result); + } + } + + Y_UNIT_TEST(SimpleCount) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::TX_COLUMNSHARD_SCAN, NActors::NLog::PRI_DEBUG); + + TLocalHelper(kikimr).CreateTestOlapTable(); + + WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000, 300, true); + + auto client = kikimr.GetTableClient(); + + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + + { + auto it = client.StreamExecuteScanQuery(R"( + --!syntax_v1 + + SELECT COUNT(level) + FROM `/Root/olapStore/olapTable` + WHERE StartsWith(uid, "uid_") + )").GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + CompareYson("[[200u]]", result); + } + } + + Y_UNIT_TEST(TableSinkWithOlapStore) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); + auto settings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + + TLocalHelper(kikimr).CreateTestOlapTables(); + + WriteTestData(kikimr, "/Root/olapStore/olapTable0", 0, 1000000, 3, true); + + auto client = kikimr.GetQueryClient(); + { + auto result = client.ExecuteQuery(R"( + SELECT * FROM `/Root/olapStore/olapTable0` ORDER BY timestamp; + INSERT INTO `/Root/olapStore/olapTable1` SELECT * FROM `/Root/olapStore/olapTable0`; + REPLACE INTO `/Root/olapStore/olapTable0` SELECT * FROM `/Root/olapStore/olapTable1`; + SELECT * FROM `/Root/olapStore/olapTable1` ORDER BY timestamp; + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } + } } } diff --git a/ydb/core/kqp/ut/olap/sparsed_ut.cpp b/ydb/core/kqp/ut/olap/sparsed_ut.cpp new file mode 100644 index 000000000000..73b75f2cc53f --- /dev/null +++ b/ydb/core/kqp/ut/olap/sparsed_ut.cpp @@ -0,0 +1,307 @@ +#include "helpers/local.h" +#include "helpers/writer.h" +#include "helpers/typed_local.h" +#include "helpers/query_executor.h" +#include "helpers/get_value.h" + +#include +#include +#include +#include + +namespace NKikimr::NKqp { + +Y_UNIT_TEST_SUITE(KqpOlapSparsed) { + + class TSparsedDataTest { + private: + const TKikimrSettings Settings = TKikimrSettings().SetWithSampleTables(false); + TKikimrRunner Kikimr; + NKikimr::NYDBTest::TControllers::TGuard CSController; + const TString StoreName; + ui32 MultiColumnRepCount = 100; + static const ui32 SKIP_GROUPS = 7; + const TVector FIELD_NAMES{"utf", "int", "uint", "float", "double"}; + public: + TSparsedDataTest(const TString& storeName) + : Kikimr(Settings) + , CSController(NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard()) + , StoreName(storeName) + { + + } + + ui32 GetCount() const { + auto selectQuery = TString(R"( + SELECT + count(*) as count, + FROM `/Root/)") + (StoreName.empty() ? "" : StoreName + "/") + "olapTable`"; + + auto tableClient = Kikimr.GetTableClient(); + auto rows = ExecuteScanQuery(tableClient, selectQuery); + return GetUint64(rows[0].at("count")); + } + + ui32 GetDefaultsCount(const TString& fieldName, const TString& defValueStr) const { + auto selectQueryTmpl = TString(R"( + SELECT + count(*) as count, + FROM `/Root/)") + (StoreName.empty() ? "" : StoreName + "/") + R"(olapTable` + WHERE %s == %s + )"; + + auto tableClient = Kikimr.GetTableClient(); + auto rows = ExecuteScanQuery(tableClient, Sprintf(selectQueryTmpl.c_str(), fieldName.c_str(), defValueStr.c_str())); + return GetUint64(rows[0].at("count")); + } + + void GetAllDefaultsCount(ui64* counts, ui32 skipCount) { + TString query = "SELECT"; + ui32 groupsCount = 0; + for (ui32 i = 0; i < MultiColumnRepCount; i += skipCount) { + query += Sprintf("%s field_utf%u == 'abcde' AS def_utf%u, field_uint%u == 0 AS def_uint%u, field_int%u == 0 AS def_int%u, field_float%u == 0 AS def_float%u, field_double%u == 0 AS def_double%u", i == 0 ? "" : ",", i, i, i, i, i, i, i, i, i, i); + groupsCount++; + } + query += " FROM `/Root/olapStore/olapTable`"; + auto tableClient = Kikimr.GetTableClient(); + + auto start = TInstant::Now().Seconds(); + + auto printTime = [&](const char* prefix) { + auto finish = TInstant::Now().Seconds(); + fprintf(stderr, "Timing: %s took %lu seconds\n", prefix, finish - start); + start = finish; + }; + + auto rows = ExecuteScanQuery(tableClient, query, false); + + printTime("Executing query"); + + Fill(&counts[0], &counts[FIELD_NAMES.size() * groupsCount], 0); + + for (auto& row: rows) { + auto incCounts = [&](ui32 i, const TString& column) { + if (*NYdb::TValueParser(row.at(column)).GetOptionalBool()) { + counts[i]++; + } + }; + ui32 ind = 0; + for (ui32 i = 0; i < MultiColumnRepCount; i += skipCount) { + TString grStr = ToString(i); + incCounts(ind++, "def_utf" + grStr); + incCounts(ind++, "def_uint" + grStr); + incCounts(ind++, "def_int" + grStr); + incCounts(ind++, "def_float" + grStr); + incCounts(ind++, "def_double" + grStr); + } + } + } + + void CheckAllFieldsTable(bool firstCall, ui32 countExpectation, ui32* defCountStart) { + ui32 grCount = (MultiColumnRepCount + SKIP_GROUPS - 1) / SKIP_GROUPS; + ui64 defCounts[FIELD_NAMES.size() * grCount]; + const ui32 count = GetCount(); + GetAllDefaultsCount(defCounts, SKIP_GROUPS); + for (ui32 i = 0; i < FIELD_NAMES.size() * grCount; i++) { + if (firstCall) { + defCountStart[i] = defCounts[i]; + } else { + AFL_VERIFY(defCountStart[i] == defCounts[i]); + } + AFL_VERIFY(count == countExpectation)("expect", countExpectation)("count", count); + AFL_VERIFY(1.0 * defCounts[i] / count < 0.95)("def", defCounts[i])("count", count); + AFL_VERIFY(1.0 * defCounts[i] / count > 0.85)("def", defCounts[i])("count", count); + } + } + + void CheckTable(const TString& fieldName, const TString& defValueStr, bool firstCall, ui32 countExpectation, ui32& defCountStart) { + const ui32 defCount = GetDefaultsCount(fieldName, defValueStr); + if (firstCall) { + defCountStart = defCount; + } else { + AFL_VERIFY(defCountStart == defCount); + } + const ui32 count = GetCount(); + AFL_VERIFY(count == countExpectation)("expect", countExpectation)("count", count); + AFL_VERIFY(1.0 * defCount / count < 0.95)("def", defCount)("count", count); + AFL_VERIFY(1.0 * defCount / count > 0.85)("def", defCount)("count", count); + } + + template + void FillCircleImpl(TFillTable&& fillTable, TCheckTable&& checkTable) { + auto start = TInstant::Now().Seconds(); + + auto printTime = [&](const char* prefix) { + auto finish = TInstant::Now().Seconds(); + fprintf(stderr, "Timing: %s took %lu seconds\n", prefix, finish - start); + start = finish; + }; + + fillTable(); + printTime("fillTable"); + checkTable(true); + printTime("checkTable"); + + CSController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + CSController->WaitIndexation(TDuration::Seconds(5)); + printTime("wait"); + + checkTable(false); + printTime("checkTable"); + + CSController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + CSController->WaitCompactions(TDuration::Seconds(5)); + printTime("wait"); + + checkTable(false); + printTime("checkTable"); + + CSController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + CSController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + printTime("wait"); + } + + void FillCircle(const double shiftKff, const ui32 countExpectation) { + ui32 defCountStart = (ui32)-1; + FillCircleImpl([&]() { + TTypedLocalHelper helper("Utf8", Kikimr, "olapTable", StoreName); + const double frq = 0.9; + NArrow::NConstruction::TStringPoolFiller sPool(1000, 52, "abcde", frq); + helper.FillTable(sPool, shiftKff, 10000); + }, + [&](bool firstCall) { + CheckTable("field", "'abcde'", firstCall, countExpectation, defCountStart); + }); + } + + void FillMultiColumnCircle(const double shiftKff, const ui32 countExpectation) { + ui32 grCount = (MultiColumnRepCount + SKIP_GROUPS - 1) / SKIP_GROUPS; + ui32 defCountStart[FIELD_NAMES.size() * grCount]; + FillCircleImpl([&]() { + TTypedLocalHelper helper("Utf8", Kikimr); + helper.FillMultiColumnTable(MultiColumnRepCount, shiftKff, 10000); + }, + [&](bool firstCall) { + CheckAllFieldsTable(firstCall, countExpectation, defCountStart); + }); + } + + void Execute() { + CSController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + CSController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + CSController->SetOverridePeriodicWakeupActivationPeriod(TDuration::MilliSeconds(100)); + + Tests::NCommon::TLoggerInit(Kikimr).Initialize(); + TTypedLocalHelper helper("Utf8", Kikimr, "olapTable", StoreName); + if (!StoreName.empty()) { + helper.CreateTestOlapTable(); + } else { + auto tableClient = Kikimr.GetTableClient(); + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + + auto query = TStringBuilder() << R"( + --!syntax_v1 + CREATE TABLE `/Root/olapTable` + ( + pk_int int64 NOT NULL, + field )" << "Utf8" << R"(, + ts TimeStamp, + PRIMARY KEY (pk_int) + ) + PARTITION BY HASH(pk_int) + WITH ( + STORE = COLUMN + ))"; + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); + } + + TString type = StoreName.empty() ? "TABLE" : "TABLESTORE"; + TString name = StoreName.empty() ? "olapTable" : "olapStore"; + + FillCircle(0, 10000); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/" + name + "`(TYPE " + type + ") SET (ACTION=ALTER_COLUMN, NAME=field, `DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME`=`SPARSED`, `DEFAULT_VALUE`=`abcde`);"); + FillCircle(0.1, 11000); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/" + name + "`(TYPE " + type + ") SET (ACTION=ALTER_COLUMN, NAME=field, `DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME`=`PLAIN`);"); + FillCircle(0.2, 12000); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/" + name + "`(TYPE " + type + ") SET (ACTION=ALTER_COLUMN, NAME=field, `DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME`=`SPARSED`);"); + FillCircle(0.3, 13000); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/" + name + "`(TYPE " + type + ") SET (ACTION=ALTER_COLUMN, NAME=field, `DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME`=`PLAIN`);"); + FillCircle(0.4, 14000); + } + + void ExecuteMultiColumn() { + CSController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + CSController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + CSController->SetOverridePeriodicWakeupActivationPeriod(TDuration::MilliSeconds(100)); + + Tests::NCommon::TLoggerInit(Kikimr).Initialize(); + TTypedLocalHelper helper("Utf8", Kikimr); + helper.CreateMultiColumnOlapTableWithStore(MultiColumnRepCount); + + auto start = TInstant::Now().Seconds(); + + auto printTime = [&](const char* prefix) { + auto finish = TInstant::Now().Seconds(); + fprintf(stderr, "Timing: %s took %lu seconds\n", prefix, finish - start); + start = finish; + }; + + FillMultiColumnCircle(0, 10000); + printTime("Fill"); + for (ui32 i = 0; i < MultiColumnRepCount; i += SKIP_GROUPS) { + TString grStr = ToString(i); + for (ui32 f = 0; f < FIELD_NAMES.size(); f++) { + helper.ExecuteSchemeQuery(TString("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field_") + FIELD_NAMES[f] + grStr + ", `DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME`=`SPARSED`, `DEFAULT_VALUE`=" + (f == 0 ? "`abcde`" : "`0`") + ");"); + } + } + printTime("Alter"); + FillMultiColumnCircle(0.1, 11000); + printTime("Fill"); + for (ui32 i = 0; i < MultiColumnRepCount; i += SKIP_GROUPS) { + TString grStr = ToString(i); + for (ui32 f = 0; f < FIELD_NAMES.size(); f++) { + helper.ExecuteSchemeQuery(TString("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field_") + FIELD_NAMES[f] + grStr + ", `DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME`=`PLAIN`);"); + } + } + printTime("Alter"); + FillMultiColumnCircle(0.2, 12000); + printTime("Fill"); + for (ui32 i = 0; i < MultiColumnRepCount; i += SKIP_GROUPS) { + TString grStr = ToString(i); + for (ui32 f = 0; f < FIELD_NAMES.size(); f++) { + helper.ExecuteSchemeQuery(TString("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field_") + FIELD_NAMES[f] + grStr + ", `DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME`=`SPARSED`);"); + } + } + printTime("Alter"); + FillMultiColumnCircle(0.3, 13000); + printTime("Fill"); + for (ui32 i = 0; i < MultiColumnRepCount; i += SKIP_GROUPS) { + TString grStr = ToString(i); + for (ui32 f = 0; f < FIELD_NAMES.size(); f++) { + helper.ExecuteSchemeQuery(TString("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field_") + FIELD_NAMES[f] + grStr + ", `DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME`=`PLAIN`);"); + } + } + printTime("Alter"); + FillMultiColumnCircle(0.4, 14000); + printTime("Fill"); + } + }; + + Y_UNIT_TEST(Switching) { + TSparsedDataTest test("olapStore"); + test.Execute(); + } + + Y_UNIT_TEST(SwitchingMultiColumn) { + TSparsedDataTest test("olapStore"); + test.ExecuteMultiColumn(); + } + + Y_UNIT_TEST(SwitchingStandalone) { + TSparsedDataTest test(""); + test.Execute(); + } +} + +} // namespace diff --git a/ydb/core/kqp/ut/olap/statistics_ut.cpp b/ydb/core/kqp/ut/olap/statistics_ut.cpp index d79a07f9bc3b..ece5e454bacb 100644 --- a/ydb/core/kqp/ut/olap/statistics_ut.cpp +++ b/ydb/core/kqp/ut/olap/statistics_ut.cpp @@ -14,25 +14,25 @@ Y_UNIT_TEST_SUITE(KqpOlapStatistics) { helper.CreateTestOlapTable(); auto tableClient = kikimr.GetTableClient(); { - auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, TYPE=max, NAME=max_pk_int, FEATURES=`{\"column_name\": \"pk_int\"}`);"; + auto alterQuery = TStringBuilder() << R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=max_pk_int, TYPE=MAX, FEATURES=`{\"column_name\": \"pk_int\"}`))"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } { - auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, TYPE=max, NAME=max_field, FEATURES=`{\"column_name\": \"field\"}`);"; + auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=max_pk_int, TYPE=MAX, FEATURES=`{\"column_name\": \"field\"}`);"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_UNEQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } { - auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, TYPE=max, NAME=max_pk_int, FEATURES=`{\"column_name\": \"pk_int\"}`);"; + auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=max_pk_int, TYPE=MAX, FEATURES=`{\"column_name\": \"pk_int\"}`);"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_UNEQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } { - auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=DROP_STAT, NAME=max_pk_int);"; + auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=DROP_INDEX, NAME=max_pk_int);"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); @@ -50,7 +50,7 @@ Y_UNIT_TEST_SUITE(KqpOlapStatistics) { helper.CreateTestOlapTable(); auto tableClient = kikimr.GetTableClient(); { - auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, TYPE=max, NAME=max_ts, FEATURES=`{\"column_name\": \"ts\"}`);"; + auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, TYPE=MAX, NAME=max_ts, FEATURES=`{\"column_name\": \"ts\"}`);"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); @@ -62,7 +62,7 @@ Y_UNIT_TEST_SUITE(KqpOlapStatistics) { UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } { - auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=DROP_STAT, NAME=max_ts);"; + auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=DROP_INDEX, NAME=max_ts);"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_UNEQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); diff --git a/ydb/core/kqp/ut/olap/sys_view_ut.cpp b/ydb/core/kqp/ut/olap/sys_view_ut.cpp index 4242f7676298..27820452542e 100644 --- a/ydb/core/kqp/ut/olap/sys_view_ut.cpp +++ b/ydb/core/kqp/ut/olap/sys_view_ut.cpp @@ -5,6 +5,7 @@ #include "helpers/get_value.h" #include +#include #include #include @@ -229,7 +230,9 @@ Y_UNIT_TEST_SUITE(KqpOlapSysView) { helper.CreateTestOlapTable(); NArrow::NConstruction::TStringPoolFiller sPool(3, 52); helper.FillTable(sPool, 0, 800000); - csController->WaitCompactions(TDuration::Seconds(10)); + csController->WaitCompactions(TDuration::Seconds(5)); + helper.FillTable(sPool, 0.5, 800000); + csController->WaitCompactions(TDuration::Seconds(5)); helper.GetVolumes(rawBytes1, bytes1, false, {"new_column_ui64"}); AFL_VERIFY(rawBytes1 == 0); @@ -241,9 +244,9 @@ Y_UNIT_TEST_SUITE(KqpOlapSysView) { csController->WaitActualization(TDuration::Seconds(10)); ui64 rawBytes2; ui64 bytes2; - helper.GetVolumes(rawBytes2, bytes2, false, {"new_column_ui64"}); - AFL_VERIFY(rawBytes2 == 6500041)("real", rawBytes2); - AFL_VERIFY(bytes2 == 45360)("b", bytes2); + helper.GetVolumes(rawBytes2, bytes2, false, { "new_column_ui64", NOlap::IIndexInfo::SPEC_COL_DELETE_FLAG }); + AFL_VERIFY(rawBytes2 == 0)("real", rawBytes2); + AFL_VERIFY(bytes2 == 0)("b", bytes2); } } @@ -302,63 +305,61 @@ Y_UNIT_TEST_SUITE(KqpOlapSysView) { auto tableClient = kikimr.GetTableClient(); { helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field, `ENCODING.DICTIONARY.ENABLED`=`true`);"); - helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, NAME=field_var, TYPE=variability, FEATURES=`{\"column_name\" : \"field\"}`);"); - helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, NAME=pk_int_max, TYPE=max, FEATURES=`{\"column_name\" : \"pk_int\"}`);"); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=pk_int_max, TYPE=MAX, FEATURES=`{\"column_name\" : \"pk_int\"}`);"); helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, SCHEME_NEED_ACTUALIZATION=`true`);"); - csController->WaitCondition(TDuration::Seconds(10), [&]() { + csController->WaitActualization(TDuration::Seconds(40)); + { ui64 rawBytes2; ui64 bytes2; - helper.GetVolumes(rawBytes2, bytes2, false, {"field"}); + helper.GetVolumes(rawBytes2, bytes2, false, { "field" }); AFL_VERIFY(rawBytes2 == rawBytes1)("f1", rawBytes1)("f2", rawBytes2); AFL_VERIFY(bytes2 < bytes1 * 0.5)("f1", bytes1)("f2", bytes2); - std::vector stats; + std::vector stats; helper.GetStats(stats, true); + AFL_VERIFY(stats.size() == 3); for (auto&& i : stats) { - if (i.ScalarsSize() != 2) { - return false; - } - if (i.GetScalars()[0].GetUint32() != 3) { - return false; - } - } - return true; + AFL_VERIFY(i.IsArray()); + AFL_VERIFY(i.GetArraySafe().size() == 1); + AFL_VERIFY(i.GetArraySafe()[0]["chunk_idx"].GetInteger() == 0); + AFL_VERIFY(i.GetArraySafe()[0]["entity_id"].GetInteger() == 4); + AFL_VERIFY(i.GetArraySafe()[0]["data"].GetIntegerRobust() >= 799992); + AFL_VERIFY(i.GetArraySafe()[0]["data"].GetIntegerRobust() <= 799999); + AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("json", i); } - ); + } } { - helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=DROP_STAT, NAME=pk_int_max);"); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=DROP_INDEX, NAME=pk_int_max);"); helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, SCHEME_NEED_ACTUALIZATION=`true`);"); - csController->WaitCondition(TDuration::Seconds(10), [&]() { - std::vector stats; + csController->WaitActualization(TDuration::Seconds(30)); + { + std::vector stats; helper.GetStats(stats, true); + AFL_VERIFY(stats.size() == 3); for (auto&& i : stats) { - if (i.ScalarsSize() != 1) { - return false; - } - if (i.GetScalars()[0].GetUint32() != 3) { - return false; - } + AFL_VERIFY(i.IsArray()); + AFL_VERIFY(i.GetArraySafe().size() == 0)("json", i); } - return true; - }); + } } { - helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, NAME=pk_int_max, TYPE=max, FEATURES=`{\"column_name\" : \"pk_int\"}`);"); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=pk_int_max, TYPE=MAX, FEATURES=`{\"column_name\" : \"pk_int\"}`);"); helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, SCHEME_NEED_ACTUALIZATION=`true`);"); - csController->WaitCondition(TDuration::Seconds(10), [&]() { - std::vector stats; + csController->WaitActualization(TDuration::Seconds(40)); + { + std::vector stats; helper.GetStats(stats, true); + AFL_VERIFY(stats.size() == 3); for (auto&& i : stats) { - if (i.ScalarsSize() != 2) { - return false; - } - if (i.GetScalars()[0].GetUint32() != 3) { - return false; - } + AFL_VERIFY(i.IsArray()); + AFL_VERIFY(i.GetArraySafe().size() == 1); + AFL_VERIFY(i.GetArraySafe()[0]["chunk_idx"].GetInteger() == 0); + AFL_VERIFY(i.GetArraySafe()[0]["entity_id"].GetInteger() == 5)("json", i); + AFL_VERIFY(i.GetArraySafe()[0]["data"].GetIntegerRobust() >= 799992); + AFL_VERIFY(i.GetArraySafe()[0]["data"].GetIntegerRobust() <= 799999); + AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("json", i); } - return true; - } - ); + } } } @@ -452,7 +453,7 @@ Y_UNIT_TEST_SUITE(KqpOlapSysView) { SELECT PathId, Kind, TabletId FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE - PathId == UInt64("3") AND Activity = true + PathId == UInt64("3") AND Activity == 1 GROUP BY TabletId, PathId, Kind ORDER BY TabletId, Kind )"); diff --git a/ydb/core/kqp/ut/olap/tiering_ut.cpp b/ydb/core/kqp/ut/olap/tiering_ut.cpp new file mode 100644 index 000000000000..b9cceba93738 --- /dev/null +++ b/ydb/core/kqp/ut/olap/tiering_ut.cpp @@ -0,0 +1,161 @@ +#include "helpers/get_value.h" +#include "helpers/local.h" +#include "helpers/query_executor.h" +#include "helpers/typed_local.h" +#include "helpers/writer.h" + +#include +#include +#include +#include +#include + +namespace NKikimr::NKqp { + +Y_UNIT_TEST_SUITE(KqpOlapTiering) { + Y_UNIT_TEST(Eviction) { + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + + TKikimrSettings runnerSettings; + runnerSettings.WithSampleTables = false; + TTestHelper testHelper(runnerSettings); + TLocalHelper localHelper(testHelper.GetKikimr()); + NYdb::NTable::TTableClient tableClient = testHelper.GetKikimr().GetTableClient(); + Tests::NCommon::TLoggerInit(testHelper.GetKikimr()).Initialize(); + Singleton()->SetSecretKey("fakeSecret"); + + localHelper.CreateTestOlapTable(); + testHelper.CreateTier("tier1"); + const TString tieringRule = testHelper.CreateTieringRule("tier1", "timestamp"); + + for (ui64 i = 0; i < 100; ++i) { + WriteTestData(testHelper.GetKikimr(), "/Root/olapStore/olapTable", 0, i * 10000, 1000); + } + + csController->WaitActualization(TDuration::Seconds(5)); + + ui64 columnRawBytes = 0; + { + auto selectQuery = TString(R"( + SELECT + TierName, SUM(ColumnRawBytes) As RawBytes + FROM `/Root/olapStore/olapTable/.sys/primary_index_portion_stats` + WHERE Activity == 1 + GROUP BY TierName + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("TierName")), "__DEFAULT"); + + columnRawBytes = GetUint64(rows[0].at("RawBytes")); + UNIT_ASSERT_GT(columnRawBytes, 0); + } + + testHelper.SetTiering("/Root/olapStore/olapTable", tieringRule); + csController->WaitActualization(TDuration::Seconds(5)); + + { + auto selectQuery = TString(R"( + SELECT + TierName, SUM(ColumnRawBytes) As RawBytes + FROM `/Root/olapStore/olapTable/.sys/primary_index_portion_stats` + WHERE Activity == 1 + GROUP BY TierName + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("TierName")), "tier1"); + UNIT_ASSERT_VALUES_EQUAL_C(GetUint64(rows[0].at("RawBytes")), columnRawBytes, + TStringBuilder() << "RawBytes changed after eviction: before=" << columnRawBytes + << " after=" << GetUint64(rows[0].at("RawBytes"))); + } + + testHelper.ResetTiering("/Root/olapStore/olapTable"); + csController->WaitCompactions(TDuration::Seconds(5)); + + { + auto selectQuery = TString(R"( + SELECT + TierName, SUM(ColumnRawBytes) As RawBytes + FROM `/Root/olapStore/olapTable/.sys/primary_index_portion_stats` + WHERE Activity == 1 + GROUP BY TierName + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("TierName")), "__DEFAULT"); + UNIT_ASSERT_VALUES_EQUAL_C(GetUint64(rows[0].at("RawBytes")), columnRawBytes, + TStringBuilder() << "RawBytes changed after resetting tiering: before=" << columnRawBytes + << " after=" << GetUint64(rows[0].at("RawBytes"))); + } + } + + Y_UNIT_TEST(TieringRuleValidation) { + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + + TKikimrSettings runnerSettings; + runnerSettings.WithSampleTables = false; + TTestHelper testHelper(runnerSettings); + TLocalHelper localHelper(testHelper.GetKikimr()); + NYdb::NTable::TTableClient tableClient = testHelper.GetKikimr().GetTableClient(); + Tests::NCommon::TLoggerInit(testHelper.GetKikimr()).Initialize(); + Singleton()->SetSecretKey("fakeSecret"); + + localHelper.CreateTestOlapTable(); + testHelper.CreateTier("tier1"); + + { + const TString query = R"( + CREATE OBJECT IF NOT EXISTS empty_tiering_rule (TYPE TIERING_RULE) + WITH (defaultColumn = timestamp, description = `{"rules": []}`))"; + auto result = testHelper.GetSession().ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL(result.GetStatus(), NYdb::EStatus::SUCCESS); + } + + { + const TString query = R"( + CREATE OBJECT IF NOT EXISTS empty_default_column (TYPE TIERING_RULE) + WITH (defaultColumn = ``, description = `{"rules": [{ "tierName" : "tier1", "durationForEvict" : "10d" }]}`))"; + auto result = testHelper.GetSession().ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL(result.GetStatus(), NYdb::EStatus::SUCCESS); + } + + { + const TString query = R"( + CREATE OBJECT IF NOT EXISTS no_default_column (TYPE TIERING_RULE) + WITH (description = `{"rules": [{ "tierName" : "tier1", "durationForEvict" : "10d" }]}`))"; + auto result = testHelper.GetSession().ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL(result.GetStatus(), NYdb::EStatus::SUCCESS); + } + + const TString correctTieringRule = testHelper.CreateTieringRule("tier1", "timestamp"); + { + const TString query = "ALTER OBJECT " + correctTieringRule + R"( (TYPE TIERING_RULE) SET description `{"rules": []}`)"; + auto result = testHelper.GetSession().ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL(result.GetStatus(), NYdb::EStatus::SUCCESS); + } + + { + const TString query = "ALTER OBJECT " + correctTieringRule + R"( (TYPE TIERING_RULE) SET description `{"rules": []}`)"; + auto result = testHelper.GetSession().ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL(result.GetStatus(), NYdb::EStatus::SUCCESS); + } + + { + const TString query = "ALTER OBJECT " + correctTieringRule + R"( (TYPE TIERING_RULE) SET defaultColumn ``)"; + auto result = testHelper.GetSession().ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL(result.GetStatus(), NYdb::EStatus::SUCCESS); + } + + { + const TString query = "ALTER OBJECT " + correctTieringRule + R"( (TYPE TIERING_RULE) RESET defaultColumn)"; + auto result = testHelper.GetSession().ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL(result.GetStatus(), NYdb::EStatus::SUCCESS); + } + } +} + +} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/ut/olap/write_ut.cpp b/ydb/core/kqp/ut/olap/write_ut.cpp index b631b245d7c9..8d9751f28193 100644 --- a/ydb/core/kqp/ut/olap/write_ut.cpp +++ b/ydb/core/kqp/ut/olap/write_ut.cpp @@ -15,7 +15,7 @@ Y_UNIT_TEST_SUITE(KqpOlapWrite) { Y_UNIT_TEST(TierDraftsGC) { auto csController = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); csController->SetIndexWriteControllerEnabled(false); - csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); Singleton()->ResetWriteCounters(); auto settings = TKikimrSettings() @@ -47,10 +47,31 @@ Y_UNIT_TEST_SUITE(KqpOlapWrite) { AFL_VERIFY(!Singleton()->GetSize()); } + Y_UNIT_TEST(TestRemoveTableBeforeIndexation) { + auto csController = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetIndexWriteControllerEnabled(false); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + + auto settings = TKikimrSettings().SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + TLocalHelper(kikimr).CreateTestOlapTable(); + Tests::NCommon::TLoggerInit(kikimr).SetComponents({ NKikimrServices::TX_COLUMNSHARD }, "CS").SetPriority(NActors::NLog::PRI_DEBUG).Initialize(); + auto tableClient = kikimr.GetTableClient(); + + WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); + TTypedLocalHelper("Utf8", kikimr).ExecuteSchemeQuery("DROP TABLE `/Root/olapStore/olapTable`;"); + csController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + csController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + csController->WaitIndexation(TDuration::Seconds(5)); + csController->WaitCompactions(TDuration::Seconds(5)); + } + Y_UNIT_TEST(TierDraftsGCWithRestart) { auto csController = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); csController->SetIndexWriteControllerEnabled(false); - csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1000)); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1000)); csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::GC); Singleton()->ResetWriteCounters(); @@ -131,6 +152,63 @@ Y_UNIT_TEST_SUITE(KqpOlapWrite) { UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("count")), 800000); } + Y_UNIT_TEST(WriteDeleteCleanGC) { + auto csController = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::MilliSeconds(100)); + csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::GC); + Singleton()->ResetWriteCounters(); + + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); + + auto settings = TKikimrSettings().SetAppConfig(appConfig).SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + TLocalHelper(kikimr).CreateTestOlapTable(); + Tests::NCommon::TLoggerInit(kikimr).SetComponents({ NKikimrServices::TX_COLUMNSHARD, NKikimrServices::TX_COLUMNSHARD_BLOBS }, "CS").SetPriority(NActors::NLog::PRI_DEBUG).Initialize(); + auto tableClient = kikimr.GetTableClient(); + + auto client = kikimr.GetQueryClient(); + + { + auto it = client.ExecuteQuery(R"( + INSERT INTO `/Root/olapStore/olapTable` (timestamp, uid, resource_id) VALUES (Timestamp('1970-01-01T00:00:00Z'), 'a', '0'); + INSERT INTO `/Root/olapStore/olapTable` (timestamp, uid, resource_id) VALUES (Timestamp('1970-01-01T00:00:01Z'), 'a', 'test'); + INSERT INTO `/Root/olapStore/olapTable` (timestamp, uid, resource_id) VALUES (Timestamp('1970-01-01T00:00:02Z'), 'a', 't'); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + } + + while (csController->GetInsertStartedCounter().Val() == 0) { + Cerr << "Wait indexation..." << Endl; + Sleep(TDuration::Seconds(2)); + } + { + const TInstant start = TInstant::Now(); + while (!Singleton()->GetSize() && TInstant::Now() - start < TDuration::Seconds(10)) { + Cerr << "Wait size in memory... " << Singleton()->GetSize() << Endl; + Sleep(TDuration::Seconds(2)); + } + AFL_VERIFY(Singleton()->GetSize()); + } + { + auto it = client.ExecuteQuery(R"( + DELETE FROM `/Root/olapStore/olapTable` ON SELECT CAST(0u AS Timestamp) AS timestamp, Unwrap(CAST('a' AS Utf8)) AS uid; + DELETE FROM `/Root/olapStore/olapTable`; + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + } + csController->SetOverrideReadTimeoutClean(TDuration::Zero()); + csController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::GC); + { + const TInstant start = TInstant::Now(); + while (Singleton()->GetSize() && TInstant::Now() - start < TDuration::Seconds(10)) { + Cerr << "Wait empty... " << Singleton()->GetSize() << Endl; + Sleep(TDuration::Seconds(2)); + } + AFL_VERIFY(!Singleton()->GetSize()); + } + } + } } // namespace diff --git a/ydb/core/kqp/ut/olap/ya.make b/ydb/core/kqp/ut/olap/ya.make index d9b7a06ef77b..e324116597eb 100644 --- a/ydb/core/kqp/ut/olap/ya.make +++ b/ydb/core/kqp/ut/olap/ya.make @@ -1,7 +1,7 @@ UNITTEST_FOR(ydb/core/kqp) FORK_SUBTESTS() -SPLIT_FACTOR(100) +SPLIT_FACTOR(200) IF (WITH_VALGRIND) TIMEOUT(3600) @@ -23,6 +23,9 @@ SRCS( clickbench_ut.cpp aggregations_ut.cpp write_ut.cpp + sparsed_ut.cpp + tiering_ut.cpp + decimal_ut.cpp ) PEERDIR( diff --git a/ydb/core/kqp/ut/opt/kqp_ne_ut.cpp b/ydb/core/kqp/ut/opt/kqp_ne_ut.cpp index 2dcce5cd1e2a..718a45473d00 100644 --- a/ydb/core/kqp/ut/opt/kqp_ne_ut.cpp +++ b/ydb/core/kqp/ut/opt/kqp_ne_ut.cpp @@ -11,6 +11,110 @@ using namespace NYdb; using namespace NYdb::NTable; Y_UNIT_TEST_SUITE(KqpNewEngine) { + Y_UNIT_TEST(StreamLookupWithView) { + TKikimrSettings settings = TKikimrSettings().SetWithSampleTables(false); + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetIndexAutoChooseMode(NKikimrConfig::TTableServiceConfig_EIndexAutoChooseMode_MAX_USED_PREFIX); + appConfig.MutableFeatureFlags()->SetEnableViews(true); + settings.SetDomainRoot(KikimrDefaultUtDomainRoot); + settings.SetAppConfig(appConfig); + + auto kikimr = TKikimrRunner{settings}; + kikimr.GetTestServer().GetRuntime()->GetAppData(0).FeatureFlags.SetEnableViews(true); + + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + AssertSuccessResult(session.ExecuteSchemeQuery(R"( + --!syntax_v1 + + CREATE TABLE `object_table` + ( + object_id utf8, + role utf8, + id utf8 not NULL, + primary key (id) + ); + + ALTER TABLE `object_table` ADD INDEX `object_id_index` GLOBAL ON (object_id); + ALTER TABLE `object_table` ADD INDEX `role_index` GLOBAL ON (role); + + CREATE TABLE `role_table` + ( + granted_by_role utf8, + granted_role utf8, + role_type utf8, + role utf8, + id utf8 not NULL, + primary key (id) + ); + + ALTER TABLE `role_table` ADD INDEX `granted_by_role_index` GLOBAL ON (granted_by_role); + ALTER TABLE `role_table` ADD INDEX `granted_role_index` GLOBAL ON (granted_role); + ALTER TABLE `role_table` ADD INDEX `role_index` GLOBAL ON (role); + + CREATE TABLE `access_table` + ( + endpoints utf8, + name utf8, + class utf8, + type utf8, + id utf8 not NULL, + primary key (id) + ); + + ALTER TABLE `access_table` ADD INDEX `endpoints_index` GLOBAL ON (endpoints); + ALTER TABLE `access_table` ADD INDEX `class_index` GLOBAL ON (class); + )").GetValueSync()); + + AssertSuccessResult(session.ExecuteSchemeQuery(R"( + --!syntax_v1 + CREATE VIEW granted_privilege WITH (security_invoker = TRUE) AS + SELECT DISTINCT + object_table.object_id AS object_id, + role_table.granted_role AS granted_role, + access_table.id AS id, + role_table.role AS role, + access_table.`type` AS object_type, + FROM `/Root/access_table` AS access_table + INNER JOIN `/Root/object_table` AS object_table ON access_table.id = object_table.object_id + INNER JOIN `/Root/role_table` AS role_table ON object_table.role = role_table.granted_role + )").GetValueSync()); + + auto result = session.ExecuteDataQuery(R"( + UPSERT INTO `access_table` (id, type) VALUES + ("10", "OPERATION_PRIVILEGE"); + UPSERT INTO `role_table` (id, granted_role, role_type) VALUES + ("10", "admin", "USER_ROLE"); + UPSERT INTO `object_table` (id, object_id, role) VALUES + ("10", "10", "admin"); + )", TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + AssertSuccessResult(result); + + auto testQueryParams = [&] (TString query, TParams params) { + auto result = session.ExecuteDataQuery(query, TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx(), params).GetValueSync(); + AssertSuccessResult(result); + + Cerr << FormatResultSetYson(result.GetResultSet(0)) << Endl; + }; + + auto params = kikimr.GetTableClient().GetParamsBuilder() + .AddParam("$jp1").Utf8("admin").Build() + .AddParam("$jp2").Utf8("10").Build() + .AddParam("$jp3").Uint64(2).Build() + .Build(); + + testQueryParams(R"( + --!syntax_v1 + DECLARE $jp1 AS Text; + DECLARE $jp2 AS Text; + DECLARE $jp3 AS Uint64; + select g1_0.id from granted_privilege g1_0 where ( + g1_0.role = 'admin' + ) and g1_0.role=$jp1 and g1_0.object_type=$jp2 limit $jp3 + )", params); + } + Y_UNIT_TEST(Select1) { auto settings = TKikimrSettings() .SetWithSampleTables(false); @@ -2991,7 +3095,7 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); AssertTableStats(result, "/Root/Test", { - .ExpectedReads = 2, + .ExpectedReads = 1, .ExpectedDeletes = 2, }); @@ -3955,6 +4059,165 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { AssertTableReads(result, "/Root/SecondaryKeys/Index/indexImplTable", 1); } + Y_UNIT_TEST(AutoChooseIndexOrderByLimit) { + TKikimrSettings settings; + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetIndexAutoChooseMode(NKikimrConfig::TTableServiceConfig_EIndexAutoChooseMode_MAX_USED_PREFIX); + settings.SetAppConfig(appConfig); + + TKikimrRunner kikimr(settings); + + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + { + auto session = db.CreateSession().GetValueSync().GetSession(); + AssertSuccessResult(session.ExecuteSchemeQuery(R"( + --!syntax_v1 + CREATE TABLE `/Root/ComplexKey` ( + Key1 Int32, + Key2 Int32, + Key3 Int32, + Value Int32, + PRIMARY KEY (Key1, Key2, Key3), + INDEX Index GLOBAL ON (Key2) + ); + )").GetValueSync()); + + auto result2 = session.ExecuteDataQuery(R"( + REPLACE INTO `/Root/ComplexKey` (Key1, Key2, Key3, Value) VALUES + (1, 1, 101, 1), + (2, 2, 102, 1), + (2, 2, 103, 3), + (3, 3, 103, 2); + )", TTxControl::BeginTx().CommitTx()).GetValueSync(); + UNIT_ASSERT_C(result2.IsSuccess(), result2.GetIssues().ToString()); + } + + NYdb::NTable::TExecDataQuerySettings querySettings; + querySettings.CollectQueryStats(ECollectQueryStatsMode::Profile); + + { + auto result = session.ExecuteDataQuery(R"( + --!syntax_v1 + SELECT Key1, Key2, Key3 FROM `/Root/ComplexKey` + WHERE Key1 = 2 and Key2 = 2; + )", TTxControl::BeginTx(TTxSettings::SerializableRW()), querySettings).GetValueSync(); + AssertSuccessResult(result); + AssertTableReads(result, "/Root/ComplexKey/Index/indexImplTable", 2); + } + + { + auto result = session.ExecuteDataQuery(R"( + --!syntax_v1 + SELECT Key1, Key2, Key3 FROM `/Root/ComplexKey` + WHERE Key1 = 2 and Key2 = 2 + ORDER BY Key1 DESC + LIMIT 1; + )", TTxControl::BeginTx(TTxSettings::SerializableRW()), querySettings).GetValueSync(); + AssertSuccessResult(result); + AssertTableReads(result, "/Root/ComplexKey/Index/indexImplTable", 0); + } + } + + Y_UNIT_TEST(AutoChooseIndexOrderByLambda) { + TKikimrSettings settings; + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetIndexAutoChooseMode(NKikimrConfig::TTableServiceConfig_EIndexAutoChooseMode_MAX_USED_PREFIX); + settings.SetAppConfig(appConfig); + + TKikimrRunner kikimr(settings); + + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + { + auto session = db.CreateSession().GetValueSync().GetSession(); + AssertSuccessResult(session.ExecuteSchemeQuery(R"( + --!syntax_v1 + CREATE TABLE `/Root/ComplexKey` ( + Key Int32, + Fk Int32, + Value String, + PRIMARY KEY (Key, Fk), + INDEX Index GLOBAL ON (Value) + ); + )").GetValueSync()); + + auto result2 = session.ExecuteDataQuery(R"( + REPLACE INTO `/Root/ComplexKey` (Key, Fk, Value) VALUES + (null, null, "NullValue"), + (1, 101, "Value1"), + (2, 102, "Value1"), + (2, 103, "Value3"), + (3, 103, "Value2"), + (4, 104, "Value2"), + (5, 105, "Value3"); + )", TTxControl::BeginTx().CommitTx()).GetValueSync(); + UNIT_ASSERT_C(result2.IsSuccess(), result2.GetIssues().ToString()); + } + + NYdb::NTable::TExecDataQuerySettings querySettings; + querySettings.CollectQueryStats(ECollectQueryStatsMode::Profile); + + auto result = session.ExecuteDataQuery(R"( + --!syntax_v1 + SELECT Key, Fk, Value FROM `/Root/ComplexKey` + WHERE Key = 2 + ORDER BY Value DESC + LIMIT 1; + )", TTxControl::BeginTx(TTxSettings::SerializableRW()), querySettings).GetValueSync(); + AssertSuccessResult(result); + AssertTableReads(result, "/Root/ComplexKey", 2); + } + + Y_UNIT_TEST(MultipleBroadcastJoin) { + TKikimrSettings kisettings; + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetIndexAutoChooseMode(NKikimrConfig::TTableServiceConfig_EIndexAutoChooseMode_MAX_USED_PREFIX); + kisettings.SetAppConfig(appConfig); + + TKikimrRunner kikimr(kisettings); + + auto db = kikimr.GetTableClient(); + auto client = kikimr.GetQueryClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + { + auto session = db.CreateSession().GetValueSync().GetSession(); + AssertSuccessResult(session.ExecuteSchemeQuery(R"( + --!syntax_v1 + + create table demo_ba(id text, some text, ref1 text, ref2 text, primary key(id)); + create table demo_ref1(id text, code text, some text, primary key(id), index ix_code global on (code)); + create table demo_ref2(id text, code text, some text, primary key(id), index ix_code global on (code)); + )").GetValueSync()); + } + + auto query = R"( + select ba_0.id, ba_0.some, + r_1.id, r_1.some, r_1.code, + r_2.id, r_2.some, r_2.code + from demo_ba ba_0 + left join demo_ref1 r_1 on r_1.id=ba_0.ref1 + left join demo_ref2 r_2 on r_2.code=ba_0.ref2 + where ba_0.id in ("ba#10"u,"ba#20"u,"ba#30"u,"ba#40"u,"ba#50"u,"ba#60"u,"ba#70"u,"ba#80"u,"ba#90"u,"ba#100"u); + )"; + + auto settings = NYdb::NQuery::TExecuteQuerySettings() + .Syntax(NYdb::NQuery::ESyntax::YqlV1) + .ConcurrentResultSets(false); + { + auto result = client.ExecuteQuery(query, NYdb::NQuery::TTxControl::BeginTx().CommitTx(), settings).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + //CompareYson(R"([[[1];["321"]]])", FormatResultSetYson(result.GetResultSet(0))); + //CompareYson(R"([[["111"];[1]]])", FormatResultSetYson(result.GetResultSet(1))); + } + { + auto it = client.StreamExecuteQuery(query, NYdb::NQuery::TTxControl::BeginTx().CommitTx(), settings).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); + Cerr << StreamResultToYson(it); + } + + } Y_UNIT_TEST_TWIN(ComplexLookupLimit, NewPredicateExtract) { TKikimrSettings settings; diff --git a/ydb/core/kqp/ut/opt/kqp_returning_ut.cpp b/ydb/core/kqp/ut/opt/kqp_returning_ut.cpp index 7035b4d6927b..b23d9d38ace9 100644 --- a/ydb/core/kqp/ut/opt/kqp_returning_ut.cpp +++ b/ydb/core/kqp/ut/opt/kqp_returning_ut.cpp @@ -11,6 +11,113 @@ using namespace NYdb::NTable; Y_UNIT_TEST_SUITE(KqpReturning) { +Y_UNIT_TEST(ReturningTwice) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableSequences(true); + appConfig.MutableTableServiceConfig()->SetEnableColumnsWithDefault(true); + auto serverSettings = TKikimrSettings().SetAppConfig(appConfig); + TKikimrRunner kikimr(serverSettings); + + auto client = kikimr.GetTableClient(); + auto session = client.CreateSession().GetValueSync().GetSession(); + + const auto queryCreate = Q_(R"( + CREATE TABLE IF NOT EXISTS tasks ( + hashed_key Uint32, + queue_name String, + task_id String, + worker_id Int32, + running Bool, + eta Timestamp, + lock_timeout Timestamp, + num_fails Int32, + num_reschedules Int32, + body String, + first_fail Timestamp, + idempotency_run_id String, + PRIMARY KEY (hashed_key, queue_name, task_id) + ); + + CREATE TABLE IF NOT EXISTS tasks_eta_002 ( + eta Timestamp, + hashed_key Uint32, + queue_name String, + task_id String, + PRIMARY KEY (eta, hashed_key, queue_name, task_id) + ) WITH ( + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 1, + AUTO_PARTITIONING_MAX_PARTITIONS_COUNT = 1 + ); + + CREATE TABLE IF NOT EXISTS tasks_processing_002 ( + expiration_ts Timestamp, + hashed_key Uint32, + queue_name String, + task_id String, + PRIMARY KEY (expiration_ts, hashed_key, queue_name, task_id) + ) WITH ( + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 1, + AUTO_PARTITIONING_MAX_PARTITIONS_COUNT = 1 + ); + )"); + + auto resultCreate = session.ExecuteSchemeQuery(queryCreate).GetValueSync(); + UNIT_ASSERT_C(resultCreate.IsSuccess(), resultCreate.GetIssues().ToString()); + + { + const auto query = Q_(R"( + --!syntax_v1 + DECLARE $eta AS Timestamp; + DECLARE $expiration_ts AS Timestamp; + DECLARE $limit AS Int32; + + $to_move = ( + SELECT $expiration_ts AS expiration_ts, eta, hashed_key, queue_name, task_id + FROM tasks_eta_002 + WHERE eta <= $eta + ORDER BY eta, hashed_key, queue_name, task_id + LIMIT $limit + ); + + UPSERT INTO tasks_processing_002 (expiration_ts, hashed_key, queue_name, task_id) + SELECT expiration_ts, hashed_key, queue_name, task_id FROM $to_move + RETURNING expiration_ts, hashed_key, queue_name, task_id; + + UPSERT INTO tasks (hashed_key, queue_name, task_id, running, lock_timeout) + SELECT hashed_key, queue_name, task_id, True as running, $expiration_ts AS lock_timeout FROM $to_move; + + DELETE FROM tasks_eta_002 ON + SELECT eta, hashed_key, queue_name, task_id FROM $to_move; + )"); + + auto params = TParamsBuilder() + .AddParam("$eta").Timestamp(TInstant::Zero()).Build() + .AddParam("$expiration_ts").Timestamp(TInstant::Zero()).Build() + .AddParam("$limit").Int32(1).Build() + .Build(); + + NYdb::NTable::TExecDataQuerySettings execSettings; + execSettings.CollectQueryStats(ECollectQueryStatsMode::Full); + + auto result = session.ExecuteDataQuery(query, TTxControl::BeginTx().CommitTx(), params, execSettings).GetValueSync(); + UNIT_ASSERT(result.IsSuccess()); + + size_t eta_table_access = 0; + auto stats = NYdb::TProtoAccessor::GetProto(*result.GetStats()); + + for (auto phase : stats.query_phases()) { + for (auto table : phase.table_access()) { + if (table.name() == "/Root/tasks_eta_002") { + eta_table_access++; + } + } + } + Cerr << "access count " << eta_table_access << Endl; + UNIT_ASSERT_EQUAL(eta_table_access, 1); + //Cerr << stats.Utf8DebugString() << Endl; + } +} + Y_UNIT_TEST(ReturningSerial) { NKikimrConfig::TAppConfig appConfig; appConfig.MutableTableServiceConfig()->SetEnableSequences(true); @@ -134,6 +241,55 @@ Y_UNIT_TEST(ReturningSerial) { } } +Y_UNIT_TEST(ReturningColumnsOrder) { + auto kikimr = DefaultKikimrRunner(); + + auto client = kikimr.GetTableClient(); + auto session = client.CreateSession().GetValueSync().GetSession(); + auto db = kikimr.GetQueryClient(); + + const auto queryCreate = Q_(R"( + CREATE TABLE test1 (id Int32, v Text, PRIMARY KEY(id)); + )"); + + auto resultCreate = session.ExecuteSchemeQuery(queryCreate).GetValueSync(); + UNIT_ASSERT_C(resultCreate.IsSuccess(), resultCreate.GetIssues().ToString()); + + { + const auto query = Q_(R"( + UPSERT INTO test1 (id, v) VALUES (1, '321') RETURNING id, v; + REPLACE INTO test1 (id, v) VALUES (1, '111') RETURNING v, id; + )"); + + auto result = session.ExecuteDataQuery(query, TTxControl::BeginTx().CommitTx()).GetValueSync(); + UNIT_ASSERT(result.IsSuccess()); + CompareYson(R"([[[1];["321"]]])", FormatResultSetYson(result.GetResultSet(0))); + CompareYson(R"([[["111"];[1]]])", FormatResultSetYson(result.GetResultSet(1))); + } + + auto settings = NYdb::NQuery::TExecuteQuerySettings() + .Syntax(NYdb::NQuery::ESyntax::YqlV1) + .ConcurrentResultSets(false); + { + auto result = db.ExecuteQuery(R"( + UPSERT INTO test1 (id, v) VALUES (1, '321') RETURNING id, v; + REPLACE INTO test1 (id, v) VALUES (1, '111') RETURNING v, id; + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), settings).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([[[1];["321"]]])", FormatResultSetYson(result.GetResultSet(0))); + CompareYson(R"([[["111"];[1]]])", FormatResultSetYson(result.GetResultSet(1))); + } + { + auto it = db.StreamExecuteQuery(R"( + UPSERT INTO test1 (id, v) VALUES (2, '321') RETURNING id, v; + REPLACE INTO test1 (id, v) VALUES (2, '111') RETURNING v, id; + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), settings).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); + Cerr << StreamResultToYson(it); + } + +} + Y_UNIT_TEST(ReturningTypes) { auto kikimr = DefaultKikimrRunner(); diff --git a/ydb/core/kqp/ut/pg/kqp_pg_ut.cpp b/ydb/core/kqp/ut/pg/kqp_pg_ut.cpp index 2854ea9d30fe..72c95312fab6 100644 --- a/ydb/core/kqp/ut/pg/kqp_pg_ut.cpp +++ b/ydb/core/kqp/ut/pg/kqp_pg_ut.cpp @@ -4685,7 +4685,8 @@ Y_UNIT_TEST_SUITE(KqpPg) { ui16 mbusport = tp.GetPort(2134); auto settings = Tests::TServerSettings(mbusport) .SetDomainName("Root") - .SetUseRealThreads(false); + .SetUseRealThreads(false) + .SetEnablePgSyntax(true); Tests::TServer::TPtr server = new Tests::TServer(settings); diff --git a/ydb/core/kqp/ut/query/kqp_analyze_ut.cpp b/ydb/core/kqp/ut/query/kqp_analyze_ut.cpp new file mode 100644 index 000000000000..9342f75de40e --- /dev/null +++ b/ydb/core/kqp/ut/query/kqp_analyze_ut.cpp @@ -0,0 +1,111 @@ +#include +#include + +#include +#include +#include + +#include + +namespace NKikimr { +namespace NKqp { + +using namespace NYdb; +using namespace NTable; +using namespace NYdb::NTable; + +Y_UNIT_TEST_SUITE(KqpAnalyze) { + +using namespace NStat; + +Y_UNIT_TEST_TWIN(AnalyzeTable, ColumnStore) { + TTestEnv env(1, 1, true); + + CreateDatabase(env, "Database"); + + TTableClient client(env.GetDriver()); + auto session = client.CreateSession().GetValueSync().GetSession(); + + TString createTable = Sprintf(R"( + CREATE TABLE `%s` ( + Key Uint64 NOT NULL, + Value String, + PRIMARY KEY (Key) + ) + )", "Root/Database/Table"); + if (ColumnStore) { + createTable += + R"( + PARTITION BY HASH(Key) + WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 16 + ) + )"; + } + + auto result = session.ExecuteSchemeQuery(createTable).GetValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + + if (ColumnStore) { + result = session.ExecuteSchemeQuery( + Sprintf(R"( + ALTER OBJECT `%s` (TYPE TABLE) + SET ( + ACTION=UPSERT_INDEX, + NAME=cms_value, + TYPE=COUNT_MIN_SKETCH, + FEATURES=`{"column_names" : ['Value']}` + ); + )", "Root/Database/Table" + ) + ).GetValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } + + TValueBuilder rows; + rows.BeginList(); + for (size_t i = 0; i < 1500; ++i) { + auto key = TValueBuilder().Uint64(i).Build(); + auto value = TValueBuilder().OptionalString("Hello,world!").Build(); + + rows.AddListItem(); + rows.BeginStruct(); + rows.AddMember("Key", key); + rows.AddMember("Value", value); + rows.EndStruct(); + } + rows.EndList(); + + result = client.BulkUpsert("Root/Database/Table", rows.Build()).GetValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + + result = session.ExecuteSchemeQuery( + Sprintf(R"(ANALYZE `Root/%s/%s`)", "Database", "Table") + ).GetValueSync(); + + if (ColumnStore) { + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } else { + UNIT_ASSERT(!result.IsSuccess()); + auto issues = result.GetIssues().ToString(); + UNIT_ASSERT_C(issues.find("analyze is not supported for oltp tables.") != TString::npos, issues); + return; + } + + auto& runtime = *env.GetServer().GetRuntime(); + ui64 saTabletId; + auto pathId = ResolvePathId(runtime, "/Root/Database/Table", nullptr, &saTabletId); + + auto countMin = ExtractCountMin(runtime, pathId, 2); + TString value = "Hello,world!"; + auto stat = countMin->Probe(value.Data(), value.Size()); + UNIT_ASSERT_C(stat >= 1500, ToString(stat)); +} + + +} // suite + + +} // namespace NKqp +} // namespace NKikimr diff --git a/ydb/core/kqp/ut/query/kqp_explain_ut.cpp b/ydb/core/kqp/ut/query/kqp_explain_ut.cpp index 9d5a82df2ef3..079397b3f590 100644 --- a/ydb/core/kqp/ut/query/kqp_explain_ut.cpp +++ b/ydb/core/kqp/ut/query/kqp_explain_ut.cpp @@ -85,10 +85,7 @@ Y_UNIT_TEST_SUITE(KqpExplain) { NJson::ReadJsonTree(*res.PlanJson, &plan, true); UNIT_ASSERT(ValidatePlanNodeIds(plan)); - auto join = FindPlanNodeByKv(plan, "Node Type", "Aggregate-InnerJoin (MapJoin)-Filter"); - if (!join.IsDefined()) { - join = FindPlanNodeByKv(plan, "Node Type", "Aggregate-InnerJoin (MapJoin)-Filter-TableFullScan"); - } + auto join = FindPlanNodeByKv(plan, "Node Type", "Aggregate-InnerJoin (MapJoin)-Filter-TableFullScan"); UNIT_ASSERT(join.IsDefined()); auto left = FindPlanNodeByKv(join, "Table", "EightShard"); UNIT_ASSERT(left.IsDefined()); @@ -115,10 +112,7 @@ Y_UNIT_TEST_SUITE(KqpExplain) { NJson::ReadJsonTree(*res.PlanJson, &plan, true); UNIT_ASSERT(ValidatePlanNodeIds(plan)); - auto join = FindPlanNodeByKv(plan, "Node Type", "Aggregate-InnerJoin (MapJoin)-Filter"); - if (!join.IsDefined()) { - join = FindPlanNodeByKv(plan, "Node Type", "Aggregate-InnerJoin (MapJoin)-Filter-TableFullScan"); - } + auto join = FindPlanNodeByKv(plan, "Node Type", "Aggregate-InnerJoin (MapJoin)-Filter-TableFullScan"); UNIT_ASSERT(join.IsDefined()); auto left = FindPlanNodeByKv(join, "Table", "EightShard"); UNIT_ASSERT(left.IsDefined()); @@ -208,15 +202,9 @@ Y_UNIT_TEST_SUITE(KqpExplain) { auto join = FindPlanNodeByKv( plan, "Node Type", - "Aggregate-InnerJoin (MapJoin)-Filter" + "Aggregate-InnerJoin (MapJoin)-Filter-TableFullScan" ); - if (!join.IsDefined()) { - join = FindPlanNodeByKv( - plan, - "Node Type", - "Aggregate-InnerJoin (MapJoin)-Filter-TableFullScan" - ); - } + UNIT_ASSERT(join.IsDefined()); auto left = FindPlanNodeByKv(join, "Table", "EightShard"); UNIT_ASSERT(left.IsDefined()); @@ -290,7 +278,7 @@ Y_UNIT_TEST_SUITE(KqpExplain) { auto& rangeRead = operators[lookupMember].GetMapSafe(); UNIT_ASSERT(rangeRead.at("Name") == "TableRangeScan"); - UNIT_ASSERT_VALUES_EQUAL(rangeRead.at("ReadRange").GetArraySafe()[0], "App («new_app_1»)"); + UNIT_ASSERT_VALUES_EQUAL(rangeRead.at("ReadRange").GetArraySafe()[0], "App (new_app_1)"); } Y_UNIT_TEST(SortStage) { @@ -850,7 +838,7 @@ Y_UNIT_TEST_SUITE(KqpExplain) { NJson::ReadJsonTree(*streamRes.PlanJson, &plan, true); UNIT_ASSERT(ValidatePlanNodeIds(plan)); - auto readNode = FindPlanNodeByKv(plan, "Node Type", "TableFullScan"); + auto readNode = FindPlanNodeByKv(plan, "Node Type", "Filter-TableFullScan"); UNIT_ASSERT(readNode.IsDefined()); auto& operators = readNode.GetMapSafe().at("Operators").GetArraySafe(); diff --git a/ydb/core/kqp/ut/query/kqp_limits_ut.cpp b/ydb/core/kqp/ut/query/kqp_limits_ut.cpp index a5c993eff08d..d8fd16d5d754 100644 --- a/ydb/core/kqp/ut/query/kqp_limits_ut.cpp +++ b/ydb/core/kqp/ut/query/kqp_limits_ut.cpp @@ -1,8 +1,12 @@ #include +#include #include +#include #include +#include +#include #include namespace NKikimr { @@ -11,6 +15,36 @@ namespace NKqp { using namespace NYdb; using namespace NYdb::NTable; +NKikimrResourceBroker::TResourceBrokerConfig MakeResourceBrokerTestConfig(ui32 multiplier = 1) { + NKikimrResourceBroker::TResourceBrokerConfig config; + + auto queue = config.AddQueues(); + queue->SetName("queue_default"); + queue->SetWeight(5); + queue->MutableLimit()->AddResource(4); + + queue = config.AddQueues(); + queue->SetName("queue_kqp_resource_manager"); + queue->SetWeight(20); + queue->MutableLimit()->AddResource(4); + queue->MutableLimit()->AddResource(33554453 * multiplier); + + auto task = config.AddTasks(); + task->SetName("unknown"); + task->SetQueueName("queue_default"); + task->SetDefaultDuration(TDuration::Seconds(5).GetValue()); + + task = config.AddTasks(); + task->SetName(NLocalDb::KqpResourceManagerTaskName); + task->SetQueueName("queue_kqp_resource_manager"); + task->SetDefaultDuration(TDuration::Seconds(5).GetValue()); + + config.MutableResourceLimit()->AddResource(10); + config.MutableResourceLimit()->AddResource(100'000); + + return config; +} + namespace { bool IsRetryable(const EStatus& status) { return status == EStatus::OVERLOADED; @@ -127,6 +161,59 @@ Y_UNIT_TEST_SUITE(KqpLimits) { result.GetIssues().PrintTo(Cerr); UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::BAD_REQUEST); } + + Y_UNIT_TEST(ComputeActorMemoryAllocationFailure) { + auto app = NKikimrConfig::TAppConfig(); + app.MutableTableServiceConfig()->MutableResourceManager()->SetMkqlLightProgramMemoryLimit(10); + app.MutableTableServiceConfig()->MutableResourceManager()->SetQueryMemoryLimit(2000); + + app.MutableResourceBrokerConfig()->CopyFrom(MakeResourceBrokerTestConfig()); + + TKikimrRunner kikimr(app); + CreateLargeTable(kikimr, 0, 0, 0); + + kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::KQP_SLOW_LOG, NActors::NLog::PRI_ERROR); + + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + auto result = session.ExecuteDataQuery(Q1_(R"( + SELECT * FROM `/Root/LargeTable`; + )"), TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + result.GetIssues().PrintTo(Cerr); + + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::OVERLOADED); + } + + Y_UNIT_TEST(ComputeActorMemoryAllocationFailureQueryService) { + auto app = NKikimrConfig::TAppConfig(); + app.MutableTableServiceConfig()->MutableResourceManager()->SetMkqlLightProgramMemoryLimit(10); + app.MutableTableServiceConfig()->MutableResourceManager()->SetQueryMemoryLimit(2000); + + app.MutableResourceBrokerConfig()->CopyFrom(MakeResourceBrokerTestConfig(4)); + + TKikimrRunner kikimr(app); + CreateLargeTable(kikimr, 0, 0, 0); + + kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::KQP_SLOW_LOG, NActors::NLog::PRI_ERROR); + + auto db = kikimr.GetQueryClient(); + NYdb::NQuery::TExecuteQuerySettings querySettings; + querySettings.StatsMode(NYdb::NQuery::EStatsMode::Full); + + auto result = db.ExecuteQuery(Q1_(R"( + SELECT * FROM `/Root/LargeTable`; + )"), NQuery::TTxControl::BeginTx().CommitTx(), querySettings).ExtractValueSync(); + result.GetIssues().PrintTo(Cerr); + + auto stats = result.GetStats(); + + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::OVERLOADED); + UNIT_ASSERT_C(result.GetIssues().ToString().Contains("Mkql memory limit exceeded"), result.GetIssues().ToString()); + UNIT_ASSERT(stats.Defined()); + + Cerr << stats->ToString(true) << Endl; + } Y_UNIT_TEST(DatashardProgramSize) { auto app = NKikimrConfig::TAppConfig(); @@ -744,6 +831,120 @@ Y_UNIT_TEST_SUITE(KqpLimits) { UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::TIMEOUT); } + /* Scenario: + - prepare and run query + - observe first EvState event from CA to Executer and replace it with EvAbortExecution + - count all EvState events from all CAs + - wait for final event EvTxResponse from Executer + - expect it to happen strictly after all EvState events + */ + Y_UNIT_TEST(WaitCAsStateOnAbort) { + TKikimrRunner kikimr(TKikimrSettings().SetUseRealThreads(false)); + auto db = kikimr.RunCall([&] { return kikimr.GetTableClient(); } ); + auto session = kikimr.RunCall([&] { return db.CreateSession().GetValueSync().GetSession(); } ); + + auto prepareResult = kikimr.RunCall([&] { return session.PrepareDataQuery(Q_(R"( + SELECT COUNT(*) FROM `/Root/TwoShard`; + )")).GetValueSync(); + }); + UNIT_ASSERT_VALUES_EQUAL_C(prepareResult.GetStatus(), EStatus::SUCCESS, prepareResult.GetIssues().ToString()); + auto dataQuery = prepareResult.GetQuery(); + + bool firstEvState = false; + ui32 totalEvState = 0; + TActorId executerId; + ui32 actorCount = 3; // TODO: get number of actors properly. + + auto& runtime = *kikimr.GetTestServer().GetRuntime(); + runtime.SetObserverFunc([&](TAutoPtr& ev) { + if (ev->GetTypeRewrite() == NYql::NDq::TEvDqCompute::TEvState::EventType) { + ++totalEvState; + if (!firstEvState) { + executerId = ev->Recipient; + ev = new IEventHandle(ev->Recipient, ev->Sender, + new NKikimr::NKqp::TEvKqp::TEvAbortExecution(NYql::NDqProto::StatusIds::UNSPECIFIED, NYql::TIssues())); + firstEvState = true; + } + } else if (ev->GetTypeRewrite() == NKikimr::NKqp::TEvKqpExecuter::TEvTxResponse::EventType && ev->Sender == executerId) { + UNIT_ASSERT_C(totalEvState == actorCount*2, "Executer sent response before waiting for CAs"); + } + + return TTestActorRuntime::EEventAction::PROCESS; + }); + + auto settings = TExecDataQuerySettings().OperationTimeout(TDuration::MilliSeconds(500)); + kikimr.RunInThreadPool([&] { return dataQuery.Execute(TTxControl::BeginTx().CommitTx(), settings).GetValueSync(); }); + + TDispatchOptions opts; + opts.FinalEvents.emplace_back([&](IEventHandle& ev) { + return ev.GetTypeRewrite() == NKikimr::NKqp::TEvKqpExecuter::TEvTxResponse::EventType + && ev.Sender == executerId && totalEvState == actorCount*2; + }); + + UNIT_ASSERT(runtime.DispatchEvents(opts)); + } + + /* Scenario: + - prepare and run query + - observe first EvState event from CA to Executer and replace it with EvAbortExecution + - count all EvState events from all CAs + - drop final EvState event from last CA + - wait for final event EvTxResponse from Executer after timeout poison + - expect it to happen strictly after all EvState events + */ + Y_UNIT_TEST(WaitCAsTimeout) { + TKikimrRunner kikimr(TKikimrSettings().SetUseRealThreads(false)); + auto db = kikimr.RunCall([&] { return kikimr.GetTableClient(); } ); + auto session = kikimr.RunCall([&] { return db.CreateSession().GetValueSync().GetSession(); } ); + + auto prepareResult = kikimr.RunCall([&] { return session.PrepareDataQuery(Q_(R"( + SELECT COUNT(*) FROM `/Root/TwoShard`; + )")).GetValueSync(); + }); + UNIT_ASSERT_VALUES_EQUAL_C(prepareResult.GetStatus(), EStatus::SUCCESS, prepareResult.GetIssues().ToString()); + auto dataQuery = prepareResult.GetQuery(); + + bool firstEvState = false; + bool timeoutPoison = false; + ui32 totalEvState = 0; + TActorId executerId; + ui32 actorCount = 3; // TODO: get number of actors properly. + + auto& runtime = *kikimr.GetTestServer().GetRuntime(); + runtime.SetObserverFunc([&](TAutoPtr& ev) { + if (ev->GetTypeRewrite() == NYql::NDq::TEvDqCompute::TEvState::EventType) { + ++totalEvState; + if (!firstEvState) { + executerId = ev->Recipient; + ev = new IEventHandle(ev->Recipient, ev->Sender, + new NKikimr::NKqp::TEvKqp::TEvAbortExecution(NYql::NDqProto::StatusIds::UNSPECIFIED, NYql::TIssues())); + firstEvState = true; + } else { + return TTestActorRuntime::EEventAction::DROP; + } + } else if (ev->GetTypeRewrite() == TEvents::TEvPoison::EventType && totalEvState == actorCount*2 && + ev->Sender == executerId && ev->Recipient == executerId) + { + timeoutPoison = true; + } else if (ev->GetTypeRewrite() == NKikimr::NKqp::TEvKqpExecuter::TEvTxResponse::EventType && ev->Sender == executerId) { + UNIT_ASSERT_C(timeoutPoison, "Executer sent response before waiting for CAs"); + } + + return TTestActorRuntime::EEventAction::PROCESS; + }); + + auto settings = TExecDataQuerySettings().OperationTimeout(TDuration::MilliSeconds(500)); + kikimr.RunInThreadPool([&] { return dataQuery.Execute(TTxControl::BeginTx().CommitTx(), settings).GetValueSync(); }); + + TDispatchOptions opts; + opts.FinalEvents.emplace_back([&](IEventHandle& ev) { + return ev.GetTypeRewrite() == NKikimr::NKqp::TEvKqpExecuter::TEvTxResponse::EventType + && ev.Sender == executerId && totalEvState == actorCount*2 && timeoutPoison; + }); + + UNIT_ASSERT(runtime.DispatchEvents(opts)); + } + Y_UNIT_TEST(ReplySizeExceeded) { auto kikimr = DefaultKikimrRunner(); auto db = kikimr.GetTableClient(); @@ -986,6 +1187,7 @@ Y_UNIT_TEST_SUITE(KqpLimits) { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); UNIT_ASSERT(result.GetStats()); + Cerr << result.GetStats()->ToString(true) << Endl; UNIT_ASSERT(result.GetStats()->GetPlan()); NJson::TJsonValue plan; @@ -993,7 +1195,7 @@ Y_UNIT_TEST_SUITE(KqpLimits) { UNIT_ASSERT_VALUES_EQUAL(plan["Plan"]["Node Type"].GetStringSafe(), "Query"); UNIT_ASSERT_VALUES_EQUAL(plan["Plan"]["Plans"][0]["Node Type"].GetStringSafe(), "ResultSet"); - UNIT_ASSERT_VALUES_EQUAL(plan["Plan"]["Plans"][0]["Plans"][0]["Node Type"].GetStringSafe(), "Collect"); + UNIT_ASSERT_VALUES_EQUAL(plan["Plan"]["Plans"][0]["Plans"][0]["Node Type"].GetStringSafe(), "Stage"); UNIT_ASSERT_VALUES_EQUAL(plan["Plan"]["Plans"][0]["Plans"][0]["Plans"][0]["Node Type"].GetStringSafe(), "Merge"); UNIT_ASSERT_VALUES_EQUAL(plan["Plan"]["Plans"][0]["Plans"][0]["Plans"][0]["SortColumns"].GetArraySafe()[0], "Key (Asc)"); diff --git a/ydb/core/kqp/ut/query/kqp_params_ut.cpp b/ydb/core/kqp/ut/query/kqp_params_ut.cpp index 86094d3a61e6..d265a84b62fd 100644 --- a/ydb/core/kqp/ut/query/kqp_params_ut.cpp +++ b/ydb/core/kqp/ut/query/kqp_params_ut.cpp @@ -103,8 +103,12 @@ Y_UNIT_TEST_SUITE(KqpParams) { } Y_UNIT_TEST(ImplicitParameterTypes) { - TKikimrRunner kikimr; - kikimr.GetTestServer().GetRuntime()->GetAppData(0).FeatureFlags.SetEnableImplicitQueryParameterTypes(true); + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableImplicitQueryParameterTypes(true); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({NKikimrKqp::TKqpSetting()}); + TKikimrRunner kikimr(serverSettings); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -127,8 +131,12 @@ Y_UNIT_TEST_SUITE(KqpParams) { Y_UNIT_TEST(CheckQueryCacheForPreparedQuery) { // All params are declared in the text - TKikimrRunner kikimr; - kikimr.GetTestServer().GetRuntime()->GetAppData(0).FeatureFlags.SetEnableImplicitQueryParameterTypes(true); + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableImplicitQueryParameterTypes(true); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({NKikimrKqp::TKqpSetting()}); + TKikimrRunner kikimr(serverSettings); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -164,8 +172,12 @@ Y_UNIT_TEST_SUITE(KqpParams) { Y_UNIT_TEST(CheckQueryCacheForUnpreparedQuery) { // Some params are declared in text, some by user - TKikimrRunner kikimr; - kikimr.GetTestServer().GetRuntime()->GetAppData(0).FeatureFlags.SetEnableImplicitQueryParameterTypes(true); + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableImplicitQueryParameterTypes(true); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({NKikimrKqp::TKqpSetting()}); + TKikimrRunner kikimr(serverSettings); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -284,8 +296,12 @@ Y_UNIT_TEST_SUITE(KqpParams) { Y_UNIT_TEST(CheckQueryCacheForExecuteAndPreparedQueries) { // All params are declared in the text - TKikimrRunner kikimr; - kikimr.GetTestServer().GetRuntime()->GetAppData(0).FeatureFlags.SetEnableImplicitQueryParameterTypes(true); + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableImplicitQueryParameterTypes(true); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({NKikimrKqp::TKqpSetting()}); + TKikimrRunner kikimr(serverSettings); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -493,8 +509,12 @@ Y_UNIT_TEST_SUITE(KqpParams) { } Y_UNIT_TEST(ImplicitSameParameterTypesQueryCacheCheck) { - TKikimrRunner kikimr; - kikimr.GetTestServer().GetRuntime()->GetAppData(0).FeatureFlags.SetEnableImplicitQueryParameterTypes(true); + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableImplicitQueryParameterTypes(true); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({NKikimrKqp::TKqpSetting()}); + TKikimrRunner kikimr(serverSettings); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -519,8 +539,12 @@ Y_UNIT_TEST_SUITE(KqpParams) { } Y_UNIT_TEST(ImplicitDifferentParameterTypesQueryCacheCheck) { - TKikimrRunner kikimr; - kikimr.GetTestServer().GetRuntime()->GetAppData(0).FeatureFlags.SetEnableImplicitQueryParameterTypes(true); + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableImplicitQueryParameterTypes(true); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({NKikimrKqp::TKqpSetting()}); + TKikimrRunner kikimr(serverSettings); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); diff --git a/ydb/core/kqp/ut/query/kqp_query_ut.cpp b/ydb/core/kqp/ut/query/kqp_query_ut.cpp index 6c11b9e2e7bb..15e018470b43 100644 --- a/ydb/core/kqp/ut/query/kqp_query_ut.cpp +++ b/ydb/core/kqp/ut/query/kqp_query_ut.cpp @@ -1576,7 +1576,7 @@ Y_UNIT_TEST_SUITE(KqpQuery) { )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); UNIT_ASSERT(!prepareResult.IsSuccess()); UNIT_ASSERT_C( - prepareResult.GetIssues().ToString().Contains("Query can be executed only in per-statement mode (NoTx)"), + prepareResult.GetIssues().ToString().Contains("CTAS statement can be executed only in NoTx mode."), prepareResult.GetIssues().ToString()); } @@ -1782,6 +1782,96 @@ Y_UNIT_TEST_SUITE(KqpQuery) { CompareYson(output, R"([[1u;[1];["test1"]];[100u;[100];["test2"]]])"); } } + + Y_UNIT_TEST_TWIN(TableSink_ReplaceDataShardDataQuery, UseSink) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOlapSink(UseSink); + appConfig.MutableTableServiceConfig()->SetEnableOltpSink(UseSink); + auto settings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + + auto session = kikimr.GetTableClient().CreateSession().GetValueSync().GetSession(); + + const TString query = R"( + CREATE TABLE `/Root/DataShard` ( + Col1 Uint32 NOT NULL, + Col2 String, + Col3 Int32 NOT NULL, + PRIMARY KEY (Col1) + ) + WITH ( + AUTO_PARTITIONING_BY_SIZE = DISABLED, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 16, + AUTO_PARTITIONING_MAX_PARTITIONS_COUNT = 16, + UNIFORM_PARTITIONS = 16); + + CREATE TABLE `/Root/DataShard2` ( + Col1 Uint32 NOT NULL, + Col2 String, + Col3 Int32 NOT NULL, + PRIMARY KEY (Col1) + ) + WITH ( + AUTO_PARTITIONING_BY_SIZE = DISABLED, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 17, + AUTO_PARTITIONING_MAX_PARTITIONS_COUNT = 17, + UNIFORM_PARTITIONS = 17); + )"; + + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); + + { + auto prepareResult = session.ExecuteDataQuery(R"( + REPLACE INTO `/Root/DataShard` (Col1, Col2, Col3) VALUES + (10u, "test1", 10), (20u, "test2", 11), (2147483647u, "test3", 12), (2147483640u, NULL, 13); + )", NYdb::NTable::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_C(prepareResult.IsSuccess(), prepareResult.GetIssues().ToString()); + } + + { + auto it = session.ExecuteDataQuery(R"( + SELECT COUNT(*) FROM `/Root/DataShard`; + )", NYdb::NTable::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); + CompareYson(R"([[4u]])", FormatResultSetYson(it.GetResultSet(0))); + } + + { + auto prepareResult = session.ExecuteDataQuery(R"( + REPLACE INTO `/Root/DataShard2` SELECT * FROM `/Root/DataShard`; + )", NYdb::NTable::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_C(prepareResult.IsSuccess(), prepareResult.GetIssues().ToString()); + } + + { + auto it = session.ExecuteDataQuery(R"( + SELECT COUNT(*) FROM `/Root/DataShard2`; + )", NYdb::NTable::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); + CompareYson(R"([[4u]])", FormatResultSetYson(it.GetResultSet(0))); + } + + { + auto prepareResult = session.ExecuteDataQuery(R"( + REPLACE INTO `/Root/DataShard2` (Col1, Col2, Col3) VALUES + (11u, "test1", 10), (21u, "test2", 11), (2147483646u, "test3", 12), (2147483641u, NULL, 13); + SELECT COUNT(*) FROM `/Root/DataShard`; + )", NYdb::NTable::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_C(prepareResult.IsSuccess(), prepareResult.GetIssues().ToString()); + } + + { + auto it = session.ExecuteDataQuery(R"( + SELECT COUNT(*) FROM `/Root/DataShard2`; + )", NYdb::NTable::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); + CompareYson(R"([[8u]])", FormatResultSetYson(it.GetResultSet(0))); + } + } } } // namespace NKqp diff --git a/ydb/core/kqp/ut/query/kqp_stats_ut.cpp b/ydb/core/kqp/ut/query/kqp_stats_ut.cpp index 0f1221423669..1192c399f334 100644 --- a/ydb/core/kqp/ut/query/kqp_stats_ut.cpp +++ b/ydb/core/kqp/ut/query/kqp_stats_ut.cpp @@ -451,7 +451,7 @@ Y_UNIT_TEST_TWIN(StreamLookupStats, StreamLookupJoin) { }); } -Y_UNIT_TEST(SysViewTimeout) { +Y_UNIT_TEST(SysViewClientLost) { TKikimrRunner kikimr; CreateLargeTable(kikimr, 500000, 10, 100, 5000, 1); @@ -490,12 +490,13 @@ Y_UNIT_TEST(SysViewTimeout) { auto settings = TStreamExecScanQuerySettings(); settings.ClientTimeout(TDuration::MilliSeconds(50)); - TStringStream request; - request << R"( + TStringStream timeoutedRequestStream; + timeoutedRequestStream << R"( SELECT COUNT(*) FROM `/Root/LargeTable` WHERE SUBSTRING(DataText, 50, 5) = "22222"; )"; + TString timeoutedRequest = timeoutedRequestStream.Str(); - auto result = db.StreamExecuteScanQuery(request.Str(), settings).GetValueSync(); + auto result = db.StreamExecuteScanQuery(timeoutedRequest, settings).GetValueSync(); if (result.IsSuccess()) { try { @@ -510,7 +511,13 @@ Y_UNIT_TEST(SysViewTimeout) { UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), NYdb::EStatus::CLIENT_DEADLINE_EXCEEDED); } + ui32 timeoutedCount = 0; + ui32 iterations = 10; + while (timeoutedCount == 0 && iterations > 0) { + iterations--; + Sleep(TDuration::Seconds(1)); + TStringStream request; request << "SELECT * FROM `/Root/.sys/top_queries_by_read_bytes_one_hour` ORDER BY Duration"; @@ -518,7 +525,6 @@ Y_UNIT_TEST(SysViewTimeout) { UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); ui64 queryCount = 0; - ui64 rowsCount = 0; for (;;) { auto streamPart = it.ReadNext().GetValueSync(); if (!streamPart.IsSuccess()) { @@ -533,17 +539,16 @@ Y_UNIT_TEST(SysViewTimeout) { while (parser.TryNextRow()) { auto value = parser.ColumnParser("QueryText").GetOptionalUtf8(); UNIT_ASSERT(value); - if (*value == request.Str()) { + if (*value == timeoutedRequest) { queryCount++; } - rowsCount++; } } } - - UNIT_ASSERT(queryCount == 1); - UNIT_ASSERT(rowsCount == 2); + timeoutedCount = queryCount; } + + UNIT_ASSERT(timeoutedCount == 1); } Y_UNIT_TEST(SysViewCancelled) { @@ -582,9 +587,9 @@ Y_UNIT_TEST(SysViewCancelled) { UNIT_ASSERT(rowsCount == 1); } - auto prepareResult = session.PrepareDataQuery(Q_(R"( - SELECT COUNT(*) FROM `/Root/LargeTable` WHERE SUBSTRING(DataText, 50, 5) = "33333"; - )")).GetValueSync(); + TStringStream cancelledRequest; + cancelledRequest << "SELECT COUNT(*) FROM `/Root/LargeTable` WHERE SUBSTRING(DataText, 50, 5) = \"33333\""; + auto prepareResult = session.PrepareDataQuery(cancelledRequest.Str()).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(prepareResult.GetStatus(), NYdb::EStatus::SUCCESS, prepareResult.GetIssues().ToString()); auto dataQuery = prepareResult.GetQuery(); @@ -619,7 +624,7 @@ Y_UNIT_TEST(SysViewCancelled) { while (parser.TryNextRow()) { auto value = parser.ColumnParser("QueryText").GetOptionalUtf8(); UNIT_ASSERT(value); - if (*value == request.Str()) { + if (*value == cancelledRequest.Str()) { queryCount++; } rowsCount++; @@ -628,7 +633,7 @@ Y_UNIT_TEST(SysViewCancelled) { } UNIT_ASSERT(queryCount == 1); - UNIT_ASSERT(rowsCount == 2); + UNIT_ASSERT(rowsCount == 3); } } diff --git a/ydb/core/kqp/ut/query/ya.make b/ydb/core/kqp/ut/query/ya.make index 2d9745cba212..91c714510791 100644 --- a/ydb/core/kqp/ut/query/ya.make +++ b/ydb/core/kqp/ut/query/ya.make @@ -17,6 +17,7 @@ ELSE() ENDIF() SRCS( + kqp_analyze_ut.cpp kqp_explain_ut.cpp kqp_limits_ut.cpp kqp_params_ut.cpp @@ -26,6 +27,7 @@ SRCS( ) PEERDIR( + ydb/core/statistics/ut_common ydb/public/sdk/cpp/client/ydb_proto ydb/core/kqp ydb/core/kqp/ut/common diff --git a/ydb/core/kqp/ut/scan/kqp_scan_ut.cpp b/ydb/core/kqp/ut/scan/kqp_scan_ut.cpp index e8bb6f3034b5..8fa62dbdb704 100644 --- a/ydb/core/kqp/ut/scan/kqp_scan_ut.cpp +++ b/ydb/core/kqp/ut/scan/kqp_scan_ut.cpp @@ -237,15 +237,34 @@ Y_UNIT_TEST_SUITE(KqpScan) { .BeginTuple().AddElement().BeginOptional().Decimal(TDecimalValue("1.5", 22, 9)).EndOptional().EndTuple() .Build()); - auto ret = session.CreateTable("/Root/DecimalTest", + auto ret = session.CreateTable("/Root/DecimalTest", TTableBuilder() .AddNullableColumn("Key", TDecimalType(22, 9)) .AddNullableColumn("Value", TDecimalType(22, 9)) .SetPrimaryKeyColumn("Key") - // .SetPartitionAtKeys(partitions) // Error at split boundary 0: Unsupported typeId 4865 at index 0 + .SetPartitionAtKeys(partitions) .Build()).GetValueSync(); UNIT_ASSERT_C(ret.IsSuccess(), ret.GetIssues().ToString()); + { + auto describeResult = session.DescribeTable("/Root/DecimalTest" , TDescribeTableSettings().WithKeyShardBoundary(true)).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(describeResult.GetStatus(), NYdb::EStatus::SUCCESS); + const NYdb::NTable::TTableDescription& tableDescription = describeResult.GetTableDescription(); + const TVector& keyRanges = tableDescription.GetKeyRanges(); + const TVector& columns = tableDescription.GetTableColumns(); + UNIT_ASSERT_VALUES_EQUAL(columns.size(), 2); + UNIT_ASSERT_STRINGS_EQUAL(columns[0].Type.ToString(), "Decimal(22,9)?"); + UNIT_ASSERT_STRINGS_EQUAL(columns[1].Type.ToString(), "Decimal(22,9)?"); + auto extractValue = [](const TValue& val) { + auto parser = TValueParser(val); + parser.OpenTuple(); + UNIT_ASSERT(parser.TryNextElement()); + return parser.GetOptionalDecimal()->ToString(); + }; + UNIT_ASSERT_VALUES_EQUAL(keyRanges.size(), 2); + UNIT_ASSERT_STRINGS_EQUAL(extractValue(keyRanges[0].To()->GetValue()), "1.5"); + } + auto params = TParamsBuilder().AddParam("$in").BeginList() .AddListItem().BeginStruct() .AddMember("Key").Decimal(TDecimalValue("1.0")) diff --git a/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp b/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp index 4db4ac5dd9fb..55d1c6b16a05 100644 --- a/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp +++ b/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp @@ -1,6 +1,10 @@ +#include #include #include +#include +#include #include +#include #include #include #include @@ -1947,8 +1951,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { } Y_UNIT_TEST(CreateTableWithPartitionAtKeysUuid) { - TKikimrSettings kikimrSettings = TKikimrSettings() - .SetEnableUuidAsPrimaryKey(true); + TKikimrSettings kikimrSettings; TKikimrRunner kikimr(kikimrSettings); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -1992,8 +1995,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { } Y_UNIT_TEST(CreateTableWithUniformPartitionsUuid) { - TKikimrSettings kikimrSettings = TKikimrSettings() - .SetEnableUuidAsPrimaryKey(true); + TKikimrSettings kikimrSettings; TKikimrRunner kikimr(kikimrSettings); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -3905,6 +3907,22 @@ Y_UNIT_TEST_SUITE(KqpScheme) { UNIT_ASSERT_VALUES_EQUAL(desc.GetTopicDescription().GetRetentionPeriod(), TDuration::Hours(1)); } + { // alter + auto query = R"( + --!syntax_v1 + ALTER TOPIC `/Root/table/feed_2` SET ( + RETENTION_PERIOD = Interval("PT2H") + ); + )"; + + const auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto desc = pq.DescribeTopic("/Root/table/feed_2").ExtractValueSync(); + UNIT_ASSERT_C(desc.IsSuccess(), desc.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(desc.GetTopicDescription().GetRetentionPeriod(), TDuration::Hours(2)); + } + { // non-positive (invalid) auto query = R"( --!syntax_v1 @@ -4075,6 +4093,70 @@ Y_UNIT_TEST_SUITE(KqpScheme) { } } + Y_UNIT_TEST(ChangefeedOnIndexTable) { + TKikimrRunner kikimr(TKikimrSettings() + .SetPQConfig(DefaultPQConfig()) + .SetEnableChangefeedsOnIndexTables(true)); + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + { + auto query = R"( + --!syntax_v1 + CREATE TABLE `/Root/table` ( + Key Uint64, + Value String, + PRIMARY KEY (Key), + INDEX SyncIndex GLOBAL SYNC ON (`Value`), + INDEX AsyncIndex GLOBAL ASYNC ON (`Value`) + ); + )"; + + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + + const auto changefeed = TChangefeedDescription("feed", EChangefeedMode::KeysOnly, EChangefeedFormat::Json); + { + auto result = session.AlterTable("/Root/table/AsyncIndex", TAlterTableSettings() + .AppendAddChangefeeds(changefeed) + ).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::PRECONDITION_FAILED, result.GetIssues().ToString()); + } + { + auto result = session.AlterTable("/Root/table/SyncIndex", TAlterTableSettings() + .AppendAddChangefeeds(changefeed) + ).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + } + + Y_UNIT_TEST(DescribeIndexTable) { + TKikimrRunner kikimr; + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + { + auto query = R"( + --!syntax_v1 + CREATE TABLE `/Root/table` ( + Key Uint64, + Value String, + PRIMARY KEY (Key), + INDEX SyncIndex GLOBAL SYNC ON (`Value`) + ); + )"; + + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + { + auto desc = session.DescribeTable("/Root/table/SyncIndex").ExtractValueSync(); + UNIT_ASSERT_C(desc.IsSuccess(), desc.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(desc.GetEntry().Name, "SyncIndex"); + } + } + Y_UNIT_TEST(CreatedAt) { TKikimrRunner kikimr(TKikimrSettings().SetPQConfig(DefaultPQConfig())); auto scheme = NYdb::NScheme::TSchemeClient(kikimr.GetDriver(), TCommonClientSettings().Database("/Root")); @@ -4785,6 +4867,66 @@ Y_UNIT_TEST_SUITE(KqpScheme) { session.Close().GetValueSync(); } + Y_UNIT_TEST(DisableExternalDataSourcesOnServerless) { + auto ydb = NWorkload::TYdbSetupSettings() + .CreateSampleTenants(true) + .EnableExternalDataSourcesOnServerless(false) + .Create(); + + auto checkDisabled = [](const auto& result, NYdb::EStatus status) { + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), status, result.GetIssues().ToString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "External data sources are disabled for serverless domains. Please contact your system administrator to enable it"); + }; + + auto checkNotFound = [](const auto& result, NYdb::EStatus status) { + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), status, result.GetIssues().ToString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Path does not exist"); + }; + + const auto& createSourceSql = R"( + CREATE EXTERNAL DATA SOURCE MyExternalDataSource WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket", + AUTH_METHOD="NONE" + );)"; + + const auto& createTableSql = R"( + CREATE EXTERNAL TABLE MyExternalTable ( + Key Uint64, + Value String + ) WITH ( + DATA_SOURCE="MyExternalDataSource", + LOCATION="/" + );)"; + + const auto& dropSourceSql = "DROP EXTERNAL DATA SOURCE MyExternalDataSource;"; + + const auto& dropTableSql = "DROP EXTERNAL TABLE MyExternalTable;"; + + auto settings = NWorkload::TQueryRunnerSettings().PoolId(""); + + // Dedicated, enabled + settings.Database(ydb->GetSettings().GetDedicatedTenantName()).NodeIndex(1); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(createSourceSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(createTableSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(dropTableSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(dropSourceSql, settings)); + + // Shared, enabled + settings.Database(ydb->GetSettings().GetSharedTenantName()).NodeIndex(2); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(createSourceSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(createTableSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(dropTableSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(dropSourceSql, settings)); + + // Serverless, disabled + settings.Database(ydb->GetSettings().GetServerlessTenantName()).NodeIndex(2); + checkDisabled(ydb->ExecuteQuery(createSourceSql, settings), NYdb::EStatus::GENERIC_ERROR); + checkDisabled(ydb->ExecuteQuery(createTableSql, settings), NYdb::EStatus::PRECONDITION_FAILED); + checkNotFound(ydb->ExecuteQuery(dropTableSql, settings), NYdb::EStatus::SCHEME_ERROR); + checkNotFound(ydb->ExecuteQuery(dropSourceSql, settings), NYdb::EStatus::GENERIC_ERROR); + } + Y_UNIT_TEST(CreateExternalDataSource) { NKikimrConfig::TAppConfig appCfg; appCfg.MutableQueryServiceConfig()->AddHostnamePatterns("my-bucket"); @@ -5146,7 +5288,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { month Int64 NOT NULL ) WITH ( DATA_SOURCE=")" << externalDataSourceName << R"(", - LOCATION="/folder1/*", + LOCATION="/folder1/", FORMAT="json_as_string", `projection.enabled`="true", `projection.year.type`="integer", @@ -5171,7 +5313,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { UNIT_ASSERT(externalTable.ExternalTableInfo); UNIT_ASSERT_VALUES_EQUAL(externalTable.ExternalTableInfo->Description.ColumnsSize(), 4); UNIT_ASSERT_VALUES_EQUAL(externalTable.ExternalTableInfo->Description.GetDataSourcePath(), externalDataSourceName); - UNIT_ASSERT_VALUES_EQUAL(externalTable.ExternalTableInfo->Description.GetLocation(), "/folder1/*"); + UNIT_ASSERT_VALUES_EQUAL(externalTable.ExternalTableInfo->Description.GetLocation(), "/folder1/"); } Y_UNIT_TEST(CreateExternalTableWithUpperCaseSettings) { @@ -5194,7 +5336,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { Month Int64 NOT NULL ) WITH ( DATA_SOURCE=")" << externalDataSourceName << R"(", - LOCATION="/folder1/*", + LOCATION="/folder1/", FORMAT="json_as_string", `projection.enabled`="true", `projection.Year.type`="integer", @@ -5219,7 +5361,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { UNIT_ASSERT(externalTable.ExternalTableInfo); UNIT_ASSERT_VALUES_EQUAL(externalTable.ExternalTableInfo->Description.ColumnsSize(), 4); UNIT_ASSERT_VALUES_EQUAL(externalTable.ExternalTableInfo->Description.GetDataSourcePath(), externalDataSourceName); - UNIT_ASSERT_VALUES_EQUAL(externalTable.ExternalTableInfo->Description.GetLocation(), "/folder1/*"); + UNIT_ASSERT_VALUES_EQUAL(externalTable.ExternalTableInfo->Description.GetLocation(), "/folder1/"); } Y_UNIT_TEST(DoubleCreateExternalTable) { @@ -6055,16 +6197,75 @@ Y_UNIT_TEST_SUITE(KqpScheme) { } } + void AsyncReplicationConnectionParams(TKikimrRunner& kikimr, const TString& connectionParam, bool ssl = false) { + using namespace NReplication; + + auto repl = TReplicationClient(kikimr.GetDriver(), TCommonClientSettings().Database("/Root")); + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + { + auto query = R"( + --!syntax_v1 + CREATE TABLE `/Root/table` (Key Uint64, Value String, PRIMARY KEY (Key)); + )"; + + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + { + auto query = Sprintf(R"( + --!syntax_v1 + CREATE ASYNC REPLICATION `/Root/replication` FOR + `/Root/table` AS `/Root/replica` + WITH ( + %s, TOKEN = "root@builtin" + ); + )", connectionParam.c_str()); + + const auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + { + const auto result = repl.DescribeReplication("/Root/replication").ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + const auto& params = result.GetReplicationDescription().GetConnectionParams(); + UNIT_ASSERT_VALUES_EQUAL(params.GetDiscoveryEndpoint(), kikimr.GetEndpoint()); + UNIT_ASSERT_VALUES_EQUAL(params.GetDatabase(), "/Root"); + UNIT_ASSERT_VALUES_EQUAL(params.GetEnableSsl(), ssl); + } + } + + Y_UNIT_TEST(AsyncReplicationConnectionString) { + TKikimrRunner kikimr; + AsyncReplicationConnectionParams(kikimr, Sprintf(R"(CONNECTION_STRING = "grpc://%s/?database=/Root")", kikimr.GetEndpoint().c_str())); + } + + Y_UNIT_TEST(AsyncReplicationConnectionStringWithSsl) { + TKikimrRunner kikimr; + AsyncReplicationConnectionParams(kikimr, Sprintf(R"(CONNECTION_STRING = "grpcs://%s/?database=/Root")", kikimr.GetEndpoint().c_str()), true); + } + + Y_UNIT_TEST(AsyncReplicationEndpointAndDatabase) { + TKikimrRunner kikimr; + AsyncReplicationConnectionParams(kikimr, Sprintf(R"(ENDPOINT = "%s", DATABASE = "/Root")", kikimr.GetEndpoint().c_str())); + } + Y_UNIT_TEST(DisableResourcePools) { TKikimrRunner kikimr(TKikimrSettings().SetEnableResourcePools(false)); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); - auto checkDisabled = [&session](const TString& query) { + auto checkQuery = [&session](const TString& query, EStatus status, const TString& error) { Cerr << "Check query:\n" << query << "\n"; auto result = session.ExecuteSchemeQuery(query).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::UNSUPPORTED); - UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Resource pools are disabled. Please contact your system administrator to enable it"); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), status); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), error); + }; + + auto checkDisabled = [checkQuery](const TString& query) { + checkQuery(query, EStatus::UNSUPPORTED, "Resource pools are disabled. Please contact your system administrator to enable it"); }; // CREATE RESOURCE POOL @@ -6077,13 +6278,65 @@ Y_UNIT_TEST_SUITE(KqpScheme) { // ALTER RESOURCE POOL checkDisabled(R"( ALTER RESOURCE POOL MyResourcePool - SET (CONCURRENT_QUERY_LIMIT = 30), - SET QUEUE_SIZE 100, + SET (CONCURRENT_QUERY_LIMIT = 30, QUEUE_SIZE = 100), RESET (QUERY_MEMORY_LIMIT_PERCENT_PER_NODE); )"); // DROP RESOURCE POOL - checkDisabled("DROP RESOURCE POOL MyResourcePool;"); + checkQuery("DROP RESOURCE POOL MyResourcePool;", + EStatus::SCHEME_ERROR, + "Path does not exist"); + } + + Y_UNIT_TEST(DisableResourcePoolsOnServerless) { + auto ydb = NWorkload::TYdbSetupSettings() + .CreateSampleTenants(true) + .EnableResourcePoolsOnServerless(false) + .Create(); + + auto checkDisabled = [](const auto& result) { + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::GENERIC_ERROR, result.GetIssues().ToString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Resource pools are disabled for serverless domains. Please contact your system administrator to enable it"); + }; + + auto checkNotFound = [](const auto& result) { + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::GENERIC_ERROR, result.GetIssues().ToString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Path does not exist"); + }; + + const auto& createSql = R"( + CREATE RESOURCE POOL MyResourcePool WITH ( + CONCURRENT_QUERY_LIMIT=20, + QUEUE_SIZE=1000 + );)"; + + const auto& alterSql = R"( + ALTER RESOURCE POOL MyResourcePool + SET (CONCURRENT_QUERY_LIMIT = 30, QUEUE_SIZE = 100), + RESET (QUERY_MEMORY_LIMIT_PERCENT_PER_NODE); + )"; + + const auto& dropSql = "DROP RESOURCE POOL MyResourcePool;"; + + auto settings = NWorkload::TQueryRunnerSettings().PoolId(""); + + // Dedicated, enabled + settings.Database(ydb->GetSettings().GetDedicatedTenantName()).NodeIndex(1); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(createSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(alterSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(dropSql, settings)); + + // Shared, enabled + settings.Database(ydb->GetSettings().GetSharedTenantName()).NodeIndex(2); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(createSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(alterSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(dropSql, settings)); + + // Serverless, disabled + settings.Database(ydb->GetSettings().GetServerlessTenantName()).NodeIndex(2); + checkDisabled(ydb->ExecuteQuery(createSql, settings)); + checkNotFound(ydb->ExecuteQuery(alterSql, settings)); + checkNotFound(ydb->ExecuteQuery(dropSql, settings)); } Y_UNIT_TEST(ResourcePoolsValidation) { @@ -6113,7 +6366,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { result = session.ExecuteSchemeQuery(R"( ALTER RESOURCE POOL MyResourcePool - SET ANOTHER_LIMIT 5, + SET (ANOTHER_LIMIT = 5), RESET (SOME_LIMIT); )").GetValueSync(); UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::GENERIC_ERROR); @@ -6125,6 +6378,20 @@ Y_UNIT_TEST_SUITE(KqpScheme) { );)").GetValueSync(); UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::GENERIC_ERROR); UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Failed to parse property concurrent_query_limit:"); + + result = session.ExecuteSchemeQuery(TStringBuilder() << R"( + CREATE RESOURCE POOL MyResourcePool WITH ( + CONCURRENT_QUERY_LIMIT=)" << NResourcePool::POOL_MAX_CONCURRENT_QUERY_LIMIT + 1 << R"( + );)").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SCHEME_ERROR); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), TStringBuilder() << "Invalid resource pool configuration, concurrent_query_limit is " << NResourcePool::POOL_MAX_CONCURRENT_QUERY_LIMIT + 1 << ", that exceeds limit in " << NResourcePool::POOL_MAX_CONCURRENT_QUERY_LIMIT); + + result = session.ExecuteSchemeQuery(R"( + CREATE RESOURCE POOL MyResourcePool WITH ( + QUEUE_SIZE=1 + );)").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SCHEME_ERROR); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Invalid resource pool configuration, queue_size unsupported without concurrent_query_limit or database_load_cpu_threshold"); } Y_UNIT_TEST(CreateResourcePool) { @@ -6148,7 +6415,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); auto& runtime = *kikimr.GetTestServer().GetRuntime(); - auto resourcePoolDesc = Navigate(runtime, runtime.AllocateEdgeActor(), "Root/.resource_pools/MyResourcePool", NSchemeCache::TSchemeCacheNavigate::EOp::OpUnknown); + auto resourcePoolDesc = Navigate(runtime, runtime.AllocateEdgeActor(), "Root/.metadata/workload_manager/pools/MyResourcePool", NSchemeCache::TSchemeCacheNavigate::EOp::OpUnknown); const auto& resourcePool = resourcePoolDesc->ResultSet.at(0); UNIT_ASSERT_VALUES_EQUAL(resourcePool.Kind, NSchemeCache::TSchemeCacheNavigate::EKind::KindResourcePool); UNIT_ASSERT(resourcePool.ResourcePoolInfo); @@ -6180,7 +6447,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); auto& runtime = *kikimr.GetTestServer().GetRuntime(); - auto resourcePoolDesc = Navigate(runtime, runtime.AllocateEdgeActor(), "Root/.resource_pools/MyResourcePool", NSchemeCache::TSchemeCacheNavigate::EOp::OpUnknown); + auto resourcePoolDesc = Navigate(runtime, runtime.AllocateEdgeActor(), "Root/.metadata/workload_manager/pools/MyResourcePool", NSchemeCache::TSchemeCacheNavigate::EOp::OpUnknown); UNIT_ASSERT_VALUES_EQUAL(resourcePoolDesc->ResultSet.at(0).Kind, NSchemeCache::TSchemeCacheNavigate::EKind::KindResourcePool); } @@ -6191,7 +6458,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { );)"; auto result = session.ExecuteSchemeQuery(query).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::GENERIC_ERROR); - UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Check failed: path: '/Root/.resource_pools/MyResourcePool', error: path exist"); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Check failed: path: '/Root/.metadata/workload_manager/pools/MyResourcePool', error: path exist"); } } @@ -6216,7 +6483,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); auto& runtime = *kikimr.GetTestServer().GetRuntime(); - auto resourcePoolDesc = Navigate(runtime, runtime.AllocateEdgeActor(), "Root/.resource_pools/MyResourcePool", NSchemeCache::TSchemeCacheNavigate::EOp::OpUnknown); + auto resourcePoolDesc = Navigate(runtime, runtime.AllocateEdgeActor(), "Root/.metadata/workload_manager/pools/MyResourcePool", NSchemeCache::TSchemeCacheNavigate::EOp::OpUnknown); const auto& properties = resourcePoolDesc->ResultSet.at(0).ResourcePoolInfo->Description.GetProperties().GetProperties(); UNIT_ASSERT_VALUES_EQUAL(properties.size(), 2); UNIT_ASSERT_VALUES_EQUAL(properties.at("concurrent_query_limit"), "20"); @@ -6226,15 +6493,14 @@ Y_UNIT_TEST_SUITE(KqpScheme) { { auto query = R"( ALTER RESOURCE POOL MyResourcePool - SET (CONCURRENT_QUERY_LIMIT = 30), - SET QUEUE_SIZE 100, + SET (CONCURRENT_QUERY_LIMIT = 30, QUEUE_SIZE = 100), RESET (QUERY_MEMORY_LIMIT_PERCENT_PER_NODE); )"; auto result = session.ExecuteSchemeQuery(query).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); auto& runtime = *kikimr.GetTestServer().GetRuntime(); - auto resourcePoolDesc = Navigate(runtime, runtime.AllocateEdgeActor(), "Root/.resource_pools/MyResourcePool", NSchemeCache::TSchemeCacheNavigate::EOp::OpUnknown); + auto resourcePoolDesc = Navigate(runtime, runtime.AllocateEdgeActor(), "Root/.metadata/workload_manager/pools/MyResourcePool", NSchemeCache::TSchemeCacheNavigate::EOp::OpUnknown); const auto& properties = resourcePoolDesc->ResultSet.at(0).ResourcePoolInfo->Description.GetProperties().GetProperties(); UNIT_ASSERT_VALUES_EQUAL(properties.size(), 3); UNIT_ASSERT_VALUES_EQUAL(properties.at("concurrent_query_limit"), "30"); @@ -6256,8 +6522,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { auto query = R"( ALTER RESOURCE POOL MyResourcePool - SET (CONCURRENT_QUERY_LIMIT = 30), - SET QUEUE_SIZE 100, + SET (CONCURRENT_QUERY_LIMIT = 30, QUEUE_SIZE = 100), RESET (QUERY_MEMORY_LIMIT_PERCENT_PER_NODE); )"; auto result = session.ExecuteSchemeQuery(query).GetValueSync(); @@ -6291,7 +6556,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { } auto& runtime = *kikimr.GetTestServer().GetRuntime(); - auto resourcePoolDesc = Navigate(runtime, runtime.AllocateEdgeActor(), "Root/.resource_pools/MyResourcePool", NSchemeCache::TSchemeCacheNavigate::EOp::OpUnknown); + auto resourcePoolDesc = Navigate(runtime, runtime.AllocateEdgeActor(), "Root/.metadata/workload_manager/pools/MyResourcePool", NSchemeCache::TSchemeCacheNavigate::EOp::OpUnknown); const auto& resourcePool = resourcePoolDesc->ResultSet.at(0); UNIT_ASSERT_VALUES_EQUAL(resourcePoolDesc->ErrorCount, 1); UNIT_ASSERT_VALUES_EQUAL(resourcePool.Kind, NSchemeCache::TSchemeCacheNavigate::EKind::KindUnknown); @@ -6312,6 +6577,498 @@ Y_UNIT_TEST_SUITE(KqpScheme) { auto result = session.ExecuteSchemeQuery(query).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SCHEME_ERROR, result.GetIssues().ToString()); } + + Y_UNIT_TEST(DisableResourcePoolClassifiers) { + NKikimrConfig::TAppConfig config; + config.MutableFeatureFlags()->SetEnableResourcePools(false); + + TKikimrRunner kikimr(NKqp::TKikimrSettings() + .SetAppConfig(config) + .SetEnableResourcePools(false)); + + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + auto checkQuery = [&session](const TString& query, EStatus status, const TString& error = "") { + Cerr << "Check query:\n" << query << "\n"; + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), status); + if (status != EStatus::SUCCESS) { + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), error); + } + }; + + auto checkDisabled = [checkQuery](const TString& query) { + checkQuery(query, EStatus::GENERIC_ERROR, "Resource pool classifiers are disabled. Please contact your system administrator to enable it"); + }; + + // CREATE RESOURCE POOL CLASSIFIER + checkDisabled(R"( + CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier WITH ( + RANK=20, + RESOURCE_POOL="test_pool" + );)"); + + // ALTER RESOURCE POOL CLASSIFIER + checkDisabled(R"( + ALTER RESOURCE POOL CLASSIFIER MyResourcePoolClassifier + SET (RANK = 1, RESOURCE_POOL = "test"), + RESET (MEMBER_NAME); + )"); + + // DROP RESOURCE POOL CLASSIFIER + checkQuery("DROP RESOURCE POOL CLASSIFIER MyResourcePoolClassifier;", + EStatus::GENERIC_ERROR, + "Classifier with name MyResourcePoolClassifier not found in database with id /Root"); + } + + Y_UNIT_TEST(DisableResourcePoolClassifiersOnServerless) { + auto ydb = NWorkload::TYdbSetupSettings() + .CreateSampleTenants(true) + .EnableResourcePoolsOnServerless(false) + .Create(); + + auto checkDisabled = [](const auto& result) { + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::GENERIC_ERROR, result.GetIssues().ToString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Resource pool classifiers are disabled for serverless domains. Please contact your system administrator to enable it"); + }; + + auto checkNotFound = [](const auto& result) { + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::GENERIC_ERROR, result.GetIssues().ToString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Classifier with name MyResourcePoolClassifier not found in database"); + }; + + const auto& createSql = R"( + CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier WITH ( + RANK=20, + RESOURCE_POOL="test_pool" + );)"; + + const auto& alterSql = R"( + ALTER RESOURCE POOL CLASSIFIER MyResourcePoolClassifier + SET (RANK = 1, RESOURCE_POOL = "test"), + RESET (MEMBER_NAME); + )"; + + const auto& dropSql = "DROP RESOURCE POOL CLASSIFIER MyResourcePoolClassifier;"; + + auto settings = NWorkload::TQueryRunnerSettings().PoolId(""); + + // Dedicated, enabled + settings.Database(ydb->GetSettings().GetDedicatedTenantName()).NodeIndex(1); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(createSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(alterSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(dropSql, settings)); + + // Shared, enabled + settings.Database(ydb->GetSettings().GetSharedTenantName()).NodeIndex(2); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(createSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(alterSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(dropSql, settings)); + + // Serverless, disabled + settings.Database(ydb->GetSettings().GetServerlessTenantName()).NodeIndex(2); + checkDisabled(ydb->ExecuteQuery(createSql, settings)); + checkDisabled(ydb->ExecuteQuery(alterSql, settings)); + checkNotFound(ydb->ExecuteQuery(dropSql, settings)); + } + + Y_UNIT_TEST(ResourcePoolClassifiersValidation) { + NKikimrConfig::TAppConfig config; + config.MutableFeatureFlags()->SetEnableResourcePools(true); + + TKikimrRunner kikimr(NKqp::TKikimrSettings() + .SetAppConfig(config) + .SetEnableResourcePools(true)); + + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + auto result = session.ExecuteSchemeQuery(R"( + CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier WITH ( + RESOURCE_POOL="test", + ANOTHER_PROPERTY=20 + );)").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::GENERIC_ERROR); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Unknown property: another_property"); + + result = session.ExecuteSchemeQuery(R"( + ALTER RESOURCE POOL CLASSIFIER MyResourcePoolClassifier + SET (ANOTHER_PROPERTY = 5), + RESET (SOME_PROPERTY); + )").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::GENERIC_ERROR); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Unknown property: another_property, some_property"); + + result = session.ExecuteSchemeQuery(R"( + CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier WITH ( + RESOURCE_POOL="test", + RANK="StringValue" + );)").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::GENERIC_ERROR); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Failed to parse property rank:"); + + result = session.ExecuteSchemeQuery(R"( + CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier WITH ( + RANK="0" + );)").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::GENERIC_ERROR); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Missing required property resource_pool"); + + result = session.ExecuteSchemeQuery(R"( + ALTER RESOURCE POOL CLASSIFIER MyResourcePoolClassifier + RESET (RESOURCE_POOL); + )").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::GENERIC_ERROR); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Cannot reset required property resource_pool"); + + result = session.ExecuteSchemeQuery(R"( + CREATE RESOURCE POOL CLASSIFIER `MyResource/PoolClassifier` WITH ( + RESOURCE_POOL="test" + );)").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::GENERIC_ERROR); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Symbol '/' is not allowed in the resource pool classifier name 'MyResource/PoolClassifier'"); + + result = session.ExecuteSchemeQuery(TStringBuilder() << R"( + CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier WITH ( + RESOURCE_POOL="test", + MEMBER_NAME=")" << BUILTIN_ACL_METADATA << R"(" + );)").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::GENERIC_ERROR); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), TStringBuilder() << "Invalid resource pool classifier configuration, cannot create classifier for system user " << BUILTIN_ACL_METADATA); + } + + Y_UNIT_TEST(ResourcePoolClassifiersRankValidation) { + NKikimrConfig::TAppConfig config; + config.MutableFeatureFlags()->SetEnableResourcePools(true); + + TKikimrRunner kikimr(NKqp::TKikimrSettings() + .SetAppConfig(config) + .SetEnableResourcePools(true)); + + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + // Create with sample rank + auto result = session.ExecuteSchemeQuery(R"( + CREATE RESOURCE POOL CLASSIFIER ClassifierRank42 WITH ( + RESOURCE_POOL="test_pool", + RANK=42 + );)").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToOneLineString()); + + // Try to create with same rank + result = session.ExecuteSchemeQuery(R"( + CREATE RESOURCE POOL CLASSIFIER AnotherClassifierRank42 WITH ( + RESOURCE_POOL="test_pool", + RANK=42 + );)").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::GENERIC_ERROR); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Resource pool classifier rank check failed, status: ALREADY_EXISTS, reason: {
: Error: Classifier with rank 42 already exists, its name ClassifierRank42 }"); + + // Create with high rank + result = session.ExecuteSchemeQuery(R"( + CREATE RESOURCE POOL CLASSIFIER `ClassifierRank2^63` WITH ( + RESOURCE_POOL="test_pool", + RANK=9223372036854775807 + );)").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToOneLineString()); + + // Try to create with auto rank + result = session.ExecuteSchemeQuery(R"( + CREATE RESOURCE POOL CLASSIFIER ClassifierRankAuto WITH ( + RESOURCE_POOL="test_pool", + MEMBER_NAME="test@user" + );)").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::GENERIC_ERROR); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "The rank could not be set automatically, the maximum rank of the resource pool classifier is too high: 9223372036854775807"); + + // Try to alter to exist rank + result = session.ExecuteSchemeQuery(R"( + ALTER RESOURCE POOL CLASSIFIER `ClassifierRank2^63` SET ( + RANK=42 + );)").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::GENERIC_ERROR); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Resource pool classifier rank check failed, status: ALREADY_EXISTS, reason: {
: Error: Classifier with rank 42 already exists, its name ClassifierRank42 }"); + + // Try to reset classifier rank + result = session.ExecuteSchemeQuery(R"( + ALTER RESOURCE POOL CLASSIFIER ClassifierRank42 RESET ( + RANK + );)").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::GENERIC_ERROR); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "The rank could not be set automatically, the maximum rank of the resource pool classifier is too high: 9223372036854775807"); + } + + TString FetchResourcePoolClassifiers(TTestActorRuntime& runtime, ui32 nodeIndex) { + const TActorId edgeActor = runtime.AllocateEdgeActor(nodeIndex); + runtime.Send(NMetadata::NProvider::MakeServiceId(runtime.GetNodeId(nodeIndex)), edgeActor, new NMetadata::NProvider::TEvAskSnapshot(std::make_shared()), nodeIndex); + + const auto response = runtime.GrabEdgeEvent(edgeActor); + UNIT_ASSERT(response); + return response->Get()->GetSnapshotAs()->SerializeToString(); + } + + TString FetchResourcePoolClassifiers(TKikimrRunner& kikimr) { + return FetchResourcePoolClassifiers(*kikimr.GetTestServer().GetRuntime(), 0); + } + + Y_UNIT_TEST(CreateResourcePoolClassifier) { + NKikimrConfig::TAppConfig config; + config.MutableFeatureFlags()->SetEnableResourcePools(true); + + TKikimrRunner kikimr(NKqp::TKikimrSettings() + .SetAppConfig(config) + .SetEnableResourcePools(true)); + + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + // Explicit rank + auto query = R"( + CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier WITH ( + RANK=20, + RESOURCE_POOL="test_pool", + MEMBER_NAME="test@user" + );)"; + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(FetchResourcePoolClassifiers(kikimr), "{\"resource_pool_classifiers\":[{\"rank\":20,\"name\":\"MyResourcePoolClassifier\",\"config\":{\"member_name\":\"test@user\",\"resource_pool\":\"test_pool\"},\"database\":\"\\/Root\"}]}"); + + // Auto rank + query = R"( + CREATE RESOURCE POOL CLASSIFIER AnotherResourcePoolClassifier WITH ( + RESOURCE_POOL="test_pool", + MEMBER_NAME="another@user" + );)"; + result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(FetchResourcePoolClassifiers(kikimr), "{\"resource_pool_classifiers\":[{\"rank\":20,\"name\":\"MyResourcePoolClassifier\",\"config\":{\"member_name\":\"test@user\",\"resource_pool\":\"test_pool\"},\"database\":\"\\/Root\"},{\"rank\":1020,\"name\":\"AnotherResourcePoolClassifier\",\"config\":{\"member_name\":\"another@user\",\"resource_pool\":\"test_pool\"},\"database\":\"\\/Root\"}]}"); + } + + Y_UNIT_TEST(CreateResourcePoolClassifierOnServerless) { + auto ydb = NWorkload::TYdbSetupSettings() + .CreateSampleTenants(true) + .EnableResourcePoolsOnServerless(true) + .Create(); + + const auto& serverlessTenant = ydb->GetSettings().GetServerlessTenantName(); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(R"( + CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier WITH ( + RANK=20, + RESOURCE_POOL="test_pool" + );)", + NWorkload::TQueryRunnerSettings() + .PoolId("") + .Database(serverlessTenant) + .NodeIndex(1) + )); + + const auto pathId = ydb->FetchDatabase(serverlessTenant)->Get()->PathId; + UNIT_ASSERT_VALUES_EQUAL( + FetchResourcePoolClassifiers(*ydb->GetRuntime(), 1), + TStringBuilder() << "{\"resource_pool_classifiers\":[{\"rank\":20,\"name\":\"MyResourcePoolClassifier\",\"config\":{\"resource_pool\":\"test_pool\"},\"database\":\"" << pathId.OwnerId << ":" << pathId.LocalPathId << ":\\/Root\\/test-serverless\"}]}" + ); + } + + Y_UNIT_TEST(DoubleCreateResourcePoolClassifier) { + NKikimrConfig::TAppConfig config; + config.MutableFeatureFlags()->SetEnableResourcePools(true); + + TKikimrRunner kikimr(NKqp::TKikimrSettings() + .SetAppConfig(config) + .SetEnableResourcePools(true)); + + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + { + auto query = R"( + CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier WITH ( + RESOURCE_POOL="test_pool", + RANK=20 + );)"; + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + + { + auto query = R"( + CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier WITH ( + RESOURCE_POOL="test_pool", + RANK=1 + );)"; + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::GENERIC_ERROR); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Conflict with existing key"); + } + } + + Y_UNIT_TEST(AlterResourcePoolClassifier) { + NKikimrConfig::TAppConfig config; + config.MutableFeatureFlags()->SetEnableResourcePools(true); + + TKikimrRunner kikimr(NKqp::TKikimrSettings() + .SetAppConfig(config) + .SetEnableResourcePools(true)); + + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + // Create sample pool + { + auto query = R"( + CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier WITH ( + RANK=20, + RESOURCE_POOL="test_pool" + );)"; + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(FetchResourcePoolClassifiers(kikimr), "{\"resource_pool_classifiers\":[{\"rank\":20,\"name\":\"MyResourcePoolClassifier\",\"config\":{\"resource_pool\":\"test_pool\"},\"database\":\"\\/Root\"}]}"); + } + + // Test update one property + { + auto query = R"( + ALTER RESOURCE POOL CLASSIFIER MyResourcePoolClassifier + SET (MEMBER_NAME = "test@user") + )"; + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(FetchResourcePoolClassifiers(kikimr), "{\"resource_pool_classifiers\":[{\"rank\":20,\"name\":\"MyResourcePoolClassifier\",\"config\":{\"member_name\":\"test@user\",\"resource_pool\":\"test_pool\"},\"database\":\"\\/Root\"}]}"); + } + + // Create another pool + { + auto query = R"( + CREATE RESOURCE POOL CLASSIFIER AnotherResourcePoolClassifier WITH ( + RESOURCE_POOL="test_pool", + RANK=42 + );)"; + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(FetchResourcePoolClassifiers(kikimr), "{\"resource_pool_classifiers\":[{\"rank\":20,\"name\":\"MyResourcePoolClassifier\",\"config\":{\"member_name\":\"test@user\",\"resource_pool\":\"test_pool\"},\"database\":\"\\/Root\"},{\"rank\":42,\"name\":\"AnotherResourcePoolClassifier\",\"config\":{\"resource_pool\":\"test_pool\"},\"database\":\"\\/Root\"}]}"); + } + + // Test reset + { + auto query = R"( + ALTER RESOURCE POOL CLASSIFIER MyResourcePoolClassifier + RESET (RANK, MEMBER_NAME); + )"; + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(FetchResourcePoolClassifiers(kikimr), "{\"resource_pool_classifiers\":[{\"rank\":1042,\"name\":\"MyResourcePoolClassifier\",\"config\":{\"member_name\":\"\",\"resource_pool\":\"test_pool\"},\"database\":\"\\/Root\"},{\"rank\":42,\"name\":\"AnotherResourcePoolClassifier\",\"config\":{\"resource_pool\":\"test_pool\"},\"database\":\"\\/Root\"}]}"); + } + } + + Y_UNIT_TEST(AlterNonExistingResourcePoolClassifier) { + NKikimrConfig::TAppConfig config; + config.MutableFeatureFlags()->SetEnableResourcePools(true); + + TKikimrRunner kikimr(NKqp::TKikimrSettings() + .SetAppConfig(config) + .SetEnableResourcePools(true)); + + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + auto query = R"( + ALTER RESOURCE POOL CLASSIFIER MyResourcePoolClassifier + SET (RESOURCE_POOL = "test", RANK = 100), + RESET (MEMBER_NAME); + )"; + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::GENERIC_ERROR, result.GetIssues().ToString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Classifier with name MyResourcePoolClassifier not found in database with id /Root"); + } + + Y_UNIT_TEST(DropResourcePoolClassifier) { + NKikimrConfig::TAppConfig config; + config.MutableFeatureFlags()->SetEnableResourcePools(true); + + TKikimrRunner kikimr(NKqp::TKikimrSettings() + .SetAppConfig(config) + .SetEnableResourcePools(true)); + + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + { + auto query = R"( + CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier WITH ( + RESOURCE_POOL="test_pool", + RANK=20 + );)"; + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(FetchResourcePoolClassifiers(kikimr), "{\"resource_pool_classifiers\":[{\"rank\":20,\"name\":\"MyResourcePoolClassifier\",\"config\":{\"resource_pool\":\"test_pool\"},\"database\":\"\\/Root\"}]}"); + } + + { + auto query = "DROP RESOURCE POOL CLASSIFIER MyResourcePoolClassifier"; + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(FetchResourcePoolClassifiers(kikimr), "{\"resource_pool_classifiers\":[]}"); + } + } + + Y_UNIT_TEST(DropNonExistingResourcePoolClassifier) { + NKikimrConfig::TAppConfig config; + config.MutableFeatureFlags()->SetEnableResourcePools(true); + + TKikimrRunner kikimr(NKqp::TKikimrSettings() + .SetAppConfig(config) + .SetEnableResourcePools(true)); + + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + auto query = "DROP RESOURCE POOL CLASSIFIER MyResourcePoolClassifier;"; + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::GENERIC_ERROR, result.GetIssues().ToString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Classifier with name MyResourcePoolClassifier not found in database with id /Root"); + } + + Y_UNIT_TEST(DisableMetadataObjectsOnServerless) { + auto ydb = NWorkload::TYdbSetupSettings() + .CreateSampleTenants(true) + .EnableMetadataObjectsOnServerless(false) + .Create(); + + auto checkDisabled = [](const auto& result) { + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::GENERIC_ERROR, result.GetIssues().ToString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Objects SECRET are disabled for serverless domains. Please contact your system administrator to enable it"); + }; + + const auto& createSql = "CREATE OBJECT MySecretObject (TYPE SECRET) WITH (value=\"qwerty\");"; + const auto& alterSql = "ALTER OBJECT MySecretObject (TYPE SECRET) SET value = \"abcde\";"; + const auto& upsertSql = "UPSERT OBJECT MySecretObject (TYPE SECRET) WITH value = \"edcba\";"; + const auto& dropSql = "DROP OBJECT MySecretObject (TYPE SECRET);"; + + auto settings = NWorkload::TQueryRunnerSettings().PoolId(""); + + // Dedicated, enabled + settings.Database(ydb->GetSettings().GetDedicatedTenantName()).NodeIndex(1); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(createSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(alterSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(upsertSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(dropSql, settings)); + + // Shared, enabled + settings.Database(ydb->GetSettings().GetSharedTenantName()).NodeIndex(2); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(createSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(alterSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(upsertSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(dropSql, settings)); + + // Serverless, disabled + settings.Database(ydb->GetSettings().GetServerlessTenantName()).NodeIndex(2); + checkDisabled(ydb->ExecuteQuery(createSql, settings)); + checkDisabled(ydb->ExecuteQuery(alterSql, settings)); + checkDisabled(ydb->ExecuteQuery(upsertSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(dropSql, settings)); + } } Y_UNIT_TEST_SUITE(KqpOlapScheme) { @@ -6343,6 +7100,7 @@ Y_UNIT_TEST_SUITE(KqpOlapScheme) { } testHelper.DropTable("/Root/ColumnTableTest"); for (auto tablet: tabletIds) { + testHelper.WaitTabletDeletionInHive(tablet, TDuration::Seconds(5)); UNIT_ASSERT_C(!testHelper.GetKikimr().GetTestClient().TabletExistsInHive(&testHelper.GetRuntime(), tablet), ToString(tablet) + " is alive"); } } @@ -7051,6 +7809,87 @@ Y_UNIT_TEST_SUITE(KqpOlapScheme) { testHelper.ReadData("SELECT * FROM `/Root/ColumnTableTest` WHERE id=1", "[[1;#;[\"test_res_1\"]]]"); } + void TestDropThenAddColumn(bool enableIndexation, bool enableCompaction) { + if (enableCompaction) { + Y_ABORT_UNLESS(enableIndexation); + } + + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->DisableBackground(NYDBTest::ICSController::EBackground::Indexation); + csController->DisableBackground(NYDBTest::ICSController::EBackground::Compaction); + + TKikimrSettings runnerSettings; + runnerSettings.WithSampleTables = false; + TTestHelper testHelper(runnerSettings); + + TVector schema = { + TTestHelper::TColumnSchema().SetName("id").SetType(NScheme::NTypeIds::Int32).SetNullable(false), + TTestHelper::TColumnSchema().SetName("value").SetType(NScheme::NTypeIds::Utf8), + }; + + TTestHelper::TColumnTable testTable; + testTable.SetName("/Root/ColumnTableTest").SetPrimaryKey({ "id" }).SetSharding({ "id" }).SetSchema(schema); + testHelper.CreateTable(testTable); + + { + TTestHelper::TUpdatesBuilder tableInserter(testTable.GetArrowSchema(schema)); + tableInserter.AddRow().Add(1).Add("test_res_1"); + tableInserter.AddRow().Add(2).Add("test_res_2"); + testHelper.BulkUpsert(testTable, tableInserter); + } + + if (enableCompaction) { + csController->EnableBackground(NYDBTest::ICSController::EBackground::Indexation); + csController->EnableBackground(NYDBTest::ICSController::EBackground::Compaction); + csController->WaitIndexation(TDuration::Seconds(5)); + csController->WaitCompactions(TDuration::Seconds(5)); + csController->DisableBackground(NYDBTest::ICSController::EBackground::Indexation); + csController->DisableBackground(NYDBTest::ICSController::EBackground::Compaction); + } + + { + auto alterQuery = TStringBuilder() << "ALTER TABLE `" << testTable.GetName() << "` DROP COLUMN value;"; + auto alterResult = testHelper.GetSession().ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + { + auto alterQuery = TStringBuilder() << "ALTER TABLE `" << testTable.GetName() << "` ADD COLUMN value Uint64;"; + auto alterResult = testHelper.GetSession().ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + schema.back().SetType(NScheme::NTypeIds::Uint64); + + { + TTestHelper::TUpdatesBuilder tableInserter(testTable.GetArrowSchema(schema)); + tableInserter.AddRow().Add(3).Add(42); + tableInserter.AddRow().Add(4).Add(43); + testHelper.BulkUpsert(testTable, tableInserter); + } + + if (enableIndexation) { + csController->EnableBackground(NYDBTest::ICSController::EBackground::Indexation); + csController->WaitIndexation(TDuration::Seconds(5)); + } + if (enableCompaction) { + csController->EnableBackground(NYDBTest::ICSController::EBackground::Compaction); + csController->WaitCompactions(TDuration::Seconds(5)); + } + + testHelper.ReadData("SELECT value FROM `/Root/ColumnTableTest`", "[[#];[#];[[42u]];[[43u]]]"); + } + + Y_UNIT_TEST(DropThenAddColumn) { + TestDropThenAddColumn(false, false); + } + + Y_UNIT_TEST(DropThenAddColumnIndexation) { + TestDropThenAddColumn(true, true); + } + + Y_UNIT_TEST(DropThenAddColumnCompaction) { + TestDropThenAddColumn(true, true); + } + Y_UNIT_TEST(DropTtlColumn) { TKikimrSettings runnerSettings; runnerSettings.WithSampleTables = false; diff --git a/ydb/core/kqp/ut/scheme/ya.make b/ydb/core/kqp/ut/scheme/ya.make index 4cd03e32b9d9..f1ab58f089eb 100644 --- a/ydb/core/kqp/ut/scheme/ya.make +++ b/ydb/core/kqp/ut/scheme/ya.make @@ -22,6 +22,7 @@ PEERDIR( library/cpp/threading/local_executor ydb/core/kqp ydb/core/kqp/ut/common + ydb/core/kqp/workload_service/ut/common ydb/core/tx/columnshard/hooks/testing ydb/library/yql/sql/pg ydb/library/yql/parser/pg_wrapper diff --git a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp index 1469403863ae..bf366f11b928 100644 --- a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp +++ b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp @@ -2,6 +2,8 @@ #include #include #include +#include +#include #include #include #include @@ -266,6 +268,47 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { } } + Y_UNIT_TEST(ExecuteQueryWithResourcePoolClassifier) { + NKikimrConfig::TAppConfig config; + config.MutableFeatureFlags()->SetEnableResourcePools(true); + + auto kikimr = TKikimrRunner(TKikimrSettings() + .SetAppConfig(config) + .SetEnableResourcePools(true)); + auto db = kikimr.GetQueryClient(); + + const TString userSID = TStringBuilder() << "test@" << BUILTIN_ACL_DOMAIN; + const TString schemeSql = TStringBuilder() << R"( + CREATE RESOURCE POOL MyPool WITH ( + CONCURRENT_QUERY_LIMIT=0 + ); + CREATE RESOURCE POOL CLASSIFIER MyPoolClassifier WITH ( + RESOURCE_POOL="MyPool", + MEMBER_NAME=")" << userSID << R"(" + ); + GRANT ALL ON `/Root` TO `)" << userSID << R"(`; + )"; + auto schemeResult = db.ExecuteQuery(schemeSql, TTxControl::NoTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(schemeResult.GetStatus(), EStatus::SUCCESS, schemeResult.GetIssues().ToString()); + + auto testUserClient = kikimr.GetQueryClient(TClientSettings().AuthToken(userSID)); + const TDuration timeout = TDuration::Seconds(5); + const TInstant start = TInstant::Now(); + while (TInstant::Now() - start <= timeout) { + const TString query = "SELECT 42;"; + auto result = testUserClient.ExecuteQuery(query, TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + if (!result.IsSuccess()) { + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::PRECONDITION_FAILED, result.GetIssues().ToString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Resource pool MyPool was disabled due to zero concurrent query limit"); + return; + } + + Cerr << "Wait resource pool classifier " << TInstant::Now() - start << ": status = " << result.GetStatus() << ", issues = " << result.GetIssues().ToOneLineString() << "\n"; + Sleep(TDuration::Seconds(1)); + } + UNIT_ASSERT_C(false, "Waiting resource pool classifier timeout. Spent time " << TInstant::Now() - start << " exceeds limit " << timeout); + } + std::pair CalcRowsAndBatches(TExecuteQueryIterator& it) { ui32 totalRows = 0; ui32 totalBatches = 0; @@ -2313,6 +2356,10 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { ALTER TABLE TestDdlDml2 DROP COLUMN Value2; UPSERT INTO TestDdlDml2 (Key, Value1) VALUES (2, "2"); SELECT * FROM TestDdlDml2; + CREATE TABLE TestDdlDml33 ( + Key Uint64, + PRIMARY KEY (Key) + ); )", TTxControl::NoTx()).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 2); @@ -2327,6 +2374,13 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 1); CompareYson(R"([[[1u];["1"]];[[2u];["2"]]])", FormatResultSetYson(result.GetResultSet(0))); + result = db.ExecuteQuery(R"( + SELECT * FROM TestDdlDml33; + )", TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 1); + CompareYson(R"([])", FormatResultSetYson(result.GetResultSet(0))); + result = db.ExecuteQuery(R"( CREATE TABLE TestDdlDml4 ( Key Uint64, @@ -2566,6 +2620,313 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { } } + Y_UNIT_TEST(CTASWithoutPerStatement) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); + appConfig.MutableTableServiceConfig()->SetEnableCreateTableAs(true); + appConfig.MutableTableServiceConfig()->SetEnablePreparedDdl(true); + appConfig.MutableTableServiceConfig()->SetEnableAstCache(false); + appConfig.MutableTableServiceConfig()->SetEnablePerStatementQueryExecution(false); + auto setting = NKikimrKqp::TKqpSetting(); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({setting}) + .SetWithSampleTables(false) + .SetEnableTempTables(true); + + TKikimrRunner kikimr(serverSettings); + auto db = kikimr.GetQueryClient(); + + { + auto result = db.ExecuteQuery(R"( + CREATE TABLE Table1 ( + PRIMARY KEY (Key) + ) AS SELECT 1u AS Key, "1" AS Value1, "1" AS Value2; + CREATE TABLE Table2 ( + PRIMARY KEY (Key) + ) AS SELECT 2u AS Key, "2" AS Value1, "2" AS Value2; + )", TTxControl::NoTx(), TExecuteQuerySettings()).ExtractValueSync(); + + UNIT_ASSERT(!result.IsSuccess()); + UNIT_ASSERT_C( + result.GetIssues().ToString().Contains("Several CTAS statement can't be used without per-statement mode."), + result.GetIssues().ToString()); + } + + { + auto result = db.ExecuteQuery(R"( + CREATE TABLE Table2 ( + PRIMARY KEY (Key) + ) AS SELECT 2u AS Key, "2" AS Value1, "2" AS Value2; + SELECT * FROM Table1 ORDER BY Key; + )", TTxControl::NoTx(), TExecuteQuerySettings()).ExtractValueSync(); + + UNIT_ASSERT(!result.IsSuccess()); + UNIT_ASSERT_C( + result.GetIssues().ToString().Contains("CTAS statement can't be used with other statements without per-statement mode."), + result.GetIssues().ToString()); + } + + { + auto result = db.ExecuteQuery(R"( + SELECT * FROM Table1 ORDER BY Key; + CREATE TABLE Table2 ( + PRIMARY KEY (Key) + ) AS SELECT 2u AS Key, "2" AS Value1, "2" AS Value2; + )", TTxControl::NoTx(), TExecuteQuerySettings()).ExtractValueSync(); + + UNIT_ASSERT(!result.IsSuccess()); + UNIT_ASSERT_C( + result.GetIssues().ToString().Contains("CTAS statement can't be used with other statements without per-statement mode."), + result.GetIssues().ToString()); + } + + { + auto result = db.ExecuteQuery(R"( + CREATE TABLE Table1 ( + PRIMARY KEY (Key) + ) AS SELECT 1u AS Key, "1" AS Value1, "1" AS Value2; + )", TTxControl::NoTx(), TExecuteQuerySettings()).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + + { + auto result = db.ExecuteQuery(R"( + SELECT * FROM Table1 ORDER BY Key; + )", TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 1); + CompareYson(R"([[[1u];["1"];["1"]]])", FormatResultSetYson(result.GetResultSet(0))); + } + } + + Y_UNIT_TEST(SeveralCTAS) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnablePreparedDdl(true); + appConfig.MutableTableServiceConfig()->SetEnableAstCache(true); + appConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); + appConfig.MutableTableServiceConfig()->SetEnablePreparedDdl(true); + appConfig.MutableTableServiceConfig()->SetEnableCreateTableAs(true); + appConfig.MutableTableServiceConfig()->SetEnablePerStatementQueryExecution(true); + auto setting = NKikimrKqp::TKqpSetting(); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({setting}) + .SetWithSampleTables(false) + .SetEnableTempTables(true); + + TKikimrRunner kikimr(serverSettings); + auto db = kikimr.GetQueryClient(); + + { + auto result = db.ExecuteQuery(R"( + CREATE TABLE Table1 ( + PRIMARY KEY (Key) + ) AS SELECT 1u AS Key, "1" AS Value1, "1" AS Value2; + CREATE TABLE Table2 ( + PRIMARY KEY (Key) + ) AS SELECT 2u AS Key, "2" AS Value1, "2" AS Value2; + CREATE TABLE Table3 ( + PRIMARY KEY (Key) + ) AS SELECT * FROM Table2 UNION ALL SELECT * FROM Table1; + SELECT * FROM Table1 ORDER BY Key; + SELECT * FROM Table2 ORDER BY Key; + SELECT * FROM Table3 ORDER BY Key; + )", TTxControl::NoTx(), TExecuteQuerySettings()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 3); + // Results are empty. Snapshot was taken before tables were created, so we don't see changes after snapshot. + // This will be fixed in future, for example, by implicit commit before/after each ddl statement. + CompareYson(R"([])", FormatResultSetYson(result.GetResultSet(0))); + CompareYson(R"([])", FormatResultSetYson(result.GetResultSet(1))); + CompareYson(R"([])", FormatResultSetYson(result.GetResultSet(2))); + + result = db.ExecuteQuery(R"( + SELECT * FROM Table1 ORDER BY Key; + SELECT * FROM Table2 ORDER BY Key; + SELECT * FROM Table3 ORDER BY Key; + )", TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 3); + CompareYson(R"([[[1u];["1"];["1"]]])", FormatResultSetYson(result.GetResultSet(0))); + CompareYson(R"([[[2u];["2"];["2"]]])", FormatResultSetYson(result.GetResultSet(1))); + // Also empty now( + CompareYson(R"([])", FormatResultSetYson(result.GetResultSet(2))); + } + } + + Y_UNIT_TEST(CheckIsolationLevelFroPerStatementMode) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnablePreparedDdl(true); + appConfig.MutableTableServiceConfig()->SetEnableAstCache(true); + appConfig.MutableTableServiceConfig()->SetEnablePerStatementQueryExecution(true); + auto setting = NKikimrKqp::TKqpSetting(); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({setting}); + + TKikimrRunner kikimr(serverSettings); + auto db = kikimr.GetQueryClient(); + auto tableClient = kikimr.GetTableClient(); + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + + { + // 1 ddl statement + auto result = db.ExecuteQuery(R"( + CREATE TABLE Test1 ( + Key Uint64, + Value1 String, + Value2 String, + PRIMARY KEY (Key) + ); + )", TTxControl::NoTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 0); + UNIT_ASSERT_EQUAL_C(result.GetIssues().Size(), 0, result.GetIssues().ToString()); + + NYdb::NTable::TDescribeTableResult describe = session.DescribeTable("/Root/Test1").GetValueSync(); + UNIT_ASSERT_EQUAL(describe.GetStatus(), EStatus::SUCCESS); + } + + { + // 2 ddl statements + auto result = db.ExecuteQuery(R"( + CREATE TABLE Test2 ( + Key Uint64, + Value1 String, + Value2 String, + PRIMARY KEY (Key) + ); + CREATE TABLE Test3 ( + Key Uint64, + Value1 String, + Value2 String, + PRIMARY KEY (Key) + ); + )", TTxControl::NoTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 0); + UNIT_ASSERT_EQUAL_C(result.GetIssues().Size(), 0, result.GetIssues().ToString()); + + NYdb::NTable::TDescribeTableResult describe1 = session.DescribeTable("/Root/Test2").GetValueSync(); + UNIT_ASSERT_EQUAL(describe1.GetStatus(), EStatus::SUCCESS); + NYdb::NTable::TDescribeTableResult describe2 = session.DescribeTable("/Root/Test3").GetValueSync(); + UNIT_ASSERT_EQUAL(describe2.GetStatus(), EStatus::SUCCESS); + } + + { + // 1 dml statement + auto result = db.ExecuteQuery(R"( + SELECT * FROM Test1; + )", TTxControl::NoTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 1); + UNIT_ASSERT_EQUAL_C(result.GetIssues().Size(), 0, result.GetIssues().ToString()); + } + + { + // 2 dml statements + auto result = db.ExecuteQuery(R"( + SELECT * FROM Test2; + SELECT * FROM Test3; + )", TTxControl::NoTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 2); + UNIT_ASSERT_EQUAL_C(result.GetIssues().Size(), 0, result.GetIssues().ToString()); + } + + { + // 1 ddl 1 dml statements + auto result = db.ExecuteQuery(R"( + CREATE TABLE Test4 ( + Key Uint64, + Value1 String, + Value2 String, + PRIMARY KEY (Key) + ); + SELECT * FROM Test4; + )", TTxControl::NoTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 1); + UNIT_ASSERT_EQUAL_C(result.GetIssues().Size(), 0, result.GetIssues().ToString()); + NYdb::NTable::TDescribeTableResult describe = session.DescribeTable("/Root/Test4").GetValueSync(); + UNIT_ASSERT_EQUAL(describe.GetStatus(), EStatus::SUCCESS); + } + + { + // 1 dml 1 ddl statements + auto result = db.ExecuteQuery(R"( + SELECT * FROM Test4; + CREATE TABLE Test5 ( + Key Uint64, + Value1 String, + Value2 String, + PRIMARY KEY (Key) + ); + )", TTxControl::NoTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 1); + UNIT_ASSERT_EQUAL_C(result.GetIssues().Size(), 0, result.GetIssues().ToString()); + NYdb::NTable::TDescribeTableResult describe = session.DescribeTable("/Root/Test5").GetValueSync(); + UNIT_ASSERT_EQUAL(describe.GetStatus(), EStatus::SUCCESS); + } + + { + // 1 ddl 1 dml 1 ddl 1 dml statements + auto result = db.ExecuteQuery(R"( + CREATE TABLE Test6 ( + Key Uint64, + Value1 String, + Value2 String, + PRIMARY KEY (Key) + ); + SELECT * FROM Test6; + CREATE TABLE Test7 ( + Key Uint64, + Value1 String, + Value2 String, + PRIMARY KEY (Key) + ); + SELECT * FROM Test7; + )", TTxControl::NoTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 2); + UNIT_ASSERT_EQUAL_C(result.GetIssues().Size(), 0, result.GetIssues().ToString()); + NYdb::NTable::TDescribeTableResult describe1 = session.DescribeTable("/Root/Test6").GetValueSync(); + UNIT_ASSERT_EQUAL(describe1.GetStatus(), EStatus::SUCCESS); + NYdb::NTable::TDescribeTableResult describe2 = session.DescribeTable("/Root/Test7").GetValueSync(); + UNIT_ASSERT_EQUAL(describe2.GetStatus(), EStatus::SUCCESS); + } + + { + // 1 dml 1 ddl 1 dml 1 ddl statements + auto result = db.ExecuteQuery(R"( + SELECT * FROM Test7; + CREATE TABLE Test8 ( + Key Uint64, + Value1 String, + Value2 String, + PRIMARY KEY (Key) + ); + SELECT * FROM Test8; + CREATE TABLE Test9 ( + Key Uint64, + Value1 String, + Value2 String, + PRIMARY KEY (Key) + ); + )", TTxControl::NoTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 2); + UNIT_ASSERT_EQUAL_C(result.GetIssues().Size(), 0, result.GetIssues().ToString()); + NYdb::NTable::TDescribeTableResult describe1 = session.DescribeTable("/Root/Test8").GetValueSync(); + UNIT_ASSERT_EQUAL(describe1.GetStatus(), EStatus::SUCCESS); + NYdb::NTable::TDescribeTableResult describe2 = session.DescribeTable("/Root/Test9").GetValueSync(); + UNIT_ASSERT_EQUAL(describe2.GetStatus(), EStatus::SUCCESS); + } + } + Y_UNIT_TEST(TableSink_ReplaceFromSelectOlap) { NKikimrConfig::TAppConfig appConfig; appConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); @@ -2735,7 +3096,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { auto insertResult = client.ExecuteQuery(sql, NYdb::NQuery::TTxControl::BeginTx().CommitTx()).GetValueSync(); UNIT_ASSERT(!insertResult.IsSuccess()); UNIT_ASSERT_C( - insertResult.GetIssues().ToString().Contains("Transactions between column and row tables are disabled at current time"), + insertResult.GetIssues().ToString().Contains("Write transactions between column and row tables are disabled at current time"), insertResult.GetIssues().ToString()); } @@ -2748,20 +3109,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { auto insertResult = client.ExecuteQuery(sql, NYdb::NQuery::TTxControl::BeginTx().CommitTx()).GetValueSync(); UNIT_ASSERT(!insertResult.IsSuccess()); UNIT_ASSERT_C( - insertResult.GetIssues().ToString().Contains("Transactions between column and row tables are disabled at current time"), - insertResult.GetIssues().ToString()); - } - - { - // column & row read - const TString sql = R"( - SELECT * FROM `/Root/DataShard`; - SELECT * FROM `/Root/ColumnShard`; - )"; - auto insertResult = client.ExecuteQuery(sql, NYdb::NQuery::TTxControl::BeginTx().CommitTx()).GetValueSync(); - UNIT_ASSERT(!insertResult.IsSuccess()); - UNIT_ASSERT_C( - insertResult.GetIssues().ToString().Contains("Transactions between column and row tables are disabled at current time"), + insertResult.GetIssues().ToString().Contains("Write transactions between column and row tables are disabled at current time"), insertResult.GetIssues().ToString()); } @@ -2776,7 +3124,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { auto insertResult = client.ExecuteQuery(sql, NYdb::NQuery::TTxControl::BeginTx().CommitTx()).GetValueSync(); UNIT_ASSERT(!insertResult.IsSuccess()); UNIT_ASSERT_C( - insertResult.GetIssues().ToString().Contains("Transactions between column and row tables are disabled at current time"), + insertResult.GetIssues().ToString().Contains("Write transactions between column and row tables are disabled at current time"), insertResult.GetIssues().ToString()); } @@ -2790,7 +3138,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { auto insertResult = client.ExecuteQuery(sql, NYdb::NQuery::TTxControl::BeginTx().CommitTx()).GetValueSync(); UNIT_ASSERT(!insertResult.IsSuccess()); UNIT_ASSERT_C( - insertResult.GetIssues().ToString().Contains("Transactions between column and row tables are disabled at current time"), + insertResult.GetIssues().ToString().Contains("Write transactions between column and row tables are disabled at current time"), insertResult.GetIssues().ToString()); } @@ -2804,7 +3152,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { auto insertResult = client.ExecuteQuery(sql, NYdb::NQuery::TTxControl::BeginTx().CommitTx()).GetValueSync(); UNIT_ASSERT(!insertResult.IsSuccess()); UNIT_ASSERT_C( - insertResult.GetIssues().ToString().Contains("Transactions between column and row tables are disabled at current time"), + insertResult.GetIssues().ToString().Contains("Write transactions between column and row tables are disabled at current time"), insertResult.GetIssues().ToString()); } } @@ -2892,7 +3240,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { // Shuffled auto client = kikimr.GetQueryClient(); - { + { auto prepareResult = client.ExecuteQuery(R"( REPLACE INTO `/Root/ColumnShard` (Col3, Col4, Col2, Col1) VALUES ("test100", "100", 1000, 100u); @@ -2916,7 +3264,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { CompareYson(output, R"([[1u;"test1";[10];["1"]];[2u;"test2";#;["2"]];[3u;"test3";[12];#];[4u;"test4";#;#];[100u;"test100";[1000];["100"]]])"); } - Y_UNIT_TEST(TableSink_OltpReplace) { + Y_UNIT_TEST_TWIN(TableSink_OltpReplace, HasSecondaryIndex) { NKikimrConfig::TAppConfig appConfig; appConfig.MutableTableServiceConfig()->SetEnableOltpSink(true); auto settings = TKikimrSettings() @@ -2928,20 +3276,21 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { auto session = kikimr.GetTableClient().CreateSession().GetValueSync().GetSession(); - const TString query = R"( + const TString query = Sprintf(R"( CREATE TABLE `/Root/DataShard` ( Col1 Uint64 NOT NULL, Col2 Int32, Col3 String, + %s PRIMARY KEY (Col1) ); - )"; + )", (HasSecondaryIndex ? "INDEX idx_2 GLOBAL ON (Col2)," : "")); auto result = session.ExecuteSchemeQuery(query).GetValueSync(); UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); auto client = kikimr.GetQueryClient(); - + { auto it = client.ExecuteQuery(R"( REPLACE INTO `/Root/DataShard` (Col1, Col2) VALUES (0u, 0); @@ -2959,7 +3308,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { CompareYson(output, R"([[0u;[0];#];[1u;#;["test"]]])"); } - { + { auto it = client.ExecuteQuery(R"( REPLACE INTO `/Root/DataShard` (Col1, Col3) VALUES (0u, 'null'); REPLACE INTO `/Root/DataShard` (Col1) VALUES (1u); @@ -2997,6 +3346,11 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { auto session = Kikimr->GetTableClient().CreateSession().GetValueSync().GetSession(); + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->SetOverrideLagForCompactionBeforeTierings(TDuration::Seconds(1)); + csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + const TString query = Sprintf(R"( CREATE TABLE `/Root/DataShard` ( Col1 Uint64 NOT NULL, @@ -3012,6 +3366,8 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { auto result = session.ExecuteSchemeQuery(query).GetValueSync(); UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); DoExecute(); + csController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + csController->WaitIndexation(TDuration::Seconds(5)); } }; @@ -3046,6 +3402,13 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); } + { + auto it = client.ExecuteQuery(R"( + UPSERT INTO `/Root/DataShard` (Col3) VALUES ('null'); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT(!it.IsSuccess()); + } + { auto it = client.StreamExecuteQuery(R"( SELECT * FROM `/Root/DataShard` ORDER BY Col1; @@ -3308,7 +3671,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); auto client = kikimr.GetQueryClient(); - { + { auto prepareResult = client.ExecuteQuery(R"( REPLACE INTO `/Root/ColumnShard` (Col1, Col2) VALUES (100u, 1000), (100u, 1000); @@ -3316,7 +3679,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { UNIT_ASSERT_C(prepareResult.IsSuccess(), prepareResult.GetIssues().ToString()); } - { + { auto prepareResult = client.ExecuteQuery(R"( REPLACE INTO `/Root/ColumnShard` (Col1, Col2) VALUES (100u, 1000), (100u, 1000); @@ -3359,7 +3722,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); auto client = kikimr.GetQueryClient(); - { + { auto prepareResult = client.ExecuteQuery(R"( REPLACE INTO `/Root/ColumnShard` (Col1, Col2) VALUES (1u, 1) )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); @@ -3479,9 +3842,101 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { } } - Y_UNIT_TEST(ReplaceIntoWithDefaultValue) { + Y_UNIT_TEST(ReadDatashardAndColumnshard) { NKikimrConfig::TAppConfig appConfig; - appConfig.MutableTableServiceConfig()->SetEnableOlapSink(false); + appConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); + appConfig.MutableTableServiceConfig()->SetEnableOltpSink(true); + auto settings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetWithSampleTables(false); + + TKikimrRunner kikimr(settings); + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + + auto client = kikimr.GetQueryClient(); + + { + auto createTable = client.ExecuteQuery(R"sql( + CREATE TABLE `/Root/DataShard` ( + Col1 Uint64 NOT NULL, + Col2 Int32, + Col3 String, + PRIMARY KEY (Col1) + ) WITH ( + STORE = ROW, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 10 + ); + CREATE TABLE `/Root/ColumnShard` ( + Col1 Uint64 NOT NULL, + Col2 Int32, + Col3 String, + PRIMARY KEY (Col1) + ) WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 10 + ); + )sql", NYdb::NQuery::TTxControl::NoTx()).ExtractValueSync(); + UNIT_ASSERT_C(createTable.IsSuccess(), createTable.GetIssues().ToString()); + } + + { + auto replaceValues = client.ExecuteQuery(R"sql( + REPLACE INTO `/Root/DataShard` (Col1, Col2, Col3) VALUES + (1u, 1, "row"); + )sql", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_C(replaceValues.IsSuccess(), replaceValues.GetIssues().ToString()); + } + + { + auto replaceValues = client.ExecuteQuery(R"sql( + REPLACE INTO `/Root/ColumnShard` (Col1, Col2, Col3) VALUES + (2u, 2, "column"); + )sql", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_C(replaceValues.IsSuccess(), replaceValues.GetIssues().ToString()); + } + + { + auto it = client.StreamExecuteQuery(R"sql( + SELECT * FROM `/Root/ColumnShard` ORDER BY Col1; + )sql", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); + TString output = StreamResultToYson(it); + CompareYson( + output, + R"([[2u;[2];["column"]]])"); + } + + { + auto it = client.StreamExecuteQuery(R"sql( + SELECT * FROM `/Root/DataShard` + UNION ALL + SELECT * FROM `/Root/ColumnShard` + ORDER BY Col1; + )sql", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); + TString output = StreamResultToYson(it); + CompareYson( + output, + R"([[1u;[1];["row"]];[2u;[2];["column"]]])"); + } + + { + auto it = client.StreamExecuteQuery(R"sql( + SELECT r.Col3 AS a, c.Col3 AS b FROM `/Root/DataShard` AS r + JOIN `/Root/ColumnShard` AS c ON r.Col1 + 1 = c.Col1 + ORDER BY a; + )sql", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); + TString output = StreamResultToYson(it); + CompareYson( + output, + R"([[["row"];["column"]]])"); + } + } + + Y_UNIT_TEST(ReplaceIntoWithDefaultValue) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOlapSink(false); appConfig.MutableTableServiceConfig()->SetEnableOltpSink(false); auto settings = TKikimrSettings() .SetAppConfig(appConfig) @@ -3513,6 +3968,355 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { UNIT_ASSERT_C(replaceValues.IsSuccess(), replaceValues.GetIssues().ToString()); } } + + void RunQuery (const TString& query, auto& session, bool expectOk = true) { + auto qResult = session.ExecuteQuery(query, NYdb::NQuery::TTxControl::NoTx()).ExtractValueSync(); + if (!qResult.IsSuccess()) { + Cerr << "Query failed, status: " << qResult.GetStatus() << ": " << qResult.GetIssues().ToString() << Endl; + } + UNIT_ASSERT(qResult.IsSuccess() == expectOk); + }; + + struct TEntryCheck { + NYdb::NScheme::ESchemeEntryType Type; + TString Name; + bool IsExpected; + bool WasFound = false; + }; + + TEntryCheck ExpectedTopic(const TString& name) { + return TEntryCheck{NYdb::NScheme::ESchemeEntryType::Topic, name, true}; + } + TEntryCheck UnexpectedTopic(const TString& name) { + return TEntryCheck{NYdb::NScheme::ESchemeEntryType::Topic, name, false}; + } + + void CheckDirEntry(TKikimrRunner& kikimr, TVector& entriesToCheck) { + auto res = kikimr.GetSchemeClient().ListDirectory("/Root").GetValueSync(); + for (const auto& entry : res.GetChildren()) { + Cerr << "Scheme entry: " << entry << Endl; + for (auto& checkEntry : entriesToCheck) { + if (checkEntry.Name != entry.Name) + continue; + if (checkEntry.IsExpected) { + UNIT_ASSERT_C(entry.Type == checkEntry.Type, checkEntry.Name); + checkEntry.WasFound = true; + } else { + UNIT_ASSERT_C(entry.Type != checkEntry.Type, checkEntry.Name); + } + } + } + for (auto& checkEntry : entriesToCheck) { + if (checkEntry.IsExpected) { + UNIT_ASSERT_C(checkEntry.WasFound, checkEntry.Name); + } + } + } + + Y_UNIT_TEST(CreateAndDropTopic) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnablePreparedDdl(true); + auto setting = NKikimrKqp::TKqpSetting(); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({setting}); + serverSettings.PQConfig.SetRequireCredentialsInNewProtocol(false); + TKikimrRunner kikimr( + serverSettings.SetWithSampleTables(false).SetEnableTempTables(true)); + auto client = kikimr.GetQueryClient(); + auto session = client.GetSession().GetValueSync().GetSession(); + auto pq = NYdb::NTopic::TTopicClient(kikimr.GetDriver(), + NYdb::NTopic::TTopicClientSettings().Database("/Root").AuthToken("root@builtin")); + + { + const auto queryCreateTopic = Q_(R"( + --!syntax_v1 + CREATE TOPIC `/Root/TempTopic` (CONSUMER cons1); + )"); + RunQuery(queryCreateTopic, session); + Cerr << "Topic created\n"; + auto desc = pq.DescribeTopic("/Root/TempTopic").ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(desc.GetTopicDescription().GetConsumers().size(), 1); + } + { + const auto queryCreateTopic = Q_(R"( + --!syntax_v1 + CREATE TOPIC IF NOT EXISTS `/Root/TempTopic` (CONSUMER cons1, CONSUMER cons2); + )"); + RunQuery(queryCreateTopic, session); + auto desc = pq.DescribeTopic("/Root/TempTopic").ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(desc.GetTopicDescription().GetConsumers().size(), 1); + } + { + const auto queryCreateTopic = Q_(R"( + --!syntax_v1 + CREATE TOPIC `/Root/TempTopic` (CONSUMER cons1, CONSUMER cons2, CONSUMER cons3); + )"); + RunQuery(queryCreateTopic, session, false); + auto desc = pq.DescribeTopic("/Root/TempTopic").ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(desc.GetTopicDescription().GetConsumers().size(), 1); + } + + TVector entriesToCheck = {ExpectedTopic("TempTopic")}; + CheckDirEntry(kikimr, entriesToCheck); + { + const auto query = Q_(R"( + --!syntax_v1 + Drop TOPIC `/Root/TempTopic`; + )"); + RunQuery(query, session); + Cerr << "Topic dropped\n"; + TVector entriesToCheck = {UnexpectedTopic("TempTopic")}; + CheckDirEntry(kikimr, entriesToCheck); + } + { + const auto query = Q_(R"( + --!syntax_v1 + Drop TOPIC IF EXISTS `/Root/TempTopic`; + )"); + RunQuery(query, session); + } + { + const auto query = Q_(R"( + --!syntax_v1 + Drop TOPIC `/Root/TempTopic`; + )"); + RunQuery(query, session, false); + } + } + + Y_UNIT_TEST(CreateAndAlterTopic) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnablePreparedDdl(true); + auto setting = NKikimrKqp::TKqpSetting(); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({setting}); + TKikimrRunner kikimr{serverSettings}; + auto client = kikimr.GetQueryClient(NYdb::NQuery::TClientSettings{}.AuthToken("root@builtin")); + auto session = client.GetSession().GetValueSync().GetSession(); + auto pq = NYdb::NTopic::TTopicClient(kikimr.GetDriver(), + NYdb::NTopic::TTopicClientSettings().Database("/Root").AuthToken("root@builtin")); + + { + const auto queryCreateTopic = Q_(R"( + --!syntax_v1 + CREATE TOPIC `/Root/TempTopic` (CONSUMER cons1); + )"); + RunQuery(queryCreateTopic, session); + + auto desc = pq.DescribeTopic("/Root/TempTopic").ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(desc.GetTopicDescription().GetPartitioningSettings().GetMinActivePartitions(), 1); + } + { + const auto query = Q_(R"( + --!syntax_v1 + ALTER TOPIC `/Root/TempTopic` SET (min_active_partitions = 10); + )"); + RunQuery(query, session); + auto desc = pq.DescribeTopic("/Root/TempTopic").ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(desc.GetTopicDescription().GetPartitioningSettings().GetMinActivePartitions(), 10); + } + { + const auto query = Q_(R"( + --!syntax_v1 + ALTER TOPIC IF EXISTS `/Root/TempTopic` SET (min_active_partitions = 15); + )"); + RunQuery(query, session); + auto desc = pq.DescribeTopic("/Root/TempTopic").ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(desc.GetTopicDescription().GetPartitioningSettings().GetMinActivePartitions(), 15); + } + + { + const auto query = Q_(R"( + --!syntax_v1 + ALTER TOPIC `/Root/NoSuchTopic` SET (min_active_partitions = 10); + )"); + RunQuery(query, session, false); + + TVector entriesToCheck = {UnexpectedTopic("NoSuchTopic")}; + CheckDirEntry(kikimr, entriesToCheck); + } + { + const auto query = Q_(R"( + --!syntax_v1 + ALTER TOPIC IF EXISTS `/Root/NoSuchTopic` SET (min_active_partitions = 10); + )"); + RunQuery(query, session); + TVector entriesToCheck = {UnexpectedTopic("NoSuchTopic")}; + CheckDirEntry(kikimr, entriesToCheck); + } + } + + Y_UNIT_TEST(CreateOrDropTopicOverTable) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnablePreparedDdl(true); + auto setting = NKikimrKqp::TKqpSetting(); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({setting}); + TKikimrRunner kikimr{serverSettings}; + auto tableClient = kikimr.GetTableClient(); + + { + auto tcSession = tableClient.CreateSession().GetValueSync().GetSession(); + UNIT_ASSERT(tcSession.ExecuteSchemeQuery(R"( + CREATE TABLE `/Root/TmpTable` ( + Key Uint64, + Value String, + PRIMARY KEY (Key) + ); + )").GetValueSync().IsSuccess()); + tcSession.Close(); + } + + auto client = kikimr.GetQueryClient(NYdb::NQuery::TClientSettings{}.AuthToken("root@builtin")); + auto session = client.GetSession().GetValueSync().GetSession(); + + TVector entriesToCheck = {TEntryCheck{.Type = NYdb::NScheme::ESchemeEntryType::Table, + .Name = "TmpTable", .IsExpected = true}}; + { + const auto queryCreateTopic = Q_(R"( + --!syntax_v1 + CREATE TOPIC `/Root/TmpTable` (CONSUMER cons1); + )"); + RunQuery(queryCreateTopic, session, false); + CheckDirEntry(kikimr, entriesToCheck); + + } + { + const auto queryCreateTopic = Q_(R"( + --!syntax_v1 + CREATE TOPIC IF NOT EXISTS `/Root/TmpTable` (CONSUMER cons1); + )"); + RunQuery(queryCreateTopic, session, false); + CheckDirEntry(kikimr, entriesToCheck); + } + { + const auto queryDropTopic = Q_(R"( + --!syntax_v1 + DROP TOPIC `/Root/TmpTable`; + )"); + RunQuery(queryDropTopic, session, false); + } + { + const auto queryDropTopic = Q_(R"( + --!syntax_v1 + DROP TOPIC IF EXISTS `/Root/TmpTable`; + )"); + RunQuery(queryDropTopic, session, false); + CheckDirEntry(kikimr, entriesToCheck); + } + { + auto tcSession = tableClient.CreateSession().GetValueSync().GetSession(); + auto type = TTypeBuilder().BeginOptional().Primitive(EPrimitiveType::Uint64).EndOptional().Build(); + auto alter = NYdb::NTable::TAlterTableSettings().AppendAddColumns(TColumn("NewColumn", type)); + + auto alterResult = tcSession.AlterTable("/Root/TmpTable", alter + ).GetValueSync(); + + UNIT_ASSERT_VALUES_EQUAL(alterResult.GetStatus(), EStatus::SUCCESS); + } + } + + Y_UNIT_TEST(AlterCdcTopic) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnablePreparedDdl(true); + auto setting = NKikimrKqp::TKqpSetting(); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({setting}); + TKikimrRunner kikimr{serverSettings}; + auto tableClient = kikimr.GetTableClient(); + + { + auto tcSession = tableClient.CreateSession().GetValueSync().GetSession(); + UNIT_ASSERT(tcSession.ExecuteSchemeQuery(R"( + CREATE TABLE `/Root/TmpTable` ( + Key Uint64, + Value String, + PRIMARY KEY (Key) + ); + )").GetValueSync().IsSuccess()); + + UNIT_ASSERT(tcSession.ExecuteSchemeQuery(R"( + ALTER TABLE `/Root/TmpTable` ADD CHANGEFEED `feed` WITH ( + MODE = 'KEYS_ONLY', FORMAT = 'JSON' + ); + )").GetValueSync().IsSuccess()); + tcSession.Close(); + } + + auto pq = NYdb::NTopic::TTopicClient(kikimr.GetDriver(), + NYdb::NTopic::TTopicClientSettings().Database("/Root").AuthToken("root@builtin")); + + auto client = kikimr.GetQueryClient(NYdb::NQuery::TClientSettings{}.AuthToken("root@builtin")); + auto session = client.GetSession().GetValueSync().GetSession(); + { + + const auto query = Q_(R"( + --!syntax_v1 + ALTER TOPIC `/Root/TmpTable/feed` ADD CONSUMER consumer21; + )"); + + RunQuery(query, session); + auto desc = pq.DescribeTopic("/Root/TmpTable/feed").ExtractValueSync(); + const auto& consumers = desc.GetTopicDescription().GetConsumers(); + UNIT_ASSERT_VALUES_EQUAL(consumers.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(consumers[0].GetConsumerName(), "consumer21"); + + } + { + const auto query = Q_(R"( + --!syntax_v1 + ALTER TOPIC `/Root/TmpTable/feed` SET (min_active_partitions = 10); + )"); + RunQuery(query, session, false); + auto desc = pq.DescribeTopic("/Root/TmpTable/feed").ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(desc.GetTopicDescription().GetPartitioningSettings().GetMinActivePartitions(), 1); + } + + } + + Y_UNIT_TEST(TableSink_OlapRWQueries) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); + auto settings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + + auto session = kikimr.GetTableClient().CreateSession().GetValueSync().GetSession(); + + const TString query = R"( + CREATE TABLE `/Root/ColumnShard` ( + Col1 Uint64 NOT NULL, + Col2 String, + Col3 Int32 NOT NULL, + PRIMARY KEY (Col1) + ) + PARTITION BY HASH(Col1) + WITH (STORE = COLUMN, AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 3); + )"; + + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); + + auto client = kikimr.GetQueryClient(); + { + auto result = client.ExecuteQuery(R"( + REPLACE INTO `/Root/ColumnShard` (Col1, Col2, Col3) VALUES + (1u, "test1", 10), (2u, "test2", 11), (3u, "test3", 12), (4u, NULL, 13); + SELECT * FROM `/Root/ColumnShard` ORDER BY Col1; + INSERT INTO `/Root/ColumnShard` SELECT Col1 + 100 AS Col1, Col2, Col3 FROM `/Root/ColumnShard`; + SELECT * FROM `/Root/ColumnShard` ORDER BY Col1; + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + CompareYson(R"([[1u;["test1"];10];[2u;["test2"];11];[3u;["test3"];12];[4u;#;13]])", FormatResultSetYson(result.GetResultSet(0))); + CompareYson(R"([[1u;["test1"];10];[2u;["test2"];11];[3u;["test3"];12];[4u;#;13];[101u;["test1"];10];[102u;["test2"];11];[103u;["test3"];12];[104u;#;13]])", FormatResultSetYson(result.GetResultSet(1))); + } + } } } // namespace NKqp diff --git a/ydb/core/kqp/ut/service/ya.make b/ydb/core/kqp/ut/service/ya.make index 646e1b16edb4..f8920e41b757 100644 --- a/ydb/core/kqp/ut/service/ya.make +++ b/ydb/core/kqp/ut/service/ya.make @@ -24,6 +24,7 @@ PEERDIR( library/cpp/threading/local_executor ydb/core/kqp ydb/core/kqp/ut/common + ydb/core/tx/columnshard/hooks/testing ydb/library/yql/sql/pg ydb/library/yql/parser/pg_wrapper ydb/public/lib/ut_helpers diff --git a/ydb/core/kqp/ut/spilling/kqp_scan_spilling_ut.cpp b/ydb/core/kqp/ut/spilling/kqp_scan_spilling_ut.cpp index 1ed26d6de137..2f66b9f37bf9 100644 --- a/ydb/core/kqp/ut/spilling/kqp_scan_spilling_ut.cpp +++ b/ydb/core/kqp/ut/spilling/kqp_scan_spilling_ut.cpp @@ -32,10 +32,119 @@ NKikimrConfig::TAppConfig AppCfg() { return appCfg; } +NKikimrConfig::TAppConfig AppCfgLowComputeLimits(double reasonableTreshold, bool enableSpilling=true) { + NKikimrConfig::TAppConfig appCfg; + + auto* rm = appCfg.MutableTableServiceConfig()->MutableResourceManager(); + rm->SetMkqlLightProgramMemoryLimit(100); + rm->SetMkqlHeavyProgramMemoryLimit(300); + rm->SetSpillingPercent(reasonableTreshold); + appCfg.MutableTableServiceConfig()->SetEnableQueryServiceSpilling(true); + + auto* spilling = appCfg.MutableTableServiceConfig()->MutableSpillingServiceConfig()->MutableLocalFileConfig(); + + spilling->SetEnable(enableSpilling); + spilling->SetRoot("./spilling/"); + + return appCfg; +} + +void FillTableWithData(NQuery::TQueryClient& db, ui64 numRows=300) { + for (ui32 i = 0; i < numRows; ++i) { + auto result = db.ExecuteQuery(Sprintf(R"( + --!syntax_v1 + REPLACE INTO `/Root/KeyValue` (Key, Value) VALUES (%d, "%s") + )", i, TString(200000 + i, 'a' + (i % 26)).c_str()), NYdb::NQuery::TTxControl::BeginTx().CommitTx()).GetValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } +} + +constexpr auto SimpleGraceJoinWithSpillingQuery = R"( + --!syntax_v1 + PRAGMA ydb.EnableSpillingNodes="GraceJoin"; + PRAGMA ydb.CostBasedOptimizationLevel='0'; + PRAGMA ydb.HashJoinMode='graceandself'; + select t1.Key, t1.Value, t2.Key, t2.Value + from `/Root/KeyValue` as t1 full join `/Root/KeyValue` as t2 on t1.Value = t2.Value + order by t1.Value + )"; + + } // anonymous namespace Y_UNIT_TEST_SUITE(KqpScanSpilling) { +Y_UNIT_TEST(SpillingPragmaParseError) { + Cerr << "cwd: " << NFs::CurrentWorkingDirectory() << Endl; + TKikimrRunner kikimr(AppCfg()); + + auto db = kikimr.GetQueryClient(); + auto query = R"( + --!syntax_v1 + PRAGMA ydb.EnableSpillingNodes="GraceJoin1"; + select t1.Key, t1.Value, t2.Key, t2.Value + from `/Root/KeyValue` as t1 full join `/Root/KeyValue` as t2 on t1.Value = t2.Value + order by t1.Value + )"; + + auto explainMode = NYdb::NQuery::TExecuteQuerySettings().ExecMode(NYdb::NQuery::EExecMode::Explain); + auto planres = db.ExecuteQuery(query, NYdb::NQuery::TTxControl::NoTx(), explainMode).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(planres.GetStatus(), EStatus::GENERIC_ERROR, planres.GetIssues().ToString()); +} + +Y_UNIT_TEST_TWIN(SpillingInRuntimeNodes, EnabledSpilling) { + // tmp fix while runtime version is lower 50 + if (EnabledSpilling) return; + double reasonableTreshold = EnabledSpilling ? 0.01 : 100; + Cerr << "cwd: " << NFs::CurrentWorkingDirectory() << Endl; + TKikimrRunner kikimr(AppCfgLowComputeLimits(reasonableTreshold)); + + auto db = kikimr.GetQueryClient(); + + FillTableWithData(db); + + auto explainMode = NYdb::NQuery::TExecuteQuerySettings().ExecMode(NYdb::NQuery::EExecMode::Explain); + auto planres = db.ExecuteQuery(SimpleGraceJoinWithSpillingQuery, NYdb::NQuery::TTxControl::NoTx(), explainMode).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(planres.GetStatus(), EStatus::SUCCESS, planres.GetIssues().ToString()); + + Cerr << planres.GetStats()->GetAst() << Endl; + + auto result = db.ExecuteQuery(SimpleGraceJoinWithSpillingQuery, NYdb::NQuery::TTxControl::BeginTx().CommitTx(), NYdb::NQuery::TExecuteQuerySettings()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + TKqpCounters counters(kikimr.GetTestServer().GetRuntime()->GetAppData().Counters); + if (EnabledSpilling) { + UNIT_ASSERT(counters.SpillingWriteBlobs->Val() > 0); + UNIT_ASSERT(counters.SpillingReadBlobs->Val() > 0); + } else { + UNIT_ASSERT(counters.SpillingWriteBlobs->Val() == 0); + UNIT_ASSERT(counters.SpillingReadBlobs->Val() == 0); + } +} + +Y_UNIT_TEST(HandleErrorsCorrectly) { + Cerr << "cwd: " << NFs::CurrentWorkingDirectory() << Endl; + TKikimrRunner kikimr(AppCfgLowComputeLimits(0.01, false)); + + auto db = kikimr.GetQueryClient(); + + FillTableWithData(db); + + auto explainMode = NYdb::NQuery::TExecuteQuerySettings().ExecMode(NYdb::NQuery::EExecMode::Explain); + auto planres = db.ExecuteQuery(SimpleGraceJoinWithSpillingQuery, NYdb::NQuery::TTxControl::NoTx(), explainMode).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(planres.GetStatus(), EStatus::SUCCESS, planres.GetIssues().ToString()); + + Cerr << planres.GetStats()->GetAst() << Endl; + + auto result = db.ExecuteQuery(SimpleGraceJoinWithSpillingQuery, NYdb::NQuery::TTxControl::BeginTx().CommitTx(), NYdb::NQuery::TExecuteQuerySettings()).ExtractValueSync(); + const auto errorMsg = result.GetIssues().ToString(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::INTERNAL_ERROR, errorMsg); + + const auto spillingPrefix = "[Compute spilling]"; + const auto pos = errorMsg.find(spillingPrefix); + UNIT_ASSERT_VALUES_UNEQUAL_C(pos, std::string::npos, "Spilling prefix not found in error message"); +} + Y_UNIT_TEST(SelfJoinQueryService) { Cerr << "cwd: " << NFs::CurrentWorkingDirectory() << Endl; @@ -60,6 +169,7 @@ Y_UNIT_TEST(SelfJoinQueryService) { auto query = R"( --!syntax_v1 + PRAGMA ydb.CostBasedOptimizationLevel='0'; select t1.Key, t1.Value, t2.Key, t2.Value from `/Root/KeyValue` as t1 join `/Root/KeyValue` as t2 on t1.Value = t2.Value order by t1.Key @@ -120,6 +230,7 @@ Y_UNIT_TEST(SelfJoin) { auto query = R"( --!syntax_v1 + PRAGMA ydb.CostBasedOptimizationLevel='0'; select t1.Key, t1.Value, t2.Key, t2.Value from `/Root/KeyValue` as t1 join `/Root/KeyValue` as t2 on t1.Key = t2.Key order by t1.Key diff --git a/ydb/core/kqp/ut/tx/kqp_locks_tricky_ut.cpp b/ydb/core/kqp/ut/tx/kqp_locks_tricky_ut.cpp index 780fdf977c3c..9ac7d3fabee7 100644 --- a/ydb/core/kqp/ut/tx/kqp_locks_tricky_ut.cpp +++ b/ydb/core/kqp/ut/tx/kqp_locks_tricky_ut.cpp @@ -29,9 +29,13 @@ using NYql::TExprNode; Y_UNIT_TEST_SUITE(KqpLocksTricky) { - Y_UNIT_TEST(TestNoLocksIssue) { + Y_UNIT_TEST_TWIN(TestNoLocksIssue, withSink) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOltpSink(withSink); + auto setting = NKikimrKqp::TKqpSetting(); TKikimrSettings settings; + settings.SetAppConfig(appConfig); settings.SetUseRealThreads(false); TKikimrRunner kikimr(settings); auto db = kikimr.GetTableClient(); @@ -123,9 +127,13 @@ Y_UNIT_TEST_SUITE(KqpLocksTricky) { } } - Y_UNIT_TEST(TestNoLocksIssueInteractiveTx) { + Y_UNIT_TEST_TWIN(TestNoLocksIssueInteractiveTx, withSink) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOltpSink(withSink); + auto setting = NKikimrKqp::TKqpSetting(); TKikimrSettings settings; + settings.SetAppConfig(appConfig); settings.SetUseRealThreads(false); TKikimrRunner kikimr(settings); auto db = kikimr.GetTableClient(); diff --git a/ydb/core/kqp/ut/tx/kqp_locks_ut.cpp b/ydb/core/kqp/ut/tx/kqp_locks_ut.cpp index b07beb62a0a2..139e135522c1 100644 --- a/ydb/core/kqp/ut/tx/kqp_locks_ut.cpp +++ b/ydb/core/kqp/ut/tx/kqp_locks_ut.cpp @@ -204,6 +204,42 @@ Y_UNIT_TEST_SUITE(KqpLocks) { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); CompareYson(R"([[[2u];#;[11u];["Session2"]]])", FormatResultSetYson(result.GetResultSet(0))); } + + Y_UNIT_TEST(TwoPhaseTx) { + TKikimrRunner kikimr; + auto db = kikimr.GetTableClient(); + + auto session1 = db.CreateSession().GetValueSync().GetSession(); + auto session2 = db.CreateSession().GetValueSync().GetSession(); + + auto result = session1.ExecuteDataQuery(Q_(R"( + REPLACE INTO `/Root/Test` (Group, Name, Comment) VALUES (1U, "Paul", "Changed"); + SELECT * FROM `/Root/Test` WHERE Name == "Paul" ORDER BY Group, Name; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW())).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto tx1 = result.GetTransaction(); + UNIT_ASSERT(tx1); + + result = session2.ExecuteDataQuery(Q_(R"( + REPLACE INTO `/Root/Test` (Group, Name, Comment) + VALUES (1U, "Paul", "Changed"); + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + result = session1.ExecuteDataQuery(Q_(R"( + SELECT * FROM `KeyValue`; + )"), TTxControl::Tx(*tx1)).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto commitResult = tx1->Commit().GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::ABORTED, result.GetIssues().ToString()); + commitResult.GetIssues().PrintTo(Cerr); + UNIT_ASSERT_C(HasIssue(commitResult.GetIssues(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED, + [] (const NYql::TIssue& issue) { + return issue.GetMessage().Contains("/Root/Test"); + }), commitResult.GetIssues().ToString()); + } } } // namespace NKqp diff --git a/ydb/core/kqp/ut/tx/kqp_mvcc_ut.cpp b/ydb/core/kqp/ut/tx/kqp_mvcc_ut.cpp index a39c839e6ee2..f4eb9ee7a9ce 100644 --- a/ydb/core/kqp/ut/tx/kqp_mvcc_ut.cpp +++ b/ydb/core/kqp/ut/tx/kqp_mvcc_ut.cpp @@ -9,9 +9,12 @@ using namespace NYdb; using namespace NYdb::NTable; Y_UNIT_TEST_SUITE(KqpSnapshotRead) { - Y_UNIT_TEST(TestSnapshotExpiration) { + Y_UNIT_TEST_TWIN(TestSnapshotExpiration, withSink) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOltpSink(withSink); auto settings = TKikimrSettings() - .SetKeepSnapshotTimeout(TDuration::Seconds(1)); + .SetKeepSnapshotTimeout(TDuration::Seconds(1)) + .SetAppConfig(appConfig); TKikimrRunner kikimr(settings); @@ -63,8 +66,9 @@ Y_UNIT_TEST_SUITE(KqpSnapshotRead) { UNIT_ASSERT_C(caught, "Failed to wait for snapshot expiration."); } - Y_UNIT_TEST(ReadOnlyTxCommitsOnConcurrentWrite) { + Y_UNIT_TEST_TWIN(ReadOnlyTxCommitsOnConcurrentWrite, withSink) { NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOltpSink(withSink); appConfig.MutableTableServiceConfig()->SetEnableKqpDataQueryStreamLookup(true); TKikimrRunner kikimr(TKikimrSettings() .SetAppConfig(appConfig) @@ -125,8 +129,13 @@ Y_UNIT_TEST_SUITE(KqpSnapshotRead) { ])", FormatResultSetYson(result.GetResultSet(0))); } - Y_UNIT_TEST(ReadOnlyTxWithIndexCommitsOnConcurrentWrite) { - TKikimrRunner kikimr; + Y_UNIT_TEST_TWIN(ReadOnlyTxWithIndexCommitsOnConcurrentWrite, withSink) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOltpSink(withSink); + TKikimrRunner kikimr( + TKikimrSettings() + .SetAppConfig(appConfig) + ); // kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::KQP_COMPUTE, NActors::NLog::PRI_DEBUG); // kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::KQP_BLOBS_STORAGE, NActors::NLog::PRI_DEBUG); @@ -186,8 +195,13 @@ Y_UNIT_TEST_SUITE(KqpSnapshotRead) { ])", FormatResultSetYson(result.GetResultSet(0))); } - Y_UNIT_TEST(ReadWriteTxFailsOnConcurrentWrite1) { - TKikimrRunner kikimr; + Y_UNIT_TEST_TWIN(ReadWriteTxFailsOnConcurrentWrite1, withSink) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOltpSink(withSink); + TKikimrRunner kikimr( + TKikimrSettings() + .SetAppConfig(appConfig) + ); // kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::KQP_COMPUTE, NActors::NLog::PRI_DEBUG); // kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::KQP_BLOBS_STORAGE, NActors::NLog::PRI_DEBUG); @@ -223,8 +237,13 @@ Y_UNIT_TEST_SUITE(KqpSnapshotRead) { UNIT_ASSERT_C(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED), result.GetIssues().ToString()); } - Y_UNIT_TEST(ReadWriteTxFailsOnConcurrentWrite2) { - TKikimrRunner kikimr; + Y_UNIT_TEST_TWIN(ReadWriteTxFailsOnConcurrentWrite2, withSink) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOltpSink(withSink); + TKikimrRunner kikimr( + TKikimrSettings() + .SetAppConfig(appConfig) + ); // kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::KQP_COMPUTE, NActors::NLog::PRI_DEBUG); // kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::KQP_BLOBS_STORAGE, NActors::NLog::PRI_DEBUG); @@ -266,8 +285,9 @@ Y_UNIT_TEST_SUITE(KqpSnapshotRead) { UNIT_ASSERT_C(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED), result.GetIssues().ToString()); } - Y_UNIT_TEST(ReadWriteTxFailsOnConcurrentWrite3) { + Y_UNIT_TEST_TWIN(ReadWriteTxFailsOnConcurrentWrite3, withSink) { NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOltpSink(withSink); appConfig.MutableTableServiceConfig()->SetEnableKqpDataQueryStreamLookup(true); TKikimrRunner kikimr( TKikimrSettings() diff --git a/ydb/core/kqp/ut/tx/kqp_sink_common.h b/ydb/core/kqp/ut/tx/kqp_sink_common.h new file mode 100644 index 000000000000..9dfcad60ed43 --- /dev/null +++ b/ydb/core/kqp/ut/tx/kqp_sink_common.h @@ -0,0 +1,109 @@ +#pragma once + +#include +#include +#include +#include + + +namespace NKikimr { +namespace NKqp { + +using namespace NYdb; +using namespace NYdb::NQuery; + +class TTableDataModificationTester { +protected: + NKikimrConfig::TAppConfig AppConfig; + std::unique_ptr Kikimr; + YDB_ACCESSOR(bool, IsOlap, false); + YDB_ACCESSOR(bool, FastSnapshotExpiration, false); + + virtual void DoExecute() = 0; +public: + void Execute() { + AppConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); + AppConfig.MutableTableServiceConfig()->SetEnableOltpSink(true); + AppConfig.MutableTableServiceConfig()->SetEnableKqpDataQueryStreamLookup(true); + auto settings = TKikimrSettings().SetAppConfig(AppConfig).SetWithSampleTables(false); + if (FastSnapshotExpiration) { + settings.SetKeepSnapshotTimeout(TDuration::Seconds(1)); + } + + Kikimr = std::make_unique(settings); + Tests::NCommon::TLoggerInit(*Kikimr).Initialize(); + + auto client = Kikimr->GetQueryClient(); + + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->SetOverrideLagForCompactionBeforeTierings(TDuration::Seconds(1)); + csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + + { + auto type = IsOlap ? "COLUMN" : "ROW"; + auto result = client.ExecuteQuery(Sprintf(R"( + CREATE TABLE `/Root/Test` ( + Group Uint32 not null, + Name String not null, + Amount Uint64, + Comment String, + PRIMARY KEY (Group, Name) + ) WITH ( + STORE = %s, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 10 + ); + + CREATE TABLE `/Root/KV` ( + Key Uint32 not null, + Value String, + PRIMARY KEY (Key) + ) WITH ( + STORE = %s, + AUTO_PARTITIONING_BY_SIZE = DISABLED, + AUTO_PARTITIONING_BY_LOAD = DISABLED, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 100, + UNIFORM_PARTITIONS = 100 + ); + + CREATE TABLE `/Root/KV2` ( + Key Uint32 not null, + Value String, + PRIMARY KEY (Key) + ) WITH ( + STORE = %s, + AUTO_PARTITIONING_BY_SIZE = DISABLED, + AUTO_PARTITIONING_BY_LOAD = DISABLED, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 100, + UNIFORM_PARTITIONS = 100 + ); + )", type, type, type), TTxControl::NoTx()).GetValueSync(); + UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); + } + + { + auto result = client.ExecuteQuery(R"( + REPLACE INTO `Test` (Group, Name, Amount, Comment) VALUES + (1u, "Anna", 3500ul, "None"), + (1u, "Paul", 300ul, "None"), + (2u, "Tony", 7200ul, "None"); + REPLACE INTO `KV` (Key, Value) VALUES + (1u, "One"), + (2u, "Two"), + (3u, "Three"), + (4000000001u, "BigOne"), + (4000000002u, "BigTwo"), + (4000000003u, "BigThree"); + )", TTxControl::NoTx()).GetValueSync(); + UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); + } + + DoExecute(); + csController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + csController->WaitIndexation(TDuration::Seconds(5)); + } + +}; + +} +} diff --git a/ydb/core/kqp/ut/tx/kqp_sink_locks_ut.cpp b/ydb/core/kqp/ut/tx/kqp_sink_locks_ut.cpp new file mode 100644 index 000000000000..45428d86c8a7 --- /dev/null +++ b/ydb/core/kqp/ut/tx/kqp_sink_locks_ut.cpp @@ -0,0 +1,337 @@ +#include "kqp_sink_common.h" + +#include +#include +#include +#include + +namespace NKikimr { +namespace NKqp { + +using namespace NYdb; +using namespace NYdb::NQuery; + +Y_UNIT_TEST_SUITE(KqpSinkLocks) { + class TInvalidate : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session1 = client.GetSession().GetValueSync().GetSession(); + auto session2 = client.GetSession().GetValueSync().GetSession(); + + auto result = session1.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/Test` + SELECT Group + 10U AS Group, Name, Amount, Comment ?? "" || "Updated" AS Comment + FROM `/Root/Test` + WHERE Group == 1U AND Name == "Paul"; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW())).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto tx1 = result.GetTransaction(); + UNIT_ASSERT(tx1); + + result = session2.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/Test` (Group, Name, Comment) + VALUES (1U, "Paul", "Changed"); + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + result = session1.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/Test` (Group, Name, Comment) + VALUES (11U, "Sergey", "BadRow"); + )"), TTxControl::Tx(tx1->GetId()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::ABORTED, result.GetIssues().ToString()); + result.GetIssues().PrintTo(Cerr); + if (!GetIsOlap()) { + UNIT_ASSERT_C(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED, + [] (const NYql::TIssue& issue) { + return issue.GetMessage().Contains("/Root/Test"); + }), result.GetIssues().ToString()); + } + + result = session2.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/Test` WHERE Name == "Paul" ORDER BY Group, Name; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([[[300u];["Changed"];1u;"Paul"]])", FormatResultSetYson(result.GetResultSet(0))); + } + }; + + Y_UNIT_TEST(TInvalidate) { + TInvalidate tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(TInvalidateOlap) { + TInvalidate tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TInvalidateOnCommit : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session1 = client.GetSession().GetValueSync().GetSession(); + auto session2 = client.GetSession().GetValueSync().GetSession(); + + auto result = session1.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/Test` + SELECT Group + 10U AS Group, Name, Amount, Comment ?? "" || "Updated" AS Comment + FROM `/Root/Test` + WHERE Group == 1U AND Name == "Paul"; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW())).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto tx1 = result.GetTransaction(); + UNIT_ASSERT(tx1); + + result = session2.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/Test` (Group, Name, Comment) + VALUES (1U, "Paul", "Changed"); + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto commitResult = tx1->Commit().GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::ABORTED, commitResult.GetIssues().ToString()); + commitResult.GetIssues().PrintTo(Cerr); + if (!GetIsOlap()) { + UNIT_ASSERT_C(HasIssue(commitResult.GetIssues(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED, + [] (const NYql::TIssue& issue) { + return issue.GetMessage().Contains("/Root/Test"); + }), commitResult.GetIssues().ToString()); + } + + result = session2.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/Test` WHERE Name == "Paul" ORDER BY Group, Name; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([[[300u];["Changed"];1u;"Paul"]])", FormatResultSetYson(result.GetResultSet(0))); + } + }; + + Y_UNIT_TEST(InvalidateOnCommit) { + TInvalidateOnCommit tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(InvalidateOlapOnCommit) { + TInvalidateOnCommit tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TDifferentKeyUpdate : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session1 = client.GetSession().GetValueSync().GetSession(); + auto session2 = client.GetSession().GetValueSync().GetSession(); + + auto result = session1.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/Test` WHERE Group = 1; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW())).ExtractValueSync(); + UNIT_ASSERT(result.IsSuccess()); + + auto tx1 = result.GetTransaction(); + UNIT_ASSERT(tx1); + + result = session2.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/Test` (Group, Name, Comment) + VALUES (2U, "Paul", "Changed"); + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT(result.IsSuccess()); + + result = session1.ExecuteQuery(Q_(R"( + SELECT "Nothing"; + )"), TTxControl::Tx(tx1->GetId()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } + }; + + Y_UNIT_TEST(DifferentKeyUpdate) { + TDifferentKeyUpdate tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(DifferentKeyUpdateOlap) { + TDifferentKeyUpdate tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TEmptyRange : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session1 = client.GetSession().GetValueSync().GetSession(); + auto session2 = client.GetSession().GetValueSync().GetSession(); + + auto result = session1.ExecuteQuery(Q1_(R"( + SELECT * FROM Test WHERE Group = 11; + )"), TTxControl::BeginTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([])", FormatResultSetYson(result.GetResultSet(0))); + + auto tx1 = result.GetTransaction(); + UNIT_ASSERT(tx1); + + result = session2.ExecuteQuery(Q1_(R"( + SELECT * FROM Test WHERE Group = 11; + UPSERT INTO Test (Group, Name, Amount) VALUES + (11, "Session2", 2); + )"), TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([])", FormatResultSetYson(result.GetResultSet(0))); + + result = session1.ExecuteQuery(Q1_(R"( + UPSERT INTO Test (Group, Name, Amount) VALUES + (11, "Session1", 1); + )"), TTxControl::Tx(tx1->GetId()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::ABORTED, result.GetIssues().ToString()); + result.GetIssues().PrintTo(Cerr); + if (!GetIsOlap()) { + UNIT_ASSERT_C(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED, + [] (const NYql::TIssue& issue) { + return issue.GetMessage().Contains("/Root/Test"); + }), result.GetIssues().ToString()); + } + + result = session1.ExecuteQuery(Q1_(R"( + SELECT * FROM Test WHERE Group = 11; + )"), TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([[[2u];#;11u;"Session2"]])", FormatResultSetYson(result.GetResultSet(0))); + } + }; + + Y_UNIT_TEST(EmptyRange) { + TEmptyRange tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(EmptyRangeOlap) { + TEmptyRange tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TEmptyRangeAlreadyBroken : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session1 = client.GetSession().GetValueSync().GetSession(); + auto session2 = client.GetSession().GetValueSync().GetSession(); + + auto result = session1.ExecuteQuery(Q1_(R"( + SELECT * FROM Test WHERE Group = 10; + )"), TTxControl::BeginTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([])", FormatResultSetYson(result.GetResultSet(0))); + + auto tx1 = result.GetTransaction(); + UNIT_ASSERT(tx1); + + result = session2.ExecuteQuery(Q1_(R"( + SELECT * FROM Test WHERE Group = 11; + + UPSERT INTO Test (Group, Name, Amount) VALUES + (11, "Session2", 2); + )"), TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([])", FormatResultSetYson(result.GetResultSet(0))); + + result = session1.ExecuteQuery(Q1_(R"( + SELECT * FROM Test WHERE Group = 11; + + UPSERT INTO Test (Group, Name, Amount) VALUES + (11, "Session1", 1); + )"), TTxControl::Tx(tx1->GetId()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::ABORTED, result.GetIssues().ToString()); + result.GetIssues().PrintTo(Cerr); + if (!GetIsOlap()) { + UNIT_ASSERT_C(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED, + [] (const NYql::TIssue& issue) { + return issue.GetMessage().Contains("/Root/Test"); + }), result.GetIssues().ToString()); + } + + result = session1.ExecuteQuery(Q1_(R"( + SELECT * FROM Test WHERE Group = 11; + )"), TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([[[2u];#;11u;"Session2"]])", FormatResultSetYson(result.GetResultSet(0))); + } + }; + + Y_UNIT_TEST(EmptyRangeAlreadyBroken) { + TEmptyRangeAlreadyBroken tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(EmptyRangeAlreadyBrokenOlap) { + TEmptyRangeAlreadyBroken tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TUncommittedRead : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session1 = client.GetSession().GetValueSync().GetSession(); + auto session2 = client.GetSession().GetValueSync().GetSession(); + + auto result = session1.ExecuteQuery(Q1_(R"( + UPSERT INTO Test (Group, Name, Amount) VALUES + (11, "TEST", 2); + )"), TTxControl::BeginTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto tx1 = result.GetTransaction(); + UNIT_ASSERT(tx1); + + { + result = session2.ExecuteQuery(Q1_(R"( + SELECT * FROM Test WHERE Group = 11; + )"), TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([])", FormatResultSetYson(result.GetResultSet(0))); + } + + { + result = session1.ExecuteQuery(Q1_(R"( + SELECT * FROM Test WHERE Group = 11; + )"), TTxControl::Tx(tx1->GetId())).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([[[2u];#;11u;"TEST"]])", FormatResultSetYson(result.GetResultSet(0))); + } + } + }; + + Y_UNIT_TEST(UncommittedRead) { + TUncommittedRead tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapUncommittedRead) { + TUncommittedRead tester; + tester.SetIsOlap(true); + tester.Execute(); + } +} + +} // namespace NKqp +} // namespace NKikimr diff --git a/ydb/core/kqp/ut/tx/kqp_sink_mvcc_ut.cpp b/ydb/core/kqp/ut/tx/kqp_sink_mvcc_ut.cpp new file mode 100644 index 000000000000..a0ac82ad151d --- /dev/null +++ b/ydb/core/kqp/ut/tx/kqp_sink_mvcc_ut.cpp @@ -0,0 +1,307 @@ +#include "kqp_sink_common.h" + +#include +#include +#include +#include + +namespace NKikimr { +namespace NKqp { + +using namespace NYdb; +using namespace NYdb::NQuery; + +Y_UNIT_TEST_SUITE(KqpSinkMvcc) { + class TSnapshotExpiration : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session1 = client.GetSession().GetValueSync().GetSession(); + auto session2 = client.GetSession().GetValueSync().GetSession(); + + auto result = session1.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV` WHERE Key = 1u OR Key = 4000000001u ORDER BY Key; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW())).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [1u;["One"]]; + [4000000001u;["BigOne"]] + ])", FormatResultSetYson(result.GetResultSet(0))); + + auto tx = result.GetTransaction(); + + result = session2.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES (1u, "ChangedOne"); + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto deadline = TInstant::Now() + TDuration::Seconds(30); + auto caught = false; + do { + Sleep(TDuration::Seconds(1)); + auto result = session1.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV` WHERE Key = 1u OR Key = 4000000001u; + )"), TTxControl::Tx(tx->GetId())).ExtractValueSync(); + if (result.GetStatus() == EStatus::SUCCESS) + continue; + + UNIT_ASSERT_C(HasIssue(result.GetIssues(), NYql::TIssuesIds::DEFAULT_ERROR, + [](const NYql::TIssue& issue){ + return issue.GetMessage().Contains("has no snapshot at"); + }), result.GetIssues().ToString()); + + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::ABORTED); + + caught = true; + break; + } while (TInstant::Now() < deadline); + UNIT_ASSERT_C(caught, "Failed to wait for snapshot expiration."); + } + }; + + Y_UNIT_TEST(SnapshotExpiration) { + TSnapshotExpiration tester; + tester.SetFastSnapshotExpiration(true); + tester.SetIsOlap(false); + tester.Execute(); + } + +// Y_UNIT_TEST(OlapSnapshotExpiration) { +// TSnapshotExpiration tester; +// tester.SetFastSnapshotExpiration(true); +// tester.SetIsOlap(true); +// tester.Execute(); +// } + + class TReadOnlyTxCommitsOnConcurrentWrite : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session1 = client.GetSession().GetValueSync().GetSession(); + auto session2 = client.GetSession().GetValueSync().GetSession(); + + auto result = session1.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV` WHERE Key = 1u OR Key = 4000000001u ORDER BY Key; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW())).ExtractValueSync(); + + auto tx = result.GetTransaction(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [1u;["One"]]; + [4000000001u;["BigOne"]] + ])", FormatResultSetYson(result.GetResultSet(0))); + + result = session2.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES (1u, "ChangedOne"); + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + result = session2.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV` WHERE Key = 1u; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [1u;["ChangedOne"]]; + ])", FormatResultSetYson(result.GetResultSet(0))); + + result = session1.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV` WHERE Key = 1u; + )"), TTxControl::Tx(tx->GetId())).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [1u;["One"]]; + ])", FormatResultSetYson(result.GetResultSet(0))); + + result = session1.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV` WHERE Key = 2u OR Key = 4000000002u ORDER BY Key; + )"), TTxControl::Tx(tx->GetId()).CommitTx()).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [2u;["Two"]]; + [4000000002u;["BigTwo"]] + ])", FormatResultSetYson(result.GetResultSet(0))); + } + }; + + Y_UNIT_TEST(ReadOnlyTxCommitsOnConcurrentWrite) { + TReadOnlyTxCommitsOnConcurrentWrite tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapReadOnlyTxCommitsOnConcurrentWrite) { + TReadOnlyTxCommitsOnConcurrentWrite tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TReadWriteTxFailsOnConcurrentWrite1 : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session1 = client.GetSession().GetValueSync().GetSession(); + auto session2 = client.GetSession().GetValueSync().GetSession(); + + auto result = session1.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV` WHERE Key = 1u OR Key = 4000000001u ORDER BY Key; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW())).ExtractValueSync(); + + auto tx = result.GetTransaction(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [1u;["One"]]; + [4000000001u;["BigOne"]] + ])", FormatResultSetYson(result.GetResultSet(0))); + + result = session2.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES (1u, "ChangedOne"); + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + result = session1.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES (1u, "TwiceChangedOne"); + )"), TTxControl::Tx(tx->GetId()).CommitTx()).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::ABORTED, result.GetIssues().ToString()); + UNIT_ASSERT_C(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED), result.GetIssues().ToString()); + } + }; + + Y_UNIT_TEST(ReadWriteTxFailsOnConcurrentWrite1) { + TReadWriteTxFailsOnConcurrentWrite1 tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapReadWriteTxFailsOnConcurrentWrite1) { + TReadWriteTxFailsOnConcurrentWrite1 tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TReadWriteTxFailsOnConcurrentWrite2 : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session1 = client.GetSession().GetValueSync().GetSession(); + auto session2 = client.GetSession().GetValueSync().GetSession(); + + auto result = session1.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV` WHERE Key = 1u OR Key = 4000000001u ORDER BY Key; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW())).ExtractValueSync(); + + auto tx = result.GetTransaction(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [1u;["One"]]; + [4000000001u;["BigOne"]] + ])", FormatResultSetYson(result.GetResultSet(0))); + + // We need to sleep before the upsert below, otherwise writes + // might happen in the same step as the snapshot, which would be + // treated as happening before snapshot and will not break any locks. + Sleep(TDuration::Seconds(2)); + + result = session2.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/KV2` (Key, Value) VALUES (101u, "SomeText"); + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + result = session1.ExecuteQuery(Q_(R"( + UPDATE `/Root/KV` SET Value = "Something" WHERE Key = 1u; + UPDATE `/Root/KV2` SET Value = "AnotherString" WHERE Key = 101u; + )"), TTxControl::Tx(tx->GetId()).CommitTx()).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::ABORTED, result.GetIssues().ToString()); + if (!GetIsOlap()) { + UNIT_ASSERT_C(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED), result.GetIssues().ToString()); + } + } + }; + + Y_UNIT_TEST(ReadWriteTxFailsOnConcurrentWrite2) { + TReadWriteTxFailsOnConcurrentWrite2 tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapReadWriteTxFailsOnConcurrentWrite2) { + TReadWriteTxFailsOnConcurrentWrite2 tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TReadWriteTxFailsOnConcurrentWrite3 : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session1 = client.GetSession().GetValueSync().GetSession(); + auto session2 = client.GetSession().GetValueSync().GetSession(); + + auto result = session1.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV` WHERE Key = 1u OR Key = 4000000001u ORDER BY Key; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW())).ExtractValueSync(); + + auto tx = result.GetTransaction(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [1u;["One"]]; + [4000000001u;["BigOne"]] + ])", FormatResultSetYson(result.GetResultSet(0))); + + result = session2.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES (2u, "ChangedTwo"); + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + result = session1.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV` WHERE Key = 2u OR Key = 4000000002u ORDER BY Key; + )"), TTxControl::Tx(tx->GetId())).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [2u;["Two"]]; + [4000000002u;["BigTwo"]] + ])", FormatResultSetYson(result.GetResultSet(0))); + + result = session1.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES (2u, "TwiceChangedTwo"); + )"), TTxControl::Tx(tx->GetId()).CommitTx()).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::ABORTED, result.GetIssues().ToString()); + UNIT_ASSERT_C(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED), result.GetIssues().ToString()); + } + }; + + Y_UNIT_TEST(ReadWriteTxFailsOnConcurrentWrite3) { + TReadWriteTxFailsOnConcurrentWrite3 tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapReadWriteTxFailsOnConcurrentWrite3) { + TReadWriteTxFailsOnConcurrentWrite3 tester; + tester.SetIsOlap(true); + tester.Execute(); + } +} + +} // namespace NKqp +} // namespace NKikimr diff --git a/ydb/core/kqp/ut/tx/kqp_sink_tx_ut.cpp b/ydb/core/kqp/ut/tx/kqp_sink_tx_ut.cpp new file mode 100644 index 000000000000..4959cf240e05 --- /dev/null +++ b/ydb/core/kqp/ut/tx/kqp_sink_tx_ut.cpp @@ -0,0 +1,404 @@ +#include "kqp_sink_common.h" + +#include +#include +#include +#include + +namespace NKikimr { +namespace NKqp { + +using namespace NYdb; +using namespace NYdb::NQuery; + +Y_UNIT_TEST_SUITE(KqpSinkTx) { + class TDeferredEffects : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session = client.GetSession().GetValueSync().GetSession(); + auto result = session.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/Test` + SELECT Group, "Sergey" AS Name + FROM `/Root/Test`; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW())).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto tx = result.GetTransaction(); + + result = session.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/Test` WHERE Group = 1 ORDER BY Name; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [[3500u];["None"];1u;"Anna"]; + [[300u];["None"];1u;"Paul"] + ])", FormatResultSetYson(result.GetResultSet(0))); + + auto commitResult = tx->Commit().ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::SUCCESS, commitResult.GetIssues().ToString()); + + result = session.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/Test` WHERE Group = 1 ORDER BY Name; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [[3500u];["None"];1u;"Anna"]; + [[300u];["None"];1u;"Paul"]; + [#;#;1u;"Sergey"] + ])", FormatResultSetYson(result.GetResultSet(0))); + } + }; + + Y_UNIT_TEST(DeferredEffects) { + TDeferredEffects tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapDeferredEffects) { + TDeferredEffects tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TExplicitTcl : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session = client.GetSession().GetValueSync().GetSession(); + auto tx = session.BeginTransaction(TTxSettings::SerializableRW()) + .ExtractValueSync() + .GetTransaction(); + UNIT_ASSERT(tx.IsActive()); + + auto result = session.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES (10u, "New"); + )"), TTxControl::Tx(tx.GetId())).ExtractValueSync(); + UNIT_ASSERT(result.IsSuccess()); + + result = session.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV` WHERE Value = "New"; + )"), TTxControl::BeginTx(TTxSettings::OnlineRO()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT(result.IsSuccess()); + CompareYson(R"([])", FormatResultSetYson(result.GetResultSet(0))); + + auto commitResult = tx.Commit().ExtractValueSync(); + UNIT_ASSERT_C(commitResult.IsSuccess(), commitResult.GetIssues().ToString()); + + result = session.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV` WHERE Value = "New"; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT(result.IsSuccess()); + CompareYson(R"([[10u;["New"]]])", FormatResultSetYson(result.GetResultSet(0))); + + commitResult = tx.Commit().ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::NOT_FOUND, commitResult.GetIssues().ToString()); + UNIT_ASSERT_C(HasIssue(commitResult.GetIssues(), NYql::TIssuesIds::KIKIMR_TRANSACTION_NOT_FOUND), commitResult.GetIssues().ToString()); + } + }; + + Y_UNIT_TEST(ExplicitTcl) { + TExplicitTcl tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapExplicitTcl) { + TExplicitTcl tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TLocksAbortOnCommit : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session = client.GetSession().GetValueSync().GetSession(); + { + auto result = session.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES (1, "One"); + UPSERT INTO `/Root/KV` (Key, Value) VALUES (2, "Two"); + UPSERT INTO `/Root/KV` (Key, Value) VALUES (3, "Three"); + UPSERT INTO `/Root/KV` (Key, Value) VALUES (4, "Four"); + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + + + auto result = session.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV`; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW())).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto tx = result.GetTransaction(); + + result = session.ExecuteQuery(Q_(R"( + UPDATE `/Root/KV` SET Value = "second" WHERE Key = 3; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + result = session.ExecuteQuery(Q_(R"( + UPDATE `/Root/KV` SET Value = "third" WHERE Key = 4; + )"), TTxControl::Tx(tx->GetId())).ExtractValueSync(); + if (GetIsOlap()) { + // Olap has Reads in this query, so it breaks now. + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::ABORTED, result.GetIssues().ToString()); + } else { + // Oltp doesn't have Reads in this query, so it breaks later. + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + + auto commitResult = tx->Commit().ExtractValueSync(); + + if (GetIsOlap()) { + UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::NOT_FOUND, commitResult.GetIssues().ToString()); + } else { + UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::ABORTED, commitResult.GetIssues().ToString()); + } + } + }; + + Y_UNIT_TEST(LocksAbortOnCommit) { + TLocksAbortOnCommit tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapLocksAbortOnCommit) { + TLocksAbortOnCommit tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TInvalidateOnError : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session = client.GetSession().GetValueSync().GetSession(); + auto tx = session.BeginTransaction(TTxSettings::SerializableRW()) + .ExtractValueSync() + .GetTransaction(); + UNIT_ASSERT(tx.IsActive()); + + auto result = session.ExecuteQuery(Q_(R"( + INSERT INTO `/Root/KV` (Key, Value) VALUES (1u, "New"); + )"), TTxControl::Tx(tx.GetId())).ExtractValueSync(); + result.GetIssues().PrintTo(Cerr); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::BAD_REQUEST, result.GetIssues().ToString()); + + result = session.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES (1u, "New"); + )"), TTxControl::Tx(tx.GetId())).ExtractValueSync(); + result.GetIssues().PrintTo(Cerr); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::NOT_FOUND, result.GetIssues().ToString()); + } + }; + + Y_UNIT_TEST(InvalidateOnError) { + TInvalidateOnError tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapInvalidateOnError) { + TInvalidateOnError tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TInteractive : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session = client.GetSession().GetValueSync().GetSession(); + auto tx = session.BeginTransaction(TTxSettings::SerializableRW()) + .ExtractValueSync() + .GetTransaction(); + UNIT_ASSERT(tx.IsActive()); + + auto result = session.ExecuteQuery(R"( + SELECT * FROM `/Root/KV` + )", TTxControl::Tx(tx.GetId())).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + result = session.ExecuteQuery(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES (1u, "New"); + )", TTxControl::Tx(tx.GetId()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + result = session.ExecuteQuery(R"( + SELECT * FROM `/Root/KV` WHERE Key < 3 ORDER BY Key + )", TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [1u;["New"]]; + [2u;["Two"]] + ])", FormatResultSetYson(result.GetResultSet(0))); + } + }; + + Y_UNIT_TEST(Interactive) { + TInteractive tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapInteractive) { + TInteractive tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TSnapshotRO : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session = client.GetSession().GetValueSync().GetSession(); + // Read Immediate + auto result = session.ExecuteQuery(Q1_(R"( + SELECT * FROM KV WHERE Key = 2; + )"), TTxControl::BeginTx(TTxSettings::SnapshotRO()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([[2u;["Two"]]])", FormatResultSetYson(result.GetResultSet(0))); + + // Read Distributed + result = session.ExecuteQuery(Q1_(R"( + SELECT COUNT(*) FROM KV WHERE Value = "One"; + )"), TTxControl::BeginTx(TTxSettings::SnapshotRO()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([[1u]])", FormatResultSetYson(result.GetResultSet(0))); + + // Write + result = session.ExecuteQuery(Q1_(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES + (100, "100500"), + (100500, "100"); + )"), TTxControl::BeginTx(TTxSettings::SnapshotRO()).CommitTx()).ExtractValueSync(); + result.GetIssues().PrintTo(Cerr); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::GENERIC_ERROR); + UNIT_ASSERT(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_BAD_OPERATION)); + } + }; + + Y_UNIT_TEST(SnapshotRO) { + TSnapshotRO tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapSnapshotRO) { + TSnapshotRO tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TSnapshotROInteractive1 : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session = client.GetSession().GetValueSync().GetSession(); + auto readQuery = Q1_(R"( + SELECT * FROM KV WHERE Key = 1u; + )"); + + auto readResult = R"([ + [1u;["One"]] + ])"; + + auto result = session.ExecuteQuery(readQuery, + TTxControl::BeginTx(TTxSettings::SnapshotRO())).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(readResult, FormatResultSetYson(result.GetResultSet(0))); + + auto tx = result.GetTransaction(); + UNIT_ASSERT(tx); + UNIT_ASSERT(tx->IsActive()); + + result = session.ExecuteQuery(Q1_(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES + (1u, "value"); + )"), TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + result = session.ExecuteQuery(readQuery, + TTxControl::Tx(tx->GetId()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(readResult, FormatResultSetYson(result.GetResultSet(0))); + } + }; + + Y_UNIT_TEST(SnapshotROInteractive1) { + TSnapshotROInteractive1 tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapSnapshotROInteractive1) { + TSnapshotROInteractive1 tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TSnapshotROInteractive2 : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session = client.GetSession().GetValueSync().GetSession(); + auto readQuery = Q1_(R"( + SELECT COUNT(*) FROM KV WHERE Value = "One"; + )"); + + auto readResult = R"([ + [1u] + ])"; + + auto tx = session.BeginTransaction(TTxSettings::SnapshotRO()) + .ExtractValueSync() + .GetTransaction(); + UNIT_ASSERT(tx.IsActive()); + + auto result = session.ExecuteQuery(readQuery, + TTxControl::Tx(tx.GetId())).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(readResult, FormatResultSetYson(result.GetResultSet(0))); + + result = session.ExecuteQuery(Q1_(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES + (100500u, "One"); + )"), TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + result = session.ExecuteQuery(readQuery, + TTxControl::Tx(tx.GetId())).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(readResult, FormatResultSetYson(result.GetResultSet(0))); + + auto commitResult = tx.Commit().ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::SUCCESS, commitResult.GetIssues().ToString()); + } + }; + + Y_UNIT_TEST(SnapshotROInteractive2) { + TSnapshotROInteractive2 tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapSnapshotROInteractive2) { + TSnapshotROInteractive2 tester; + tester.SetIsOlap(true); + tester.Execute(); + } +} + +} // namespace NKqp +} // namespace NKikimr diff --git a/ydb/core/kqp/ut/tx/ya.make b/ydb/core/kqp/ut/tx/ya.make index 7f4ca5fae0d6..e7369a4a256e 100644 --- a/ydb/core/kqp/ut/tx/ya.make +++ b/ydb/core/kqp/ut/tx/ya.make @@ -16,12 +16,16 @@ SRCS( kqp_locks_tricky_ut.cpp kqp_locks_ut.cpp kqp_mvcc_ut.cpp + kqp_sink_locks_ut.cpp + kqp_sink_mvcc_ut.cpp + kqp_sink_tx_ut.cpp kqp_tx_ut.cpp ) PEERDIR( ydb/core/kqp ydb/core/kqp/ut/common + ydb/core/tx/columnshard/hooks/testing ydb/library/yql/sql/pg_dummy ) diff --git a/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/create_view.sql b/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/create_view.sql new file mode 100644 index 000000000000..f9f813948191 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/create_view.sql @@ -0,0 +1,43 @@ +CREATE VIEW `/Root/aggregates_and_window` WITH (security_invoker = TRUE) AS + SELECT + series.title AS series, + series_stats.seasons_with_episode_count_greater_than_average AS seasons_with_episode_count_greater_than_average + FROM ( + SELECT + series_id, + SUM( + CASE + WHEN episode_count > average_episodes_in_season + THEN 1 + ELSE 0 + END + ) AS seasons_with_episode_count_greater_than_average + FROM ( + SELECT + series_id, + season_id, + episode_count, + AVG(episode_count) OVER average_episodes_in_season_window AS average_episodes_in_season + FROM ( + SELECT + series_id, + season_id, + COUNT(*) AS episode_count + FROM `/Root/episodes` + GROUP BY + series_id, + season_id + ) + WINDOW + average_episodes_in_season_window AS ( + PARTITION BY + series_id + ) + ) + GROUP BY + series_id + ) + AS series_stats + JOIN `/Root/series` + AS series + USING (series_id); diff --git a/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/drop_view.sql b/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/drop_view.sql new file mode 100644 index 000000000000..c4c0dd0a1c8f --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/drop_view.sql @@ -0,0 +1 @@ +DROP VIEW `/Root/aggregates_and_window`; diff --git a/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/etalon_query.sql b/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/etalon_query.sql new file mode 100644 index 000000000000..c701bd2d2648 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/etalon_query.sql @@ -0,0 +1,46 @@ +SELECT + * +FROM ( + SELECT + series.title AS series, + series_stats.seasons_with_episode_count_greater_than_average AS seasons_with_episode_count_greater_than_average + FROM ( + SELECT + series_id, + SUM( + CASE + WHEN episode_count > average_episodes_in_season + THEN 1 + ELSE 0 + END + ) AS seasons_with_episode_count_greater_than_average + FROM ( + SELECT + series_id, + season_id, + episode_count, + AVG(episode_count) OVER average_episodes_in_season_window AS average_episodes_in_season + FROM ( + SELECT + series_id, + season_id, + COUNT(*) AS episode_count + FROM `/Root/episodes` + GROUP BY + series_id, + season_id + ) + WINDOW + average_episodes_in_season_window AS ( + PARTITION BY + series_id + ) + ) + GROUP BY + series_id + ) + AS series_stats + JOIN `/Root/series` + AS series + USING (series_id) +); diff --git a/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/select_from_view.sql b/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/select_from_view.sql new file mode 100644 index 000000000000..0cc947dd8661 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/select_from_view.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM `/Root/aggregates_and_window`; diff --git a/ydb/core/kqp/ut/view/input/cases/count_episodes/create_view.sql b/ydb/core/kqp/ut/view/input/cases/count_episodes/create_view.sql new file mode 100644 index 000000000000..dcbfee8f92db --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_episodes/create_view.sql @@ -0,0 +1,9 @@ +CREATE VIEW `/Root/count_episodes` WITH (security_invoker = TRUE) AS + SELECT + series_id, + season_id, + COUNT(*) + FROM `/Root/episodes` + GROUP BY + series_id, + season_id; diff --git a/ydb/core/kqp/ut/view/input/cases/count_episodes/drop_view.sql b/ydb/core/kqp/ut/view/input/cases/count_episodes/drop_view.sql new file mode 100644 index 000000000000..897adef6e6d6 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_episodes/drop_view.sql @@ -0,0 +1 @@ +DROP VIEW `/Root/count_episodes`; diff --git a/ydb/core/kqp/ut/view/input/cases/count_episodes/etalon_query.sql b/ydb/core/kqp/ut/view/input/cases/count_episodes/etalon_query.sql new file mode 100644 index 000000000000..8964c9ab76a1 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_episodes/etalon_query.sql @@ -0,0 +1,12 @@ +SELECT + * +FROM ( + SELECT + series_id, + season_id, + COUNT(*) + FROM `/Root/episodes` + GROUP BY + series_id, + season_id +); diff --git a/ydb/core/kqp/ut/view/input/cases/count_episodes/select_from_view.sql b/ydb/core/kqp/ut/view/input/cases/count_episodes/select_from_view.sql new file mode 100644 index 000000000000..153ad494f165 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_episodes/select_from_view.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM `/Root/count_episodes`; diff --git a/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/create_view.sql b/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/create_view.sql new file mode 100644 index 000000000000..545ec4056034 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/create_view.sql @@ -0,0 +1,22 @@ +CREATE VIEW `/Root/count_episodes_with_titles` WITH (security_invoker = TRUE) AS + SELECT + series.title AS series, + seasons.title AS season, + episodes.episode_count AS episode_count + FROM ( + SELECT + series_id, + season_id, + COUNT(*) AS episode_count + FROM `/Root/episodes` + GROUP BY + series_id, + season_id + ) + AS episodes + JOIN `/Root/series` + AS series + ON episodes.series_id == series.series_id + JOIN `/Root/seasons` + AS seasons + ON episodes.series_id == seasons.series_id AND episodes.season_id == seasons.season_id; diff --git a/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/drop_view.sql b/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/drop_view.sql new file mode 100644 index 000000000000..99e8b54a675d --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/drop_view.sql @@ -0,0 +1 @@ +DROP VIEW `/Root/count_episodes_with_titles`; diff --git a/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/etalon_query.sql b/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/etalon_query.sql new file mode 100644 index 000000000000..faa472a66beb --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/etalon_query.sql @@ -0,0 +1,25 @@ +SELECT + * +FROM ( + SELECT + series.title AS series, + seasons.title AS season, + episodes.episode_count AS episode_count + FROM ( + SELECT + series_id, + season_id, + COUNT(*) AS episode_count + FROM `/Root/episodes` + GROUP BY + series_id, + season_id + ) + AS episodes + JOIN `/Root/series` + AS series + ON episodes.series_id == series.series_id + JOIN `/Root/seasons` + AS seasons + ON episodes.series_id == seasons.series_id AND episodes.season_id == seasons.season_id +); diff --git a/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/select_from_view.sql b/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/select_from_view.sql new file mode 100644 index 000000000000..b84299a26d02 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/select_from_view.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM `/Root/count_episodes_with_titles`; diff --git a/ydb/core/kqp/ut/view/input/cases/count_rows/create_view.sql b/ydb/core/kqp/ut/view/input/cases/count_rows/create_view.sql new file mode 100644 index 000000000000..0189412ace53 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_rows/create_view.sql @@ -0,0 +1,4 @@ +CREATE VIEW `/Root/count_rows` WITH (security_invoker = TRUE) AS + SELECT + COUNT(*) + FROM `/Root/episodes`; diff --git a/ydb/core/kqp/ut/view/input/cases/count_rows/drop_view.sql b/ydb/core/kqp/ut/view/input/cases/count_rows/drop_view.sql new file mode 100644 index 000000000000..b540f8f401e8 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_rows/drop_view.sql @@ -0,0 +1 @@ +DROP VIEW `/Root/count_rows`; diff --git a/ydb/core/kqp/ut/view/input/cases/count_rows/etalon_query.sql b/ydb/core/kqp/ut/view/input/cases/count_rows/etalon_query.sql new file mode 100644 index 000000000000..b9d2cdb0f4a7 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_rows/etalon_query.sql @@ -0,0 +1,7 @@ +SELECT + * +FROM ( + SELECT + COUNT(*) + FROM `/Root/episodes` +); diff --git a/ydb/core/kqp/ut/view/input/cases/count_rows/select_from_view.sql b/ydb/core/kqp/ut/view/input/cases/count_rows/select_from_view.sql new file mode 100644 index 000000000000..49ffdf0dd923 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_rows/select_from_view.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM `/Root/count_rows`; diff --git a/ydb/core/kqp/ut/view/input/cases/in_subquery/create_view.sql b/ydb/core/kqp/ut/view/input/cases/in_subquery/create_view.sql new file mode 100644 index 000000000000..b8793469c350 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/in_subquery/create_view.sql @@ -0,0 +1,9 @@ +CREATE VIEW in_subquery WITH (security_invoker = TRUE) AS + SELECT + * + FROM series + WHERE series_id IN ( + SELECT + series_id + FROM series + ); diff --git a/ydb/core/kqp/ut/view/input/cases/in_subquery/drop_view.sql b/ydb/core/kqp/ut/view/input/cases/in_subquery/drop_view.sql new file mode 100644 index 000000000000..da81f5486bc6 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/in_subquery/drop_view.sql @@ -0,0 +1 @@ +DROP VIEW in_subquery; diff --git a/ydb/core/kqp/ut/view/input/cases/in_subquery/etalon_query.sql b/ydb/core/kqp/ut/view/input/cases/in_subquery/etalon_query.sql new file mode 100644 index 000000000000..596f8521ea79 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/in_subquery/etalon_query.sql @@ -0,0 +1,12 @@ +SELECT + * +FROM ( + SELECT + * + FROM series + WHERE series_id IN ( + SELECT + series_id + FROM series + ) +); diff --git a/ydb/core/kqp/ut/view/input/cases/in_subquery/select_from_view.sql b/ydb/core/kqp/ut/view/input/cases/in_subquery/select_from_view.sql new file mode 100644 index 000000000000..c0c1b52763e0 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/in_subquery/select_from_view.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM in_subquery; diff --git a/ydb/core/kqp/ut/view/view_ut.cpp b/ydb/core/kqp/ut/view/view_ut.cpp index 381dea85c6d9..cb55869e4bf2 100644 --- a/ydb/core/kqp/ut/view/view_ut.cpp +++ b/ydb/core/kqp/ut/view/view_ut.cpp @@ -56,6 +56,20 @@ TString ReadWholeFile(const TString& path) { return file.ReadAll(); } +NQuery::TExecuteQueryResult ExecuteQuery(NQuery::TSession& session, const TString& query) { + const auto result = session.ExecuteQuery( + query, + NQuery::TTxControl::NoTx() + ).ExtractValueSync(); + + UNIT_ASSERT_C(result.IsSuccess(), + "Failed to execute the following query:\n" << query << '\n' + << "The issues:\n" << result.GetIssues().ToString() + ); + + return result; +} + void ExecuteDataDefinitionQuery(TSession& session, const TString& script) { const auto result = session.ExecuteSchemeQuery(script).ExtractValueSync(); UNIT_ASSERT_C(result.IsSuccess(), "Failed to execute the following DDL script:\n" @@ -110,20 +124,25 @@ void AssertFromCache(const TMaybe& stats, bool expectedValue) { UNIT_ASSERT_VALUES_EQUAL_C(*isFromCache, expectedValue, stats->ToString()); } -void CompareResults(const TDataQueryResult& first, const TDataQueryResult& second) { - const auto& firstResults = first.GetResultSets(); - const auto& secondResults = second.GetResultSets(); - +void CompareResults(const TVector& firstResults, const TVector& secondResults) { UNIT_ASSERT_VALUES_EQUAL(firstResults.size(), secondResults.size()); for (size_t i = 0; i < firstResults.size(); ++i) { CompareYson(FormatResultSetYson(firstResults[i]), FormatResultSetYson(secondResults[i])); } } -void InitializeTablesAndSecondaryViews(TSession& session) { +void CompareResults(const TDataQueryResult& first, const TDataQueryResult& second) { + CompareResults(first.GetResultSets(), second.GetResultSets()); +} + +void CompareResults(const NQuery::TExecuteQueryResult& first, const NQuery::TExecuteQueryResult& second) { + CompareResults(first.GetResultSets(), second.GetResultSets()); +} + +void InitializeTablesAndSecondaryViews(NQuery::TSession& session) { const auto inputFolder = ArcadiaFromCurrentLocation(__SOURCE_FILE__, "input"); - ExecuteDataDefinitionQuery(session, ReadWholeFile(inputFolder + "/create_tables_and_secondary_views.sql")); - ExecuteDataModificationQuery(session, ReadWholeFile(inputFolder + "/fill_tables.sql")); + ExecuteQuery(session, ReadWholeFile(inputFolder + "/create_tables_and_secondary_views.sql")); + ExecuteQuery(session, ReadWholeFile(inputFolder + "/fill_tables.sql")); } } @@ -162,7 +181,7 @@ Y_UNIT_TEST_SUITE(TCreateAndDropViewTest) { )", path ); - + DisableViewsFeatureFlag(kikimr); const auto creationResult = session.ExecuteSchemeQuery(creationQuery).ExtractValueSync(); UNIT_ASSERT(!creationResult.IsSuccess()); @@ -194,6 +213,90 @@ Y_UNIT_TEST_SUITE(TCreateAndDropViewTest) { UNIT_ASSERT_STRING_CONTAINS(creationResult.GetIssues().ToString(), "Error: Cannot divide type String and String"); } + Y_UNIT_TEST(ParsingSecurityInvoker) { + TKikimrRunner kikimr(TKikimrSettings().SetWithSampleTables(false)); + EnableViewsFeatureFlag(kikimr); + auto session = kikimr.GetQueryClient().GetSession().ExtractValueSync().GetSession(); + + constexpr const char* path = "TheView"; + constexpr const char* query = "SELECT 1"; + + auto fail = [&](const char* options) { + const TString creationQuery = std::format(R"( + CREATE VIEW {} {} AS {}; + )", + path, + options, + query + ); + + const auto creationResult = session.ExecuteQuery( + creationQuery, + NQuery::TTxControl::NoTx() + ).ExtractValueSync(); + + UNIT_ASSERT_C(!creationResult.IsSuccess(), creationQuery); + UNIT_ASSERT_STRING_CONTAINS( + creationResult.GetIssues().ToString(), "security_invoker option must be explicitly enabled" + ); + }; + fail(""); + fail("WITH security_invoker"); + fail("WITH security_invoker = false"); + fail("WITH SECURITY_INVOKER = true"); // option name is case-sensitive + fail("WITH (security_invoker)"); + fail("WITH (security_invoker = false)"); + fail("WITH (security_invoker = true, security_invoker = false)"); + + auto succeed = [&](const char* options) { + const TString creationQuery = std::format(R"( + CREATE VIEW {} {} AS {}; + DROP VIEW {}; + )", + path, + options, + query, + path + ); + ExecuteQuery(session, creationQuery); + }; + succeed("WITH security_invoker = true"); + succeed("WITH (security_invoker = true)"); + succeed("WITH (security_invoker = tRuE)"); // bool parsing is flexible enough + succeed("WITH (security_invoker = false, security_invoker = true)"); + + { + // literal named expression + const TString creationQuery = std::format(R"( + $value = "true"; + CREATE VIEW {} WITH security_invoker = $value AS {}; + DROP VIEW {}; + )", + path, + query, + path + ); + ExecuteQuery(session, creationQuery); + } + { + // evaluated expression + const TString creationQuery = std::format(R"( + $lambda = ($x) -> {{ + RETURN CAST($x as String) + }}; + $value = $lambda(true); + + CREATE VIEW {} WITH security_invoker = $value AS {}; + DROP VIEW {}; + )", + path, + query, + path + ); + ExecuteQuery(session, creationQuery); + } + } + Y_UNIT_TEST(ListCreatedView) { TKikimrRunner kikimr(TKikimrSettings().SetWithSampleTables(false)); EnableViewsFeatureFlag(kikimr); @@ -241,10 +344,60 @@ Y_UNIT_TEST_SUITE(TCreateAndDropViewTest) { { const auto creationResult = session.ExecuteSchemeQuery(creationQuery).GetValueSync(); UNIT_ASSERT(!creationResult.IsSuccess()); - UNIT_ASSERT(creationResult.GetIssues().ToString().Contains("error: path exist, request accepts it")); + UNIT_ASSERT_STRING_CONTAINS(creationResult.GetIssues().ToString(), "error: path exist, request accepts it"); + } + } + + Y_UNIT_TEST(CreateViewOccupiedName) { + TKikimrRunner kikimr(TKikimrSettings().SetWithSampleTables(false)); + EnableViewsFeatureFlag(kikimr); + auto session = kikimr.GetQueryClient().GetSession().ExtractValueSync().GetSession(); + + constexpr const char* path = "table"; + + const TString createTable = std::format(R"( + CREATE TABLE {} (key Int32, value Utf8, PRIMARY KEY (key)); + )", path + ); + ExecuteQuery(session, createTable); + + auto checkError = [&session](const TString& query, const TString& expectedError) { + const auto result = session.ExecuteQuery(query, NQuery::TTxControl::NoTx()).ExtractValueSync(); + UNIT_ASSERT(!result.IsSuccess()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), expectedError); + }; + + const TString queryTemplate = std::format(R"( + CREATE VIEW {{}}{} WITH (security_invoker = true) AS SELECT 1; + )", path + ); + const TString expectedError = std::format("path: '/Root/{}', error: unexpected path type", path); + + for (std::string existenceCheck : {"", "IF NOT EXISTS "}) { + const TString createView = std::vformat(queryTemplate, std::make_format_args(existenceCheck)); + checkError(createView, expectedError); } } + Y_UNIT_TEST(CreateViewIfNotExists) { + TKikimrRunner kikimr(TKikimrSettings().SetWithSampleTables(false)); + EnableViewsFeatureFlag(kikimr); + auto session = kikimr.GetQueryClient().GetSession().ExtractValueSync().GetSession(); + + constexpr const char* path = "/Root/TheView"; + constexpr const char* queryInView = "SELECT 1"; + + const TString creationQuery = std::format(R"( + CREATE VIEW IF NOT EXISTS `{}` WITH (security_invoker = true) AS {}; + )", + path, + queryInView + ); + ExecuteQuery(session, creationQuery); + // an attempt to create a duplicate does not produce an error + ExecuteQuery(session, creationQuery); + } + Y_UNIT_TEST(DropView) { TKikimrRunner kikimr(TKikimrSettings().SetWithSampleTables(false)); EnableViewsFeatureFlag(kikimr); @@ -296,6 +449,42 @@ Y_UNIT_TEST_SUITE(TCreateAndDropViewTest) { UNIT_ASSERT_STRING_CONTAINS(dropResult.GetIssues().ToString(), "Error: Views are disabled"); } + Y_UNIT_TEST(DropNonexistingView) { + TKikimrRunner kikimr(TKikimrSettings().SetWithSampleTables(false)); + EnableViewsFeatureFlag(kikimr); + auto session = kikimr.GetQueryClient().GetSession().ExtractValueSync().GetSession(); + + const auto dropResult = session.ExecuteQuery( + "DROP VIEW NonexistingView;", NQuery::TTxControl::NoTx() + ).ExtractValueSync(); + + UNIT_ASSERT(!dropResult.IsSuccess()); + UNIT_ASSERT_STRING_CONTAINS(dropResult.GetIssues().ToString(), "Error: Path does not exist"); + } + + Y_UNIT_TEST(CallDropViewOnTable) { + TKikimrRunner kikimr(TKikimrSettings().SetWithSampleTables(false)); + EnableViewsFeatureFlag(kikimr); + auto session = kikimr.GetQueryClient().GetSession().ExtractValueSync().GetSession(); + + constexpr const char* path = "table"; + + const TString createTable = std::format(R"( + CREATE TABLE {} (key Int32, value Utf8, PRIMARY KEY (key)); + )", path + ); + ExecuteQuery(session, createTable); + + auto checkError = [&session](const TString& query, const TString& expectedError) { + const auto result = session.ExecuteQuery(query, NQuery::TTxControl::NoTx()).ExtractValueSync(); + UNIT_ASSERT(!result.IsSuccess()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), expectedError); + }; + const TString expectedError = std::format("path: '/Root/{}', error: path is not a view", path); + checkError(std::format("DROP VIEW {};", path), expectedError); + checkError(std::format("DROP VIEW IF EXISTS {};", path), expectedError); + } + Y_UNIT_TEST(DropSameViewTwice) { TKikimrRunner kikimr(TKikimrSettings().SetWithSampleTables(false)); EnableViewsFeatureFlag(kikimr); @@ -321,10 +510,62 @@ Y_UNIT_TEST_SUITE(TCreateAndDropViewTest) { { const auto dropResult = session.ExecuteSchemeQuery(dropQuery).GetValueSync(); UNIT_ASSERT(!dropResult.IsSuccess()); - UNIT_ASSERT(dropResult.GetIssues().ToString().Contains("Error: Path does not exist")); + UNIT_ASSERT_STRING_CONTAINS(dropResult.GetIssues().ToString(), "Error: Path does not exist"); } } + Y_UNIT_TEST(DropViewIfExists) { + TKikimrRunner kikimr(TKikimrSettings().SetWithSampleTables(false)); + EnableViewsFeatureFlag(kikimr); + auto session = kikimr.GetQueryClient().GetSession().ExtractValueSync().GetSession(); + + constexpr const char* path = "/Root/TheView"; + constexpr const char* queryInView = "SELECT 1"; + + const TString creationQuery = std::format(R"( + CREATE VIEW `{}` WITH (security_invoker = true) AS {}; + )", + path, + queryInView + ); + ExecuteQuery(session, creationQuery); + + const TString dropQuery = std::format(R"( + DROP VIEW IF EXISTS `{}`; + )", + path + ); + ExecuteQuery(session, dropQuery); + // an attempt to drop an already deleted view does not produce an error + ExecuteQuery(session, dropQuery); + } + + Y_UNIT_TEST(DropViewInFolder) { + TKikimrRunner kikimr(TKikimrSettings().SetWithSampleTables(false)); + EnableViewsFeatureFlag(kikimr); + auto& runtime = *kikimr.GetTestServer().GetRuntime(); + auto session = kikimr.GetTableClient().CreateSession().GetValueSync().GetSession(); + + constexpr const char* path = "/Root/some/path/to/TheView"; + constexpr const char* queryInView = "SELECT 1"; + + const TString creationQuery = std::format(R"( + CREATE VIEW `{}` WITH (security_invoker = true) AS {}; + )", + path, + queryInView + ); + ExecuteDataDefinitionQuery(session, creationQuery); + + const TString dropQuery = std::format(R"( + DROP VIEW `{}`; + )", + path + ); + ExecuteDataDefinitionQuery(session, dropQuery); + ExpectUnknownEntry(runtime, path); + } + Y_UNIT_TEST(ContextPollution) { TKikimrRunner kikimr(TKikimrSettings().SetWithSampleTables(false)); EnableViewsFeatureFlag(kikimr); @@ -336,7 +577,7 @@ Y_UNIT_TEST_SUITE(TCreateAndDropViewTest) { ExecuteDataDefinitionQuery(session, R"( CREATE VIEW OuterView WITH (security_invoker = TRUE) AS SELECT * FROM InnerView; )"); - + ExecuteDataDefinitionQuery(session, R"( DROP VIEW OuterView; CREATE VIEW OuterView WITH (security_invoker = TRUE) AS SELECT * FROM InnerView; @@ -382,6 +623,46 @@ Y_UNIT_TEST_SUITE(TSelectFromViewTest) { CompareResults(etalonResults, selectFromViewResults); } + Y_UNIT_TEST(OneTableUsingRelativeName) { + TKikimrRunner kikimr; + + auto& runtime = *kikimr.GetTestServer().GetRuntime(); + runtime.SetLogPriority(NKikimrServices::FLAT_TX_SCHEMESHARD, NLog::PRI_DEBUG); + + EnableViewsFeatureFlag(kikimr); + auto session = kikimr.GetQueryClient().GetSession().ExtractValueSync().GetSession(); + + constexpr const char* viewName = "TheView"; + constexpr const char* testTable = "Test"; + const auto innerQuery = std::format(R"( + SELECT * FROM {} + )", + testTable + ); + + const TString creationQuery = std::format(R"( + CREATE VIEW {} WITH (security_invoker = true) AS {}; + )", + viewName, + innerQuery + ); + ExecuteQuery(session, creationQuery); + + const auto etalonResults = ExecuteQuery(session, std::format(R"( + SELECT * FROM ({}); + )", + innerQuery + ) + ); + const auto selectFromViewResults = ExecuteQuery(session, std::format(R"( + SELECT * FROM {}; + )", + viewName + ) + ); + CompareResults(etalonResults, selectFromViewResults); + } + Y_UNIT_TEST(DisabledFeatureFlag) { TKikimrRunner kikimr(TKikimrSettings().SetWithSampleTables(false)); auto session = kikimr.GetTableClient().CreateSession().GetValueSync().GetSession(); @@ -413,7 +694,7 @@ Y_UNIT_TEST_SUITE(TSelectFromViewTest) { Y_UNIT_TEST(ReadTestCasesFromFiles) { TKikimrRunner kikimr; EnableViewsFeatureFlag(kikimr); - auto session = kikimr.GetTableClient().CreateSession().GetValueSync().GetSession(); + auto session = kikimr.GetQueryClient().GetSession().ExtractValueSync().GetSession(); InitializeTablesAndSecondaryViews(session); EnableLogging(); @@ -424,13 +705,13 @@ Y_UNIT_TEST_SUITE(TSelectFromViewTest) { TString testcase; while (testcase = testcases.Next()) { const auto pathPrefix = TStringBuilder() << testcasesFolder << '/' << testcase << '/'; - ExecuteDataDefinitionQuery(session, ReadWholeFile(pathPrefix + "create_view.sql")); + ExecuteQuery(session, ReadWholeFile(pathPrefix + "create_view.sql")); - const auto etalonResults = ExecuteDataModificationQuery(session, ReadWholeFile(pathPrefix + "etalon_query.sql")); - const auto selectFromViewResults = ExecuteDataModificationQuery(session, ReadWholeFile(pathPrefix + "select_from_view.sql")); + const auto etalonResults = ExecuteQuery(session, ReadWholeFile(pathPrefix + "etalon_query.sql")); + const auto selectFromViewResults = ExecuteQuery(session, ReadWholeFile(pathPrefix + "select_from_view.sql")); CompareResults(etalonResults, selectFromViewResults); - ExecuteDataDefinitionQuery(session, ReadWholeFile(pathPrefix + "drop_view.sql")); + ExecuteQuery(session, ReadWholeFile(pathPrefix + "drop_view.sql")); } } diff --git a/ydb/core/kqp/ut/yql/kqp_pragma_ut.cpp b/ydb/core/kqp/ut/yql/kqp_pragma_ut.cpp index 8cd9b54a6857..edf5740b7ef2 100644 --- a/ydb/core/kqp/ut/yql/kqp_pragma_ut.cpp +++ b/ydb/core/kqp/ut/yql/kqp_pragma_ut.cpp @@ -84,6 +84,93 @@ Y_UNIT_TEST_SUITE(KqpPragma) { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); UNIT_ASSERT_C(result.GetIssues().Empty(), result.GetIssues().ToString()); } + + Y_UNIT_TEST(MatchRecognizeWithTimeOrderRecoverer) { + TKikimrSettings settings; + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableQueryServiceConfig()->SetEnableMatchRecognize(true); + settings.SetAppConfig(appConfig); + + TKikimrRunner kikimr(settings); + NYdb::NScripting::TScriptingClient client(kikimr.GetDriver()); + + auto result = client.ExecuteYqlScript(R"( + PRAGMA FeatureR010="prototype"; + + CREATE TABLE `/Root/NewTable` ( + dt Uint64, + value String, + PRIMARY KEY (dt) + ); + COMMIT; + + INSERT INTO `/Root/NewTable` (dt, value) VALUES + (1, 'value1'), (2, 'value2'), (3, 'value3'), (4, 'value4'); + COMMIT; + + SELECT * FROM (SELECT dt, value FROM `/Root/NewTable`) + MATCH_RECOGNIZE( + ORDER BY CAST(dt as Timestamp) + MEASURES + LAST(V1.dt) as v1, + LAST(V4.dt) as v4 + ONE ROW PER MATCH + PATTERN (V1 V* V4) + DEFINE + V1 as V1.value = "value1", + V as True, + V4 as V4.value = "value4" + ); + )").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [[1u];[4u]]; + ])", FormatResultSetYson(result.GetResultSet(0))); + } + + Y_UNIT_TEST(MatchRecognizeWithoutTimeOrderRecoverer) { + TKikimrSettings settings; + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableQueryServiceConfig()->SetEnableMatchRecognize(true); + settings.SetAppConfig(appConfig); + + TKikimrRunner kikimr(settings); + NYdb::NScripting::TScriptingClient client(kikimr.GetDriver()); + + auto result = client.ExecuteYqlScript(R"( + PRAGMA FeatureR010="prototype"; + PRAGMA config.flags("MatchRecognizeStream", "disable"); + + CREATE TABLE `/Root/NewTable` ( + dt Uint64, + value String, + PRIMARY KEY (dt) + ); + COMMIT; + + INSERT INTO `/Root/NewTable` (dt, value) VALUES + (1, 'value1'), (2, 'value2'), (3, 'value3'), (4, 'value4'); + COMMIT; + + SELECT * FROM (SELECT dt, value FROM `/Root/NewTable`) + MATCH_RECOGNIZE( + ORDER BY CAST(dt as Timestamp) + MEASURES + LAST(V1.dt) as v1, + LAST(V4.dt) as v4 + ONE ROW PER MATCH + PATTERN (V1 V* V4) + DEFINE + V1 as V1.value = "value1", + V as True, + V4 as V4.value = "value4" + ); + )").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [[1u];[4u]]; + ])", FormatResultSetYson(result.GetResultSet(0))); + } } } // namspace NKqp diff --git a/ydb/core/kqp/ut/yql/kqp_scripting_ut.cpp b/ydb/core/kqp/ut/yql/kqp_scripting_ut.cpp index 6525749d7ce2..b09e37b794dd 100644 --- a/ydb/core/kqp/ut/yql/kqp_scripting_ut.cpp +++ b/ydb/core/kqp/ut/yql/kqp_scripting_ut.cpp @@ -79,7 +79,7 @@ Y_UNIT_TEST_SUITE(KqpScripting) { ALTER TABLE `/Root/ScriptingCreateAndAlterTableTest` SET (AUTO_PARTITIONING_BY_SIZE = ENABLED); )").GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - + result = client.ExecuteYqlScript(R"( ALTER TABLE `/Root/ScriptingCreateAndAlterTableTest` SET (AUTO_PARTITIONING_BY_SIZE = ENABLED); COMMIT; @@ -88,7 +88,7 @@ Y_UNIT_TEST_SUITE(KqpScripting) { ALTER TABLE `/Root/ScriptingCreateAndAlterTableTest` SET (AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 4); )").GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - + result = client.ExecuteYqlScript(R"( ALTER TABLE `/Root/ScriptingCreateAndAlterTableTest` SET (AUTO_PARTITIONING_BY_SIZE = ENABLED); COMMIT; @@ -989,7 +989,36 @@ Y_UNIT_TEST_SUITE(KqpScripting) { [[[101u]];[[201u]];[[301u]];[[401u]];[[501u]];[[601u]];[[701u]];[[801u]]]; [[8u]]; [[8u]]; - [[8u]]])", StreamResultToYson(it)); + [[8u]] + ])", StreamResultToYson(it)); + } + + Y_UNIT_TEST(SelectNullType) { + TKikimrRunner kikimr; + TScriptingClient client(kikimr.GetDriver()); + { + auto result = client.ExecuteYqlScript(R"( + CREATE TABLE demo1(id Text, PRIMARY KEY(id)); + )").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + + { + auto result = client.ExecuteYqlScript(R"( + UPSERT INTO demo1(id) VALUES("a"),("b"),("c"); + )").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + + { + auto result = client.ExecuteYqlScript(R"( + SELECT NULL auto_proc_ FROM demo1 LIMIT 10; + )").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [#];[#];[#] + ])", FormatResultSetYson(result.GetResultSet(0))); + } } Y_UNIT_TEST(StreamExecuteYqlScriptLeadingEmptyScan) { @@ -1206,7 +1235,7 @@ Y_UNIT_TEST_SUITE(KqpScripting) { Y_UNIT_TEST(StreamExecuteYqlScriptPg) { TKikimrRunner kikimr; - + auto settings = TExecuteYqlRequestSettings() .Syntax(Ydb::Query::SYNTAX_PG); diff --git a/ydb/core/kqp/ut/yql/kqp_yql_ut.cpp b/ydb/core/kqp/ut/yql/kqp_yql_ut.cpp index 247704928cfb..17c7fa65c0fc 100644 --- a/ydb/core/kqp/ut/yql/kqp_yql_ut.cpp +++ b/ydb/core/kqp/ut/yql/kqp_yql_ut.cpp @@ -613,6 +613,7 @@ Y_UNIT_TEST_SUITE(KqpYql) { appConfig.MutableTableServiceConfig()->SetEnablePreparedDdl(true); auto setting = NKikimrKqp::TKqpSetting(); auto serverSettings = TKikimrSettings() + .SetEnableUuidAsPrimaryKey(false) .SetAppConfig(appConfig) .SetKqpSettings({setting}); TKikimrRunner kikimr(serverSettings.SetWithSampleTables(false)); @@ -684,7 +685,6 @@ Y_UNIT_TEST_SUITE(KqpYql) { auto setting = NKikimrKqp::TKqpSetting(); auto serverSettings = TKikimrSettings() .SetAppConfig(appConfig) - .SetEnableUuidAsPrimaryKey(true) .SetKqpSettings({setting}); TKikimrRunner kikimr(serverSettings.SetWithSampleTables(false)); @@ -833,7 +833,6 @@ Y_UNIT_TEST_SUITE(KqpYql) { auto setting = NKikimrKqp::TKqpSetting(); auto serverSettings = TKikimrSettings() .SetAppConfig(appConfig) - .SetEnableUuidAsPrimaryKey(true) .SetKqpSettings({setting}); TKikimrRunner kikimr(serverSettings.SetWithSampleTables(false)); @@ -890,7 +889,6 @@ Y_UNIT_TEST_SUITE(KqpYql) { auto setting = NKikimrKqp::TKqpSetting(); auto serverSettings = TKikimrSettings() .SetAppConfig(appConfig) - .SetEnableUuidAsPrimaryKey(true) .SetKqpSettings({setting}); TKikimrRunner kikimr(serverSettings.SetWithSampleTables(false)); @@ -917,7 +915,6 @@ Y_UNIT_TEST_SUITE(KqpYql) { Y_UNIT_TEST(UuidPrimaryKeyBulkUpsert) { auto settings = TKikimrSettings() - .SetEnableUuidAsPrimaryKey(true) .SetWithSampleTables(false); auto kikimr = TKikimrRunner{settings}; auto db = kikimr.GetTableClient(); diff --git a/ydb/core/kqp/workload_service/actors/actors.h b/ydb/core/kqp/workload_service/actors/actors.h index 7bae3b142656..d21706f3df7b 100644 --- a/ydb/core/kqp/workload_service/actors/actors.h +++ b/ydb/core/kqp/workload_service/actors/actors.h @@ -6,13 +6,19 @@ namespace NKikimr::NKqp::NWorkload { // Pool state holder -NActors::IActor* CreatePoolHandlerActor(const TString& database, const TString& poolId, const NResourcePool::TPoolSettings& poolConfig, NMonitoring::TDynamicCounterPtr counters); +NActors::IActor* CreatePoolHandlerActor(const TString& databaseId, const TString& poolId, const NResourcePool::TPoolSettings& poolConfig, NMonitoring::TDynamicCounterPtr counters); // Fetch pool and create default pool if needed NActors::IActor* CreatePoolResolverActor(TEvPlaceRequestIntoPool::TPtr event, bool defaultPoolExists); // Fetch and create pool in scheme shard -NActors::IActor* CreatePoolFetcherActor(const NActors::TActorId& replyActorId, const TString& database, const TString& poolId, TIntrusiveConstPtr userToken); -NActors::IActor* CreatePoolCreatorActor(const NActors::TActorId& replyActorId, const TString& database, const TString& poolId, const NResourcePool::TPoolSettings& poolConfig, TIntrusiveConstPtr userToken, NACLibProto::TDiffACL diffAcl); +NActors::IActor* CreatePoolFetcherActor(const NActors::TActorId& replyActorId, const TString& databaseId, const TString& poolId, TIntrusiveConstPtr userToken); +NActors::IActor* CreatePoolCreatorActor(const NActors::TActorId& replyActorId, const TString& databaseId, const TString& poolId, const NResourcePool::TPoolSettings& poolConfig, TIntrusiveConstPtr userToken, NACLibProto::TDiffACL diffAcl); + +// Checks that database is serverless and return database id +NActors::IActor* CreateDatabaseFetcherActor(const NActors::TActorId& replyActorId, const TString& database, TIntrusiveConstPtr userToken = nullptr, NACLib::EAccessRights checkAccess = NACLib::EAccessRights::NoAccess); + +// Cpu load fetcher actor +NActors::IActor* CreateCpuLoadFetcherActor(const NActors::TActorId& replyActorId); } // NKikimr::NKqp::NWorkload diff --git a/ydb/core/kqp/workload_service/actors/cpu_load_actors.cpp b/ydb/core/kqp/workload_service/actors/cpu_load_actors.cpp new file mode 100644 index 000000000000..425c8cfbd30e --- /dev/null +++ b/ydb/core/kqp/workload_service/actors/cpu_load_actors.cpp @@ -0,0 +1,77 @@ +#include "actors.h" + +#include + +#include + + +namespace NKikimr::NKqp::NWorkload { + +namespace { + +class TCpuLoadFetcherActor : public NKikimr::TQueryBase { + using TBase = NKikimr::TQueryBase; + +public: + TCpuLoadFetcherActor() + : TBase(NKikimrServices::KQP_WORKLOAD_SERVICE) + { + SetOperationInfo(__func__, ""); + } + + void OnRunQuery() override { + TString sql = TStringBuilder() << R"( + -- TCpuLoadFetcherActor::OnRunQuery + + SELECT + SUM(CpuThreads) AS ThreadsCount, + SUM(CpuThreads * (1.0 - CpuIdle)) AS TotalLoad + FROM `.sys/nodes`; + )"; + + RunDataQuery(sql); + } + + void OnQueryResult() override { + if (ResultSets.size() != 1) { + Finish(Ydb::StatusIds::INTERNAL_ERROR, "Unexpected database response"); + return; + } + + NYdb::TResultSetParser result(ResultSets[0]); + if (!result.TryNextRow()) { + Finish(Ydb::StatusIds::INTERNAL_ERROR, "Unexpected database response"); + return; + } + + ThreadsCount = result.ColumnParser("ThreadsCount").GetOptionalUint64().GetOrElse(0); + TotalLoad = result.ColumnParser("TotalLoad").GetOptionalDouble().GetOrElse(0.0); + + if (!ThreadsCount) { + Finish(Ydb::StatusIds::NOT_FOUND, "Cpu info not found"); + return; + } + + Finish(); + } + + void OnFinish(Ydb::StatusIds::StatusCode status, NYql::TIssues&& issues) override { + if (status == Ydb::StatusIds::SUCCESS) { + Send(Owner, new TEvPrivate::TEvCpuLoadResponse(Ydb::StatusIds::SUCCESS, TotalLoad / ThreadsCount, ThreadsCount, std::move(issues))); + } else { + Send(Owner, new TEvPrivate::TEvCpuLoadResponse(status, 0.0, 0, std::move(issues))); + } + } + +private: + double TotalLoad = 0.0; + ui64 ThreadsCount = 0; +}; + +} // anonymous namespace + +IActor* CreateCpuLoadFetcherActor(const TActorId& replyActorId) { + return new TQueryRetryActor(replyActorId); +} + +} // NKikimr::NKqp::NWorkload diff --git a/ydb/core/kqp/workload_service/actors/pool_handlers_acors.cpp b/ydb/core/kqp/workload_service/actors/pool_handlers_acors.cpp index 76b72ceee20b..63fb1adbe068 100644 --- a/ydb/core/kqp/workload_service/actors/pool_handlers_acors.cpp +++ b/ydb/core/kqp/workload_service/actors/pool_handlers_acors.cpp @@ -22,6 +22,82 @@ template class TPoolHandlerActorBase : public TActor { using TBase = TActor; + struct TCommonCounters { + const NMonitoring::TDynamicCounterPtr CountersRoot; + const NMonitoring::TDynamicCounterPtr CountersSubgroup; + + // Workload service counters + NMonitoring::TDynamicCounters::TCounterPtr ActivePoolHandlers; + + // Pool counters + NMonitoring::TDynamicCounters::TCounterPtr LocalInFly; + NMonitoring::TDynamicCounters::TCounterPtr LocalDelayedRequests; + NMonitoring::TDynamicCounters::TCounterPtr ContinueOk; + NMonitoring::TDynamicCounters::TCounterPtr ContinueOverloaded; + NMonitoring::TDynamicCounters::TCounterPtr ContinueError; + NMonitoring::TDynamicCounters::TCounterPtr CleanupOk; + NMonitoring::TDynamicCounters::TCounterPtr CleanupError; + NMonitoring::TDynamicCounters::TCounterPtr Cancelled; + NMonitoring::THistogramPtr DelayedTimeMs; + NMonitoring::THistogramPtr RequestsLatencyMs; + + // Config counters + NMonitoring::TDynamicCounters::TCounterPtr InFlightLimit; + NMonitoring::TDynamicCounters::TCounterPtr QueueSizeLimit; + NMonitoring::TDynamicCounters::TCounterPtr LoadCpuThreshold; + + TCommonCounters(NMonitoring::TDynamicCounterPtr counters, const TString& databaseId, const TString& poolId, const NResourcePool::TPoolSettings& poolConfig) + : CountersRoot(counters) + , CountersSubgroup(counters->GetSubgroup("pool", TStringBuilder() << databaseId << "/" << poolId)) + { + Register(); + UpdateConfigCounters(poolConfig); + } + + void CollectRequestLatency(TInstant continueTime) { + if (continueTime) { + RequestsLatencyMs->Collect((TInstant::Now() - continueTime).MilliSeconds()); + } + } + + void UpdateConfigCounters(const NResourcePool::TPoolSettings& poolConfig) { + InFlightLimit->Set(std::max(poolConfig.ConcurrentQueryLimit, 0)); + QueueSizeLimit->Set(std::max(poolConfig.QueueSize, 0)); + LoadCpuThreshold->Set(std::max(poolConfig.DatabaseLoadCpuThreshold, 0.0)); + } + + void OnCleanup(bool resetConfigCounters) { + ActivePoolHandlers->Dec(); + + if (resetConfigCounters) { + InFlightLimit->Set(0); + QueueSizeLimit->Set(0); + LoadCpuThreshold->Set(0); + } + } + + private: + void Register() { + ActivePoolHandlers = CountersRoot->GetCounter("ActivePoolHandlers", false); + ActivePoolHandlers->Inc(); + + LocalInFly = CountersSubgroup->GetCounter("LocalInFly", false); + LocalDelayedRequests = CountersSubgroup->GetCounter("LocalDelayedRequests", false); + ContinueOk = CountersSubgroup->GetCounter("ContinueOk", true); + ContinueOverloaded = CountersSubgroup->GetCounter("ContinueOverloaded", true); + ContinueError = CountersSubgroup->GetCounter("ContinueError", true); + CleanupOk = CountersSubgroup->GetCounter("CleanupOk", true); + CleanupError = CountersSubgroup->GetCounter("CleanupError", true); + Cancelled = CountersSubgroup->GetCounter("Cancelled", true); + DelayedTimeMs = CountersSubgroup->GetHistogram("DelayedTimeMs", NMonitoring::ExponentialHistogram(20, 2, 4)); + RequestsLatencyMs = CountersSubgroup->GetHistogram("RequestsLatencyMs", NMonitoring::ExponentialHistogram(20, 2, 4)); + + InFlightLimit = CountersSubgroup->GetCounter("InFlightLimit", false); + QueueSizeLimit = CountersSubgroup->GetCounter("QueueSizeLimit", false); + LoadCpuThreshold = CountersSubgroup->GetCounter("LoadCpuThreshold", false); + } + }; + protected: struct TRequest { enum class EState { @@ -38,32 +114,33 @@ class TPoolHandlerActorBase : public TActor { const TActorId WorkerActorId; const TString SessionId; const TInstant StartTime = TInstant::Now(); + TInstant ContinueTime; EState State = EState::Pending; bool Started = false; // after TEvContinueRequest success bool CleanupRequired = false; + bool UsedCpuQuota = false; + TDuration Duration; + TDuration CpuConsumed; }; public: - TPoolHandlerActorBase(void (TDerived::* requestFunc)(TAutoPtr& ev), const TString& database, const TString& poolId, const NResourcePool::TPoolSettings& poolConfig, NMonitoring::TDynamicCounterPtr counters) + TPoolHandlerActorBase(void (TDerived::* requestFunc)(TAutoPtr& ev), const TString& databaseId, const TString& poolId, const NResourcePool::TPoolSettings& poolConfig, NMonitoring::TDynamicCounterPtr counters) : TBase(requestFunc) - , CountersRoot(counters) - , CountersSubgroup(counters->GetSubgroup("pool", CanonizePath(TStringBuilder() << database << "/" << poolId))) - , Database(database) + , Counters(counters, databaseId, poolId, poolConfig) + , DatabaseId(databaseId) , PoolId(poolId) , QueueSizeLimit(GetMaxQueueSize(poolConfig)) , InFlightLimit(GetMaxInFlight(poolConfig)) , PoolConfig(poolConfig) - , CancelAfter(poolConfig.QueryCancelAfter) - { - RegisterCounters(); - } + {} STRICT_STFUNC(StateFuncBase, // Workload service events sFunc(TEvents::TEvPoison, HandlePoison); - sFunc(TEvPrivate::TEvStopPoolHandler, HandleStop); + hFunc(TEvPrivate::TEvStopPoolHandler, Handle); hFunc(TEvPrivate::TEvResolvePoolResponse, Handle); + hFunc(TEvPrivate::TEvUpdatePoolSubscription, Handle); // Pool handler events hFunc(TEvPrivate::TEvCancelRequest, Handle); @@ -74,7 +151,7 @@ class TPoolHandlerActorBase : public TActor { // Schemeboard events hFunc(TEvTxProxySchemeCache::TEvWatchNotifyUpdated, Handle); - IgnoreFunc(TEvTxProxySchemeCache::TEvWatchNotifyDeleted); + hFunc(TEvTxProxySchemeCache::TEvWatchNotifyDeleted, Handle); IgnoreFunc(TEvTxProxySchemeCache::TEvWatchNotifyUnavailable); ) @@ -83,7 +160,10 @@ class TPoolHandlerActorBase : public TActor { this->Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvWatchRemove(0)); } - ActivePoolHandlers->Dec(); + SendPoolInfoUpdate(std::nullopt, std::nullopt, Subscribers); + this->Send(MakeKqpWorkloadServiceId(this->SelfId().NodeId()), new TEvPrivate::TEvStopPoolHandlerResponse(DatabaseId, PoolId)); + + Counters.OnCleanup(ResetCountersOnStrop); TBase::PassAway(); } @@ -94,8 +174,9 @@ class TPoolHandlerActorBase : public TActor { this->PassAway(); } - void HandleStop() { + void Handle(TEvPrivate::TEvStopPoolHandler::TPtr& ev) { LOG_I("Got stop pool handler request, waiting for " << LocalSessions.size() << " requests"); + ResetCountersOnStrop = ev->Get()->ResetCounters; if (LocalSessions.empty()) { PassAway(); } else { @@ -105,31 +186,31 @@ class TPoolHandlerActorBase : public TActor { void Handle(TEvPrivate::TEvResolvePoolResponse::TPtr& ev) { auto event = std::move(ev->Get()->Event); + const TString& sessionId = event->Get()->SessionId; + this->Send(MakeKqpWorkloadServiceId(this->SelfId().NodeId()), new TEvPrivate::TEvPlaceRequestIntoPoolResponse(DatabaseId, PoolId, sessionId)); + const TActorId& workerActorId = event->Sender; if (!InFlightLimit) { this->Send(workerActorId, new TEvContinueRequest(Ydb::StatusIds::PRECONDITION_FAILED, PoolId, PoolConfig, {NYql::TIssue(TStringBuilder() << "Resource pool " << PoolId << " was disabled due to zero concurrent query limit")})); return; } - const TString& sessionId = event->Get()->SessionId; if (LocalSessions.contains(sessionId)) { this->Send(workerActorId, new TEvContinueRequest(Ydb::StatusIds::INTERNAL_ERROR, PoolId, PoolConfig, {NYql::TIssue(TStringBuilder() << "Got duplicate session id " << sessionId << " for pool " << PoolId)})); return; } LOG_D("Received new request, worker id: " << workerActorId << ", session id: " << sessionId); - if (CancelAfter) { - this->Schedule(CancelAfter, new TEvPrivate::TEvCancelRequest(sessionId)); + if (auto cancelAfter = PoolConfig.QueryCancelAfter) { + this->Schedule(cancelAfter, new TEvPrivate::TEvCancelRequest(sessionId)); } TRequest* request = &LocalSessions.insert({sessionId, TRequest(workerActorId, sessionId)}).first->second; - LocalDelayedRequests->Inc(); + Counters.LocalDelayedRequests->Inc(); UpdatePoolConfig(ev->Get()->PoolConfig); UpdateSchemeboardSubscription(ev->Get()->PathId); OnScheduleRequest(request); - - this->Send(MakeKqpWorkloadServiceId(this->SelfId().NodeId()), new TEvPrivate::TEvPlaceRequestIntoPoolResponse(Database, PoolId)); } void Handle(TEvCleanupRequest::TPtr& ev) { @@ -146,8 +227,10 @@ class TPoolHandlerActorBase : public TActor { return; } request->State = TRequest::EState::Finishing; + request->Duration = ev->Get()->Duration; + request->CpuConsumed = ev->Get()->CpuConsumed; - LOG_D("Received cleanup request, worker id: " << workerActorId << ", session id: " << sessionId); + LOG_D("Received cleanup request, worker id: " << workerActorId << ", session id: " << sessionId << ", duration: " << request->Duration << ", cpu consumed: " << request->CpuConsumed); OnCleanupRequest(request); } @@ -163,6 +246,14 @@ class TPoolHandlerActorBase : public TActor { OnCleanupRequest(request); } + void Handle(TEvPrivate::TEvUpdatePoolSubscription::TPtr& ev) { + const auto& newSubscribers = ev->Get()->Subscribers; + if (!UpdateSchemeboardSubscription(ev->Get()->PathId)) { + SendPoolInfoUpdate(PoolConfig, SecurityObject, newSubscribers); + } + Subscribers.insert(newSubscribers.begin(), newSubscribers.end()); + } + void Handle(TEvTxProxySchemeCache::TEvWatchNotifyUpdated::TPtr& ev) { if (ev->Get()->Key != WatchKey) { // Skip old paths watch notifications @@ -185,6 +276,23 @@ class TPoolHandlerActorBase : public TActor { NResourcePool::TPoolSettings poolConfig; ParsePoolSettings(result->GetPathDescription().GetResourcePoolDescription(), poolConfig); UpdatePoolConfig(poolConfig); + + const auto& pathDescription = result->GetPathDescription().GetSelf(); + SecurityObject = NACLib::TSecurityObject(pathDescription.GetOwner(), false); + if (!SecurityObject->MutableACL()->ParseFromString(pathDescription.GetEffectiveACL())) { + SecurityObject = std::nullopt; + } + SendPoolInfoUpdate(poolConfig, SecurityObject, Subscribers); + } + + void Handle(TEvTxProxySchemeCache::TEvWatchNotifyDeleted::TPtr& ev) { + if (ev->Get()->Key != WatchKey) { + // Skip old paths watch notifications + return; + } + + LOG_D("Got delete notification"); + SendPoolInfoUpdate(std::nullopt, std::nullopt, Subscribers); } public: @@ -198,25 +306,26 @@ class TPoolHandlerActorBase : public TActor { if (status == Ydb::StatusIds::SUCCESS) { LocalInFlight++; request->Started = true; - LocalInFly->Inc(); - ContinueOk->Inc(); - DelayedTimeMs->Collect((TInstant::Now() - request->StartTime).MilliSeconds()); + request->ContinueTime = TInstant::Now(); + Counters.LocalInFly->Inc(); + Counters.ContinueOk->Inc(); + Counters.DelayedTimeMs->Collect((TInstant::Now() - request->StartTime).MilliSeconds()); LOG_D("Reply continue success to " << request->WorkerActorId << ", session id: " << request->SessionId << ", local in flight: " << LocalInFlight); } else { if (status == Ydb::StatusIds::OVERLOADED) { - ContinueOverloaded->Inc(); + Counters.ContinueOverloaded->Inc(); LOG_I("Reply overloaded to " << request->WorkerActorId << ", session id: " << request->SessionId << ", issues: " << issues.ToOneLineString()); } else if (status == Ydb::StatusIds::CANCELLED) { - Cancelled->Inc(); + Counters.Cancelled->Inc(); LOG_I("Reply cancelled to " << request->WorkerActorId << ", session id: " << request->SessionId << ", issues: " << issues.ToOneLineString()); } else { - ContinueError->Inc(); + Counters.ContinueError->Inc(); LOG_W("Reply continue error " << status << " to " << request->WorkerActorId << ", session id: " << request->SessionId << ", issues: " << issues.ToOneLineString()); } - RemoveRequest(request->SessionId); + RemoveRequest(request); } - LocalDelayedRequests->Dec(); + Counters.LocalDelayedRequests->Dec(); } void FinalReply(TRequest* request, Ydb::StatusIds::StatusCode status, const TString& message) { @@ -227,7 +336,7 @@ class TPoolHandlerActorBase : public TActor { if (!request->Started && request->State != TRequest::EState::Finishing) { if (request->State == TRequest::EState::Canceling && status == Ydb::StatusIds::SUCCESS) { status = Ydb::StatusIds::CANCELLED; - issues.AddIssue(TStringBuilder() << "Delay deadline exceeded in pool " << PoolId); + issues.AddIssue(TStringBuilder() << "Request was delayed during " << TInstant::Now() - request->StartTime << ", that is larger than delay deadline " << PoolConfig.QueryCancelAfter << " in pool " << PoolId << ", request was canceled"); } ReplyContinue(request, status, issues); return; @@ -235,9 +344,9 @@ class TPoolHandlerActorBase : public TActor { if (request->Started) { LocalInFlight--; - LocalInFly->Dec(); + Counters.LocalInFly->Dec(); } else { - LocalDelayedRequests->Dec(); + Counters.LocalDelayedRequests->Dec(); } if (request->State == TRequest::EState::Canceling) { @@ -246,7 +355,13 @@ class TPoolHandlerActorBase : public TActor { ReplyCleanup(request, status, issues); } - RemoveRequest(request->SessionId); + RemoveRequest(request); + } + + void SendPoolInfoUpdate(const std::optional& config, const std::optional& securityObject, const std::unordered_set& subscribers) const { + for (const auto& subscriber : subscribers) { + this->Send(subscriber, new TEvUpdatePoolInfo(DatabaseId, PoolId, config, securityObject)); + } } protected: @@ -273,9 +388,13 @@ class TPoolHandlerActorBase : public TActor { return nullptr; } - void RemoveRequest(const TString& sessionId) { - LocalSessions.erase(sessionId); - this->Send(MakeKqpWorkloadServiceId(this->SelfId().NodeId()), new TEvPrivate::TEvFinishRequestInPool(Database, PoolId)); + void RemoveRequest(TRequest* request) { + auto event = std::make_unique( + DatabaseId, PoolId, request->Duration, request->CpuConsumed, request->UsedCpuQuota + ); + this->Send(MakeKqpWorkloadServiceId(this->SelfId().NodeId()), event.release()); + + LocalSessions.erase(request->SessionId); if (StopHandler && LocalSessions.empty()) { LOG_I("All requests finished, stop handler"); PassAway(); @@ -291,14 +410,21 @@ class TPoolHandlerActorBase : public TActor { } TMaybe GetWaitDeadline(TInstant startTime) const { - if (!CancelAfter) { + if (auto cancelAfter = PoolConfig.QueryCancelAfter) { + return startTime + cancelAfter; + } + return Nothing(); + } + + TMaybe GetLoadCpuThreshold() const { + if (PoolConfig.DatabaseLoadCpuThreshold < 0.0) { return Nothing(); } - return startTime + CancelAfter; + return PoolConfig.DatabaseLoadCpuThreshold; } TString LogPrefix() const { - return TStringBuilder() << "[TPoolHandlerActorBase] ActorId: " << this->SelfId() << ", Database: " << Database << ", PoolId: " << PoolId << ", "; + return TStringBuilder() << "[TPoolHandlerActorBase] ActorId: " << this->SelfId() << ", DatabaseId: " << DatabaseId << ", PoolId: " << PoolId << ", "; } private: @@ -306,11 +432,11 @@ class TPoolHandlerActorBase : public TActor { this->Send(request->WorkerActorId, new TEvCleanupResponse(status, issues)); if (status == Ydb::StatusIds::SUCCESS) { - CleanupOk->Inc(); - RequestsLatencyMs->Collect((TInstant::Now() - request->StartTime).MilliSeconds()); + Counters.CleanupOk->Inc(); + Counters.CollectRequestLatency(request->ContinueTime); LOG_D("Reply cleanup success to " << request->WorkerActorId << ", session id: " << request->SessionId << ", local in flight: " << LocalInFlight); } else { - CleanupError->Inc(); + Counters.CleanupError->Inc(); LOG_W("Reply cleanup error " << status << " to " << request->WorkerActorId << ", session id: " << request->SessionId << ", issues: " << issues.ToOneLineString()); } } @@ -320,14 +446,14 @@ class TPoolHandlerActorBase : public TActor { ev->Record.MutableRequest()->SetSessionId(request->SessionId); this->Send(MakeKqpProxyID(this->SelfId().NodeId()), ev.release()); - Cancelled->Inc(); - RequestsLatencyMs->Collect((TInstant::Now() - request->StartTime).MilliSeconds()); + Counters.Cancelled->Inc(); + Counters.CollectRequestLatency(request->ContinueTime); LOG_I("Cancel request for worker " << request->WorkerActorId << ", session id: " << request->SessionId << ", local in flight: " << LocalInFlight); } - void UpdateSchemeboardSubscription(TPathId pathId) { + bool UpdateSchemeboardSubscription(TPathId pathId) { if (WatchPathId && *WatchPathId == pathId) { - return; + return false; } if (WatchPathId) { @@ -340,6 +466,7 @@ class TPoolHandlerActorBase : public TActor { LOG_D("Subscribed on schemeboard notifications for path: " << pathId.ToString()); WatchPathId = std::make_unique(pathId); this->Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvWatchPathId(*WatchPathId, WatchKey)); + return true; } void UpdatePoolConfig(const NResourcePool::TPoolSettings& poolConfig) { @@ -349,14 +476,14 @@ class TPoolHandlerActorBase : public TActor { LOG_D("Pool config has changed, queue size: " << poolConfig.QueueSize << ", in flight limit: " << poolConfig.ConcurrentQueryLimit); PoolConfig = poolConfig; - CancelAfter = poolConfig.QueryCancelAfter; QueueSizeLimit = GetMaxQueueSize(poolConfig); InFlightLimit = GetMaxInFlight(poolConfig); + Counters.UpdateConfigCounters(poolConfig); RefreshState(true); if (ShouldResign()) { - const TActorId& newHandler = this->RegisterWithSameMailbox(CreatePoolHandlerActor(Database, PoolId, poolConfig, CountersRoot)); - this->Send(MakeKqpWorkloadServiceId(this->SelfId().NodeId()), new TEvPrivate::TEvResignPoolHandler(Database, PoolId, newHandler)); + const TActorId& newHandler = this->RegisterWithSameMailbox(CreatePoolHandlerActor(DatabaseId, PoolId, poolConfig, Counters.CountersRoot)); + this->Send(MakeKqpWorkloadServiceId(this->SelfId().NodeId()), new TEvPrivate::TEvResignPoolHandler(DatabaseId, PoolId, newHandler)); } } @@ -370,57 +497,29 @@ class TPoolHandlerActorBase : public TActor { return concurrentQueryLimit == -1 ? std::numeric_limits::max() : static_cast(concurrentQueryLimit); } - void RegisterCounters() { - ActivePoolHandlers = CountersRoot->GetCounter("ActivePoolHandlers", false); - ActivePoolHandlers->Inc(); - - LocalInFly = CountersSubgroup->GetCounter("LocalInFly", false); - LocalDelayedRequests = CountersSubgroup->GetCounter("LocalDelayedRequests", false); - ContinueOk = CountersSubgroup->GetCounter("ContinueOk", true); - ContinueOverloaded = CountersSubgroup->GetCounter("ContinueOverloaded", true); - ContinueError = CountersSubgroup->GetCounter("ContinueError", true); - CleanupOk = CountersSubgroup->GetCounter("CleanupOk", true); - CleanupError = CountersSubgroup->GetCounter("CleanupError", true); - Cancelled = CountersSubgroup->GetCounter("Cancelled", true); - DelayedTimeMs = CountersSubgroup->GetHistogram("DelayedTimeMs", NMonitoring::ExponentialHistogram(20, 2, 4)); - RequestsLatencyMs = CountersSubgroup->GetHistogram("RequestsLatencyMs", NMonitoring::ExponentialHistogram(20, 2, 4)); - } - protected: - NMonitoring::TDynamicCounterPtr CountersRoot; - NMonitoring::TDynamicCounterPtr CountersSubgroup; + TCommonCounters Counters; // Configuration - const TString Database; + const TString DatabaseId; const TString PoolId; ui64 QueueSizeLimit = std::numeric_limits::max(); ui64 InFlightLimit = std::numeric_limits::max(); private: NResourcePool::TPoolSettings PoolConfig; - TDuration CancelAfter; + std::optional SecurityObject; // Scheme board settings std::unique_ptr WatchPathId; ui64 WatchKey = 0; + std::unordered_set Subscribers; // Pool state ui64 LocalInFlight = 0; std::unordered_map LocalSessions; bool StopHandler = false; // Stop than all requests finished - - // Counters - NMonitoring::TDynamicCounters::TCounterPtr ActivePoolHandlers; - NMonitoring::TDynamicCounters::TCounterPtr LocalInFly; - NMonitoring::TDynamicCounters::TCounterPtr LocalDelayedRequests; - NMonitoring::TDynamicCounters::TCounterPtr ContinueOk; - NMonitoring::TDynamicCounters::TCounterPtr ContinueOverloaded; - NMonitoring::TDynamicCounters::TCounterPtr ContinueError; - NMonitoring::TDynamicCounters::TCounterPtr CleanupOk; - NMonitoring::TDynamicCounters::TCounterPtr CleanupError; - NMonitoring::TDynamicCounters::TCounterPtr Cancelled; - NMonitoring::THistogramPtr DelayedTimeMs; - NMonitoring::THistogramPtr RequestsLatencyMs; + bool ResetCountersOnStrop = true; }; @@ -428,15 +527,15 @@ class TUnlimitedPoolHandlerActor : public TPoolHandlerActorBase; public: - TUnlimitedPoolHandlerActor(const TString& database, const TString& poolId, const NResourcePool::TPoolSettings& poolConfig, NMonitoring::TDynamicCounterPtr counters) - : TBase(&TBase::StateFuncBase, database, poolId, poolConfig, counters) + TUnlimitedPoolHandlerActor(const TString& databaseId, const TString& poolId, const NResourcePool::TPoolSettings& poolConfig, NMonitoring::TDynamicCounterPtr counters) + : TBase(&TBase::StateFuncBase, databaseId, poolId, poolConfig, counters) { Y_ENSURE(!ShouldResign()); } protected: bool ShouldResign() const override { - return 0 < InFlightLimit && InFlightLimit < std::numeric_limits::max(); + return 0 < InFlightLimit && (InFlightLimit < std::numeric_limits::max() || GetLoadCpuThreshold()); } void OnScheduleRequest(TRequest* request) override { @@ -452,20 +551,59 @@ class TUnlimitedPoolHandlerActor : public TPoolHandlerActorBase { using TBase = TPoolHandlerActorBase; + struct TCounters { + // Fifo pool counters + NMonitoring::TDynamicCounters::TCounterPtr PendingRequestsCount; + NMonitoring::TDynamicCounters::TCounterPtr FinishingRequestsCount; + NMonitoring::TDynamicCounters::TCounterPtr GlobalInFly; + NMonitoring::TDynamicCounters::TCounterPtr GlobalDelayedRequests; + NMonitoring::THistogramPtr PoolStateUpdatesBacklogMs; + + TCounters(NMonitoring::TDynamicCounterPtr countersSubgroup) { + Register(countersSubgroup); + } + + void UpdateGlobalState(const TPoolStateDescription& description) { + GlobalInFly->Set(description.RunningRequests); + GlobalDelayedRequests->Set(description.DelayedRequests); + } + + void OnCleanup() { + GlobalInFly->Set(0); + GlobalDelayedRequests->Set(0); + } + + private: + void Register(NMonitoring::TDynamicCounterPtr countersSubgroup) { + PendingRequestsCount = countersSubgroup->GetCounter("PendingRequestsCount", false); + FinishingRequestsCount = countersSubgroup->GetCounter("FinishingRequestsCount", false); + GlobalInFly = countersSubgroup->GetCounter("GlobalInFly", false); + GlobalDelayedRequests = countersSubgroup->GetCounter("GlobalDelayedRequests", false); + PoolStateUpdatesBacklogMs = countersSubgroup->GetHistogram("PoolStateUpdatesBacklogMs", NMonitoring::LinearHistogram(20, 0, 3 * LEASE_DURATION.MillisecondsFloat() / 40)); + } + }; + + enum class EStartRequestCase { + Pending, + Delayed + }; + static constexpr ui64 MAX_PENDING_REQUESTS = 1000; public: - TFifoPoolHandlerActor(const TString& database, const TString& poolId, const NResourcePool::TPoolSettings& poolConfig, NMonitoring::TDynamicCounterPtr counters) - : TBase(&TFifoPoolHandlerActor::StateFunc, database, poolId, poolConfig, counters) + TFifoPoolHandlerActor( const TString& databaseId, const TString& poolId, const NResourcePool::TPoolSettings& poolConfig, NMonitoring::TDynamicCounterPtr counters) + : TBase(&TFifoPoolHandlerActor::StateFunc, databaseId, poolId, poolConfig, counters) + , FifoCounters(Counters.CountersSubgroup) { Y_ENSURE(!ShouldResign()); - RegisterCounters(); } STFUNC(StateFunc) { switch (ev->GetTypeRewrite()) { sFunc(TEvents::TEvWakeup, HandleRefreshState); sFunc(TEvPrivate::TEvRefreshPoolState, HandleExternalRefreshState); + hFunc(TEvPrivate::TEvCpuQuotaResponse, Handle); + hFunc(TEvPrivate::TEvNodesInfoResponse, Handle); hFunc(TEvPrivate::TEvTablesCreationFinished, Handle); hFunc(TEvPrivate::TEvRefreshPoolStateResponse, Handle); @@ -478,28 +616,32 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseSet(0); - GlobalDelayedRequests->Set(0); + FifoCounters.OnCleanup(); TBase::PassAway(); } protected: bool ShouldResign() const override { - return InFlightLimit == 0 || InFlightLimit == std::numeric_limits::max(); + return InFlightLimit == 0 || (InFlightLimit == std::numeric_limits::max() && !GetLoadCpuThreshold()); } void OnScheduleRequest(TRequest* request) override { - if (PendingRequests.size() >= MAX_PENDING_REQUESTS || GetLocalSessionsCount() - GetLocalInFlight() > QueueSizeLimit + 1) { - ReplyContinue(request, Ydb::StatusIds::OVERLOADED, TStringBuilder() << "Too many pending requests for pool " << PoolId); + if (PendingRequests.size() >= MAX_PENDING_REQUESTS) { + ReplyContinue(request, Ydb::StatusIds::OVERLOADED, TStringBuilder() << "Request was rejected, number of local pending requests is " << PendingRequests.size() << ", that is larger than allowed limit " << MAX_PENDING_REQUESTS); + return; + } + + if (SaturationSub(GetLocalSessionsCount() - GetLocalInFlight(), InFlightLimit) > QueueSizeLimit) { + ReplyContinue(request, Ydb::StatusIds::OVERLOADED, TStringBuilder() << "Request was rejected, number of local pending/delayed requests is " << GetLocalSessionsCount() - GetLocalInFlight() << ", that is larger than allowed limit " << QueueSizeLimit << " (including concurrent query limit " << InFlightLimit << ") for pool " << PoolId); return; } PendingRequests.emplace_back(request->SessionId); - PendingRequestsCount->Inc(); + FifoCounters.PendingRequestsCount->Inc(); if (!PreparingFinished) { - this->Send(MakeKqpWorkloadServiceId(this->SelfId().NodeId()), new TEvPrivate::TEvPrepareTablesRequest(Database, PoolId)); + this->Send(MakeKqpWorkloadServiceId(this->SelfId().NodeId()), new TEvPrivate::TEvPrepareTablesRequest(DatabaseId, PoolId)); } RefreshState(); @@ -515,6 +657,11 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBase LEASE_DURATION) { + WaitingNodesInfo = true; + this->Send(MakeKqpWorkloadServiceId(this->SelfId().NodeId()), new TEvPrivate::TEvNodesInfoRequest()); + } + RefreshRequired |= refreshRequired; if (!PreparingFinished) { return; @@ -532,7 +679,7 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseRegister(CreateRefreshPoolStateActor(this->SelfId(), Database, PoolId, LEASE_DURATION, CountersSubgroup)); + this->Register(CreateRefreshPoolStateActor(this->SelfId(), DatabaseId, PoolId, LEASE_DURATION, Counters.CountersSubgroup)); } } @@ -552,6 +699,14 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseGet()->NodeCount; + + LOG_T("Updated node info, noode count: " << NodeCount); + } + void Handle(TEvPrivate::TEvTablesCreationFinished::TPtr& ev) { if (ev->Get()->Success) { PreparingFinished = true; @@ -565,7 +720,7 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseSet(0); + FifoCounters.PendingRequestsCount->Set(0); } void Handle(TEvPrivate::TEvRefreshPoolStateResponse::TPtr& ev) { @@ -579,7 +734,7 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseCollect((TInstant::Now() - LastRefreshTime).MilliSeconds()); + FifoCounters.PoolStateUpdatesBacklogMs->Collect((TInstant::Now() - LastRefreshTime).MilliSeconds()); } LastRefreshTime = TInstant::Now(); @@ -587,31 +742,30 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseSet(GlobalState.RunningRequests); - GlobalDelayedRequests->Set(GlobalState.DelayedRequests); + FifoCounters.UpdateGlobalState(GlobalState); LOG_T("succefully refreshed pool state, in flight: " << GlobalState.RunningRequests << ", delayed: " << GlobalState.DelayedRequests); RemoveFinishedRequests(); size_t delayedRequestsCount = DelayedRequests.size(); - DoStartPendingRequest(); + DoStartPendingRequest(GetLoadCpuThreshold()); - if (GlobalState.DelayedRequests + PendingRequests.size() > QueueSizeLimit) { - RemoveBackRequests(PendingRequests, std::min(GlobalState.DelayedRequests + PendingRequests.size() - QueueSizeLimit, PendingRequests.size()), [this](TRequest* request) { - ReplyContinue(request, Ydb::StatusIds::OVERLOADED, TStringBuilder() << "Too many pending requests for pool " << PoolId); + if (const ui64 delayedRequests = SaturationSub(GlobalState.AmountRequests() + PendingRequests.size(), InFlightLimit); delayedRequests > QueueSizeLimit) { + RemoveBackRequests(PendingRequests, std::min(delayedRequests - QueueSizeLimit, PendingRequests.size()), [this](TRequest* request) { + ReplyContinue(request, Ydb::StatusIds::OVERLOADED, TStringBuilder() << "Request was rejected, number of local pending requests is " << PendingRequests.size() << ", number of global delayed/running requests is " << GlobalState.AmountRequests() << ", sum of them is larger than allowed limit " << QueueSizeLimit << " (including concurrent query limit " << InFlightLimit << ") for pool " << PoolId); }); - PendingRequestsCount->Set(PendingRequests.size()); + FifoCounters.PendingRequestsCount->Set(PendingRequests.size()); } if (PendingRequests.empty() && delayedRequestsCount > QueueSizeLimit) { - RemoveBackRequests(DelayedRequests, delayedRequestsCount - QueueSizeLimit, [this](TRequest* request) { + RemoveBackRequests(DelayedRequests, delayedRequestsCount - QueueSizeLimit, [this, delayedRequestsCount](TRequest* request) { AddFinishedRequest(request->SessionId); - ReplyContinue(request, Ydb::StatusIds::OVERLOADED, TStringBuilder() << "Too many pending requests for pool " << PoolId); + ReplyContinue(request, Ydb::StatusIds::OVERLOADED, TStringBuilder() << "Request was rejected, number of local delayed requests is " << delayedRequestsCount << ", that is larger than allowed limit " << QueueSizeLimit << " for pool " << PoolId); }); } DoDelayRequest(); - DoStartDelayedRequest(); + DoStartDelayedRequest(GetLoadCpuThreshold()); RefreshState(); }; @@ -630,13 +784,48 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseInc(); + FifoCounters.GlobalDelayedRequests->Inc(); LOG_D("succefully delayed request, session id: " << ev->Get()->SessionId); - DoStartDelayedRequest(); + DoStartDelayedRequest(GetLoadCpuThreshold()); RefreshState(); }; + void Handle(TEvPrivate::TEvCpuQuotaResponse::TPtr& ev) { + RunningOperation = false; + + if (!ev->Get()->QuotaAccepted) { + LOG_D("Skipped request start due to load cpu threshold"); + if (static_cast(ev->Cookie) == EStartRequestCase::Pending) { + NYql::TIssues issues = GroupIssues(ev->Get()->Issues, TStringBuilder() << "Request was rejected, failed to request CPU quota for pool " << PoolId << ", current CPU threshold is " << 100.0 * ev->Get()->MaxClusterLoad << "%"); + ForEachUnfinished(DelayedRequests.begin(), DelayedRequests.end(), [this, issues](TRequest* request) { + AddFinishedRequest(request->SessionId); + ReplyContinue(request, Ydb::StatusIds::OVERLOADED, issues); + }); + } + RefreshState(); + return; + } + + RemoveFinishedRequests(); + switch (static_cast(ev->Cookie)) { + case EStartRequestCase::Pending: + if (!RunningOperation && !DelayedRequests.empty()) { + RunningOperation = true; + const TString& sessionId = DelayedRequests.front(); + this->Register(CreateStartRequestActor(this->SelfId(), DatabaseId, PoolId, sessionId, LEASE_DURATION, Counters.CountersSubgroup)); + GetRequest(sessionId)->CleanupRequired = true; + } + break; + + case EStartRequestCase::Delayed: + DoStartDelayedRequest(Nothing()); + break; + } + + RefreshState(); + } + void Handle(TEvPrivate::TEvStartRequestResponse::TPtr& ev) { RunningOperation = false; @@ -657,7 +846,7 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBase(); event->Record.SetPoolId(PoolId); - event->Record.SetDatabase(Database); + event->Record.SetDatabase(DatabaseId); this->Send(MakeKqpWorkloadServiceId(nodeId), std::move(event)); RefreshState(); return; @@ -668,14 +857,15 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseSessionId == sessionId) { + request->UsedCpuQuota = !!GetLoadCpuThreshold(); requestFound = true; GlobalState.RunningRequests++; - GlobalInFly->Inc(); + FifoCounters.GlobalInFly->Inc(); ReplyContinue(request); } else { // Request was dropped due to lease expiration PendingRequests.emplace_front(request->SessionId); - PendingRequestsCount->Inc(); + FifoCounters.PendingRequestsCount->Inc(); } }); DelayedRequests.pop_front(); @@ -706,22 +896,28 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBase loadCpuThreshold) { RemoveFinishedRequests(); if (RunningOperation) { return; } - if (!PendingRequests.empty() && QueueSizeLimit == 0 && GlobalState.RunningRequests < InFlightLimit) { + bool canStartRequest = QueueSizeLimit == 0 && GlobalState.RunningRequests < InFlightLimit; + canStartRequest |= !GetLoadCpuThreshold() && DelayedRequests.size() + GlobalState.DelayedRequests == 0 && NodeCount && GlobalState.RunningRequests + NodeCount < InFlightLimit; + if (!PendingRequests.empty() && canStartRequest) { RunningOperation = true; const TString& sessionId = PopPendingRequest(); - this->Register(CreateStartRequestActor(this->SelfId(), Database, PoolId, sessionId, LEASE_DURATION, CountersSubgroup)); DelayedRequests.emplace_front(sessionId); - GetRequest(sessionId)->CleanupRequired = true; + if (loadCpuThreshold) { + RequestCpuQuota(*loadCpuThreshold, EStartRequestCase::Pending); + } else { + this->Register(CreateStartRequestActor(this->SelfId(), DatabaseId, PoolId, sessionId, LEASE_DURATION, Counters.CountersSubgroup)); + GetRequest(sessionId)->CleanupRequired = true; + } } } - void DoStartDelayedRequest() { + void DoStartDelayedRequest(TMaybe loadCpuThreshold) { RemoveFinishedRequests(); if (RunningOperation) { return; @@ -729,7 +925,11 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseRegister(CreateStartRequestActor(this->SelfId(), Database, PoolId, std::nullopt, LEASE_DURATION, CountersSubgroup)); + if (loadCpuThreshold) { + RequestCpuQuota(*loadCpuThreshold, EStartRequestCase::Delayed); + } else { + this->Register(CreateStartRequestActor(this->SelfId(), DatabaseId, PoolId, std::nullopt, LEASE_DURATION, Counters.CountersSubgroup)); + } } } @@ -743,7 +943,7 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseRegister(CreateDelayRequestActor(this->SelfId(), Database, PoolId, sessionId, request->StartTime, GetWaitDeadline(request->StartTime), LEASE_DURATION, CountersSubgroup)); + this->Register(CreateDelayRequestActor(this->SelfId(), DatabaseId, PoolId, sessionId, request->StartTime, GetWaitDeadline(request->StartTime), LEASE_DURATION, Counters.CountersSubgroup)); DelayedRequests.emplace_back(sessionId); request->CleanupRequired = true; } @@ -756,9 +956,9 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseRegister(CreateCleanupRequestsActor(this->SelfId(), Database, PoolId, FinishedRequests, CountersSubgroup)); + this->Register(CreateCleanupRequestsActor(this->SelfId(), DatabaseId, PoolId, FinishedRequests, Counters.CountersSubgroup)); FinishedRequests.clear(); - FinishingRequestsCount->Set(0); + FifoCounters.FinishingRequestsCount->Set(0); } } @@ -770,6 +970,10 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseSchedule(LEASE_DURATION / 2, new TEvents::TEvWakeup()); } + void RequestCpuQuota(double loadCpuThreshold, EStartRequestCase requestCase) const { + this->Send(MakeKqpWorkloadServiceId(this->SelfId().NodeId()), new TEvPrivate::TEvCpuQuotaRequest(loadCpuThreshold / 100.0), 0, static_cast(requestCase)); + } + private: void RemoveFinishedRequests() { if (RunningOperation) { @@ -778,7 +982,7 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseSet(PendingRequests.size()); + FifoCounters.PendingRequestsCount->Set(PendingRequests.size()); } void RemoveFinishedRequests(std::deque& requests) { @@ -819,24 +1023,18 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseDec(); + FifoCounters.PendingRequestsCount->Dec(); return sessionId; } void AddFinishedRequest(const TString& sessionId) { FinishedRequests.emplace_back(sessionId); - FinishingRequestsCount->Inc(); - } - - void RegisterCounters() { - PendingRequestsCount = CountersSubgroup->GetCounter("PendingRequestsCount", false); - FinishingRequestsCount = CountersSubgroup->GetCounter("FinishingRequestsCount", false); - GlobalInFly = CountersSubgroup->GetCounter("GlobalInFly", false); - GlobalDelayedRequests = CountersSubgroup->GetCounter("GlobalDelayedRequests", false); - PoolStateUpdatesBacklogMs = CountersSubgroup->GetHistogram("PoolStateUpdatesBacklogMs", NMonitoring::LinearHistogram(20, 0, 3 * LEASE_DURATION.MillisecondsFloat() / 40)); + FifoCounters.FinishingRequestsCount->Inc(); } private: + TCounters FifoCounters; + bool PreparingFinished = false; bool RefreshRequired = false; bool RunningOperation = false; @@ -849,20 +1047,18 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBase +#include #include #include #include +#include #include #include @@ -36,7 +38,7 @@ class TPoolResolverActor : public TActorBootstrapped { void StartPoolFetchRequest() const { LOG_D("Start pool fetching"); - Register(CreatePoolFetcherActor(SelfId(), Event->Get()->Database, Event->Get()->PoolId, Event->Get()->UserToken)); + Register(CreatePoolFetcherActor(SelfId(), Event->Get()->DatabaseId, Event->Get()->PoolId, Event->Get()->UserToken)); } void Handle(TEvPrivate::TEvFetchPoolResponse::TPtr& ev) { @@ -63,16 +65,22 @@ class TPoolResolverActor : public TActorBootstrapped { for (const TString& usedSid : AppData()->AdministrationAllowedSIDs) { diffAcl.AddAccess(NACLib::EAccessType::Allow, NACLib::EAccessRights::GenericFull, usedSid); } - diffAcl.AddAccess(NACLib::EAccessType::Allow, NACLib::EAccessRights::SelectRow | NACLib::EAccessRights::DescribeSchema, AppData()->AllAuthenticatedUsers); + + auto useAccess = NACLib::EAccessRights::SelectRow | NACLib::EAccessRights::DescribeSchema; + for (const auto& userSID : AppData()->DefaultUserSIDs) { + diffAcl.AddAccess(NACLib::EAccessType::Allow, useAccess, userSID); + } + diffAcl.AddAccess(NACLib::EAccessType::Allow, useAccess, AppData()->AllAuthenticatedUsers); + diffAcl.AddAccess(NACLib::EAccessType::Allow, useAccess, BUILTIN_ACL_ROOT); auto token = MakeIntrusive(BUILTIN_ACL_METADATA, TVector{}); - Register(CreatePoolCreatorActor(SelfId(), Event->Get()->Database, Event->Get()->PoolId, NResourcePool::TPoolSettings(), token, diffAcl)); + Register(CreatePoolCreatorActor(SelfId(), Event->Get()->DatabaseId, Event->Get()->PoolId, NResourcePool::TPoolSettings(), token, diffAcl)); } void Handle(TEvPrivate::TEvCreatePoolResponse::TPtr& ev) { if (ev->Get()->Status != Ydb::StatusIds::SUCCESS) { LOG_E("Failed to create default pool " << ev->Get()->Status << ", issues: " << ev->Get()->Issues.ToOneLineString()); - Reply(ev->Get()->Status, GroupIssues(ev->Get()->Issues, "Failed to create default pool")); + Reply(ev->Get()->Status, GroupIssues(ev->Get()->Issues, TStringBuilder() << "Failed to create default pool in database " << Event->Get()->DatabaseId)); return; } @@ -88,7 +96,7 @@ class TPoolResolverActor : public TActorBootstrapped { private: TString LogPrefix() const { - return TStringBuilder() << "[TPoolResolverActor] ActorId: " << SelfId() << ", Database: " << Event->Get()->Database << ", PoolId: " << Event->Get()->PoolId << ", SessionId: " << Event->Get()->SessionId << ", "; + return TStringBuilder() << "[TPoolResolverActor] ActorId: " << SelfId() << ", DatabaseId: " << Event->Get()->DatabaseId << ", PoolId: " << Event->Get()->PoolId << ", SessionId: " << Event->Get()->SessionId << ", "; } void Reply(NResourcePool::TPoolSettings poolConfig, TPathId pathId) { @@ -114,9 +122,9 @@ class TPoolResolverActor : public TActorBootstrapped { class TPoolFetcherActor : public TSchemeActorBase { public: - TPoolFetcherActor(const NActors::TActorId& replyActorId, const TString& database, const TString& poolId, TIntrusiveConstPtr userToken) + TPoolFetcherActor(const TActorId& replyActorId, const TString& databaseId, const TString& poolId, TIntrusiveConstPtr userToken) : ReplyActorId(replyActorId) - , Database(database) + , DatabaseId(databaseId) , PoolId(poolId) , UserToken(userToken) {} @@ -171,8 +179,8 @@ class TPoolFetcherActor : public TSchemeActorBase { void StartRequest() override { LOG_D("Start pool fetching"); auto event = NTableCreator::BuildSchemeCacheNavigateRequest( - {{".resource_pools", PoolId}}, - Database, + {{".metadata/workload_manager/pools", PoolId}}, + DatabaseIdToDatabase(DatabaseId), UserToken ); event->ResultSet[0].Access |= NACLib::SelectRow; @@ -185,7 +193,7 @@ class TPoolFetcherActor : public TSchemeActorBase { } TString LogPrefix() const override { - return TStringBuilder() << "[TPoolFetcherActor] ActorId: " << SelfId() << ", Database: " << Database << ", PoolId: " << PoolId << ", "; + return TStringBuilder() << "[TPoolFetcherActor] ActorId: " << SelfId() << ", DatabaseId: " << DatabaseId << ", PoolId: " << PoolId << ", "; } private: @@ -213,13 +221,13 @@ class TPoolFetcherActor : public TSchemeActorBase { } Issues.AddIssues(std::move(issues)); - Send(ReplyActorId, new TEvPrivate::TEvFetchPoolResponse(status, PoolConfig, PathIdFromPathId(PathId), std::move(Issues))); + Send(ReplyActorId, new TEvPrivate::TEvFetchPoolResponse(status, DatabaseId, PoolId, PoolConfig, PathIdFromPathId(PathId), std::move(Issues))); PassAway(); } private: const TActorId ReplyActorId; - const TString Database; + const TString DatabaseId; const TString PoolId; const TIntrusiveConstPtr UserToken; @@ -232,9 +240,9 @@ class TPoolCreatorActor : public TSchemeActorBase { using TBase = TSchemeActorBase; public: - TPoolCreatorActor(const TActorId& replyActorId, const TString& database, const TString& poolId, const NResourcePool::TPoolSettings& poolConfig, TIntrusiveConstPtr userToken, NACLibProto::TDiffACL diffAcl) + TPoolCreatorActor(const TActorId& replyActorId, const TString& databaseId, const TString& poolId, const NResourcePool::TPoolSettings& poolConfig, TIntrusiveConstPtr userToken, NACLibProto::TDiffACL diffAcl) : ReplyActorId(replyActorId) - , Database(database) + , DatabaseId(databaseId) , PoolId(poolId) , UserToken(userToken) , DiffAcl(diffAcl) @@ -246,38 +254,67 @@ class TPoolCreatorActor : public TSchemeActorBase { } void Handle(TEvTxUserProxy::TEvProposeTransactionStatus::TPtr& ev) { - const auto ssStatus = ev->Get()->Record.GetSchemeShardStatus(); - switch (ev->Get()->Status()) { + const auto& response = ev->Get()->Record; + const auto ssStatus = response.GetSchemeShardStatus(); + const auto status = ev->Get()->Status(); + switch (status) { case NTxProxy::TResultStatus::ExecComplete: case NTxProxy::TResultStatus::ExecAlready: if (ssStatus == NKikimrScheme::EStatus::StatusSuccess || ssStatus == NKikimrScheme::EStatus::StatusAlreadyExists) { Reply(Ydb::StatusIds::SUCCESS); } else { - Reply(Ydb::StatusIds::SCHEME_ERROR, TStringBuilder() << "Invalid creation status: " << static_cast(ssStatus)); + Reply(Ydb::StatusIds::SCHEME_ERROR, ExtractIssues(response, TStringBuilder() << "Invalid creation status: " << static_cast(ssStatus))); } return; case NTxProxy::TResultStatus::ExecError: - if (ssStatus == NKikimrScheme::EStatus::StatusMultipleModifications || ssStatus == NKikimrScheme::EStatus::StatusInvalidParameter) { - ScheduleRetry(ssStatus, "Retry execution error", true); + if (ssStatus == NKikimrScheme::EStatus::StatusMultipleModifications) { + SubscribeOnTransactionOrRetry(status, response); } else { - Reply(Ydb::StatusIds::SCHEME_ERROR, TStringBuilder() << "Execution error: " << static_cast(ssStatus)); + Reply(Ydb::StatusIds::SCHEME_ERROR, ExtractIssues(response, TStringBuilder() << "Execution error: " << static_cast(ssStatus))); } return; case NTxProxy::TResultStatus::ExecInProgress: - ScheduleRetry(ssStatus, "Retry execution in progress error", true); + SubscribeOnTransactionOrRetry(status, response); return; case NTxProxy::TResultStatus::ProxyShardNotAvailable: - ScheduleRetry(ssStatus, "Retry shard unavailable error"); + ScheduleRetry(response, "Retry shard unavailable error"); return; default: - Reply(Ydb::StatusIds::SCHEME_ERROR, TStringBuilder() << "Failed to create resource pool: " << static_cast(ssStatus)); + Reply(Ydb::StatusIds::SCHEME_ERROR, ExtractIssues(response, TStringBuilder() << "Failed to create resource pool: " << static_cast(ssStatus))); return; } } + void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev) { + if (ev->Get()->Status == NKikimrProto::OK) { + LOG_T("Tablet to pipe successfully connected"); + return; + } + + ClosePipeClient(); + ScheduleRetry(TStringBuilder() << "Tablet to pipe not connected: " << NKikimrProto::EReplyStatus_Name(ev->Get()->Status)); + } + + void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev) { + const TActorId clientId = ev->Get()->ClientId; + if (!ClosedSchemePipeActors.contains(clientId)) { + ClosePipeClient(); + ScheduleRetry("Tablet to pipe destroyed"); + } + } + + void Handle(NSchemeShard::TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr& ev) { + ScheduleRetry(TStringBuilder() << "Transaction " << ev->Get()->Record.GetTxId() << " completed, doublechecking"); + } + STFUNC(StateFunc) { switch (ev->GetTypeRewrite()) { hFunc(TEvTxUserProxy::TEvProposeTransactionStatus, Handle) + hFunc(TEvTabletPipe::TEvClientConnected, Handle) + hFunc(TEvTabletPipe::TEvClientDestroyed, Handle) + hFunc(NSchemeShard::TEvSchemeShard::TEvNotifyTxCompletionResult, Handle) + IgnoreFunc(NSchemeShard::TEvSchemeShard::TEvNotifyTxCompletionRegistered) + default: StateFuncBase(ev); } @@ -289,16 +326,15 @@ class TPoolCreatorActor : public TSchemeActorBase { auto event = std::make_unique(); auto& schemeTx = *event->Record.MutableTransaction()->MutableModifyScheme(); - schemeTx.SetWorkingDir(JoinPath({Database, ".resource_pools"})); + schemeTx.SetWorkingDir(JoinPath({DatabaseIdToDatabase(DatabaseId), ".metadata/workload_manager/pools"})); schemeTx.SetOperationType(NKikimrSchemeOp::ESchemeOpCreateResourcePool); schemeTx.SetInternal(true); - schemeTx.SetAllowAccessToPrivatePaths(true); BuildCreatePoolRequest(*schemeTx.MutableCreateResourcePool()); BuildModifyAclRequest(*schemeTx.MutableModifyACL()); if (UserToken) { - event->Record.SetUserToken(UserToken->GetSerializedToken()); + event->Record.SetUserToken(UserToken->SerializeAsString()); } Send(MakeTxProxyID(), std::move(event)); @@ -309,23 +345,55 @@ class TPoolCreatorActor : public TSchemeActorBase { } TString LogPrefix() const override { - return TStringBuilder() << "[TPoolCreatorActor] ActorId: " << SelfId() << ", Database: " << Database << ", PoolId: " << PoolId << ", "; + return TStringBuilder() << "[TPoolCreatorActor] ActorId: " << SelfId() << ", DatabaseId: " << DatabaseId << ", PoolId: " << PoolId << ", "; } private: - void ScheduleRetry(ui32 status, const TString& message, bool longDelay = false) { - auto ssStatus = static_cast(status); - if (!TBase::ScheduleRetry(TStringBuilder() << message << ", status: " << ssStatus, longDelay)) { - Reply(Ydb::StatusIds::UNAVAILABLE, TStringBuilder() << "Retry limit exceeded on status: " << ssStatus); + void SubscribeOnTransactionOrRetry(NTxProxy::TResultStatus::EStatus status, const NKikimrTxUserProxy::TEvProposeTransactionStatus& response) { + const ui64 txId = status == NTxProxy::TResultStatus::ExecInProgress ? response.GetTxId() : response.GetPathCreateTxId(); + if (txId == 0) { + ScheduleRetry(response, "Unable to subscribe to concurrent transaction", true); + return; + } + + SchemePipeActorId = Register(NTabletPipe::CreateClient(SelfId(), response.GetSchemeShardTabletId())); + + auto request = MakeHolder(); + request->Record.SetTxId(txId); + NTabletPipe::SendData(SelfId(), SchemePipeActorId, std::move(request)); + LOG_D("Subscribe on create pool tx: " << txId); + } + + void ClosePipeClient() { + if (SchemePipeActorId) { + ClosedSchemePipeActors.insert(SchemePipeActorId); + NTabletPipe::CloseClient(SelfId(), SchemePipeActorId); + SchemePipeActorId = {}; + } + } + + void ScheduleRetry(const NKikimrTxUserProxy::TEvProposeTransactionStatus& response, const TString& message, bool longDelay = false) { + ClosePipeClient(); + + auto ssStatus = static_cast(response.GetSchemeShardStatus()); + if (!TBase::ScheduleRetry(ExtractIssues(response, TStringBuilder() << message << ", status: " << ssStatus), longDelay)) { + Reply(Ydb::StatusIds::UNAVAILABLE, ExtractIssues(response, TStringBuilder() << "Retry limit exceeded on status: " << ssStatus)); + } + } + + void ScheduleRetry(const TString& message, bool longDelay = false) { + ClosePipeClient(); + if (!TBase::ScheduleRetry(message, longDelay)) { + Reply(Ydb::StatusIds::UNAVAILABLE, TStringBuilder() << "Retry limit exceeded on error: " << message); } } void BuildCreatePoolRequest(NKikimrSchemeOp::TResourcePoolDescription& poolDescription) { poolDescription.SetName(PoolId); - for (auto& [property, value] : NResourcePool::GetPropertiesMap(PoolConfig)) { + for (auto& [property, value] : PoolConfig.GetPropertiesMap()) { poolDescription.MutableProperties()->MutableProperties()->insert({ property, - std::visit(NResourcePool::TSettingsExtractor{}, value) + std::visit(NResourcePool::TPoolSettings::TExtractor{}, value) }); } } @@ -349,18 +417,151 @@ class TPoolCreatorActor : public TSchemeActorBase { LOG_W("Failed to create pool, " << status << ", issues: " << issues.ToOneLineString()); } + ClosePipeClient(); + Issues.AddIssues(std::move(issues)); Send(ReplyActorId, new TEvPrivate::TEvCreatePoolResponse(status, std::move(Issues))); PassAway(); } + static NYql::TIssues ExtractIssues(const NKikimrTxUserProxy::TEvProposeTransactionStatus& response, const TString& message) { + NYql::TIssues issues; + NYql::IssuesFromMessage(response.GetIssues(), issues); + return GroupIssues(issues, message); + } + private: const TActorId ReplyActorId; - const TString Database; + const TString DatabaseId; const TString PoolId; const TIntrusiveConstPtr UserToken; const NACLibProto::TDiffACL DiffAcl; NResourcePool::TPoolSettings PoolConfig; + + std::unordered_set ClosedSchemePipeActors; + TActorId SchemePipeActorId; +}; + + +class TDatabaseFetcherActor : public TSchemeActorBase { +public: + TDatabaseFetcherActor(const TActorId& replyActorId, const TString& database, TIntrusiveConstPtr userToken, NACLib::EAccessRights checkAccess) + : ReplyActorId(replyActorId) + , Database(database) + , UserToken(userToken) + , CheckAccess(checkAccess) + {} + + void DoBootstrap() { + Become(&TDatabaseFetcherActor::StateFunc); + } + + void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { + const auto& results = ev->Get()->Request->ResultSet; + if (results.size() != 1) { + Reply(Ydb::StatusIds::INTERNAL_ERROR, "Unexpected scheme cache response"); + return; + } + + const auto& result = results[0]; + switch (result.Status) { + case EStatus::Unknown: + case EStatus::PathNotTable: + case EStatus::PathNotPath: + case EStatus::RedirectLookupError: + case EStatus::RootUnknown: + case EStatus::PathErrorUnknown: + Reply(Ydb::StatusIds::NOT_FOUND, TStringBuilder() << "Database " << Database << " not found or you don't have access permissions"); + return; + case EStatus::AccessDenied: + Reply(Ydb::StatusIds::UNAUTHORIZED, TStringBuilder() << "You don't have access permissions for database " << Database); + return; + case EStatus::LookupError: + case EStatus::TableCreationNotComplete: + if (!ScheduleRetry(TStringBuilder() << "Retry error " << result.Status)) { + Reply(Ydb::StatusIds::UNAVAILABLE, TStringBuilder() << "Retry limit exceeded on scheme error: " << result.Status); + } + return; + case EStatus::Ok: + if (!IsSubDomainPath(result)) { + Reply(Ydb::StatusIds::UNSUPPORTED, TStringBuilder() << "Invalid database path " << Database << ", please check the correctness of the path"); + return; + } + if (result.DomainInfo) { + Serverless = result.DomainInfo->IsServerless(); + PathId = result.DomainInfo->DomainKey; + } + Reply(Ydb::StatusIds::SUCCESS); + return; + } + } + + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); + default: + StateFuncBase(ev); + } + } + +protected: + void StartRequest() override { + LOG_D("Start database fetching"); + auto event = NTableCreator::BuildSchemeCacheNavigateRequest( + {{}}, + Database ? Database : AppData()->TenantName, + UserToken + ); + event->ResultSet[0].Operation = NSchemeCache::TSchemeCacheNavigate::OpPath; + event->ResultSet[0].Access |= CheckAccess; + Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(event.Release()), IEventHandle::FlagTrackDelivery); + } + + void OnFatalError(Ydb::StatusIds::StatusCode status, NYql::TIssue issue) override { + Reply(status, {std::move(issue)}); + } + + TString LogPrefix() const override { + return TStringBuilder() << "[TDatabaseFetcherActor] ActorId: " << SelfId() << ", Database: " << Database << ", "; + } + +private: + void Reply(Ydb::StatusIds::StatusCode status, const TString& message) { + Reply(status, {NYql::TIssue(message)}); + } + + void Reply(Ydb::StatusIds::StatusCode status, NYql::TIssues issues = {}) { + if (status == Ydb::StatusIds::SUCCESS) { + LOG_D("Database info successfully fetched, serverless: " << Serverless); + } else { + LOG_W("Failed to fetch database info, " << status << ", issues: " << issues.ToOneLineString()); + } + + Issues.AddIssues(std::move(issues)); + Send(ReplyActorId, new TEvFetchDatabaseResponse(status, Database, CreateDatabaseId(Database, Serverless, PathId), Serverless, PathId, std::move(Issues))); + PassAway(); + } + + static bool IsSubDomainPath(const NSchemeCache::TSchemeCacheNavigate::TEntry& entry) { + switch (entry.Kind) { + case NSchemeCache::TSchemeCacheNavigate::EKind::KindSubdomain: + case NSchemeCache::TSchemeCacheNavigate::EKind::KindExtSubdomain: + return true; + case NSchemeCache::TSchemeCacheNavigate::EKind::KindPath: + return entry.Self->Info.GetPathId() == NSchemeShard::RootPathId; + default: + return false; + } + } + +private: + const TActorId ReplyActorId; + const TString Database; + const TIntrusiveConstPtr UserToken; + const NACLib::EAccessRights CheckAccess; + + bool Serverless = false; + TPathId PathId; }; } // anonymous namespace @@ -369,12 +570,16 @@ IActor* CreatePoolResolverActor(TEvPlaceRequestIntoPool::TPtr event, bool defaul return new TPoolResolverActor(std::move(event), defaultPoolExists); } -IActor* CreatePoolFetcherActor(const TActorId& replyActorId, const TString& database, const TString& poolId, TIntrusiveConstPtr userToken) { - return new TPoolFetcherActor(replyActorId, database, poolId, userToken); +IActor* CreatePoolFetcherActor(const TActorId& replyActorId, const TString& databaseId, const TString& poolId, TIntrusiveConstPtr userToken) { + return new TPoolFetcherActor(replyActorId, databaseId, poolId, userToken); +} + +IActor* CreatePoolCreatorActor(const TActorId& replyActorId, const TString& databaseId, const TString& poolId, const NResourcePool::TPoolSettings& poolConfig, TIntrusiveConstPtr userToken, NACLibProto::TDiffACL diffAcl) { + return new TPoolCreatorActor(replyActorId, databaseId, poolId, poolConfig, userToken, diffAcl); } -IActor* CreatePoolCreatorActor(const TActorId& replyActorId, const TString& database, const TString& poolId, const NResourcePool::TPoolSettings& poolConfig, TIntrusiveConstPtr userToken, NACLibProto::TDiffACL diffAcl) { - return new TPoolCreatorActor(replyActorId, database, poolId, poolConfig, userToken, diffAcl); +IActor* CreateDatabaseFetcherActor(const TActorId& replyActorId, const TString& database, TIntrusiveConstPtr userToken, NACLib::EAccessRights checkAccess) { + return new TDatabaseFetcherActor(replyActorId, database, userToken, checkAccess); } } // NKikimr::NKqp::NWorkload diff --git a/ydb/core/kqp/workload_service/actors/ya.make b/ydb/core/kqp/workload_service/actors/ya.make index 01d4e7c9d663..774488a83d6a 100644 --- a/ydb/core/kqp/workload_service/actors/ya.make +++ b/ydb/core/kqp/workload_service/actors/ya.make @@ -1,6 +1,7 @@ LIBRARY() SRCS( + cpu_load_actors.cpp pool_handlers_acors.cpp scheme_actors.cpp ) diff --git a/ydb/core/kqp/workload_service/common/cpu_quota_manager.cpp b/ydb/core/kqp/workload_service/common/cpu_quota_manager.cpp new file mode 100644 index 000000000000..dd3a66183423 --- /dev/null +++ b/ydb/core/kqp/workload_service/common/cpu_quota_manager.cpp @@ -0,0 +1,156 @@ +#include "cpu_quota_manager.h" + +#include + + +namespace NKikimr::NKqp::NWorkload { + +//// TCpuQuotaManager::TCounters + +TCpuQuotaManager::TCounters::TCounters(const ::NMonitoring::TDynamicCounterPtr& subComponent) + : SubComponent(subComponent) +{ + Register(); +} + +void TCpuQuotaManager::TCounters::Register() { + RegisterCommonMetrics(CpuLoadRequest); + InstantLoadPercentage = SubComponent->GetCounter("InstantLoadPercentage", false); + AverageLoadPercentage = SubComponent->GetCounter("AverageLoadPercentage", false); + QuotedLoadPercentage = SubComponent->GetCounter("QuotedLoadPercentage", false); +} + +void TCpuQuotaManager::TCounters::RegisterCommonMetrics(TCommonMetrics& metrics) const { + metrics.Ok = SubComponent->GetCounter("Ok", true); + metrics.Error = SubComponent->GetCounter("Error", true); +} + +//// TCpuQuotaManager::TCpuQuotaResponse + +TCpuQuotaManager::TCpuQuotaResponse::TCpuQuotaResponse(int32_t currentLoad, NYdb::EStatus status, NYql::TIssues issues) + : CurrentLoad(currentLoad) + , Status(status) + , Issues(std::move(issues)) +{} + +//// TCpuQuotaManager + +TCpuQuotaManager::TCpuQuotaManager(TDuration monitoringRequestDelay, TDuration averageLoadInterval, TDuration idleTimeout, double defaultQueryLoad, bool strict, ui64 cpuNumber, const ::NMonitoring::TDynamicCounterPtr& subComponent) + : Counters(subComponent) + , MonitoringRequestDelay(monitoringRequestDelay) + , AverageLoadInterval(averageLoadInterval) + , IdleTimeout(idleTimeout) + , DefaultQueryLoad(defaultQueryLoad) + , Strict(strict) + , CpuNumber(cpuNumber) +{} + +double TCpuQuotaManager::GetInstantLoad() const { + return InstantLoad; +} + +double TCpuQuotaManager::GetAverageLoad() const { + return AverageLoad; +} + +TDuration TCpuQuotaManager::GetMonitoringRequestDelay() const { + return GetMonitoringRequestTime() - TInstant::Now(); +} + +TInstant TCpuQuotaManager::GetMonitoringRequestTime() const { + TDuration delay = MonitoringRequestDelay; + if (IdleTimeout && TInstant::Now() - LastRequestCpuQuota > IdleTimeout) { + delay = AverageLoadInterval / 2; + } + + return LastUpdateCpuLoad ? LastUpdateCpuLoad + delay : TInstant::Now(); +} + +void TCpuQuotaManager::UpdateCpuLoad(double instantLoad, ui64 cpuNumber, bool success) { + auto now = TInstant::Now(); + LastUpdateCpuLoad = now; + + if (!success) { + Counters.CpuLoadRequest.Error->Inc(); + CheckLoadIsOutdated(); + return; + } + + auto delta = now - LastCpuLoad; + LastCpuLoad = now; + + if (cpuNumber) { + CpuNumber = cpuNumber; + } + + InstantLoad = instantLoad; + // exponential moving average + if (!Ready || delta >= AverageLoadInterval) { + AverageLoad = InstantLoad; + QuotedLoad = InstantLoad; + } else { + auto ratio = static_cast(delta.GetValue()) / AverageLoadInterval.GetValue(); + AverageLoad = (1 - ratio) * AverageLoad + ratio * InstantLoad; + QuotedLoad = (1 - ratio) * QuotedLoad + ratio * InstantLoad; + } + Ready = true; + Counters.CpuLoadRequest.Ok->Inc(); + Counters.InstantLoadPercentage->Set(static_cast(InstantLoad * 100)); + Counters.AverageLoadPercentage->Set(static_cast(AverageLoad * 100)); + Counters.QuotedLoadPercentage->Set(static_cast(QuotedLoad * 100)); +} + +bool TCpuQuotaManager::CheckLoadIsOutdated() { + if (TInstant::Now() - LastCpuLoad > AverageLoadInterval) { + Ready = false; + QuotedLoad = 0.0; + Counters.QuotedLoadPercentage->Set(0); + } + return Ready; +} + +bool TCpuQuotaManager::HasCpuQuota(double maxClusterLoad) { + LastRequestCpuQuota = TInstant::Now(); + return maxClusterLoad == 0.0 || ((Ready || !Strict) && QuotedLoad < maxClusterLoad); +} + +TCpuQuotaManager::TCpuQuotaResponse TCpuQuotaManager::RequestCpuQuota(double quota, double maxClusterLoad) { + if (quota < 0.0 || quota > 1.0) { + return TCpuQuotaResponse(-1, NYdb::EStatus::OVERLOADED, {NYql::TIssue(TStringBuilder() << "Incorrect quota value (exceeds 1.0 or less than 0.0) " << quota)}); + } + quota = quota ? quota : DefaultQueryLoad; + + CheckLoadIsOutdated(); + if (!HasCpuQuota(maxClusterLoad)) { + return TCpuQuotaResponse(-1, NYdb::EStatus::OVERLOADED, {NYql::TIssue(TStringBuilder() + << "Cluster is overloaded, current quoted load " << static_cast(QuotedLoad * 100) + << "%, average load " << static_cast(AverageLoad * 100) << "%" + )}); + } + + QuotedLoad += quota; + Counters.QuotedLoadPercentage->Set(static_cast(QuotedLoad * 100)); + return TCpuQuotaResponse(QuotedLoad * 100); +} + +void TCpuQuotaManager::AdjustCpuQuota(double quota, TDuration duration, double cpuSecondsConsumed) { + if (!CpuNumber) { + return; + } + + if (duration && duration < AverageLoadInterval / 2 && quota <= 1.0) { + quota = quota ? quota : DefaultQueryLoad; + auto load = (cpuSecondsConsumed * 1000.0 / duration.MilliSeconds()) / CpuNumber; + if (quota > load) { + auto adjustment = (quota - load) / 2; + if (QuotedLoad > adjustment) { + QuotedLoad -= adjustment; + } else { + QuotedLoad = 0.0; + } + Counters.QuotedLoadPercentage->Set(static_cast(QuotedLoad * 100)); + } + } +} + +} // namespace NKikimr::NKqp::NWorkload diff --git a/ydb/core/kqp/workload_service/common/cpu_quota_manager.h b/ydb/core/kqp/workload_service/common/cpu_quota_manager.h new file mode 100644 index 000000000000..f0587e1d4418 --- /dev/null +++ b/ydb/core/kqp/workload_service/common/cpu_quota_manager.h @@ -0,0 +1,76 @@ +#pragma once + +#include + +#include + +#include + + +namespace NKikimr::NKqp::NWorkload { + +class TCpuQuotaManager { + struct TCounters { + const ::NMonitoring::TDynamicCounterPtr SubComponent; + struct TCommonMetrics { + ::NMonitoring::TDynamicCounters::TCounterPtr Ok; + ::NMonitoring::TDynamicCounters::TCounterPtr Error; + }; + + TCommonMetrics CpuLoadRequest; + ::NMonitoring::TDynamicCounters::TCounterPtr InstantLoadPercentage; + ::NMonitoring::TDynamicCounters::TCounterPtr AverageLoadPercentage; + ::NMonitoring::TDynamicCounters::TCounterPtr QuotedLoadPercentage; + + explicit TCounters(const ::NMonitoring::TDynamicCounterPtr& subComponent); + + private: + void Register(); + void RegisterCommonMetrics(TCommonMetrics& metrics) const; + }; + +public: + struct TCpuQuotaResponse { + explicit TCpuQuotaResponse(int32_t currentLoad, NYdb::EStatus status = NYdb::EStatus::SUCCESS, NYql::TIssues issues = {}); + + const int32_t CurrentLoad; + const NYdb::EStatus Status; + const NYql::TIssues Issues; + }; + +public: + TCpuQuotaManager(TDuration monitoringRequestDelay, TDuration averageLoadInterval, TDuration idleTimeout, double defaultQueryLoad, bool strict, ui64 cpuNumber, const ::NMonitoring::TDynamicCounterPtr& subComponent); + + double GetInstantLoad() const; + double GetAverageLoad() const; + TDuration GetMonitoringRequestDelay() const; + TInstant GetMonitoringRequestTime() const; + + void UpdateCpuLoad(double instantLoad, ui64 cpuNumber, bool success); + bool CheckLoadIsOutdated(); + + bool HasCpuQuota(double maxClusterLoad); + TCpuQuotaResponse RequestCpuQuota(double quota, double maxClusterLoad); + void AdjustCpuQuota(double quota, TDuration duration, double cpuSecondsConsumed); + +private: + TCounters Counters; + + const TDuration MonitoringRequestDelay; + const TDuration AverageLoadInterval; + const TDuration IdleTimeout; + const double DefaultQueryLoad; + const bool Strict; + ui64 CpuNumber = 0; + + TInstant LastCpuLoad; + TInstant LastUpdateCpuLoad; + TInstant LastRequestCpuQuota; + + double InstantLoad = 0.0; + double AverageLoad = 0.0; + double QuotedLoad = 0.0; + bool Ready = false; +}; + +} // namespace NKikimr::NKqp::NWorkload diff --git a/ydb/core/kqp/workload_service/common/events.h b/ydb/core/kqp/workload_service/common/events.h index 25018bbe6728..911dccd36f0e 100644 --- a/ydb/core/kqp/workload_service/common/events.h +++ b/ydb/core/kqp/workload_service/common/events.h @@ -28,7 +28,15 @@ struct TEvPrivate { EvFinishRequestInPool, EvResignPoolHandler, EvStopPoolHandler, + EvStopPoolHandlerResponse, EvCancelRequest, + EvUpdatePoolSubscription, + + EvCpuQuotaRequest, + EvCpuQuotaResponse, + EvCpuLoadResponse, + EvNodesInfoRequest, + EvNodesInfoResponse, EvTablesCreationFinished, EvCleanupTableResponse, @@ -66,14 +74,18 @@ struct TEvPrivate { }; struct TEvFetchPoolResponse : public NActors::TEventLocal { - TEvFetchPoolResponse(Ydb::StatusIds::StatusCode status, const NResourcePool::TPoolSettings& poolConfig, TPathId pathId, NYql::TIssues issues) + TEvFetchPoolResponse(Ydb::StatusIds::StatusCode status, const TString& databaseId, const TString& poolId, const NResourcePool::TPoolSettings& poolConfig, TPathId pathId, NYql::TIssues issues) : Status(status) + , DatabaseId(databaseId) + , PoolId(poolId) , PoolConfig(poolConfig) , PathId(pathId) , Issues(std::move(issues)) {} const Ydb::StatusIds::StatusCode Status; + const TString DatabaseId; + const TString PoolId; const NResourcePool::TPoolSettings PoolConfig; const TPathId PathId; const NYql::TIssues Issues; @@ -90,48 +102,71 @@ struct TEvPrivate { }; struct TEvPrepareTablesRequest : public NActors::TEventLocal { - TEvPrepareTablesRequest(const TString& database, const TString& poolId) - : Database(database) + TEvPrepareTablesRequest(const TString& databaseId, const TString& poolId) + : DatabaseId(databaseId) , PoolId(poolId) {} - const TString Database; + const TString DatabaseId; const TString PoolId; }; struct TEvPlaceRequestIntoPoolResponse : public NActors::TEventLocal { - TEvPlaceRequestIntoPoolResponse(const TString& database, const TString& poolId) - : Database(database) + TEvPlaceRequestIntoPoolResponse(const TString& databaseId, const TString& poolId, const TString& sessionId) + : DatabaseId(databaseId) , PoolId(poolId) + , SessionId(sessionId) {} - const TString Database; + const TString DatabaseId; const TString PoolId; + const TString SessionId; }; struct TEvFinishRequestInPool : public NActors::TEventLocal { - TEvFinishRequestInPool(const TString& database, const TString& poolId) - : Database(database) + TEvFinishRequestInPool(const TString& databaseId, const TString& poolId, TDuration duration, TDuration cpuConsumed, bool adjustCpuQuota) + : DatabaseId(databaseId) , PoolId(poolId) + , Duration(duration) + , CpuConsumed(cpuConsumed) + , AdjustCpuQuota(adjustCpuQuota) {} - const TString Database; + const TString DatabaseId; const TString PoolId; + const TDuration Duration; + const TDuration CpuConsumed; + const bool AdjustCpuQuota; }; struct TEvResignPoolHandler : public NActors::TEventLocal { - TEvResignPoolHandler(const TString& database, const TString& poolId, const TActorId& newHandler) - : Database(database) + TEvResignPoolHandler(const TString& databaseId, const TString& poolId, const TActorId& newHandler) + : DatabaseId(databaseId) , PoolId(poolId) , NewHandler(newHandler) {} - const TString Database; + const TString DatabaseId; const TString PoolId; const TActorId NewHandler; }; struct TEvStopPoolHandler : public NActors::TEventLocal { + explicit TEvStopPoolHandler(bool resetCounters) + : ResetCounters(resetCounters) + {} + + const bool ResetCounters; + }; + + struct TEvStopPoolHandlerResponse : public NActors::TEventLocal { + TEvStopPoolHandlerResponse(const TString& databaseId, const TString& poolId) + : DatabaseId(databaseId) + , PoolId(poolId) + {} + + const TString DatabaseId; + const TString PoolId; }; struct TEvCancelRequest : public NActors::TEventLocal { @@ -142,6 +177,62 @@ struct TEvPrivate { const TString SessionId; }; + struct TEvUpdatePoolSubscription : public NActors::TEventLocal { + explicit TEvUpdatePoolSubscription(TPathId pathId, const std::unordered_set& subscribers) + : PathId(pathId) + , Subscribers(subscribers) + {} + + const TPathId PathId; + const std::unordered_set Subscribers; + }; + + // Cpu load requests + struct TEvCpuQuotaRequest : public NActors::TEventLocal { + explicit TEvCpuQuotaRequest(double maxClusterLoad) + : MaxClusterLoad(maxClusterLoad) + {} + + const double MaxClusterLoad; + }; + + struct TEvCpuQuotaResponse : public NActors::TEventLocal { + explicit TEvCpuQuotaResponse(bool quotaAccepted, double maxClusterLoad, NYql::TIssues issues) + : QuotaAccepted(quotaAccepted) + , MaxClusterLoad(maxClusterLoad) + , Issues(std::move(issues)) + {} + + const bool QuotaAccepted; + const double MaxClusterLoad; + const NYql::TIssues Issues; + }; + + struct TEvCpuLoadResponse : public NActors::TEventLocal { + TEvCpuLoadResponse(Ydb::StatusIds::StatusCode status, double instantLoad, ui64 cpuNumber, NYql::TIssues issues) + : Status(status) + , InstantLoad(instantLoad) + , CpuNumber(cpuNumber) + , Issues(std::move(issues)) + {} + + const Ydb::StatusIds::StatusCode Status; + const double InstantLoad; + const ui64 CpuNumber; + const NYql::TIssues Issues; + }; + + struct TEvNodesInfoRequest : public NActors::TEventLocal { + }; + + struct TEvNodesInfoResponse : public NActors::TEventLocal { + explicit TEvNodesInfoResponse(ui32 nodeCount) + : NodeCount(nodeCount) + {} + + const ui32 NodeCount; + }; + // Tables queries events struct TEvTablesCreationFinished : public NActors::TEventLocal { TEvTablesCreationFinished(bool success, NYql::TIssues issues) diff --git a/ydb/core/kqp/workload_service/common/helpers.cpp b/ydb/core/kqp/workload_service/common/helpers.cpp index 3a5427a9de4a..b9ad774c6f84 100644 --- a/ydb/core/kqp/workload_service/common/helpers.cpp +++ b/ydb/core/kqp/workload_service/common/helpers.cpp @@ -1,8 +1,32 @@ #include "helpers.h" +#include +#include + namespace NKikimr::NKqp::NWorkload { +TString CreateDatabaseId(const TString& database, bool serverless, TPathId pathId) { + TString databasePath = CanonizePath(database); + TString tennantPath = CanonizePath(AppData()->TenantName); + if (databasePath.empty() || databasePath == tennantPath) { + return tennantPath; + } + + if (serverless) { + databasePath = TStringBuilder() << pathId.OwnerId << ":" << pathId.LocalPathId << ":" << databasePath; + } + return databasePath; +} + +TString DatabaseIdToDatabase(TStringBuf databaseId) { + TStringBuf id; + TStringBuf database; + return databaseId.TrySplit("/", id, database) + ? CanonizePath(TString(database)) // Serverless + : CanonizePath(TString(databaseId)); // Dedicated +} + NYql::TIssues GroupIssues(const NYql::TIssues& issues, const TString& message) { NYql::TIssue rootIssue(message); for (const NYql::TIssue& issue : issues) { @@ -12,12 +36,11 @@ NYql::TIssues GroupIssues(const NYql::TIssues& issues, const TString& message) { } void ParsePoolSettings(const NKikimrSchemeOp::TResourcePoolDescription& description, NResourcePool::TPoolSettings& poolConfig) { - const auto& properties = description.GetProperties().GetProperties(); - for (auto& [property, value] : NResourcePool::GetPropertiesMap(poolConfig)) { - if (auto propertyIt = properties.find(property); propertyIt != properties.end()) { - std::visit(NResourcePool::TSettingsParser{propertyIt->second}, value); - } - } + poolConfig = NResourcePool::TPoolSettings(description.GetProperties().GetProperties()); +} + +ui64 SaturationSub(ui64 x, ui64 y) { + return (x > y) ? x - y : 0; } } // NKikimr::NKqp::NWorkload diff --git a/ydb/core/kqp/workload_service/common/helpers.h b/ydb/core/kqp/workload_service/common/helpers.h index 85aff302d68f..a4c933b92356 100644 --- a/ydb/core/kqp/workload_service/common/helpers.h +++ b/ydb/core/kqp/workload_service/common/helpers.h @@ -62,21 +62,25 @@ class TSchemeActorBase : public NActors::TActorBootstrapped { virtual TString LogPrefix() const = 0; protected: - bool ScheduleRetry(const TString& message, bool longDelay = false) { + bool ScheduleRetry(NYql::TIssues issues, bool longDelay = false) { if (!RetryState) { RetryState = CreateRetryState(); } if (const auto delay = RetryState->GetNextRetryDelay(longDelay)) { - Issues.AddIssue(message); + Issues.AddIssues(issues); this->Schedule(*delay, new TEvents::TEvWakeup()); - LOG_W("Scheduled retry for error: " << message); + LOG_W("Scheduled retry for error: " << issues.ToOneLineString()); return true; } return false; } + bool ScheduleRetry(const TString& message, bool longDelay = false) { + return ScheduleRetry({NYql::TIssue(message)}, longDelay); + } + private: static TRetryPolicy::IRetryState::TPtr CreateRetryState() { return TRetryPolicy::GetFixedIntervalPolicy( @@ -95,8 +99,13 @@ class TSchemeActorBase : public NActors::TActorBootstrapped { }; +TString CreateDatabaseId(const TString& database, bool serverless, TPathId pathId); +TString DatabaseIdToDatabase(TStringBuf databaseId); + NYql::TIssues GroupIssues(const NYql::TIssues& issues, const TString& message); void ParsePoolSettings(const NKikimrSchemeOp::TResourcePoolDescription& description, NResourcePool::TPoolSettings& poolConfig); +ui64 SaturationSub(ui64 x, ui64 y); + } // NKikimr::NKqp::NWorkload diff --git a/ydb/core/kqp/workload_service/common/ya.make b/ydb/core/kqp/workload_service/common/ya.make index 44cbd65ca22e..4026b389648b 100644 --- a/ydb/core/kqp/workload_service/common/ya.make +++ b/ydb/core/kqp/workload_service/common/ya.make @@ -1,6 +1,7 @@ LIBRARY() SRCS( + cpu_quota_manager.cpp events.cpp helpers.cpp ) @@ -14,6 +15,8 @@ PEERDIR( ydb/library/actors/core + ydb/public/sdk/cpp/client/ydb_types + library/cpp/retry ) diff --git a/ydb/core/kqp/workload_service/kqp_workload_service.cpp b/ydb/core/kqp/workload_service/kqp_workload_service.cpp index 94ad44dbe460..af4c0536b45f 100644 --- a/ydb/core/kqp/workload_service/kqp_workload_service.cpp +++ b/ydb/core/kqp/workload_service/kqp_workload_service.cpp @@ -1,4 +1,5 @@ #include "kqp_workload_service.h" +#include "kqp_workload_service_impl.h" #include #include @@ -8,12 +9,13 @@ #include #include -#include #include #include #include +#include + namespace NKikimr::NKqp { @@ -23,10 +25,25 @@ namespace { using namespace NActors; -constexpr TDuration IDLE_DURATION = TDuration::Seconds(15); - class TKqpWorkloadService : public TActorBootstrapped { + struct TCounters { + const NMonitoring::TDynamicCounterPtr Counters; + + NMonitoring::TDynamicCounters::TCounterPtr ActivePools; + + TCounters(NMonitoring::TDynamicCounterPtr counters) + : Counters(counters) + { + Register(); + } + + private: + void Register() { + ActivePools = Counters->GetCounter("ActivePools", false); + } + }; + enum class ETablesCreationStatus { Cleanup, NotStarted, @@ -34,53 +51,16 @@ class TKqpWorkloadService : public TActorBootstrapped { Finished, }; - struct TPoolState { - TActorId PoolHandler; - TActorContext ActorContext; - - std::queue PendingRequests = {}; - bool WaitingInitialization = false; - bool PlaceRequestRunning = false; - std::optional NewPoolHandler = std::nullopt; - - ui64 InFlightRequests = 0; - TInstant LastUpdateTime = TInstant::Now(); - - void UpdateHandler() { - if (PlaceRequestRunning || WaitingInitialization || !NewPoolHandler) { - return; - } - - ActorContext.Send(PoolHandler, new TEvPrivate::TEvStopPoolHandler()); - PoolHandler = *NewPoolHandler; - NewPoolHandler = std::nullopt; - InFlightRequests = 0; - } - - void StartPlaceRequest() { - if (PlaceRequestRunning || PendingRequests.empty()) { - return; - } - - PlaceRequestRunning = true; - InFlightRequests++; - ActorContext.Send(PendingRequests.front()->Forward(PoolHandler)); - PendingRequests.pop(); - } - - void OnRequestFinished() { - Y_ENSURE(InFlightRequests); - InFlightRequests--; - LastUpdateTime = TInstant::Now(); - } + enum class EWakeUp { + IdleCheck, + StartCpuLoadRequest, + StartNodeInfoRequest }; public: explicit TKqpWorkloadService(NMonitoring::TDynamicCounterPtr counters) : Counters(counters) - { - RegisterCounters(); - } + {} void Bootstrap() { Become(&TKqpWorkloadService::MainState); @@ -90,7 +70,11 @@ class TKqpWorkloadService : public TActorBootstrapped { (ui32)NKikimrConsole::TConfigItem::FeatureFlagsItem }), IEventHandle::FlagTrackDelivery); + CpuQuotaManager = std::make_unique(ActorContext(), Counters.Counters->GetSubgroup("subcomponent", "CpuQuotaManager")); + EnabledResourcePools = AppData()->FeatureFlags.GetEnableResourcePools(); + EnabledResourcePoolsOnServerless = AppData()->FeatureFlags.GetEnableResourcePoolsOnServerless(); + EnableResourcePoolsCounters = AppData()->FeatureFlags.GetEnableResourcePoolsCounters(); if (EnabledResourcePools) { InitializeWorkloadService(); } @@ -117,6 +101,8 @@ class TKqpWorkloadService : public TActorBootstrapped { const auto& event = ev->Get()->Record; EnabledResourcePools = event.GetConfig().GetFeatureFlags().GetEnableResourcePools(); + EnabledResourcePoolsOnServerless = event.GetConfig().GetFeatureFlags().GetEnableResourcePoolsOnServerless(); + EnableResourcePoolsCounters = event.GetConfig().GetFeatureFlags().GetEnableResourcePoolsCounters(); if (EnabledResourcePools) { LOG_I("Resource pools was enanbled"); InitializeWorkloadService(); @@ -128,6 +114,13 @@ class TKqpWorkloadService : public TActorBootstrapped { Send(ev->Sender, responseEvent.release(), IEventHandle::FlagTrackDelivery, ev->Cookie); } + void Handle(TEvInterconnect::TEvNodesInfo::TPtr& ev) { + NodeCount = ev->Get()->Nodes.size(); + ScheduleNodeInfoRequest(); + + LOG_T("Updated node info, noode count: " << NodeCount); + } + void Handle(TEvents::TEvUndelivered::TPtr& ev) const { switch (ev->Get()->SourceType) { case NConsole::TEvConfigsDispatcher::EvSetConfigSubscriptionRequest: @@ -138,12 +131,29 @@ class TKqpWorkloadService : public TActorBootstrapped { LOG_E("Failed to deliver config notification response"); break; + case TEvInterconnect::EvListNodes: + LOG_W("Failed to deliver list nodes request"); + ScheduleNodeInfoRequest(); + break; + default: LOG_E("Undelivered event with unexpected source type: " << ev->Get()->SourceType); break; } } + void Handle(TEvSubscribeOnPoolChanges::TPtr& ev) { + const TString& databaseId = ev->Get()->DatabaseId; + const TString& poolId = ev->Get()->PoolId; + if (!EnabledResourcePools) { + Send(ev->Sender, new TEvUpdatePoolInfo(databaseId, poolId, std::nullopt, std::nullopt)); + return; + } + + LOG_D("Recieved subscription request, DatabaseId: " << databaseId << ", PoolId: " << poolId); + GetOrCreateDatabaseState(databaseId)->DoSubscribeRequest(std::move(ev)); + } + void Handle(TEvPlaceRequestIntoPool::TPtr& ev) { const TActorId& workerActorId = ev->Sender; if (!EnabledResourcePools) { @@ -151,45 +161,44 @@ class TKqpWorkloadService : public TActorBootstrapped { return; } - // Add AllAuthenticatedUsers group SID into user token - ev->Get()->UserToken = GetUserToken(ev->Get()->UserToken); - - LOG_D("Recieved new request from " << workerActorId << ", Database: " << ev->Get()->Database << ", PoolId: " << ev->Get()->PoolId << ", SessionId: " << ev->Get()->SessionId); - bool hasDefaultPool = DatabasesWithDefaultPool.contains(CanonizePath(ev->Get()->Database)); - Register(CreatePoolResolverActor(std::move(ev), hasDefaultPool)); + const TString& databaseId = ev->Get()->DatabaseId; + LOG_D("Recieved new request from " << workerActorId << ", DatabaseId: " << databaseId << ", PoolId: " << ev->Get()->PoolId << ", SessionId: " << ev->Get()->SessionId); + GetOrCreateDatabaseState(databaseId)->DoPlaceRequest(std::move(ev)); } void Handle(TEvCleanupRequest::TPtr& ev) { - const TString& database = ev->Get()->Database; + const TString& databaseId = ev->Get()->DatabaseId; const TString& poolId = ev->Get()->PoolId; - auto poolState = GetPoolState(database, poolId); + const TString& sessionId = ev->Get()->SessionId; + if (GetOrCreateDatabaseState(databaseId)->PendingSessionIds.contains(sessionId)) { + LOG_D("Finished request with worker actor " << ev->Sender << ", wait for place request, DatabaseId: " << databaseId << ", PoolId: " << poolId << ", SessionId: " << ev->Get()->SessionId); + GetOrCreateDatabaseState(databaseId)->PendingCancelRequests[sessionId].emplace_back(std::move(ev)); + return; + } + + auto poolState = GetPoolState(databaseId, poolId); if (!poolState) { ReplyCleanupError(ev->Sender, Ydb::StatusIds::NOT_FOUND, TStringBuilder() << "Pool " << poolId << " not found"); return; } - LOG_D("Finished request with worker actor " << ev->Sender << ", Database: " << database << ", PoolId: " << poolId << ", SessionId: " << ev->Get()->SessionId); - Send(ev->Forward(poolState->PoolHandler)); + LOG_D("Finished request with worker actor " << ev->Sender << ", DatabaseId: " << databaseId << ", PoolId: " << poolId << ", SessionId: " << ev->Get()->SessionId); + poolState->DoCleanupRequest(std::move(ev)); } - void HandleWakeup() { - IdleChecksStarted = false; + void Handle(TEvents::TEvWakeup::TPtr& ev) { + switch (static_cast(ev->Get()->Tag)) { + case EWakeUp::IdleCheck: + RunIdleCheck(); + break; - std::vector poolsToDelete; - poolsToDelete.reserve(PoolIdToState.size()); - for (const auto& [poolKey, poolState] : PoolIdToState) { - if (!poolState.InFlightRequests && TInstant::Now() - poolState.LastUpdateTime > IDLE_DURATION) { - Send(poolState.PoolHandler, new TEvPrivate::TEvStopPoolHandler()); - poolsToDelete.emplace_back(poolKey); - } - } - for (const auto& poolKey : poolsToDelete) { - PoolIdToState.erase(poolKey); - ActivePools->Dec(); - } + case EWakeUp::StartCpuLoadRequest: + RunCpuLoadRequest(); + break; - if (!PoolIdToState.empty()) { - StartIdleChecks(); + case EWakeUp::StartNodeInfoRequest: + RunNodeInfoRequest(); + break; } } @@ -197,93 +206,145 @@ class TKqpWorkloadService : public TActorBootstrapped { sFunc(TEvents::TEvPoison, HandlePoison); sFunc(NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionResponse, HandleSetConfigSubscriptionResponse); hFunc(NConsole::TEvConsole::TEvConfigNotificationRequest, Handle); + hFunc(TEvInterconnect::TEvNodesInfo, Handle); hFunc(TEvents::TEvUndelivered, Handle); + hFunc(TEvSubscribeOnPoolChanges, Handle); hFunc(TEvPlaceRequestIntoPool, Handle); hFunc(TEvCleanupRequest, Handle); - sFunc(TEvents::TEvWakeup, HandleWakeup); + hFunc(TEvents::TEvWakeup, Handle); + hFunc(TEvFetchDatabaseResponse, Handle); + hFunc(TEvPrivate::TEvFetchPoolResponse, Handle); hFunc(TEvPrivate::TEvResolvePoolResponse, Handle); hFunc(TEvPrivate::TEvPlaceRequestIntoPoolResponse, Handle); + hFunc(TEvPrivate::TEvNodesInfoRequest, Handle); hFunc(TEvPrivate::TEvRefreshPoolState, Handle); + hFunc(TEvPrivate::TEvCpuQuotaRequest, Handle); hFunc(TEvPrivate::TEvFinishRequestInPool, Handle); hFunc(TEvPrivate::TEvPrepareTablesRequest, Handle); hFunc(TEvPrivate::TEvCleanupTablesFinished, Handle); hFunc(TEvPrivate::TEvTablesCreationFinished, Handle); + hFunc(TEvPrivate::TEvCpuLoadResponse, Handle); hFunc(TEvPrivate::TEvResignPoolHandler, Handle); + hFunc(TEvPrivate::TEvStopPoolHandlerResponse, Handle); ) private: + void Handle(TEvFetchDatabaseResponse::TPtr& ev) { + if (ev->Get()->Status == Ydb::StatusIds::SUCCESS) { + LOG_D("Successfully fetched database info, DatabaseId: " << ev->Get()->DatabaseId << ", Serverless: " << ev->Get()->Serverless); + } else { + LOG_D("Failed to fetch database info, DatabaseId: " << ev->Get()->DatabaseId << ", Status: " << ev->Get()->Status << ", Issues: " << ev->Get()->Issues.ToOneLineString()); + } + GetOrCreateDatabaseState(ev->Get()->DatabaseId)->UpdateDatabaseInfo(ev); + } + + void Handle(TEvPrivate::TEvFetchPoolResponse::TPtr& ev) { + const TString& databaseId = ev->Get()->DatabaseId; + const TString& poolId = ev->Get()->PoolId; + + TActorId poolHandler; + if (ev->Get()->Status == Ydb::StatusIds::SUCCESS) { + LOG_D("Successfully fetched pool " << poolId << ", DatabaseId: " << databaseId); + poolHandler = GetOrCreatePoolState(databaseId, poolId, ev->Get()->PoolConfig)->PoolHandler; + } else { + LOG_W("Failed to fetch pool " << poolId << ", DatabaseId: " << databaseId << ", status: " << ev->Get()->Status << ", issues: " << ev->Get()->Issues.ToOneLineString()); + } + + GetOrCreateDatabaseState(databaseId)->UpdatePoolInfo(ev, poolHandler); + } + void Handle(TEvPrivate::TEvResolvePoolResponse::TPtr& ev) { const auto& event = ev->Get()->Event; - const TString& database = event->Get()->Database; + const TString& databaseId = event->Get()->DatabaseId; + auto databaseState = GetOrCreateDatabaseState(databaseId); if (ev->Get()->DefaultPoolCreated) { - DatabasesWithDefaultPool.insert(CanonizePath(database)); + databaseState->HasDefaultPool = true; } const TString& poolId = event->Get()->PoolId; if (ev->Get()->Status != Ydb::StatusIds::SUCCESS) { + databaseState->RemovePendingSession(event->Get()->SessionId, [this](TEvCleanupRequest::TPtr event) { + ReplyCleanupError(event->Sender, Ydb::StatusIds::NOT_FOUND, TStringBuilder() << "Pool " << event->Get()->PoolId << " not found"); + }); ReplyContinueError(event->Sender, ev->Get()->Status, ev->Get()->Issues); return; } - LOG_D("Successfully fetched pool " << poolId << ", Database: " << database << ", SessionId: " << event->Get()->SessionId); - - auto poolState = GetPoolState(database, poolId); - if (!poolState) { - TString poolKey = GetPoolKey(database, poolId); - LOG_I("Creating new handler for pool " << poolKey); - - auto poolHandler = Register(CreatePoolHandlerActor(database, poolId, ev->Get()->PoolConfig, Counters)); - poolState = &PoolIdToState.insert({poolKey, TPoolState{.PoolHandler = poolHandler, .ActorContext = ActorContext()}}).first->second; - - ActivePools->Inc(); - StartIdleChecks(); - } + LOG_D("Successfully fetched pool " << poolId << ", DatabaseId: " << databaseId << ", SessionId: " << event->Get()->SessionId); + auto poolState = GetOrCreatePoolState(databaseId, poolId, ev->Get()->PoolConfig); poolState->PendingRequests.emplace(std::move(ev)); poolState->StartPlaceRequest(); } void Handle(TEvPrivate::TEvPlaceRequestIntoPoolResponse::TPtr& ev) { - const TString& database = ev->Get()->Database; + const TString& databaseId = ev->Get()->DatabaseId; const TString& poolId = ev->Get()->PoolId; - LOG_T("Request placed into pool, Database: " << database << ", PoolId: " << poolId); + const TString& sessionId = ev->Get()->SessionId; + LOG_T("Request placed into pool, DatabaseId: " << databaseId << ", PoolId: " << poolId << ", SessionId: " << sessionId); + + auto poolState = GetPoolState(databaseId, poolId); + GetOrCreateDatabaseState(databaseId)->RemovePendingSession(sessionId, [this, poolState](TEvCleanupRequest::TPtr event) { + if (poolState) { + poolState->DoCleanupRequest(std::move(event)); + } else { + ReplyCleanupError(event->Sender, Ydb::StatusIds::NOT_FOUND, TStringBuilder() << "Pool " << event->Get()->PoolId << " not found"); + } + }); - if (auto poolState = GetPoolState(database, poolId)) { + if (poolState) { poolState->PlaceRequestRunning = false; poolState->UpdateHandler(); poolState->StartPlaceRequest(); } } + void Handle(TEvPrivate::TEvNodesInfoRequest::TPtr& ev) const { + Send(ev->Sender, new TEvPrivate::TEvNodesInfoResponse(NodeCount)); + } + void Handle(TEvPrivate::TEvRefreshPoolState::TPtr& ev) { const auto& event = ev->Get()->Record; - const TString& database = event.GetDatabase(); + const TString& databaseId = event.GetDatabase(); const TString& poolId = event.GetPoolId(); - LOG_T("Got remote refresh request, Database: " << database << ", PoolId: " << poolId << ", NodeId: " << ev->Sender.NodeId()); + LOG_T("Got remote refresh request, DatabaseId: " << databaseId << ", PoolId: " << poolId << ", NodeId: " << ev->Sender.NodeId()); - if (auto poolState = GetPoolState(database, poolId)) { + if (auto poolState = GetPoolState(databaseId, poolId)) { Send(ev->Forward(poolState->PoolHandler)); } } + void Handle(TEvPrivate::TEvCpuQuotaRequest::TPtr& ev) { + const TActorId& poolHandler = ev->Sender; + const double maxClusterLoad = ev->Get()->MaxClusterLoad; + LOG_T("Requested cpu quota from handler " << poolHandler << ", MaxClusterLoad: " << maxClusterLoad); + + CpuQuotaManager->RequestCpuQuota(poolHandler, maxClusterLoad, ev->Cookie); + ScheduleCpuLoadRequest(); + } + void Handle(TEvPrivate::TEvFinishRequestInPool::TPtr& ev) { - const TString& database = ev->Get()->Database; + const TString& databaseId = ev->Get()->DatabaseId; const TString& poolId = ev->Get()->PoolId; - LOG_T("Request finished in pool, Database: " << database << ", PoolId: " << poolId); + LOG_T("Request finished in pool, DatabaseId: " << databaseId << ", PoolId: " << poolId << ", Duration: " << ev->Get()->Duration << ", CpuConsumed: " << ev->Get()->CpuConsumed << ", AdjustCpuQuota: " << ev->Get()->AdjustCpuQuota); - if (auto poolState = GetPoolState(database, poolId)) { + if (auto poolState = GetPoolState(databaseId, poolId)) { poolState->OnRequestFinished(); } + if (ev->Get()->AdjustCpuQuota) { + CpuQuotaManager->AdjustCpuQuota(ev->Get()->Duration, ev->Get()->CpuConsumed.SecondsFloat()); + ScheduleCpuLoadRequest(); + } } void Handle(TEvPrivate::TEvPrepareTablesRequest::TPtr& ev) { - const TString& database = ev->Get()->Database; + const TString& databaseId = ev->Get()->DatabaseId; const TString& poolId = ev->Get()->PoolId; - LOG_T("Got create teables request, Database: " << database << ", PoolId: " << poolId); + LOG_T("Got create teables request, DatabaseId: " << databaseId << ", PoolId: " << poolId); - auto poolState = GetPoolState(database, poolId); + auto poolState = GetPoolState(databaseId, poolId); if (!poolState) { return; } @@ -292,7 +353,7 @@ class TKqpWorkloadService : public TActorBootstrapped { Send(poolState->PoolHandler, new TEvPrivate::TEvTablesCreationFinished(true, {})); } else { poolState->WaitingInitialization = true; - PendingHandlers.emplace(GetPoolKey(database, poolId)); + PendingHandlers.emplace(GetPoolKey(databaseId, poolId)); PrepareWorkloadServiceTables(); } } @@ -327,20 +388,44 @@ class TKqpWorkloadService : public TActorBootstrapped { OnTabelsCreated(false, issues); } + void Handle(TEvPrivate::TEvCpuLoadResponse::TPtr& ev) { + const bool success = ev->Get()->Status == Ydb::StatusIds::SUCCESS; + if (!success) { + LOG_E("Failed to fetch cpu load " << ev->Get()->Status << ", issues: " << ev->Get()->Issues.ToOneLineString()); + } else { + LOG_T("Succesfully fetched cpu load: " << 100.0 * ev->Get()->InstantLoad << "%, cpu number: " << ev->Get()->CpuNumber); + } + + CpuQuotaManager->CpuLoadRequestRunning = false; + CpuQuotaManager->UpdateCpuLoad(ev->Get()->InstantLoad, ev->Get()->CpuNumber, success); + ScheduleCpuLoadRequest(); + } + void Handle(TEvPrivate::TEvResignPoolHandler::TPtr& ev) { - const TString& database = ev->Get()->Database; + const TString& databaseId = ev->Get()->DatabaseId; const TString& poolId = ev->Get()->PoolId; - LOG_T("Got resign request, Database: " << database << ", PoolId: " << poolId); + LOG_T("Got resign request, DatabaseId: " << databaseId << ", PoolId: " << poolId); - if (auto poolState = GetPoolState(database, poolId)) { + if (auto poolState = GetPoolState(databaseId, poolId)) { if (poolState->NewPoolHandler) { - Send(*poolState->NewPoolHandler, new TEvPrivate::TEvStopPoolHandler()); + Send(*poolState->NewPoolHandler, new TEvPrivate::TEvStopPoolHandler(false)); } poolState->NewPoolHandler = ev->Get()->NewHandler; poolState->UpdateHandler(); } } + void Handle(TEvPrivate::TEvStopPoolHandlerResponse::TPtr& ev) { + const TString& databaseId = ev->Get()->DatabaseId; + const TString& poolId = ev->Get()->PoolId; + LOG_T("Got stop pool handler response, DatabaseId: " << databaseId << ", PoolId: " << poolId); + + Counters.ActivePools->Dec(); + if (auto poolState = GetPoolState(databaseId, poolId)) { + poolState->PreviousPoolHandlers.erase(ev->Sender); + } + } + private: void InitializeWorkloadService() { if (ServiceInitialized) { @@ -350,6 +435,7 @@ class TKqpWorkloadService : public TActorBootstrapped { LOG_I("Started workload service initialization"); Register(CreateCleanupTablesActor()); + RunNodeInfoRequest(); } void PrepareWorkloadServiceTables() { @@ -378,13 +464,70 @@ class TKqpWorkloadService : public TActorBootstrapped { PendingHandlers.clear(); } - void StartIdleChecks() { + void ScheduleIdleCheck() { if (IdleChecksStarted) { return; } IdleChecksStarted = true; - Schedule(IDLE_DURATION, new TEvents::TEvWakeup()); + Schedule(IDLE_DURATION / 2, new TEvents::TEvWakeup(static_cast(EWakeUp::IdleCheck))); + } + + void RunIdleCheck() { + IdleChecksStarted = false; + + std::vector poolsToDelete; + poolsToDelete.reserve(PoolIdToState.size()); + for (const auto& [poolKey, poolState] : PoolIdToState) { + if (!poolState.InFlightRequests && TInstant::Now() - poolState.LastUpdateTime > IDLE_DURATION && poolState.PendingRequests.empty()) { + CpuQuotaManager->CleanupHandler(poolState.PoolHandler); + Send(poolState.PoolHandler, new TEvPrivate::TEvStopPoolHandler(true)); + poolsToDelete.emplace_back(poolKey); + } + } + for (const auto& poolKey : poolsToDelete) { + PoolIdToState.erase(poolKey); + } + + if (!PoolIdToState.empty()) { + ScheduleIdleCheck(); + } + } + + void ScheduleCpuLoadRequest() const { + auto delay = CpuQuotaManager->GetCpuLoadRequestDelay(); + if (!delay) { + return; + } + + if (*delay) { + Schedule(*delay, new TEvents::TEvWakeup(static_cast(EWakeUp::StartCpuLoadRequest))); + } else { + RunCpuLoadRequest(); + } + } + + void RunCpuLoadRequest() const { + if (CpuQuotaManager->CpuLoadRequestRunning) { + return; + } + + CpuQuotaManager->CpuLoadRequestTime = TInstant::Zero(); + if (CpuQuotaManager->CpuQuotaManager.GetMonitoringRequestDelay()) { + ScheduleCpuLoadRequest(); + return; + } + + CpuQuotaManager->CpuLoadRequestRunning = true; + Register(CreateCpuLoadFetcherActor(SelfId())); + } + + void ScheduleNodeInfoRequest() const { + Schedule(IDLE_DURATION * 2, new TEvents::TEvWakeup(static_cast(EWakeUp::StartNodeInfoRequest))); + } + + void RunNodeInfoRequest() const { + Send(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes(), IEventHandle::FlagTrackDelivery); } private: @@ -406,27 +549,34 @@ class TKqpWorkloadService : public TActorBootstrapped { Send(replyActorId, new TEvCleanupResponse(status, {NYql::TIssue(message)})); } - static TIntrusivePtr GetUserToken(TIntrusiveConstPtr userToken) { - auto token = MakeIntrusive(userToken ? userToken->GetUserSID() : NACLib::TSID(), TVector{}); - - bool hasAllAuthenticatedUsersSID = false; - const auto& allAuthenticatedUsersSID = AppData()->AllAuthenticatedUsers; - if (userToken) { - for (const auto& groupSID : userToken->GetGroupSIDs()) { - token->AddGroupSID(groupSID); - hasAllAuthenticatedUsersSID = hasAllAuthenticatedUsersSID || groupSID == allAuthenticatedUsersSID; - } + TDatabaseState* GetOrCreateDatabaseState(TString databaseId) { + auto databaseIt = DatabaseToState.find(databaseId); + if (databaseIt != DatabaseToState.end()) { + return &databaseIt->second; } + LOG_I("Creating new database state for id " << databaseId); + return &DatabaseToState.insert({databaseId, TDatabaseState{.ActorContext = ActorContext(), .EnabledResourcePoolsOnServerless = EnabledResourcePoolsOnServerless}}).first->second; + } - if (!hasAllAuthenticatedUsersSID) { - token->AddGroupSID(allAuthenticatedUsersSID); + TPoolState* GetOrCreatePoolState(const TString& databaseId, const TString& poolId, const NResourcePool::TPoolSettings& poolConfig) { + const auto& poolKey = GetPoolKey(databaseId, poolId); + if (auto poolState = GetPoolState(poolKey)) { + return poolState; } - return token; + LOG_I("Creating new handler for pool " << poolKey); + + const auto poolHandler = Register(CreatePoolHandlerActor(databaseId, poolId, poolConfig, EnableResourcePoolsCounters ? Counters.Counters : MakeIntrusive())); + const auto poolState = &PoolIdToState.insert({poolKey, TPoolState{.PoolHandler = poolHandler, .ActorContext = ActorContext()}}).first->second; + + Counters.ActivePools->Inc(); + ScheduleIdleCheck(); + + return poolState; } - TPoolState* GetPoolState(const TString& database, const TString& poolId) { - return GetPoolState(GetPoolKey(database, poolId)); + TPoolState* GetPoolState(const TString& databaseId, const TString& poolId) { + return GetPoolState(GetPoolKey(databaseId, poolId)); } TPoolState* GetPoolState(const TString& key) { @@ -437,35 +587,37 @@ class TKqpWorkloadService : public TActorBootstrapped { return nullptr; } - static TString GetPoolKey(const TString& database, const TString& poolId) { - return CanonizePath(TStringBuilder() << database << "/" << poolId); + static TString GetPoolKey(const TString& databaseId, const TString& poolId) { + return CanonizePath(TStringBuilder() << databaseId << "/" << poolId); } TString LogPrefix() const { return "[Service] "; } - void RegisterCounters() { - ActivePools = Counters->GetCounter("ActivePools", false); - } - private: - NMonitoring::TDynamicCounterPtr Counters; + TCounters Counters; bool EnabledResourcePools = false; + bool EnabledResourcePoolsOnServerless = false; + bool EnableResourcePoolsCounters = false; bool ServiceInitialized = false; bool IdleChecksStarted = false; ETablesCreationStatus TablesCreationStatus = ETablesCreationStatus::Cleanup; - std::unordered_set PendingHandlers; - - std::unordered_set DatabasesWithDefaultPool; - std::unordered_map PoolIdToState; + std::unordered_set PendingHandlers; // DatabaseID/PoolID - NMonitoring::TDynamicCounters::TCounterPtr ActivePools; + std::unordered_map DatabaseToState; // DatabaseID to state + std::unordered_map PoolIdToState; // DatabaseID/PoolID to state + std::unique_ptr CpuQuotaManager; + ui32 NodeCount = 0; }; } // anonymous namespace +bool IsWorkloadServiceRequired(const NResourcePool::TPoolSettings& config) { + return config.ConcurrentQueryLimit != -1 || config.DatabaseLoadCpuThreshold >= 0.0 || config.QueryCancelAfter; +} + } // namespace NWorkload IActor* CreateKqpWorkloadService(NMonitoring::TDynamicCounterPtr counters) { diff --git a/ydb/core/kqp/workload_service/kqp_workload_service.h b/ydb/core/kqp/workload_service/kqp_workload_service.h index 33371c97a249..5b6015328dd9 100644 --- a/ydb/core/kqp/workload_service/kqp_workload_service.h +++ b/ydb/core/kqp/workload_service/kqp_workload_service.h @@ -1,10 +1,18 @@ #pragma once +#include + #include namespace NKikimr::NKqp { +namespace NWorkload { + +bool IsWorkloadServiceRequired(const NResourcePool::TPoolSettings& config); + +} // namespace NWorkload + NActors::IActor* CreateKqpWorkloadService(NMonitoring::TDynamicCounterPtr counters); } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/workload_service/kqp_workload_service_impl.h b/ydb/core/kqp/workload_service/kqp_workload_service_impl.h new file mode 100644 index 000000000000..5fd22dbb032b --- /dev/null +++ b/ydb/core/kqp/workload_service/kqp_workload_service_impl.h @@ -0,0 +1,253 @@ +#pragma once + +#include + +#include +#include +#include +#include +#include +#include + + +namespace NKikimr::NKqp::NWorkload { + +constexpr TDuration IDLE_DURATION = TDuration::Seconds(60); + + +struct TDatabaseState { + NActors::TActorContext ActorContext; + bool& EnabledResourcePoolsOnServerless; + + std::vector PendingRequersts = {}; + std::unordered_set PendingSessionIds = {}; + std::unordered_map> PendingCancelRequests = {}; // Session ID to requests + std::unordered_map> PendingSubscriptions = {}; // Pool ID to subscribers + bool HasDefaultPool = false; + bool Serverless = false; + bool DatabaseUnsupported = false; + + TInstant LastUpdateTime = TInstant::Zero(); + + void DoSubscribeRequest(TEvSubscribeOnPoolChanges::TPtr ev) { + const TString& poolId = ev->Get()->PoolId; + auto& subscribers = PendingSubscriptions[poolId]; + if (subscribers.empty()) { + ActorContext.Register(CreatePoolFetcherActor(ActorContext.SelfID, ev->Get()->DatabaseId, poolId, nullptr)); + } + + subscribers.emplace(ev->Sender); + } + + void DoPlaceRequest(TEvPlaceRequestIntoPool::TPtr ev) { + TString databaseId = ev->Get()->DatabaseId; + PendingSessionIds.emplace(ev->Get()->SessionId); + PendingRequersts.emplace_back(std::move(ev)); + + if (!EnabledResourcePoolsOnServerless && (TInstant::Now() - LastUpdateTime) > IDLE_DURATION) { + ActorContext.Register(CreateDatabaseFetcherActor(ActorContext.SelfID, DatabaseIdToDatabase(databaseId))); + } else if (!DatabaseUnsupported) { + StartPendingRequests(); + } else { + ReplyContinueError(Ydb::StatusIds::UNSUPPORTED, {NYql::TIssue(TStringBuilder() << "Unsupported database: " << databaseId)}); + } + } + + void UpdatePoolInfo(const TEvPrivate::TEvFetchPoolResponse::TPtr& ev, NActors::TActorId poolHandler) { + const TString& poolId = ev->Get()->PoolId; + auto& subscribers = PendingSubscriptions[poolId]; + if (subscribers.empty()) { + return; + } + + if (ev->Get()->Status == Ydb::StatusIds::SUCCESS && poolHandler) { + ActorContext.Send(poolHandler, new TEvPrivate::TEvUpdatePoolSubscription(ev->Get()->PathId, subscribers)); + } else { + const TString& databaseId = ev->Get()->DatabaseId; + for (const auto& subscriber : subscribers) { + ActorContext.Send(subscriber, new TEvUpdatePoolInfo(databaseId, poolId, std::nullopt, std::nullopt)); + } + } + subscribers.clear(); + } + + void UpdateDatabaseInfo(const TEvFetchDatabaseResponse::TPtr& ev) { + DatabaseUnsupported = ev->Get()->Status == Ydb::StatusIds::UNSUPPORTED; + if (ev->Get()->Status != Ydb::StatusIds::SUCCESS) { + ReplyContinueError(ev->Get()->Status, GroupIssues(ev->Get()->Issues, "Failed to fetch database info")); + return; + } + + if (Serverless != ev->Get()->Serverless) { + ActorContext.Send(MakeKqpProxyID(ActorContext.SelfID.NodeId()), new TEvKqp::TEvUpdateDatabaseInfo(ev->Get()->Database, ev->Get()->DatabaseId, ev->Get()->Serverless)); + } + + LastUpdateTime = TInstant::Now(); + Serverless = ev->Get()->Serverless; + StartPendingRequests(); + } + + void RemovePendingSession(const TString& sessionId, std::function callback) { + for (auto& event : PendingCancelRequests[sessionId]) { + callback(std::move(event)); + } + PendingCancelRequests.erase(sessionId); + PendingSessionIds.erase(sessionId); + } + +private: + void StartPendingRequests() { + if (!EnabledResourcePoolsOnServerless && Serverless) { + ReplyContinueError(Ydb::StatusIds::UNSUPPORTED, {NYql::TIssue("Resource pools are disabled for serverless domains. Please contact your system administrator to enable it")}); + return; + } + + for (auto& ev : PendingRequersts) { + ActorContext.Register(CreatePoolResolverActor(std::move(ev), HasDefaultPool)); + } + PendingRequersts.clear(); + } + + void ReplyContinueError(Ydb::StatusIds::StatusCode status, NYql::TIssues issues) { + for (const auto& ev : PendingRequersts) { + RemovePendingSession(ev->Get()->SessionId, [this](TEvCleanupRequest::TPtr event) { + ActorContext.Send(event->Sender, new TEvCleanupResponse(Ydb::StatusIds::NOT_FOUND, {NYql::TIssue(TStringBuilder() << "Pool " << event->Get()->PoolId << " not found")})); + }); + ActorContext.Send(ev->Sender, new TEvContinueRequest(status, {}, {}, issues)); + } + PendingRequersts.clear(); + } +}; + +struct TPoolState { + NActors::TActorId PoolHandler; + NActors::TActorContext ActorContext; + + std::queue PendingRequests = {}; + bool WaitingInitialization = false; + bool PlaceRequestRunning = false; + std::optional NewPoolHandler = std::nullopt; + std::unordered_set PreviousPoolHandlers = {}; + + ui64 InFlightRequests = 0; + TInstant LastUpdateTime = TInstant::Now(); + + void UpdateHandler() { + if (PlaceRequestRunning || WaitingInitialization || !NewPoolHandler) { + return; + } + + ActorContext.Send(PoolHandler, new TEvPrivate::TEvStopPoolHandler(false)); + PreviousPoolHandlers.insert(PoolHandler); + PoolHandler = *NewPoolHandler; + NewPoolHandler = std::nullopt; + InFlightRequests = 0; + } + + void StartPlaceRequest() { + if (PlaceRequestRunning || PendingRequests.empty()) { + return; + } + + PlaceRequestRunning = true; + InFlightRequests++; + ActorContext.Send(PendingRequests.front()->Forward(PoolHandler)); + PendingRequests.pop(); + } + + void OnRequestFinished() { + Y_ENSURE(InFlightRequests); + InFlightRequests--; + LastUpdateTime = TInstant::Now(); + } + + void DoCleanupRequest(TEvCleanupRequest::TPtr event) { + for (const auto& poolHandler : PreviousPoolHandlers) { + ActorContext.Send(poolHandler, new TEvCleanupRequest( + event->Get()->DatabaseId, event->Get()->SessionId, + event->Get()->PoolId, event->Get()->Duration, event->Get()->CpuConsumed + )); + } + ActorContext.Send(event->Forward(PoolHandler)); + } +}; + +struct TCpuQuotaManagerState { + TCpuQuotaManager CpuQuotaManager; + NActors::TActorContext ActorContext; + bool CpuLoadRequestRunning = false; + TInstant CpuLoadRequestTime = TInstant::Zero(); + + TCpuQuotaManagerState(NActors::TActorContext actorContext, NMonitoring::TDynamicCounterPtr subComponent) + : CpuQuotaManager(TDuration::Seconds(1), TDuration::Seconds(10), IDLE_DURATION, 0.1, true, 0, subComponent) + , ActorContext(actorContext) + {} + + void RequestCpuQuota(TActorId poolHandler, double maxClusterLoad, ui64 coockie) { + auto response = CpuQuotaManager.RequestCpuQuota(0.0, maxClusterLoad); + + bool quotaAccepted = response.Status == NYdb::EStatus::SUCCESS; + ActorContext.Send(poolHandler, new TEvPrivate::TEvCpuQuotaResponse(quotaAccepted, maxClusterLoad, std::move(response.Issues)), 0, coockie); + + // Schedule notification + if (!quotaAccepted) { + if (auto it = HandlersLimits.find(poolHandler); it != HandlersLimits.end()) { + PendingHandlers[it->second].erase(poolHandler); + } + HandlersLimits[poolHandler] = maxClusterLoad; + PendingHandlers[maxClusterLoad].insert(poolHandler); + } + } + + void UpdateCpuLoad(double instantLoad, ui64 cpuNumber, bool success) { + CpuQuotaManager.UpdateCpuLoad(instantLoad, cpuNumber, success); + CheckPendingQueue(); + } + + void AdjustCpuQuota(TDuration duration, double cpuSecondsConsumed) { + CpuQuotaManager.AdjustCpuQuota(0.0, duration, cpuSecondsConsumed); + CheckPendingQueue(); + } + + std::optional GetCpuLoadRequestDelay() { + if (CpuLoadRequestRunning) { + return std::nullopt; + } + + auto requestTime = CpuQuotaManager.GetMonitoringRequestTime(); + if (!CpuLoadRequestTime || CpuLoadRequestTime > requestTime) { + CpuLoadRequestTime = requestTime; + return CpuLoadRequestTime - TInstant::Now(); + } + return std::nullopt; + } + + void CleanupHandler(TActorId poolHandler) { + if (auto it = HandlersLimits.find(poolHandler); it != HandlersLimits.end()) { + PendingHandlers[it->second].erase(poolHandler); + HandlersLimits.erase(it); + } + } + +private: + void CheckPendingQueue() { + while (!PendingHandlers.empty()) { + const auto& [maxClusterLoad, poolHandlers] = *PendingHandlers.begin(); + if (!CpuQuotaManager.HasCpuQuota(maxClusterLoad)) { + break; + } + + for (const TActorId& poolHandler : poolHandlers) { + ActorContext.Send(poolHandler, new TEvPrivate::TEvRefreshPoolState()); + HandlersLimits.erase(poolHandler); + } + PendingHandlers.erase(PendingHandlers.begin()); + } + } + +private: + std::map> PendingHandlers; + std::unordered_map HandlersLimits; +}; + +} // namespace NKikimr::NKqp::NWorkload diff --git a/ydb/core/kqp/workload_service/tables/table_queries.cpp b/ydb/core/kqp/workload_service/tables/table_queries.cpp index d498c778d224..2a18cadaaa6e 100644 --- a/ydb/core/kqp/workload_service/tables/table_queries.cpp +++ b/ydb/core/kqp/workload_service/tables/table_queries.cpp @@ -28,17 +28,17 @@ class TQueryBase : public NKikimr::TQueryBase { SetOperationInfo(operationName, traceId, counters); } - TQueryBase(const TString& operationName, const TString& traceId, const TString& database, const TString& sessionId, NMonitoring::TDynamicCounterPtr counters) - : TQueryBase(operationName, ComposeTraceId(traceId, database, sessionId), counters) + TQueryBase(const TString& operationName, const TString& traceId, const TString& databaseId, const TString& sessionId, NMonitoring::TDynamicCounterPtr counters) + : TQueryBase(operationName, ComposeTraceId(traceId, databaseId, sessionId), counters) {} - void UpdateLogInfo(const TString& traceId, const TString& database, const TString& sessionId) { - SetOperationInfo(OperationName, ComposeTraceId(traceId, database, sessionId), nullptr); + void UpdateLogInfo(const TString& traceId, const TString& databaseId, const TString& sessionId) { + SetOperationInfo(OperationName, ComposeTraceId(traceId, databaseId, sessionId), nullptr); } private: - static TString ComposeTraceId(const TString& traceId, const TString& database, const TString& sessionId) { - return TStringBuilder() << traceId << ", RequestDatabase: " << database << ", RequestSessionId: " << sessionId; + static TString ComposeTraceId(const TString& traceId, const TString& databaseId, const TString& sessionId) { + return TStringBuilder() << traceId << ", RequestDatabase: " << databaseId << ", RequestSessionId: " << sessionId; } }; @@ -178,8 +178,15 @@ class TCleanupTablesActor : public TSchemeActorBase { TablePathsToCheck.clear(); for (const auto& result : results) { - const TString& path = CanonizePath(result.Path); - LOG_D("Describe table " << path << " status " << result.Status); + const TString& fullPath = CanonizePath(result.Path); + LOG_D("Describe table " << fullPath << " status " << result.Status); + + std::pair pathPair; + if (TString error; !TrySplitPathByDb(fullPath, AppData()->TenantName, pathPair, error)) { + TablesExists = false; + AddError(TStringBuilder() << "Failed to describe table path " << fullPath << ", " << error); + continue; + } switch (result.Status) { case EStatus::Unknown: @@ -188,10 +195,10 @@ class TCleanupTablesActor : public TSchemeActorBase { case EStatus::AccessDenied: case EStatus::RedirectLookupError: TablesExists = false; - AddError(TStringBuilder() << "Failed to describe table path " << path << ", " << result.Status); + AddError(TStringBuilder() << "Failed to describe table path " << fullPath << ", " << result.Status); break; case EStatus::LookupError: - RetryPathCheck(result.Path, result.Status); + RetryPathCheck(pathPair.second, result.Status); break; case EStatus::RootUnknown: case EStatus::PathErrorUnknown: @@ -199,9 +206,9 @@ class TCleanupTablesActor : public TSchemeActorBase { TablesExists = false; break; case EStatus::Ok: - LOG_D("Start cleanup for table " << path); + LOG_D("Start cleanup for table " << fullPath); CleanupQueriesInFlight++; - Register(new TCleanupTablesRetryQuery(SelfId(), path)); + Register(new TCleanupTablesRetryQuery(SelfId(), fullPath)); break; } } @@ -251,14 +258,14 @@ class TCleanupTablesActor : public TSchemeActorBase { } private: - void RetryPathCheck(const TVector& path, EStatus status) { - if (TablePathsToCheck.empty() && !ScheduleRetry(TStringBuilder() << "Retry " << status << " for table " << CanonizePath(path))) { + void RetryPathCheck(const TString& path, EStatus status) { + if (TablePathsToCheck.empty() && !ScheduleRetry(TStringBuilder() << "Retry " << status << " for table " << path)) { TablesExists = false; - AddError(TStringBuilder() << "Retry limit exceeded for table " << CanonizePath(path) << ", " << status); + AddError(TStringBuilder() << "Retry limit exceeded for table " << path << ", " << status); return; } - TablePathsToCheck.emplace_back(path); + TablePathsToCheck.emplace_back(SplitPath(path)); } template @@ -299,9 +306,9 @@ class TCleanupTablesActor : public TSchemeActorBase { class TRefreshPoolStateQuery : public TQueryBase { public: - TRefreshPoolStateQuery(const TString& database, const TString& poolId, TDuration leaseDuration, NMonitoring::TDynamicCounterPtr counters) - : TQueryBase(__func__, poolId, database, "", counters) - , Database(database) + TRefreshPoolStateQuery(const TString& databaseId, const TString& poolId, TDuration leaseDuration, NMonitoring::TDynamicCounterPtr counters) + : TQueryBase(__func__, poolId, databaseId, "", counters) + , DatabaseId(databaseId) , PoolId(poolId) , LeaseDuration(leaseDuration) {} @@ -314,14 +321,14 @@ class TRefreshPoolStateQuery : public TQueryBase { TString sql = TStringBuilder() << R"( -- TRefreshPoolStateQuery::OnRunQuery - DECLARE $database AS Text; + DECLARE $database_id AS Text; DECLARE $pool_id AS Text; DECLARE $node_id AS Uint32; DECLARE $lease_duration AS Interval; UPDATE `)" << TTablesCreator::GetDelayedRequestsPath() << R"(` SET lease_deadline = CurrentUtcTimestamp() + $lease_duration - WHERE database = $database + WHERE database = $database_id AND pool_id = $pool_id AND node_id = $node_id AND (wait_deadline IS NULL OR wait_deadline >= CurrentUtcTimestamp()) @@ -329,7 +336,7 @@ class TRefreshPoolStateQuery : public TQueryBase { UPDATE `)" << TTablesCreator::GetRunningRequestsPath() << R"(` SET lease_deadline = CurrentUtcTimestamp() + $lease_duration - WHERE database = $database + WHERE database = $database_id AND pool_id = $pool_id AND node_id = $node_id AND lease_deadline >= CurrentUtcTimestamp(); @@ -337,8 +344,8 @@ class TRefreshPoolStateQuery : public TQueryBase { NYdb::TParamsBuilder params; params - .AddParam("$database") - .Utf8(Database) + .AddParam("$database_id") + .Utf8(DatabaseId) .Build() .AddParam("$pool_id") .Utf8(PoolId) @@ -357,27 +364,27 @@ class TRefreshPoolStateQuery : public TQueryBase { void OnLeaseUpdated() { TString sql = TStringBuilder() << R"( -- TRefreshPoolStateQuery::OnLeaseUpdated - DECLARE $database AS Text; + DECLARE $database_id AS Text; DECLARE $pool_id AS Text; SELECT COUNT(*) AS delayed_requests FROM `)" << TTablesCreator::GetDelayedRequestsPath() << R"(` - WHERE database = $database + WHERE database = $database_id AND pool_id = $pool_id AND (wait_deadline IS NULL OR wait_deadline >= CurrentUtcTimestamp()) AND lease_deadline >= CurrentUtcTimestamp(); SELECT COUNT(*) AS running_requests FROM `)" << TTablesCreator::GetRunningRequestsPath() << R"(` - WHERE database = $database + WHERE database = $database_id AND pool_id = $pool_id AND lease_deadline >= CurrentUtcTimestamp(); )"; NYdb::TParamsBuilder params; params - .AddParam("$database") - .Utf8(Database) + .AddParam("$database_id") + .Utf8(DatabaseId) .Build() .AddParam("$pool_id") .Utf8(PoolId) @@ -421,7 +428,7 @@ class TRefreshPoolStateQuery : public TQueryBase { } private: - const TString Database; + const TString DatabaseId; const TString PoolId; const TDuration LeaseDuration; @@ -431,9 +438,9 @@ class TRefreshPoolStateQuery : public TQueryBase { class TDelayRequestQuery : public TQueryBase { public: - TDelayRequestQuery(const TString& database, const TString& poolId, const TString& sessionId, TInstant startTime, TMaybe waitDeadline, TDuration leaseDuration, NMonitoring::TDynamicCounterPtr counters) - : TQueryBase(__func__, poolId, database, sessionId, counters) - , Database(database) + TDelayRequestQuery(const TString& databaseId, const TString& poolId, const TString& sessionId, TInstant startTime, TMaybe waitDeadline, TDuration leaseDuration, NMonitoring::TDynamicCounterPtr counters) + : TQueryBase(__func__, poolId, databaseId, sessionId, counters) + , DatabaseId(databaseId) , PoolId(poolId) , SessionId(sessionId) , StartTime(startTime) @@ -444,7 +451,7 @@ class TDelayRequestQuery : public TQueryBase { void OnRunQuery() override { TString sql = TStringBuilder() << R"( -- TDelayRequestQuery::OnRunQuery - DECLARE $database AS Text; + DECLARE $database_id AS Text; DECLARE $pool_id AS Text; DECLARE $start_time AS Timestamp; DECLARE $session_id AS Text; @@ -455,15 +462,15 @@ class TDelayRequestQuery : public TQueryBase { UPSERT INTO `)" << TTablesCreator::GetDelayedRequestsPath() << R"(` (database, pool_id, start_time, session_id, node_id, wait_deadline, lease_deadline) VALUES ( - $database, $pool_id, $start_time, $session_id, $node_id, $wait_deadline, + $database_id, $pool_id, $start_time, $session_id, $node_id, $wait_deadline, CurrentUtcTimestamp() + $lease_duration ); )"; NYdb::TParamsBuilder params; params - .AddParam("$database") - .Utf8(Database) + .AddParam("$database_id") + .Utf8(DatabaseId) .Build() .AddParam("$pool_id") .Utf8(PoolId) @@ -496,7 +503,7 @@ class TDelayRequestQuery : public TQueryBase { } private: - const TString Database; + const TString DatabaseId; const TString PoolId; const TString SessionId; const TInstant StartTime; @@ -507,9 +514,9 @@ class TDelayRequestQuery : public TQueryBase { class TStartFirstDelayedRequestQuery : public TQueryBase { public: - TStartFirstDelayedRequestQuery(const TString& database, const TString& poolId, TDuration leaseDuration, NMonitoring::TDynamicCounterPtr counters) - : TQueryBase(__func__, poolId, database, "", counters) - , Database(database) + TStartFirstDelayedRequestQuery(const TString& databaseId, const TString& poolId, TDuration leaseDuration, NMonitoring::TDynamicCounterPtr counters) + : TQueryBase(__func__, poolId, databaseId, "", counters) + , DatabaseId(databaseId) , PoolId(poolId) , LeaseDuration(leaseDuration) {} @@ -519,12 +526,12 @@ class TStartFirstDelayedRequestQuery : public TQueryBase { TString sql = TStringBuilder() << R"( -- TStartFirstDelayedRequestQuery::OnRunQuery - DECLARE $database AS Text; + DECLARE $database_id AS Text; DECLARE $pool_id AS Text; SELECT database, pool_id, start_time, session_id, node_id FROM `)" << TTablesCreator::GetDelayedRequestsPath() << R"(` - WHERE database = $database + WHERE database = $database_id AND pool_id = $pool_id AND (wait_deadline IS NULL OR wait_deadline >= CurrentUtcTimestamp()) AND lease_deadline >= CurrentUtcTimestamp() @@ -534,8 +541,8 @@ class TStartFirstDelayedRequestQuery : public TQueryBase { NYdb::TParamsBuilder params; params - .AddParam("$database") - .Utf8(Database) + .AddParam("$database_id") + .Utf8(DatabaseId) .Build() .AddParam("$pool_id") .Utf8(PoolId) @@ -576,7 +583,7 @@ class TStartFirstDelayedRequestQuery : public TQueryBase { } RequestSessionId = *sessionId; - UpdateLogInfo(PoolId, Database, RequestSessionId); + UpdateLogInfo(PoolId, DatabaseId, RequestSessionId); TMaybe startTime = result.ColumnParser("start_time").GetOptionalTimestamp(); if (!startTime) { @@ -591,7 +598,7 @@ class TStartFirstDelayedRequestQuery : public TQueryBase { void StartQueuedRequest() { TString sql = TStringBuilder() << R"( -- TStartFirstDelayedRequestQuery::StartQueuedRequest - DECLARE $database AS Text; + DECLARE $database_id AS Text; DECLARE $pool_id AS Text; DECLARE $start_time AS Timestamp; DECLARE $session_id AS Text; @@ -599,7 +606,7 @@ class TStartFirstDelayedRequestQuery : public TQueryBase { DECLARE $lease_duration AS Interval; DELETE FROM `)" << TTablesCreator::GetDelayedRequestsPath() << R"(` - WHERE database = $database + WHERE database = $database_id AND pool_id = $pool_id AND node_id = $node_id AND start_time = $start_time @@ -608,15 +615,15 @@ class TStartFirstDelayedRequestQuery : public TQueryBase { UPSERT INTO `)" << TTablesCreator::GetRunningRequestsPath() << R"(` (database, pool_id, session_id, node_id, lease_deadline) VALUES ( - $database, $pool_id, $session_id, $node_id, + $database_id, $pool_id, $session_id, $node_id, CurrentUtcTimestamp() + $lease_duration ); )"; NYdb::TParamsBuilder params; params - .AddParam("$database") - .Utf8(Database) + .AddParam("$database_id") + .Utf8(DatabaseId) .Build() .AddParam("$pool_id") .Utf8(PoolId) @@ -647,7 +654,7 @@ class TStartFirstDelayedRequestQuery : public TQueryBase { } private: - const TString Database; + const TString DatabaseId; const TString PoolId; const TDuration LeaseDuration; @@ -658,9 +665,9 @@ class TStartFirstDelayedRequestQuery : public TQueryBase { class TStartRequestQuery : public TQueryBase { public: - TStartRequestQuery(const TString& database, const TString& poolId, const TString& sessionId, TDuration leaseDuration, NMonitoring::TDynamicCounterPtr counters) - : TQueryBase(__func__, poolId, database, sessionId, counters) - , Database(database) + TStartRequestQuery(const TString& databaseId, const TString& poolId, const TString& sessionId, TDuration leaseDuration, NMonitoring::TDynamicCounterPtr counters) + : TQueryBase(__func__, poolId, databaseId, sessionId, counters) + , DatabaseId(databaseId) , PoolId(poolId) , SessionId(sessionId) , LeaseDuration(leaseDuration) @@ -669,7 +676,7 @@ class TStartRequestQuery : public TQueryBase { void OnRunQuery() override { TString sql = TStringBuilder() << R"( -- TStartRequestQuery::OnRunQuery - DECLARE $database AS Text; + DECLARE $database_id AS Text; DECLARE $pool_id AS Text; DECLARE $session_id AS Text; DECLARE $node_id AS Uint32; @@ -678,15 +685,15 @@ class TStartRequestQuery : public TQueryBase { UPSERT INTO `)" << TTablesCreator::GetRunningRequestsPath() << R"(` (database, pool_id, session_id, node_id, lease_deadline) VALUES ( - $database, $pool_id, $session_id, $node_id, + $database_id, $pool_id, $session_id, $node_id, CurrentUtcTimestamp() + $lease_duration ); )"; NYdb::TParamsBuilder params; params - .AddParam("$database") - .Utf8(Database) + .AddParam("$database_id") + .Utf8(DatabaseId) .Build() .AddParam("$pool_id") .Utf8(PoolId) @@ -713,7 +720,7 @@ class TStartRequestQuery : public TQueryBase { } private: - const TString Database; + const TString DatabaseId; const TString PoolId; const TString SessionId; const TDuration LeaseDuration; @@ -724,9 +731,9 @@ class TStartRequestActor : public TActorBootstrapped { using TStartRequestRetryQuery = TQueryRetryActor; public: - TStartRequestActor(const TActorId& replyActorId, const TString& database, const TString& poolId, const std::optional& sessionId, TDuration leaseDuration, NMonitoring::TDynamicCounterPtr counters) + TStartRequestActor(const TActorId& replyActorId, const TString& databaseId, const TString& poolId, const std::optional& sessionId, TDuration leaseDuration, NMonitoring::TDynamicCounterPtr counters) : ReplyActorId(replyActorId) - , Database(database) + , DatabaseId(databaseId) , PoolId(poolId) , SessionId(sessionId) , LeaseDuration(leaseDuration) @@ -737,9 +744,9 @@ class TStartRequestActor : public TActorBootstrapped { Become(&TStartRequestActor::StateFunc); if (!SessionId) { - Register(new TStartFirstDelayedRequestRetryQuery(SelfId(), Database, PoolId, LeaseDuration, Counters)); + Register(new TStartFirstDelayedRequestRetryQuery(SelfId(), DatabaseId, PoolId, LeaseDuration, Counters)); } else { - Register(new TStartRequestRetryQuery(SelfId(), Database, PoolId, *SessionId, LeaseDuration, Counters)); + Register(new TStartRequestRetryQuery(SelfId(), DatabaseId, PoolId, *SessionId, LeaseDuration, Counters)); } } @@ -753,7 +760,7 @@ class TStartRequestActor : public TActorBootstrapped { private: const TActorId ReplyActorId; - const TString Database; + const TString DatabaseId; const TString PoolId; const std::optional SessionId; const TDuration LeaseDuration; @@ -763,9 +770,9 @@ class TStartRequestActor : public TActorBootstrapped { class TCleanupRequestsQuery : public TQueryBase { public: - TCleanupRequestsQuery(const TString& database, const TString& poolId, const std::vector& sessionIds, NMonitoring::TDynamicCounterPtr counters) - : TQueryBase(__func__, poolId, database, "", counters) - , Database(database) + TCleanupRequestsQuery(const TString& databaseId, const TString& poolId, const std::vector& sessionIds, NMonitoring::TDynamicCounterPtr counters) + : TQueryBase(__func__, poolId, databaseId, "", counters) + , DatabaseId(databaseId) , PoolId(poolId) , SessionIds(sessionIds) {} @@ -775,19 +782,19 @@ class TCleanupRequestsQuery : public TQueryBase { -- TCleanupRequestsQuery::OnRunQuery PRAGMA AnsiInForEmptyOrNullableItemsCollections; - DECLARE $database AS Text; + DECLARE $database_id AS Text; DECLARE $pool_id AS Text; DECLARE $node_id AS Uint32; DECLARE $session_ids AS List; DELETE FROM `)" << TTablesCreator::GetDelayedRequestsPath() << R"(` - WHERE database = $database + WHERE database = $database_id AND pool_id = $pool_id AND node_id = $node_id AND session_id IN $session_ids; DELETE FROM `)" << TTablesCreator::GetRunningRequestsPath() << R"(` - WHERE database = $database + WHERE database = $database_id AND pool_id = $pool_id AND node_id = $node_id AND session_id IN $session_ids; @@ -795,8 +802,8 @@ class TCleanupRequestsQuery : public TQueryBase { NYdb::TParamsBuilder params; params - .AddParam("$database") - .Utf8(Database) + .AddParam("$database_id") + .Utf8(DatabaseId) .Build() .AddParam("$pool_id") .Utf8(PoolId) @@ -825,11 +832,11 @@ class TCleanupRequestsQuery : public TQueryBase { private: TString LogPrefix() const { - return TStringBuilder() << "[TCleanupRequestsQuery] ActorId: " << SelfId() << ", Database: " << Database << ", PoolId: " << PoolId << ", "; + return TStringBuilder() << "[TCleanupRequestsQuery] ActorId: " << SelfId() << ", DatabaseId: " << DatabaseId << ", PoolId: " << PoolId << ", "; } private: - const TString Database; + const TString DatabaseId; const TString PoolId; const std::vector SessionIds; }; @@ -844,20 +851,20 @@ IActor* CreateCleanupTablesActor() { return new TCleanupTablesActor(); } -IActor* CreateRefreshPoolStateActor(const TActorId& replyActorId, const TString& database, const TString& poolId, TDuration leaseDuration, NMonitoring::TDynamicCounterPtr counters) { - return new TQueryRetryActor(replyActorId, database, poolId, leaseDuration, counters); +IActor* CreateRefreshPoolStateActor(const TActorId& replyActorId, const TString& databaseId, const TString& poolId, TDuration leaseDuration, NMonitoring::TDynamicCounterPtr counters) { + return new TQueryRetryActor(replyActorId, databaseId, poolId, leaseDuration, counters); } -IActor* CreateDelayRequestActor(const TActorId& replyActorId, const TString& database, const TString& poolId, const TString& sessionId, TInstant startTime, TMaybe waitDeadline, TDuration leaseDuration, NMonitoring::TDynamicCounterPtr counters) { - return new TQueryRetryActor, TDuration, NMonitoring::TDynamicCounterPtr>(replyActorId, database, poolId, sessionId, startTime, waitDeadline, leaseDuration, counters); +IActor* CreateDelayRequestActor(const TActorId& replyActorId, const TString& databaseId, const TString& poolId, const TString& sessionId, TInstant startTime, TMaybe waitDeadline, TDuration leaseDuration, NMonitoring::TDynamicCounterPtr counters) { + return new TQueryRetryActor, TDuration, NMonitoring::TDynamicCounterPtr>(replyActorId, databaseId, poolId, sessionId, startTime, waitDeadline, leaseDuration, counters); } -IActor* CreateStartRequestActor(const TActorId& replyActorId, const TString& database, const TString& poolId, const std::optional& sessionId, TDuration leaseDuration, NMonitoring::TDynamicCounterPtr counters) { - return new TStartRequestActor(replyActorId, database, poolId, sessionId, leaseDuration, counters); +IActor* CreateStartRequestActor(const TActorId& replyActorId, const TString& databaseId, const TString& poolId, const std::optional& sessionId, TDuration leaseDuration, NMonitoring::TDynamicCounterPtr counters) { + return new TStartRequestActor(replyActorId, databaseId, poolId, sessionId, leaseDuration, counters); } -IActor* CreateCleanupRequestsActor(const TActorId& replyActorId, const TString& database, const TString& poolId, const std::vector& sessionIds, NMonitoring::TDynamicCounterPtr counters) { - return new TQueryRetryActor, NMonitoring::TDynamicCounterPtr>(replyActorId, database, poolId, sessionIds, counters); +IActor* CreateCleanupRequestsActor(const TActorId& replyActorId, const TString& databaseId, const TString& poolId, const std::vector& sessionIds, NMonitoring::TDynamicCounterPtr counters) { + return new TQueryRetryActor, NMonitoring::TDynamicCounterPtr>(replyActorId, databaseId, poolId, sessionIds, counters); } } // NKikimr::NKqp::NWorkload diff --git a/ydb/core/kqp/workload_service/tables/table_queries.h b/ydb/core/kqp/workload_service/tables/table_queries.h index f44afac3ba74..b7727bbff28a 100644 --- a/ydb/core/kqp/workload_service/tables/table_queries.h +++ b/ydb/core/kqp/workload_service/tables/table_queries.h @@ -12,11 +12,11 @@ NActors::IActor* CreateTablesCreator(); NActors::IActor* CreateCleanupTablesActor(); // Updates pool lease and returns pool description -NActors::IActor* CreateRefreshPoolStateActor(const NActors::TActorId& replyActorId, const TString& database, const TString& poolId, TDuration leaseDuration, NMonitoring::TDynamicCounterPtr counters); +NActors::IActor* CreateRefreshPoolStateActor(const NActors::TActorId& replyActorId, const TString& databaseId, const TString& poolId, TDuration leaseDuration, NMonitoring::TDynamicCounterPtr counters); // Push / Start / Finish requests in pool -NActors::IActor* CreateDelayRequestActor(const NActors::TActorId& replyActorId, const TString& database, const TString& poolId, const TString& sessionId, TInstant startTime, TMaybe waitDeadline, TDuration leaseDuration, NMonitoring::TDynamicCounterPtr counters); -NActors::IActor* CreateStartRequestActor(const NActors::TActorId& replyActorId, const TString& database, const TString& poolId, const std::optional& sessionId, TDuration leaseDuration, NMonitoring::TDynamicCounterPtr counters); -NActors::IActor* CreateCleanupRequestsActor(const NActors::TActorId& replyActorId, const TString& database, const TString& poolId, const std::vector& sessionIds, NMonitoring::TDynamicCounterPtr counters); +NActors::IActor* CreateDelayRequestActor(const NActors::TActorId& replyActorId, const TString& databaseId, const TString& poolId, const TString& sessionId, TInstant startTime, TMaybe waitDeadline, TDuration leaseDuration, NMonitoring::TDynamicCounterPtr counters); +NActors::IActor* CreateStartRequestActor(const NActors::TActorId& replyActorId, const TString& databaseId, const TString& poolId, const std::optional& sessionId, TDuration leaseDuration, NMonitoring::TDynamicCounterPtr counters); +NActors::IActor* CreateCleanupRequestsActor(const NActors::TActorId& replyActorId, const TString& databaseId, const TString& poolId, const std::vector& sessionIds, NMonitoring::TDynamicCounterPtr counters); } // NKikimr::NKqp::NWorkload diff --git a/ydb/core/kqp/workload_service/ut/common/kqp_workload_service_ut_common.cpp b/ydb/core/kqp/workload_service/ut/common/kqp_workload_service_ut_common.cpp index 7edc17a78525..7ff63e246426 100644 --- a/ydb/core/kqp/workload_service/ut/common/kqp_workload_service_ut_common.cpp +++ b/ydb/core/kqp/workload_service/ut/common/kqp_workload_service_ut_common.cpp @@ -8,9 +8,10 @@ #include #include #include - #include +#include + namespace NKikimr::NKqp::NWorkload { @@ -229,6 +230,11 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { TAppConfig GetAppConfig() const { TAppConfig appConfig; appConfig.MutableFeatureFlags()->SetEnableResourcePools(Settings_.EnableResourcePools_); + appConfig.MutableFeatureFlags()->SetEnableResourcePoolsOnServerless(Settings_.EnableResourcePoolsOnServerless_); + appConfig.MutableFeatureFlags()->SetEnableMetadataObjectsOnServerless(Settings_.EnableMetadataObjectsOnServerless_); + appConfig.MutableFeatureFlags()->SetEnableExternalDataSourcesOnServerless(Settings_.EnableExternalDataSourcesOnServerless_); + appConfig.MutableFeatureFlags()->SetEnableExternalDataSources(true); + appConfig.MutableFeatureFlags()->SetEnableResourcePoolsCounters(true); return appConfig; } @@ -236,7 +242,7 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { void SetLoggerSettings(TServerSettings& serverSettings) const { auto loggerInitializer = [](TTestActorRuntime& runtime) { runtime.SetLogPriority(NKikimrServices::KQP_WORKLOAD_SERVICE, NLog::EPriority::PRI_TRACE); - runtime.SetLogPriority(NKikimrServices::KQP_SESSION, NLog::EPriority::PRI_DEBUG); + runtime.SetLogPriority(NKikimrServices::KQP_SESSION, NLog::EPriority::PRI_TRACE); }; serverSettings.SetLoggerInitializer(loggerInitializer); @@ -253,16 +259,50 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { .SetAppConfig(appConfig) .SetFeatureFlags(appConfig.GetFeatureFlags()); + if (Settings_.CreateSampleTenants_) { + serverSettings + .SetDynamicNodeCount(2) + .AddStoragePoolType(Settings_.GetDedicatedTenantName()) + .AddStoragePoolType(Settings_.GetSharedTenantName()); + } + SetLoggerSettings(serverSettings); return serverSettings; } + void SetupResourcesTenant(Ydb::Cms::CreateDatabaseRequest& request, Ydb::Cms::StorageUnits* storage, const TString& name) { + request.set_path(name); + storage->set_unit_kind(name); + storage->set_count(1); + } + + void CreateTenants() { + { // Dedicated + Ydb::Cms::CreateDatabaseRequest request; + SetupResourcesTenant(request, request.mutable_resources()->add_storage_units(), Settings_.GetDedicatedTenantName()); + Tenants_->CreateTenant(std::move(request)); + } + + { // Shared + Ydb::Cms::CreateDatabaseRequest request; + SetupResourcesTenant(request, request.mutable_shared_resources()->add_storage_units(), Settings_.GetSharedTenantName()); + Tenants_->CreateTenant(std::move(request)); + } + + { // Serverless + Ydb::Cms::CreateDatabaseRequest request; + request.set_path(Settings_.GetServerlessTenantName()); + request.mutable_serverless_resources()->set_shared_database_path(Settings_.GetSharedTenantName()); + Tenants_->CreateTenant(std::move(request)); + } + } + void InitializeServer() { ui32 grpcPort = PortManager_.GetPort(); TServerSettings serverSettings = GetServerSettings(grpcPort); - Server_ = std::make_unique(serverSettings); + Server_ = MakeIntrusive(serverSettings); Server_->EnableGRpc(grpcPort); GetRuntime()->SetDispatchTimeout(FUTURE_WAIT_TIMEOUT); @@ -275,21 +315,20 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { TableClient_ = std::make_unique(*YdbDriver_, NYdb::NTable::TClientSettings().AuthToken("user@" BUILTIN_SYSTEM_DOMAIN)); TableClientSession_ = std::make_unique(TableClient_->CreateSession().GetValueSync().GetSession()); + + Tenants_ = std::make_unique(Server_); + if (Settings_.CreateSampleTenants_) { + CreateTenants(); + } } void CreateSamplePool() const { - if (!Settings_.EnableResourcePools_) { + if (!Settings_.EnableResourcePools_ || Settings_.CreateSampleTenants_) { return; } - NResourcePool::TPoolSettings poolConfig; - poolConfig.ConcurrentQueryLimit = Settings_.ConcurrentQueryLimit_; - poolConfig.QueueSize = Settings_.QueueSize_; - poolConfig.QueryCancelAfter = Settings_.QueryCancelAfter_; - poolConfig.QueryMemoryLimitPercentPerNode = Settings_.QueryMemoryLimitPercentPerNode_; - TActorId edgeActor = GetRuntime()->AllocateEdgeActor(); - GetRuntime()->Register(CreatePoolCreatorActor(edgeActor, Settings_.DomainName_, Settings_.PoolId_, poolConfig, nullptr, {})); + GetRuntime()->Register(CreatePoolCreatorActor(edgeActor, Settings_.DomainName_, Settings_.PoolId_, Settings_.GetDefaultPoolSettings(), nullptr, {})); auto response = GetRuntime()->GrabEdgeEvent(edgeActor, FUTURE_WAIT_TIMEOUT); UNIT_ASSERT_VALUES_EQUAL_C(response->Get()->Status, Ydb::StatusIds::SUCCESS, response->Get()->Issues.ToOneLineString()); } @@ -303,6 +342,41 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { CreateSamplePool(); } + // Cluster helpers + void UpdateNodeCpuInfo(double usage, ui32 threads, ui64 nodeIndex = 0) override { + TVector> pools; + pools.emplace_back("User", usage, threads, threads); + + auto edgeActor = GetRuntime()->AllocateEdgeActor(nodeIndex); + GetRuntime()->Send( + NNodeWhiteboard::MakeNodeWhiteboardServiceId(GetRuntime()->GetNodeId(nodeIndex)), edgeActor, + new NNodeWhiteboard::TEvWhiteboard::TEvSystemStateUpdate(pools), nodeIndex + ); + + WaitFor(FUTURE_WAIT_TIMEOUT, "node cpu usage", [this, usage, threads, nodeIndex, edgeActor](TString& errorString) { + GetRuntime()->Send( + NNodeWhiteboard::MakeNodeWhiteboardServiceId(GetRuntime()->GetNodeId(nodeIndex)), edgeActor, + new NNodeWhiteboard::TEvWhiteboard::TEvSystemStateRequest(), nodeIndex + ); + auto response = GetRuntime()->GrabEdgeEvent(edgeActor, FUTURE_WAIT_TIMEOUT); + + if (!response->Get()->Record.SystemStateInfoSize()) { + errorString = "empty system state info"; + return false; + } + const auto& systemStateInfo = response->Get()->Record.GetSystemStateInfo()[0]; + + if (!systemStateInfo.PoolStatsSize()) { + errorString = "empty pool stats"; + return false; + } + const auto& poolStat = systemStateInfo.GetPoolStats()[0]; + + errorString = TStringBuilder() << "usage: " << poolStat.GetUsage() << ", threads: " << poolStat.GetThreads(); + return poolStat.GetUsage() == usage && threads == poolStat.GetThreads(); + }); + } + // Scheme queries helpers NYdb::NScheme::TSchemeClient GetSchemeClient() const override { return NYdb::NScheme::TSchemeClient(*YdbDriver_); @@ -323,21 +397,17 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { void WaitPoolAccess(const TString& userSID, ui32 access, const TString& poolId = "") const override { auto token = NACLib::TUserToken(userSID, {}); - TInstant start = TInstant::Now(); - while (TInstant::Now() - start <= FUTURE_WAIT_TIMEOUT) { - if (auto response = Navigate(TStringBuilder() << ".resource_pools/" << (poolId ? poolId : Settings_.PoolId_))) { - const auto& result = response->ResultSet.at(0); - bool resourcePool = result.Kind == NSchemeCache::TSchemeCacheNavigate::EKind::KindResourcePool; - if (resourcePool && (!result.SecurityObject || result.SecurityObject->CheckAccess(access, token))) { - return; - } - Cerr << "WaitPoolAccess " << TInstant::Now() - start << ": " << (resourcePool ? TStringBuilder() << "access denied" : TStringBuilder() << "unexpected kind " << result.Kind) << "\n"; - } else { - Cerr << "WaitPoolAccess " << TInstant::Now() - start << ": empty response\n"; + WaitFor(FUTURE_WAIT_TIMEOUT, "pool acl", [this, token, access, poolId](TString& errorString) { + auto response = Navigate(TStringBuilder() << ".metadata/workload_manager/pools/" << (poolId ? poolId : Settings_.PoolId_)); + if (!response) { + errorString = "empty response"; + return false; } - Sleep(TDuration::Seconds(1)); - } - UNIT_ASSERT_C(false, "Pool version waiting timeout"); + const auto& result = response->ResultSet.at(0); + bool resourcePool = result.Kind == NSchemeCache::TSchemeCacheNavigate::EKind::KindResourcePool; + errorString = (resourcePool ? TStringBuilder() << "access denied" : TStringBuilder() << "unexpected kind " << result.Kind); + return resourcePool && (!result.SecurityObject || result.SecurityObject->CheckAccess(access, token)); + }); } // Generic query helpers @@ -382,7 +452,7 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { TPoolStateDescription GetPoolDescription(TDuration leaseDuration = FUTURE_WAIT_TIMEOUT, const TString& poolId = "") const override { const auto& edgeActor = GetRuntime()->AllocateEdgeActor(); - GetRuntime()->Register(CreateRefreshPoolStateActor(edgeActor, Settings_.DomainName_, poolId ? poolId : Settings_.PoolId_, leaseDuration, GetRuntime()->GetAppData().Counters)); + GetRuntime()->Register(CreateRefreshPoolStateActor(edgeActor, CanonizePath(Settings_.DomainName_), poolId ? poolId : Settings_.PoolId_, leaseDuration, GetRuntime()->GetAppData().Counters)); auto response = GetRuntime()->GrabEdgeEvent(edgeActor, FUTURE_WAIT_TIMEOUT); UNIT_ASSERT_VALUES_EQUAL_C(response->Get()->Status, Ydb::StatusIds::SUCCESS, response->Get()->Issues.ToOneLineString()); @@ -390,17 +460,11 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { } void WaitPoolState(const TPoolStateDescription& state, const TString& poolId = "") const override { - TInstant start = TInstant::Now(); - while (TInstant::Now() - start <= FUTURE_WAIT_TIMEOUT) { + WaitFor(FUTURE_WAIT_TIMEOUT, "pool state", [this, state, poolId](TString& errorString) { auto description = GetPoolDescription(TDuration::Zero(), poolId); - if (description.DelayedRequests == state.DelayedRequests && description.RunningRequests == state.RunningRequests) { - return; - } - - Cerr << "WaitPoolState " << TInstant::Now() - start << ": delayed = " << description.DelayedRequests << ", running = " << description.RunningRequests << "\n"; - Sleep(TDuration::Seconds(1)); - } - UNIT_ASSERT_C(false, "Pool state waiting timeout"); + errorString = TStringBuilder() << "delayed = " << description.DelayedRequests << ", running = " << description.RunningRequests; + return description.DelayedRequests == state.DelayedRequests && description.RunningRequests == state.RunningRequests; + }); } void WaitPoolHandlersCount(i64 finalCount, std::optional initialCount = std::nullopt, TDuration timeout = FUTURE_WAIT_TIMEOUT) const override { @@ -410,16 +474,10 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { UNIT_ASSERT_VALUES_EQUAL_C(counter->Val(), *initialCount, "Unexpected pool handlers count"); } - TInstant start = TInstant::Now(); - while (TInstant::Now() - start < timeout) { - if (counter->Val() == finalCount) { - return; - } - - Cerr << "WaitPoolHandlersCount " << TInstant::Now() - start << ": number handlers = " << counter->Val() << "\n"; - Sleep(TDuration::Seconds(1)); - } - UNIT_ASSERT_C(false, "Pool handlers count wait timeout"); + WaitFor(timeout, "pool handlers", [counter, finalCount](TString& errorString) { + errorString = TStringBuilder() << "number handlers = " << counter->Val(); + return counter->Val() == finalCount; + }); } void StopWorkloadService(ui64 nodeIndex = 0) const override { @@ -432,13 +490,24 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { auto subgroup = GetWorkloadManagerCounters(nodeIndex) ->GetSubgroup("pool", CanonizePath(TStringBuilder() << Settings_.DomainName_ << "/" << (poolId ? poolId : Settings_.PoolId_))); - CheckCommonCounters(subgroup); + const TString description = TStringBuilder() << "Node id: " << GetRuntime()->GetNodeId(nodeIndex); + CheckCommonCounters(subgroup, description); if (checkTableCounters) { - CheckTableCounters(subgroup); + CheckTableCounters(subgroup, description); } } } + TEvFetchDatabaseResponse::TPtr FetchDatabase(const TString& database) const override { + const TActorId edgeActor = GetRuntime()->AllocateEdgeActor(); + GetRuntime()->Register(CreateDatabaseFetcherActor(edgeActor, database)); + const auto response = GetRuntime()->GrabEdgeEvent(edgeActor); + UNIT_ASSERT_C(response, "Got empty response from DatabaseFetcherActor"); + UNIT_ASSERT_VALUES_EQUAL_C(response->Get()->Status, Ydb::StatusIds::SUCCESS, response->Get()->Issues.ToOneLineString()); + return response; + } + + // Coomon helpers TTestActorRuntime* GetRuntime() const override { return Server_->GetRuntime(); } @@ -457,15 +526,17 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { } std::unique_ptr GetQueryRequest(const TString& query, const TQueryRunnerSettings& settings) const { + UNIT_ASSERT_C(settings.PoolId_, "Query pool id is not specified"); + auto event = std::make_unique(); - event->Record.SetUserToken(NACLib::TUserToken("", settings.UserSID_, {}).SerializeAsString()); + event->Record.SetUserToken(NACLib::TUserToken("", settings.UserSID_, settings.GroupSIDs_).SerializeAsString()); auto request = event->Record.MutableRequest(); request->SetQuery(query); request->SetType(NKikimrKqp::QUERY_TYPE_SQL_GENERIC_QUERY); request->SetAction(NKikimrKqp::QUERY_ACTION_EXECUTE); - request->SetDatabase(Settings_.DomainName_); - request->SetPoolId(settings.PoolId_); + request->SetDatabase(settings.Database_ ? settings.Database_ : Settings_.DomainName_); + request->SetPoolId(*settings.PoolId_); return event; } @@ -475,37 +546,30 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { ->GetSubgroup("subsystem", "workload_manager"); } - static void CheckCommonCounters(NMonitoring::TDynamicCounterPtr subgroup) { - UNIT_ASSERT_VALUES_EQUAL(subgroup->GetCounter("LocalInFly", false)->Val(), 0); - UNIT_ASSERT_VALUES_EQUAL(subgroup->GetCounter("LocalDelayedRequests", false)->Val(), 0); - UNIT_ASSERT_VALUES_EQUAL(subgroup->GetCounter("ContinueOverloaded", true)->Val(), 0); - UNIT_ASSERT_VALUES_EQUAL(subgroup->GetCounter("ContinueError", true)->Val(), 0); - UNIT_ASSERT_VALUES_EQUAL(subgroup->GetCounter("CleanupError", true)->Val(), 0); - UNIT_ASSERT_VALUES_EQUAL(subgroup->GetCounter("Cancelled", true)->Val(), 0); - - UNIT_ASSERT_GE(subgroup->GetCounter("ContinueOk", true)->Val(), 1); - UNIT_ASSERT_VALUES_EQUAL(subgroup->GetCounter("ContinueOk", true)->Val(), subgroup->GetCounter("CleanupOk", true)->Val()); + static void CheckCommonCounters(NMonitoring::TDynamicCounterPtr subgroup, const TString& description) { + UNIT_ASSERT_VALUES_EQUAL_C(subgroup->GetCounter("LocalInFly", false)->Val(), 0, description); + UNIT_ASSERT_VALUES_EQUAL_C(subgroup->GetCounter("LocalDelayedRequests", false)->Val(), 0, description); + UNIT_ASSERT_VALUES_EQUAL_C(subgroup->GetCounter("ContinueOverloaded", true)->Val(), 0, description); + UNIT_ASSERT_VALUES_EQUAL_C(subgroup->GetCounter("ContinueError", true)->Val(), 0, description); + UNIT_ASSERT_VALUES_EQUAL_C(subgroup->GetCounter("CleanupError", true)->Val(), 0, description); + UNIT_ASSERT_VALUES_EQUAL_C(subgroup->GetCounter("Cancelled", true)->Val(), 0, description); } - static void CheckTableCounters(NMonitoring::TDynamicCounterPtr subgroup) { - UNIT_ASSERT_VALUES_EQUAL(subgroup->GetCounter("PendingRequestsCount", false)->Val(), 0); - UNIT_ASSERT_VALUES_EQUAL(subgroup->GetCounter("FinishingRequestsCount", false)->Val(), 0); + static void CheckTableCounters(NMonitoring::TDynamicCounterPtr subgroup, const TString& description) { + UNIT_ASSERT_VALUES_EQUAL_C(subgroup->GetCounter("PendingRequestsCount", false)->Val(), 0, description); + UNIT_ASSERT_VALUES_EQUAL_C(subgroup->GetCounter("FinishingRequestsCount", false)->Val(), 0, description); - const std::vector> tableQueries = { - {"TCleanupTablesQuery", false}, - {"TRefreshPoolStateQuery", true}, - {"TDelayRequestQuery", true}, - {"TStartFirstDelayedRequestQuery", true}, - {"TStartRequestQuery", false}, - {"TCleanupRequestsQuery", true}, + const std::vector tableQueries = { + "TCleanupTablesQuery", + "TRefreshPoolStateQuery", + "TDelayRequestQuery", + "TStartFirstDelayedRequestQuery", + "TStartRequestQuery", + "TCleanupRequestsQuery", }; - for (const auto& [operation, runExpected] : tableQueries) { + for (const auto& operation : tableQueries) { auto operationSubgroup = subgroup->GetSubgroup("operation", operation); - - UNIT_ASSERT_VALUES_EQUAL_C(operationSubgroup->GetCounter("FinishError", true)->Val(), 0, TStringBuilder() << "Unexpected vaule for operation " << operation); - if (runExpected) { - UNIT_ASSERT_GE_C(operationSubgroup->GetCounter("FinishOk", true)->Val(), 1, TStringBuilder() << "Unexpected vaule for operation " << operation); - } + UNIT_ASSERT_VALUES_EQUAL_C(operationSubgroup->GetCounter("FinishError", true)->Val(), 0, TStringBuilder() << description << ", unexpected vaule for operation " << operation); } } @@ -513,9 +577,10 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { const TYdbSetupSettings Settings_; TPortManager PortManager_; - std::unique_ptr Server_; + TServer::TPtr Server_; std::unique_ptr Client_; std::unique_ptr YdbDriver_; + std::unique_ptr Tenants_; std::unique_ptr TableClient_; std::unique_ptr TableClientSession_; @@ -560,10 +625,47 @@ bool TQueryRunnerResultAsync::HasValue() const { //// TYdbSetupSettings +NResourcePool::TPoolSettings TYdbSetupSettings::GetDefaultPoolSettings() const { + NResourcePool::TPoolSettings poolConfig; + poolConfig.ConcurrentQueryLimit = ConcurrentQueryLimit_; + poolConfig.QueueSize = QueueSize_; + poolConfig.QueryCancelAfter = QueryCancelAfter_; + poolConfig.QueryMemoryLimitPercentPerNode = QueryMemoryLimitPercentPerNode_; + poolConfig.DatabaseLoadCpuThreshold = DatabaseLoadCpuThreshold_; + return poolConfig; +} + TIntrusivePtr TYdbSetupSettings::Create() const { return MakeIntrusive(*this); } +TString TYdbSetupSettings::GetDedicatedTenantName() const { + return TStringBuilder() << CanonizePath(DomainName_) << "/test-dedicated"; +} + +TString TYdbSetupSettings::GetSharedTenantName() const { + return TStringBuilder() << CanonizePath(DomainName_) << "/test-shared"; +} + +TString TYdbSetupSettings::GetServerlessTenantName() const { + return TStringBuilder() << CanonizePath(DomainName_) << "/test-serverless"; +} + +//// IYdbSetup + +void IYdbSetup::WaitFor(TDuration timeout, TString description, std::function callback) { + TInstant start = TInstant::Now(); + while (TInstant::Now() - start <= timeout) { + TString errorString; + if (callback(errorString)) { + return; + } + Cerr << "Wait " << description << " " << TInstant::Now() - start << ": " << errorString << "\n"; + Sleep(TDuration::Seconds(1)); + } + UNIT_ASSERT_C(false, "Waiting " << description << " timeout. Spent time " << TInstant::Now() - start << " exceeds limit " << timeout); +} + //// TSampleQueriess void TSampleQueries::CompareYson(const TString& expected, const TString& actual) { diff --git a/ydb/core/kqp/workload_service/ut/common/kqp_workload_service_ut_common.h b/ydb/core/kqp/workload_service/ut/common/kqp_workload_service_ut_common.h index f673f070c11b..0f4eb6c7cf6a 100644 --- a/ydb/core/kqp/workload_service/ut/common/kqp_workload_service_ut_common.h +++ b/ydb/core/kqp/workload_service/ut/common/kqp_workload_service_ut_common.h @@ -14,7 +14,7 @@ namespace NKikimr::NKqp::NWorkload { -inline constexpr TDuration FUTURE_WAIT_TIMEOUT = TDuration::Seconds(30); +inline constexpr TDuration FUTURE_WAIT_TIMEOUT = TDuration::Seconds(60); // Query runner @@ -24,8 +24,10 @@ struct TQueryRunnerSettings { // Query settings FLUENT_SETTING_DEFAULT(ui32, NodeIndex, 0); - FLUENT_SETTING_DEFAULT(TString, PoolId, ""); + FLUENT_SETTING_DEFAULT(std::optional, PoolId, std::nullopt); FLUENT_SETTING_DEFAULT(TString, UserSID, "user@" BUILTIN_SYSTEM_DOMAIN); + FLUENT_SETTING_DEFAULT(TVector, GroupSIDs, {}); + FLUENT_SETTING_DEFAULT(TString, Database, ""); // Runner settings FLUENT_SETTING_DEFAULT(bool, HangUpDuringExecution, false); @@ -66,7 +68,11 @@ struct TYdbSetupSettings { // Cluster settings FLUENT_SETTING_DEFAULT(ui32, NodeCount, 1); FLUENT_SETTING_DEFAULT(TString, DomainName, "Root"); + FLUENT_SETTING_DEFAULT(bool, CreateSampleTenants, false); FLUENT_SETTING_DEFAULT(bool, EnableResourcePools, true); + FLUENT_SETTING_DEFAULT(bool, EnableResourcePoolsOnServerless, false); + FLUENT_SETTING_DEFAULT(bool, EnableMetadataObjectsOnServerless, true); + FLUENT_SETTING_DEFAULT(bool, EnableExternalDataSourcesOnServerless, true); // Default pool settings FLUENT_SETTING_DEFAULT(TString, PoolId, "sample_pool_id"); @@ -74,12 +80,21 @@ struct TYdbSetupSettings { FLUENT_SETTING_DEFAULT(i32, QueueSize, -1); FLUENT_SETTING_DEFAULT(TDuration, QueryCancelAfter, FUTURE_WAIT_TIMEOUT); FLUENT_SETTING_DEFAULT(double, QueryMemoryLimitPercentPerNode, -1); + FLUENT_SETTING_DEFAULT(double, DatabaseLoadCpuThreshold, -1); + NResourcePool::TPoolSettings GetDefaultPoolSettings() const; TIntrusivePtr Create() const; + + TString GetDedicatedTenantName() const; + TString GetSharedTenantName() const; + TString GetServerlessTenantName() const; }; class IYdbSetup : public TThrRefBase { public: + // Cluster helpers + virtual void UpdateNodeCpuInfo(double usage, ui32 threads, ui64 nodeIndex = 0) = 0; + // Scheme queries helpers virtual NYdb::NScheme::TSchemeClient GetSchemeClient() const = 0; virtual void ExecuteSchemeQuery(const TString& query, NYdb::EStatus expectedStatus = NYdb::EStatus::SUCCESS, const TString& expectedMessage = "") const = 0; @@ -101,9 +116,12 @@ class IYdbSetup : public TThrRefBase { virtual void WaitPoolHandlersCount(i64 finalCount, std::optional initialCount = std::nullopt, TDuration timeout = FUTURE_WAIT_TIMEOUT) const = 0; virtual void StopWorkloadService(ui64 nodeIndex = 0) const = 0; virtual void ValidateWorkloadServiceCounters(bool checkTableCounters = true, const TString& poolId = "") const = 0; + virtual TEvFetchDatabaseResponse::TPtr FetchDatabase(const TString& database) const = 0; + // Coomon helpers virtual TTestActorRuntime* GetRuntime() const = 0; virtual const TYdbSetupSettings& GetSettings() const = 0; + static void WaitFor(TDuration timeout, TString description, std::function callback); }; // Test queries @@ -114,18 +132,18 @@ struct TSampleQueries { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); } - template - static void CheckOverloaded(const TResult& result, const TString& poolId) { - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::OVERLOADED, result.GetIssues().ToString()); - UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), TStringBuilder() << "Too many pending requests for pool " << poolId); - } - template static void CheckCancelled(const TResult& result) { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::CANCELLED, result.GetIssues().ToString()); UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Request timeout exceeded, cancelling after"); } + template + static void CheckNotFound(const TResult& result, const TString& poolId) { + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::NOT_FOUND, result.GetIssues().ToString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), TStringBuilder() << "Resource pool " << poolId << " not found or you don't have access permissions"); + } + struct TSelect42 { static constexpr char Query[] = "SELECT 42;"; diff --git a/ydb/core/kqp/workload_service/ut/kqp_workload_service_actors_ut.cpp b/ydb/core/kqp/workload_service/ut/kqp_workload_service_actors_ut.cpp index 8b9a8262609d..7d9db86cccae 100644 --- a/ydb/core/kqp/workload_service/ut/kqp_workload_service_actors_ut.cpp +++ b/ydb/core/kqp/workload_service/ut/kqp_workload_service_actors_ut.cpp @@ -1,5 +1,6 @@ #include +#include #include #include @@ -16,10 +17,20 @@ TEvPrivate::TEvFetchPoolResponse::TPtr FetchPool(TIntrusivePtr ydb, c auto runtime = ydb->GetRuntime(); const auto& edgeActor = runtime->AllocateEdgeActor(); - runtime->Register(CreatePoolFetcherActor(edgeActor, settings.DomainName_, poolId ? poolId : settings.PoolId_, MakeIntrusive(userSID, TVector{}))); + auto userToken = MakeIntrusive(userSID, TVector{}); + userToken->SaveSerializationInfo(); + runtime->Register(CreatePoolFetcherActor(edgeActor, settings.DomainName_, poolId ? poolId : settings.PoolId_, userToken)); return runtime->GrabEdgeEvent(edgeActor, FUTURE_WAIT_TIMEOUT); } +TEvPrivate::TEvCpuLoadResponse::TPtr FetchCpuInfo(TIntrusivePtr ydb) { + auto runtime = ydb->GetRuntime(); + const auto& edgeActor = runtime->AllocateEdgeActor(); + + runtime->Register(CreateCpuLoadFetcherActor(edgeActor)); + return runtime->GrabEdgeEvent(edgeActor, FUTURE_WAIT_TIMEOUT); +} + } // anonymous namespace Y_UNIT_TEST_SUITE(KqpWorkloadServiceActors) { @@ -47,7 +58,7 @@ Y_UNIT_TEST_SUITE(KqpWorkloadServiceActors) { const TString& userSID = "user@test"; TSampleQueries::CheckSuccess(ydb->ExecuteQuery(TStringBuilder() << R"( - GRANT DESCRIBE SCHEMA ON `/Root/.resource_pools/)" << ydb->GetSettings().PoolId_ << "` TO `" << userSID << "`;" + GRANT DESCRIBE SCHEMA ON `/Root/.metadata/workload_manager/pools/)" << ydb->GetSettings().PoolId_ << "` TO `" << userSID << "`;" )); ydb->WaitPoolAccess(userSID, NACLib::EAccessRights::DescribeSchema); @@ -56,7 +67,7 @@ Y_UNIT_TEST_SUITE(KqpWorkloadServiceActors) { UNIT_ASSERT_STRING_CONTAINS(failedResponse->Get()->Issues.ToString(), TStringBuilder() << "You don't have access permissions for resource pool " << ydb->GetSettings().PoolId_); TSampleQueries::CheckSuccess(ydb->ExecuteQuery(TStringBuilder() << R"( - GRANT SELECT ROW ON `/Root/.resource_pools/)" << ydb->GetSettings().PoolId_ << "` TO `" << userSID << "`;" + GRANT SELECT ROW ON `/Root/.metadata/workload_manager/pools/)" << ydb->GetSettings().PoolId_ << "` TO `" << userSID << "`;" )); ydb->WaitPoolAccess(userSID, NACLib::EAccessRights::SelectRow); @@ -75,7 +86,7 @@ Y_UNIT_TEST_SUITE(KqpWorkloadServiceActors) { Y_UNIT_TEST(TestCreateDefaultPool) { auto ydb = TYdbSetupSettings().Create(); - const TString path = TStringBuilder() << ".resource_pools/" << NResourcePool::DEFAULT_POOL_ID; + const TString path = TStringBuilder() << ".metadata/workload_manager/pools/" << NResourcePool::DEFAULT_POOL_ID; auto response = ydb->Navigate(path, NSchemeCache::TSchemeCacheNavigate::EOp::OpUnknown); UNIT_ASSERT_VALUES_EQUAL(response->ErrorCount, 1); UNIT_ASSERT_VALUES_EQUAL(response->ResultSet.at(0).Kind, NSchemeCache::TSchemeCacheNavigate::EKind::KindUnknown); @@ -100,7 +111,8 @@ Y_UNIT_TEST_SUITE(KqpWorkloadServiceActors) { // Check default pool access TSampleQueries::TSelect42::CheckResult(ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, settings.UserSID(userSID))); - TSampleQueries::TSelect42::CheckResult(ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, settings.UserSID(""))); + TSampleQueries::TSelect42::CheckResult(ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, settings.UserSID(ydb->GetRuntime()->GetAppData().AllAuthenticatedUsers))); + TSampleQueries::TSelect42::CheckResult(ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, settings.UserSID(BUILTIN_ACL_ROOT))); } Y_UNIT_TEST(TestDefaultPoolAdminPermissions) { @@ -120,7 +132,7 @@ Y_UNIT_TEST_SUITE(KqpWorkloadServiceActors) { // Check alter access TSampleQueries::CheckSuccess(ydb->ExecuteQuery(TStringBuilder() << R"( ALTER RESOURCE POOL )" << NResourcePool::DEFAULT_POOL_ID << R"( SET ( - QUEUE_SIZE=1 + QUERY_MEMORY_LIMIT_PERCENT_PER_NODE=1 ); )", settings)); @@ -129,6 +141,116 @@ Y_UNIT_TEST_SUITE(KqpWorkloadServiceActors) { DROP RESOURCE POOL )" << NResourcePool::DEFAULT_POOL_ID << ";" , settings)); } + + Y_UNIT_TEST(TestCpuLoadActor) { + const ui32 nodeCount = 5; + auto ydb = TYdbSetupSettings() + .NodeCount(nodeCount) + .Create(); + + auto response = FetchCpuInfo(ydb); + UNIT_ASSERT_VALUES_EQUAL_C(response->Get()->Status, Ydb::StatusIds::NOT_FOUND, response->Get()->Issues.ToOneLineString()); + UNIT_ASSERT_STRING_CONTAINS(response->Get()->Issues.ToString(), "Cpu info not found"); + + const double usage = 0.25; + const ui32 threads = 2; + for (size_t nodeIndex = 0; nodeIndex < nodeCount; ++nodeIndex) { + ydb->UpdateNodeCpuInfo(usage, threads, nodeIndex); + } + + response = FetchCpuInfo(ydb); + UNIT_ASSERT_VALUES_EQUAL_C(response->Get()->Status, Ydb::StatusIds::SUCCESS, response->Get()->Issues.ToOneLineString()); + UNIT_ASSERT_VALUES_EQUAL(response->Get()->CpuNumber, threads * nodeCount); + UNIT_ASSERT_DOUBLES_EQUAL(response->Get()->InstantLoad, usage, 0.01); + } +} + +Y_UNIT_TEST_SUITE(KqpWorkloadServiceSubscriptions) { + TActorId SubscribeOnPool(TIntrusivePtr ydb) { + const auto& settings = ydb->GetSettings(); + auto& runtime = *ydb->GetRuntime(); + const auto& edgeActor = runtime.AllocateEdgeActor(); + + runtime.Send(MakeKqpWorkloadServiceId(runtime.GetNodeId()), edgeActor, new TEvSubscribeOnPoolChanges(settings.DomainName_, settings.PoolId_)); + const auto& response = runtime.GrabEdgeEvent(edgeActor, FUTURE_WAIT_TIMEOUT); + UNIT_ASSERT_C(response, "Subscription update not found"); + + const auto& config = response->Get()->Config; + UNIT_ASSERT_C(config, "Pool config not found"); + UNIT_ASSERT_C(*config == settings.GetDefaultPoolSettings(), "Unexpected pool config"); + + const auto& securityObject = response->Get()->SecurityObject; + UNIT_ASSERT_C(securityObject, "Security object not found"); + UNIT_ASSERT_VALUES_EQUAL_C(securityObject->GetOwnerSID(), BUILTIN_ACL_ROOT, "Unexpected owner user SID"); + + return edgeActor; + } + + Y_UNIT_TEST(TestResourcePoolSubscription) { + auto ydb = TYdbSetupSettings() + .QueueSize(10) + .ConcurrentQueryLimit(5) + .QueryCancelAfter(TDuration::Seconds(42)) + .QueryMemoryLimitPercentPerNode(55.0) + .DatabaseLoadCpuThreshold(30.0) + .Create(); + + SubscribeOnPool(ydb); + } + + Y_UNIT_TEST(TestResourcePoolSubscriptionAfterAlter) { + auto ydb = TYdbSetupSettings().Create(); + + const auto& subscriber = SubscribeOnPool(ydb); + + ydb->ExecuteSchemeQuery(TStringBuilder() << R"( + ALTER RESOURCE POOL )" << ydb->GetSettings().PoolId_ << R"( SET ( + CONCURRENT_QUERY_LIMIT=42 + ); + )"); + + const auto& response = ydb->GetRuntime()->GrabEdgeEvent(subscriber, FUTURE_WAIT_TIMEOUT); + UNIT_ASSERT_C(response, "Subscription update not found"); + + const auto& config = response->Get()->Config; + UNIT_ASSERT_C(config, "Pool config not found"); + UNIT_ASSERT_VALUES_EQUAL(config->ConcurrentQueryLimit, 42); + } + + Y_UNIT_TEST(TestResourcePoolSubscriptionAfterAclChange) { + auto ydb = TYdbSetupSettings().Create(); + + const auto& subscriber = SubscribeOnPool(ydb); + + const TString& userSID = "test@user"; + ydb->ExecuteSchemeQuery(TStringBuilder() << R"( + GRANT ALL ON `/Root/.metadata/workload_manager/pools/)" << ydb->GetSettings().PoolId_ << R"(` TO `)" << userSID << R"(`; + )"); + + const auto& response = ydb->GetRuntime()->GrabEdgeEvent(subscriber, FUTURE_WAIT_TIMEOUT); + UNIT_ASSERT_C(response, "Subscription update not found"); + + const auto& securityObject = response->Get()->SecurityObject; + UNIT_ASSERT_C(securityObject, "Security object not found"); + + NACLib::TUserToken token("", userSID, {}); + UNIT_ASSERT_C(securityObject->CheckAccess(NACLib::GenericFull, token), TStringBuilder() << "Unexpected pool access rights: " << securityObject->ToString()); + } + + Y_UNIT_TEST(TestResourcePoolSubscriptionAfterDrop) { + auto ydb = TYdbSetupSettings().Create(); + + const auto& subscriber = SubscribeOnPool(ydb); + + ydb->ExecuteSchemeQuery(TStringBuilder() << R"( + DROP RESOURCE POOL )" << ydb->GetSettings().PoolId_ << R"(; + )"); + + const auto& response = ydb->GetRuntime()->GrabEdgeEvent(subscriber, FUTURE_WAIT_TIMEOUT); + UNIT_ASSERT_C(response, "Subscription update not found"); + UNIT_ASSERT_C(!response->Get()->Config, "Unexpected pool config"); + UNIT_ASSERT_C(!response->Get()->SecurityObject, "Unexpected security object"); + } } } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/workload_service/ut/kqp_workload_service_tables_ut.cpp b/ydb/core/kqp/workload_service/ut/kqp_workload_service_tables_ut.cpp index 288f3b72edeb..f5972e873f4e 100644 --- a/ydb/core/kqp/workload_service/ut/kqp_workload_service_tables_ut.cpp +++ b/ydb/core/kqp/workload_service/ut/kqp_workload_service_tables_ut.cpp @@ -27,7 +27,7 @@ void DelayRequest(TIntrusivePtr ydb, const TString& sessionId, TDurat auto runtime = ydb->GetRuntime(); const auto& edgeActor = runtime->AllocateEdgeActor(); - runtime->Register(CreateDelayRequestActor(edgeActor, settings.DomainName_, settings.PoolId_, sessionId, TInstant::Now(), Nothing(), leaseDuration, runtime->GetAppData().Counters)); + runtime->Register(CreateDelayRequestActor(edgeActor, CanonizePath(settings.DomainName_), settings.PoolId_, sessionId, TInstant::Now(), Nothing(), leaseDuration, runtime->GetAppData().Counters)); auto response = runtime->GrabEdgeEvent(edgeActor, FUTURE_WAIT_TIMEOUT); UNIT_ASSERT_VALUES_EQUAL_C(response->Get()->Status, Ydb::StatusIds::SUCCESS, response->Get()->Issues.ToOneLineString()); UNIT_ASSERT_VALUES_EQUAL(response->Get()->SessionId, sessionId); @@ -38,7 +38,7 @@ void StartRequest(TIntrusivePtr ydb, const TString& sessionId, TDurat auto runtime = ydb->GetRuntime(); const auto& edgeActor = runtime->AllocateEdgeActor(); - runtime->Register(CreateStartRequestActor(edgeActor, settings.DomainName_, settings.PoolId_, sessionId, leaseDuration, runtime->GetAppData().Counters)); + runtime->Register(CreateStartRequestActor(edgeActor, CanonizePath(settings.DomainName_), settings.PoolId_, sessionId, leaseDuration, runtime->GetAppData().Counters)); auto response = runtime->GrabEdgeEvent(edgeActor, FUTURE_WAIT_TIMEOUT); UNIT_ASSERT_VALUES_EQUAL_C(response->Get()->Status, Ydb::StatusIds::SUCCESS, response->Get()->Issues.ToOneLineString()); UNIT_ASSERT_VALUES_EQUAL(response->Get()->SessionId, sessionId); @@ -70,25 +70,24 @@ Y_UNIT_TEST_SUITE(KqpWorkloadServiceTables) { CanonizePath({ydb->GetSettings().DomainName_, ".metadata/workload_manager"}) ).GetValue(FUTURE_WAIT_TIMEOUT); UNIT_ASSERT_VALUES_EQUAL_C(listResult.GetStatus(), NYdb::EStatus::SUCCESS, listResult.GetIssues().ToString()); - UNIT_ASSERT_VALUES_EQUAL(listResult.GetChildren().size(), 2); + UNIT_ASSERT_VALUES_EQUAL(listResult.GetChildren().size(), 3); } Y_UNIT_TEST(TestTablesIsNotCreatingForUnlimitedPool) { auto ydb = TYdbSetupSettings() .ConcurrentQueryLimit(-1) - .QueueSize(10) + .QueryMemoryLimitPercentPerNode(50) .Create(); TSampleQueries::TSelect42::CheckResult(ydb->ExecuteQuery(TSampleQueries::TSelect42::Query)); // Check that there is no .metadata folder auto listResult = ydb->GetSchemeClient().ListDirectory( - CanonizePath(ydb->GetSettings().DomainName_) + CanonizePath({ydb->GetSettings().DomainName_, ".metadata", "workload_manager"}) ).GetValue(FUTURE_WAIT_TIMEOUT); UNIT_ASSERT_VALUES_EQUAL_C(listResult.GetStatus(), NYdb::EStatus::SUCCESS, listResult.GetIssues().ToString()); - UNIT_ASSERT_VALUES_EQUAL(listResult.GetChildren().size(), 2); - UNIT_ASSERT_VALUES_EQUAL(listResult.GetChildren()[0].Name, ".resource_pools"); - UNIT_ASSERT_VALUES_EQUAL(listResult.GetChildren()[1].Name, ".sys"); + UNIT_ASSERT_VALUES_EQUAL(listResult.GetChildren().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(listResult.GetChildren()[0].Name, "pools"); } Y_UNIT_TEST(TestPoolStateFetcherActor) { @@ -133,22 +132,26 @@ Y_UNIT_TEST_SUITE(KqpWorkloadServiceTables) { Y_UNIT_TEST(TestLeaseExpiration) { auto ydb = TYdbSetupSettings() .ConcurrentQueryLimit(1) + .QueryCancelAfter(TDuration::Zero()) .Create(); // Create tables - TSampleQueries::TSelect42::CheckResult(ydb->ExecuteQuery(TSampleQueries::TSelect42::Query)); + auto hangingRequest = ydb->ExecuteQueryAsync(TSampleQueries::TSelect42::Query, TQueryRunnerSettings().HangUpDuringExecution(true)); + ydb->WaitQueryExecution(hangingRequest); - const TDuration leaseDuration = TDuration::Seconds(10); - StartRequest(ydb, "test_session", leaseDuration); - DelayRequest(ydb, "test_session", leaseDuration); - CheckPoolDescription(ydb, 1, 1, leaseDuration); + auto delayedRequest = ydb->ExecuteQueryAsync(TSampleQueries::TSelect42::Query, TQueryRunnerSettings().ExecutionExpected(false)); + ydb->WaitPoolState({.DelayedRequests = 1, .RunningRequests = 1}); ydb->StopWorkloadService(); ydb->WaitPoolHandlersCount(0); // Check that lease expired - Sleep(leaseDuration + TDuration::Seconds(5)); - CheckPoolDescription(ydb, 0, 0); + IYdbSetup::WaitFor(TDuration::Seconds(60), "lease expiration", [ydb](TString& errorString) { + auto description = ydb->GetPoolDescription(TDuration::Zero()); + + errorString = TStringBuilder() << "delayed = " << description.DelayedRequests << ", running = " << description.RunningRequests; + return description.AmountRequests() == 0; + }); } Y_UNIT_TEST(TestLeaseUpdates) { diff --git a/ydb/core/kqp/workload_service/ut/kqp_workload_service_ut.cpp b/ydb/core/kqp/workload_service/ut/kqp_workload_service_ut.cpp index 553cb5b17933..a020bd32ba6e 100644 --- a/ydb/core/kqp/workload_service/ut/kqp_workload_service_ut.cpp +++ b/ydb/core/kqp/workload_service/ut/kqp_workload_service_ut.cpp @@ -1,4 +1,5 @@ #include +#include #include @@ -47,6 +48,56 @@ Y_UNIT_TEST_SUITE(KqpWorkloadService) { TSampleQueries::TSelect42::CheckResult(ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, TQueryRunnerSettings().PoolId("another_pool_id"))); } + Y_UNIT_TEST(WorkloadServiceDisabledByFeatureFlagOnServerless) { + auto ydb = TYdbSetupSettings() + .CreateSampleTenants(true) + .EnableResourcePoolsOnServerless(false) + .Create(); + + const TString& poolId = "another_pool_id"; + auto settings = TQueryRunnerSettings().PoolId(poolId); + + // Dedicated, enabled + TSampleQueries::CheckNotFound(ydb->ExecuteQuery( + TSampleQueries::TSelect42::Query, + settings.Database(ydb->GetSettings().GetDedicatedTenantName()).NodeIndex(1) + ), poolId); + + // Shared, enabled + TSampleQueries::CheckNotFound(ydb->ExecuteQuery( + TSampleQueries::TSelect42::Query, + settings.Database(ydb->GetSettings().GetSharedTenantName()).NodeIndex(2) + ), poolId); + + // Serverless, disabled + TSampleQueries::TSelect42::CheckResult(ydb->ExecuteQuery( + TSampleQueries::TSelect42::Query, + settings.Database(ydb->GetSettings().GetServerlessTenantName()).NodeIndex(2) + )); + } + + Y_UNIT_TEST(WorkloadServiceDisabledByInvalidDatabasePath) { + auto ydb = TYdbSetupSettings().Create(); + + const TString& poolId = "another_pool_id"; + auto settings = TQueryRunnerSettings().PoolId(poolId); + + TSampleQueries::CheckNotFound(ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, settings), poolId); + + const TString& tabmleName = "sub_path"; + ydb->ExecuteSchemeQuery(TStringBuilder() << R"( + CREATE TABLE )" << tabmleName << R"( ( + Key Int32, + PRIMARY KEY (Key) + ); + )"); + + TSampleQueries::TSelect42::CheckResult(ydb->ExecuteQuery( + TSampleQueries::TSelect42::Query, + settings.Database(TStringBuilder() << CanonizePath(ydb->GetSettings().DomainName_) << "/" << tabmleName) + )); + } + TQueryRunnerResultAsync StartQueueSizeCheckRequests(TIntrusivePtr ydb, const TQueryRunnerSettings& settings) { // One of these requests should be rejected by QueueSize auto firstRequest = ydb->ExecuteQueryAsync(TSampleQueries::TSelect42::Query, settings); @@ -58,7 +109,10 @@ Y_UNIT_TEST_SUITE(KqpWorkloadService) { } UNIT_ASSERT_C(firstRequest.HasValue(), "One of two requests shoud be rejected"); UNIT_ASSERT_C(!secondRequest.HasValue(), "One of two requests shoud be placed in pool"); - TSampleQueries::CheckOverloaded(firstRequest.GetResult(), ydb->GetSettings().PoolId_); + + auto result = firstRequest.GetResult(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::OVERLOADED, result.GetIssues().ToOneLineString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), TStringBuilder() << "Request was rejected, number of local pending requests is 2, number of global delayed/running requests is 1, sum of them is larger than allowed limit 1 (including concurrent query limit 1) for pool " << ydb->GetSettings().PoolId_); return secondRequest; } @@ -114,15 +168,43 @@ Y_UNIT_TEST_SUITE(KqpWorkloadService) { auto hangingRequest = ydb->ExecuteQueryAsync(TSampleQueries::TSelect42::Query, TQueryRunnerSettings().HangUpDuringExecution(true)); ydb->WaitQueryExecution(hangingRequest); - TSampleQueries::CheckOverloaded( - ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, TQueryRunnerSettings().ExecutionExpected(false)), - ydb->GetSettings().PoolId_ - ); + auto result = ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, TQueryRunnerSettings().ExecutionExpected(false)); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::OVERLOADED, result.GetIssues().ToOneLineString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), TStringBuilder() << "Request was rejected, number of local pending requests is 1, number of global delayed/running requests is 1, sum of them is larger than allowed limit 0 (including concurrent query limit 1) for pool " << ydb->GetSettings().PoolId_); ydb->ContinueQueryExecution(hangingRequest); TSampleQueries::TSelect42::CheckResult(hangingRequest.GetResult()); } + Y_UNIT_TEST(TestZeroQueueSizeManyQueries) { + const i32 inFlight = 10; + auto ydb = TYdbSetupSettings() + .ConcurrentQueryLimit(inFlight) + .QueueSize(0) + .QueryCancelAfter(FUTURE_WAIT_TIMEOUT * inFlight) + .Create(); + + auto settings = TQueryRunnerSettings().HangUpDuringExecution(true); + + std::vector asyncResults; + for (size_t i = 0; i < inFlight; ++i) { + asyncResults.emplace_back(ydb->ExecuteQueryAsync(TSampleQueries::TSelect42::Query, settings)); + } + + for (const auto& asyncResult : asyncResults) { + ydb->WaitQueryExecution(asyncResult); + } + + auto result = ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, TQueryRunnerSettings().ExecutionExpected(false)); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::OVERLOADED, result.GetIssues().ToOneLineString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), TStringBuilder() << "Request was rejected, number of local pending requests is 1, number of global delayed/running requests is " << inFlight << ", sum of them is larger than allowed limit 0 (including concurrent query limit " << inFlight << ") for pool " << ydb->GetSettings().PoolId_); + + for (const auto& asyncResult : asyncResults) { + ydb->ContinueQueryExecution(asyncResult); + TSampleQueries::TSelect42::CheckResult(asyncResult.GetResult()); + } + } + Y_UNIT_TEST(TestQueryCancelAfterUnlimitedPool) { auto ydb = TYdbSetupSettings() .QueryCancelAfter(TDuration::Seconds(10)) @@ -189,6 +271,39 @@ Y_UNIT_TEST_SUITE(KqpWorkloadService) { UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), TStringBuilder() << "Resource pool " << ydb->GetSettings().PoolId_ << " was disabled due to zero concurrent query limit"); } + Y_UNIT_TEST(TestCpuLoadThreshold) { + auto ydb = TYdbSetupSettings() + .DatabaseLoadCpuThreshold(90) + .QueryCancelAfter(TDuration::Seconds(10)) + .Create(); + + // Simulate load + ydb->UpdateNodeCpuInfo(1.0, 1); + + auto result = ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, TQueryRunnerSettings().ExecutionExpected(false)); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::CANCELLED, result.GetIssues().ToString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), TStringBuilder() << "Request was delayed during"); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), TStringBuilder() << ", that is larger than delay deadline 10.000000s in pool " << ydb->GetSettings().PoolId_ << ", request was canceled"); + } + + Y_UNIT_TEST(TestCpuLoadThresholdRefresh) { + auto ydb = TYdbSetupSettings() + .DatabaseLoadCpuThreshold(90) + .Create(); + + // Simulate load + ydb->UpdateNodeCpuInfo(1.0, 1); + + // Delay request + auto result = ydb->ExecuteQueryAsync(TSampleQueries::TSelect42::Query, TQueryRunnerSettings().ExecutionExpected(false)); + ydb->WaitPoolState({.DelayedRequests = 1, .RunningRequests = 0}); + + // Free load + ydb->ContinueQueryExecution(result); + ydb->UpdateNodeCpuInfo(0.0, 1); + TSampleQueries::TSelect42::CheckResult(result.GetResult(TDuration::Seconds(5))); + } + Y_UNIT_TEST(TestHandlerActorCleanup) { auto ydb = TYdbSetupSettings() .ConcurrentQueryLimit(1) @@ -197,7 +312,12 @@ Y_UNIT_TEST_SUITE(KqpWorkloadService) { TSampleQueries::TSelect42::CheckResult(ydb->ExecuteQuery(TSampleQueries::TSelect42::Query)); TSampleQueries::TSelect42::CheckResult(ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, TQueryRunnerSettings().PoolId(NResourcePool::DEFAULT_POOL_ID))); - ydb->WaitPoolHandlersCount(0, 2, TDuration::Seconds(35)); + ydb->ExecuteSchemeQuery(TStringBuilder() << R"( + DROP RESOURCE POOL )" << ydb->GetSettings().PoolId_ << R"(; + DROP RESOURCE POOL )" << NResourcePool::DEFAULT_POOL_ID << R"(; + )"); + + ydb->WaitPoolHandlersCount(0, std::nullopt, TDuration::Seconds(95)); } } @@ -222,7 +342,9 @@ Y_UNIT_TEST_SUITE(KqpWorkloadServiceDistributed) { ydb->WaitPoolState({.DelayedRequests = 1, .RunningRequests = 1}); // Check distributed queue size - TSampleQueries::CheckOverloaded(ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, TQueryRunnerSettings().NodeIndex(0)), ydb->GetSettings().PoolId_); + auto result = ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, TQueryRunnerSettings().NodeIndex(0)); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::OVERLOADED, result.GetIssues().ToOneLineString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), TStringBuilder() << "Request was rejected, number of local pending requests is 1, number of global delayed/running requests is 2, sum of them is larger than allowed limit 1 (including concurrent query limit 1) for pool " << ydb->GetSettings().PoolId_); ydb->ContinueQueryExecution(delayedRequest); ydb->ContinueQueryExecution(hangingRequest); @@ -292,7 +414,59 @@ Y_UNIT_TEST_SUITE(ResourcePoolsDdl) { ); ydb->WaitQueryExecution(hangingRequest); - TSampleQueries::CheckOverloaded(ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, TQueryRunnerSettings().PoolId(poolId)), poolId); + auto result = ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, TQueryRunnerSettings().PoolId(poolId)); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::OVERLOADED, result.GetIssues().ToOneLineString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), TStringBuilder() << "Request was rejected, number of local pending requests is 1, number of global delayed/running requests is 1, sum of them is larger than allowed limit 0 (including concurrent query limit 1) for pool " << poolId); + + ydb->ContinueQueryExecution(hangingRequest); + TSampleQueries::TSelect42::CheckResult(hangingRequest.GetResult()); + } + + Y_UNIT_TEST(TestCreateResourcePoolOnServerless) { + auto ydb = TYdbSetupSettings() + .CreateSampleTenants(true) + .EnableResourcePoolsOnServerless(true) + .Create(); + + const auto& serverlessTenant = ydb->GetSettings().GetServerlessTenantName(); + auto settings = TQueryRunnerSettings() + .PoolId("") + .Database(serverlessTenant) + .NodeIndex(1); + + const TString& poolId = "my_pool"; + TSampleQueries::CheckSuccess(ydb->ExecuteQuery(TStringBuilder() << R"( + CREATE RESOURCE POOL )" << poolId << R"( WITH ( + CONCURRENT_QUERY_LIMIT=1, + QUEUE_SIZE=0 + ); + )", settings)); + settings.PoolId(poolId); + + auto hangingRequest = ydb->ExecuteQueryAsync(TSampleQueries::TSelect42::Query, settings.HangUpDuringExecution(true)); + ydb->WaitQueryExecution(hangingRequest); + + settings.HangUpDuringExecution(false); + + { // Rejected result + auto result = ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, settings.PoolId(poolId)); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::OVERLOADED, result.GetIssues().ToOneLineString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), TStringBuilder() << "Request was rejected, number of local pending requests is 1, number of global delayed/running requests is 1, sum of them is larger than allowed limit 0 (including concurrent query limit 1) for pool " << poolId); + } + + { // Check tables + auto result = ydb->ExecuteQuery(R"( + SELECT * FROM `.metadata/workload_manager/running_requests` + )", settings.PoolId(NResourcePool::DEFAULT_POOL_ID).Database(ydb->GetSettings().GetSharedTenantName())); + TSampleQueries::CheckSuccess(result); + + NYdb::TResultSetParser resultSet(result.GetResultSet(0)); + UNIT_ASSERT_C(resultSet.TryNextRow(), "Unexpected row count"); + + const auto& databaseId = resultSet.ColumnParser("database").GetOptionalUtf8(); + UNIT_ASSERT_C(databaseId, "Unexpected database response"); + UNIT_ASSERT_VALUES_EQUAL_C(*databaseId, ydb->FetchDatabase(serverlessTenant)->Get()->DatabaseId, "Unexpected database id"); + } ydb->ContinueQueryExecution(hangingRequest); TSampleQueries::TSelect42::CheckResult(hangingRequest.GetResult()); @@ -334,7 +508,10 @@ Y_UNIT_TEST_SUITE(ResourcePoolsDdl) { QUEUE_SIZE=0 ); )"); - TSampleQueries::CheckOverloaded(delayedRequest.GetResult(), ydb->GetSettings().PoolId_); + + auto result = delayedRequest.GetResult(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::OVERLOADED, result.GetIssues().ToOneLineString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), TStringBuilder() << "Request was rejected, number of local delayed requests is 1, that is larger than allowed limit 0 for pool " << ydb->GetSettings().PoolId_); ydb->ContinueQueryExecution(hangingRequest); TSampleQueries::TSelect42::CheckResult(hangingRequest.GetResult()); @@ -412,19 +589,16 @@ Y_UNIT_TEST_SUITE(ResourcePoolsDdl) { DROP RESOURCE POOL )" << poolId << ";" ); - TInstant start = TInstant::Now(); - while (TInstant::Now() - start <= FUTURE_WAIT_TIMEOUT) { - if (ydb->Navigate(TStringBuilder() << ".resource_pools/" << poolId)->ResultSet.at(0).Kind == NSchemeCache::TSchemeCacheNavigate::EKind::KindUnknown) { - auto result = ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, settings); - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::NOT_FOUND, result.GetIssues().ToString()); - UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), TStringBuilder() << "Resource pool " << poolId << " not found"); - return; - } - - Cerr << "WaitPoolDrop " << TInstant::Now() - start << "\n"; - Sleep(TDuration::Seconds(1)); - } - UNIT_ASSERT_C(false, "Pool drop waiting timeout"); + IYdbSetup::WaitFor(FUTURE_WAIT_TIMEOUT, "pool drop", [ydb, poolId](TString& errorString) { + auto kind = ydb->Navigate(TStringBuilder() << ".metadata/workload_manager/pools/" << poolId)->ResultSet.at(0).Kind; + + errorString = TStringBuilder() << "kind = " << kind; + return kind == NSchemeCache::TSchemeCacheNavigate::EKind::KindUnknown; + }); + + auto result = ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, settings); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::NOT_FOUND, result.GetIssues().ToString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), TStringBuilder() << "Resource pool " << poolId << " not found"); } Y_UNIT_TEST(TestResourcePoolAcl) { @@ -436,7 +610,7 @@ Y_UNIT_TEST_SUITE(ResourcePoolsDdl) { CREATE RESOURCE POOL )" << poolId << R"( WITH ( CONCURRENT_QUERY_LIMIT=1 ); - GRANT DESCRIBE SCHEMA ON `/Root/.resource_pools/)" << poolId << "` TO `" << userSID << "`;" + GRANT DESCRIBE SCHEMA ON `/Root/.metadata/workload_manager/pools/)" << poolId << "` TO `" << userSID << "`;" ); ydb->WaitPoolAccess(userSID, NACLib::EAccessRights::DescribeSchema, poolId); @@ -446,11 +620,247 @@ Y_UNIT_TEST_SUITE(ResourcePoolsDdl) { UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), TStringBuilder() << "You don't have access permissions for resource pool " << poolId); ydb->ExecuteSchemeQuery(TStringBuilder() << R"( - GRANT SELECT ROW ON `/Root/.resource_pools/)" << poolId << "` TO `" << userSID << "`;" + GRANT SELECT ROW ON `/Root/.metadata/workload_manager/pools/)" << poolId << "` TO `" << userSID << "`;" ); ydb->WaitPoolAccess(userSID, NACLib::EAccessRights::SelectRow, poolId); TSampleQueries::TSelect42::CheckResult(ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, settings)); } } +Y_UNIT_TEST_SUITE(ResourcePoolClassifiersDdl) { + Y_UNIT_TEST(TestResourcePoolClassifiersPermissions) { + auto ydb = TYdbSetupSettings().Create(); + + const TString& userSID = "user@test"; + ydb->ExecuteSchemeQuery(TStringBuilder() << R"( + GRANT DESCRIBE SCHEMA ON `/Root` TO `)" << userSID << R"(`; + GRANT DESCRIBE SCHEMA, SELECT ROW ON `/Root/.metadata/workload_manager/pools/)" << ydb->GetSettings().PoolId_ << "` TO `" << userSID << "`;" + ); + ydb->WaitPoolAccess(userSID, NACLib::EAccessRights::DescribeSchema | NACLib::EAccessRights::SelectRow); + + auto settings = TQueryRunnerSettings().UserSID(userSID); + + ydb->WaitFor(TDuration::Seconds(5), "Database permissions", [ydb, settings](TString& errorString) { + auto result = ydb->ExecuteQuery("DROP RESOURCE POOL CLASSIFIER MyResourcePoolClassifier", settings); + + errorString = result.GetIssues().ToOneLineString(); + return result.GetStatus() == EStatus::GENERIC_ERROR && errorString.Contains("You don't have access permissions for database Root"); + }); + + auto createResult = ydb->ExecuteQuery(TStringBuilder() << R"( + CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier WITH ( + RESOURCE_POOL=")" << NResourcePool::DEFAULT_POOL_ID << R"(", + RANK=20 + ); + )", settings); + UNIT_ASSERT_VALUES_EQUAL_C(createResult.GetStatus(), EStatus::GENERIC_ERROR, createResult.GetIssues().ToOneLineString()); + UNIT_ASSERT_STRING_CONTAINS(createResult.GetIssues().ToOneLineString(), "You don't have access permissions for database Root"); + + auto alterResult = ydb->ExecuteQuery(R"( + ALTER RESOURCE POOL CLASSIFIER MyResourcePoolClassifier SET ( + RANK=20 + ); + )", settings); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), EStatus::GENERIC_ERROR, alterResult.GetIssues().ToOneLineString()); + UNIT_ASSERT_STRING_CONTAINS(alterResult.GetIssues().ToOneLineString(), "You don't have access permissions for database Root"); + } + + void CreateSampleResourcePoolClassifier(TIntrusivePtr ydb, const TString& classifierId, const TQueryRunnerSettings& settings, const TString& poolId) { + TSampleQueries::CheckSuccess(ydb->ExecuteQuery(TStringBuilder() << R"( + GRANT ALL ON `)" << CanonizePath(settings.Database_ ? settings.Database_ : ydb->GetSettings().DomainName_) << R"(` TO `)" << settings.UserSID_ << R"(`; + CREATE RESOURCE POOL )" << poolId << R"( WITH ( + CONCURRENT_QUERY_LIMIT=0 + ); + CREATE RESOURCE POOL CLASSIFIER )" << classifierId << R"( WITH ( + RESOURCE_POOL=")" << poolId << R"(", + MEMBER_NAME=")" << settings.UserSID_ << R"(" + ); + )", TQueryRunnerSettings() + .UserSID(BUILTIN_ACL_METADATA) + .Database(settings.Database_) + .NodeIndex(settings.NodeIndex_) + .PoolId(NResourcePool::DEFAULT_POOL_ID) + )); + } + + TString CreateSampleResourcePoolClassifier(TIntrusivePtr ydb, const TQueryRunnerSettings& settings, const TString& poolId) { + const TString& classifierId = "my_pool_classifier"; + CreateSampleResourcePoolClassifier(ydb, classifierId, settings, poolId); + return classifierId; + } + + void WaitForFail(TIntrusivePtr ydb, const TQueryRunnerSettings& settings, const TString& poolId) { + ydb->WaitFor(TDuration::Seconds(10), "Resource pool classifier fail", [ydb, settings, poolId](TString& errorString) { + auto result = ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, settings); + + errorString = result.GetIssues().ToOneLineString(); + return result.GetStatus() == EStatus::PRECONDITION_FAILED && errorString.Contains(TStringBuilder() << "Resource pool " << poolId << " was disabled due to zero concurrent query limit"); + }); + } + + void WaitForSuccess(TIntrusivePtr ydb, const TQueryRunnerSettings& settings) { + ydb->WaitFor(TDuration::Seconds(10), "Resource pool classifier success", [ydb, settings](TString& errorString) { + auto result = ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, settings); + + errorString = result.GetIssues().ToOneLineString(); + return result.GetStatus() == EStatus::SUCCESS; + }); + } + + Y_UNIT_TEST(TestCreateResourcePoolClassifier) { + auto ydb = TYdbSetupSettings().Create(); + + auto settings = TQueryRunnerSettings().PoolId("").UserSID("test@user"); + const TString& poolId = "my_pool"; + CreateSampleResourcePoolClassifier(ydb, settings, poolId); + + WaitForFail(ydb, settings, poolId); + } + + Y_UNIT_TEST(TestCreateResourcePoolClassifierOnServerless) { + auto ydb = TYdbSetupSettings() + .CreateSampleTenants(true) + .EnableResourcePoolsOnServerless(true) + .Create(); + + auto settings = TQueryRunnerSettings() + .PoolId("") + .UserSID("test@user") + .Database(ydb->GetSettings().GetServerlessTenantName()) + .NodeIndex(1); + + const TString& poolId = "my_pool"; + CreateSampleResourcePoolClassifier(ydb, settings, poolId); + + WaitForFail(ydb, settings, poolId); + } + + Y_UNIT_TEST(TestAlterResourcePoolClassifier) { + auto ydb = TYdbSetupSettings().Create(); + + auto settings = TQueryRunnerSettings().PoolId("").UserSID("test@user"); + const TString& poolId = "my_pool"; + const TString& classifierId = CreateSampleResourcePoolClassifier(ydb, settings, poolId); + + WaitForFail(ydb, settings, poolId); + + ydb->ExecuteSchemeQuery(TStringBuilder() << R"( + ALTER RESOURCE POOL CLASSIFIER )" << classifierId << R"( SET ( + RESOURCE_POOL=")" << NResourcePool::DEFAULT_POOL_ID << R"(" + ); + )"); + + WaitForSuccess(ydb, settings); + } + + Y_UNIT_TEST(TestDropResourcePoolClassifier) { + auto ydb = TYdbSetupSettings().Create(); + + auto settings = TQueryRunnerSettings().PoolId("").UserSID("test@user"); + const TString& poolId = "my_pool"; + const TString& classifierId = CreateSampleResourcePoolClassifier(ydb, settings, poolId); + + WaitForFail(ydb, settings, poolId); + + ydb->ExecuteSchemeQuery(TStringBuilder() << R"( + DROP RESOURCE POOL CLASSIFIER )" << classifierId << R"(; + )"); + + WaitForSuccess(ydb, settings); + } + + Y_UNIT_TEST(TestDropResourcePool) { + auto ydb = TYdbSetupSettings().Create(); + + auto settings = TQueryRunnerSettings().PoolId("").UserSID("test@user"); + const TString& poolId = "my_pool"; + CreateSampleResourcePoolClassifier(ydb, settings, poolId); + + WaitForFail(ydb, settings, poolId); + + ydb->ExecuteSchemeQuery(TStringBuilder() << R"( + DROP RESOURCE POOL )" << poolId << R"(; + )"); + + WaitForSuccess(ydb, settings); + } + + Y_UNIT_TEST(TestResourcePoolClassifierRanks) { + auto ydb = TYdbSetupSettings().Create(); + + auto settings = TQueryRunnerSettings().PoolId("").UserSID("test@user"); + const TString& poolId = "my_pool"; + CreateSampleResourcePoolClassifier(ydb, settings, poolId); + + WaitForFail(ydb, settings, poolId); + + const TString& classifierId = "rank_classifier"; + ydb->ExecuteSchemeQuery(TStringBuilder() << R"( + CREATE RESOURCE POOL CLASSIFIER )" << classifierId << R"( WITH ( + RANK="1", + RESOURCE_POOL=")" << NResourcePool::DEFAULT_POOL_ID << R"(", + MEMBER_NAME=")" << settings.UserSID_ << R"(" + ); + )"); + + WaitForSuccess(ydb, settings); + + ydb->ExecuteSchemeQuery(TStringBuilder() << R"( + ALTER RESOURCE POOL CLASSIFIER )" << classifierId << R"( RESET ( + RANK + ); + )"); + + WaitForFail(ydb, settings, poolId); + } + + Y_UNIT_TEST(TestExplicitPoolId) { + auto ydb = TYdbSetupSettings().Create(); + + auto settings = TQueryRunnerSettings().PoolId("").UserSID("test@user"); + const TString& poolId = "my_pool"; + CreateSampleResourcePoolClassifier(ydb, settings, poolId); + + WaitForFail(ydb, settings, poolId); + TSampleQueries::TSelect42::CheckResult(ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, TQueryRunnerSettings().PoolId(NResourcePool::DEFAULT_POOL_ID))); + } + + Y_UNIT_TEST(TestMultiGroupClassification) { + auto ydb = TYdbSetupSettings().Create(); + + auto settings = TQueryRunnerSettings().PoolId(""); + + const TString& poolId = "my_pool"; + const TString& firstSID = "first@user"; + const TString& secondSID = "second@user"; + ydb->ExecuteSchemeQuery(TStringBuilder() << R"( + CREATE RESOURCE POOL )" << poolId << R"( WITH ( + CONCURRENT_QUERY_LIMIT=0 + ); + CREATE RESOURCE POOL CLASSIFIER first_classifier WITH ( + RESOURCE_POOL=")" << poolId << R"(", + MEMBER_NAME=")" << firstSID << R"(", + RANK=1 + ); + CREATE RESOURCE POOL CLASSIFIER second_classifier WITH ( + RESOURCE_POOL=")" << NResourcePool::DEFAULT_POOL_ID << R"(", + MEMBER_NAME=")" << secondSID << R"(", + RANK=2 + ); + )"); + + WaitForFail(ydb, settings.GroupSIDs({firstSID}), poolId); + WaitForSuccess(ydb, settings.GroupSIDs({secondSID})); + WaitForFail(ydb, settings.GroupSIDs({firstSID, secondSID}), poolId); + + ydb->ExecuteSchemeQuery(TStringBuilder() << R"( + ALTER RESOURCE POOL CLASSIFIER second_classifier SET ( + RANK=0 + ); + )"); + + WaitForSuccess(ydb, settings.GroupSIDs({firstSID, secondSID})); + } +} + } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/workload_service/ya.make b/ydb/core/kqp/workload_service/ya.make index 40ee9196cdad..b8b5704044e2 100644 --- a/ydb/core/kqp/workload_service/ya.make +++ b/ydb/core/kqp/workload_service/ya.make @@ -7,7 +7,11 @@ SRCS( PEERDIR( ydb/core/cms/console + ydb/core/fq/libs/compute/common + ydb/core/kqp/workload_service/actors + + ydb/library/actors/interconnect ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/load_test/aggregated_result.cpp b/ydb/core/load_test/aggregated_result.cpp index 82ae3dee3e68..548a5628b442 100644 --- a/ydb/core/load_test/aggregated_result.cpp +++ b/ydb/core/load_test/aggregated_result.cpp @@ -6,6 +6,7 @@ #include #include +#include namespace NKikimr { @@ -77,55 +78,35 @@ IOutputStream& operator<<(IOutputStream& output, const TAggregatedResult& result return output; } -using TColumnPositions = THashMap; - -TColumnPositions GetColumnPositionsInResponse(const NKikimrMiniKQL::TType& ttype) { - TColumnPositions columnPositions; - for (const NKikimrMiniKQL::TMember& member : ttype.GetStruct().GetMember()) { - if (member.GetName() == "Data") { - const auto& listStruct = member.GetType().GetList().GetItem().GetStruct(); - for (const NKikimrMiniKQL::TMember& listMember : listStruct.GetMember()) { - columnPositions.emplace(listMember.GetName(), columnPositions.size()); - } - break; - } - } - return columnPositions; -} - -NKikimrMiniKQL::TValue GetOptional(const NKikimrMiniKQL::TValue& listItem, ui32 pos) { - return listItem.GetStruct(pos).GetOptional(); -} - template -T ExtractValue(const NKikimrMiniKQL::TValue& listItem, ui32 pos) { - Y_UNUSED(listItem, pos); +T ExtractValue(NYdb::TResultSetParser& parser, const TString& column) { + Y_UNUSED(parser, column); Y_ABORT("unimplemented"); } template<> -ui32 ExtractValue(const NKikimrMiniKQL::TValue& listItem, ui32 pos) { - return GetOptional(listItem, pos).GetUint32(); +ui32 ExtractValue(NYdb::TResultSetParser& parser, const TString& column) { + return parser.ColumnParser(column).GetOptionalUint32().GetOrElse(0); } template<> -ui64 ExtractValue(const NKikimrMiniKQL::TValue& listItem, ui32 pos) { - return GetOptional(listItem, pos).GetUint64(); +ui64 ExtractValue(NYdb::TResultSetParser& parser, const TString& column) { + return parser.ColumnParser(column).GetOptionalUint64().GetOrElse(0); } template<> -double ExtractValue(const NKikimrMiniKQL::TValue& listItem, ui32 pos) { - return GetOptional(listItem, pos).GetDouble(); +double ExtractValue(NYdb::TResultSetParser& parser, const TString& column) { + return parser.ColumnParser(column).GetOptionalDouble().GetOrElse(static_cast(0)); } template<> -TString ExtractValue(const NKikimrMiniKQL::TValue& listItem, ui32 pos) { - return GetOptional(listItem, pos).GetBytes(); +TString ExtractValue(NYdb::TResultSetParser& parser, const TString& column) { + return parser.ColumnParser(column).GetOptionalString().GetOrElse(""); } template<> -TInstant ExtractValue(const NKikimrMiniKQL::TValue& listItem, ui32 pos) { - return TInstant::Seconds(GetOptional(listItem, pos).GetUint32()); +TInstant ExtractValue(NYdb::TResultSetParser& parser, const TString& column) { + return TInstant::Seconds(parser.ColumnParser(column).GetOptionalUint32().GetOrElse(0)); } bool GetStatName(TStringBuf columnName, TStringBuf& statName, TStringBuf& suffix) { @@ -161,38 +142,40 @@ void SetInAggregatedField(TStringBuf suffix, T value, TAggregatedField& dst) } } -TAggregatedResult GetResultFromValueListItem(const NKikimrMiniKQL::TValue& listItem, const TColumnPositions& columnPositions) { +TAggregatedResult GetResultFromValueListItem(NYdb::TResultSetParser& parser, const NYdb::TResultSet& rs) { TAggregatedResult result; TStringBuf statName; TStringBuf suffix; TStringBuf levelSb; - for (const auto& [column, pos] : columnPositions) { + for (const auto& columnMeta : rs.GetColumnsMeta()) { + TString column = columnMeta.Name; + if (column == "id") { - result.Uuid = ExtractValue(listItem, pos); + result.Uuid = ExtractValue(parser, column); } else if (column == "start") { - result.Start = ExtractValue(listItem, pos); + result.Start = ExtractValue(parser, column); } else if (column == "finish") { - result.Finish = ExtractValue(listItem, pos); + result.Finish = ExtractValue(parser, column); } else if (column == "total_nodes") { - result.Stats.TotalNodes = ExtractValue(listItem, pos); + result.Stats.TotalNodes = ExtractValue(parser, column); } else if (column == "success_nodes") { - result.Stats.SuccessNodes = ExtractValue(listItem, pos); + result.Stats.SuccessNodes = ExtractValue(parser, column); } else if (column == "config") { - result.Config = ExtractValue(listItem, pos); + result.Config = ExtractValue(parser, column); } else if (GetStatName(column, statName, suffix)) { if (statName == "transactions") { if (suffix == "_avg") { - SetInAggregatedField(suffix, ExtractValue(listItem, pos), result.Stats.Transactions); + SetInAggregatedField(suffix, ExtractValue(parser, column), result.Stats.Transactions); } else { - SetInAggregatedField(suffix, ExtractValue(listItem, pos), result.Stats.Transactions); + SetInAggregatedField(suffix, ExtractValue(parser, column), result.Stats.Transactions); } } else if (statName == "transactions_per_sec") { - SetInAggregatedField(suffix, ExtractValue(listItem, pos), result.Stats.TransactionsPerSecond); + SetInAggregatedField(suffix, ExtractValue(parser, column), result.Stats.TransactionsPerSecond); } else if (statName == "errors_per_sec") { - SetInAggregatedField(suffix, ExtractValue(listItem, pos), result.Stats.ErrorsPerSecond); + SetInAggregatedField(suffix, ExtractValue(parser, column), result.Stats.ErrorsPerSecond); } else if (GetPercentileLevel(statName, levelSb)) { auto level = FromString(levelSb); - SetInAggregatedField(suffix, ExtractValue(listItem, pos), result.Stats.Percentiles[level]); + SetInAggregatedField(suffix, ExtractValue(parser, column), result.Stats.Percentiles[level]); } } } @@ -200,16 +183,16 @@ TAggregatedResult GetResultFromValueListItem(const NKikimrMiniKQL::TValue& listI } bool LoadResultFromResponseProto(const NKikimrKqp::TQueryResponse& response, TVector& results) { - const auto& ttype = response.GetResults(0).GetType(); - auto columnPositions = GetColumnPositionsInResponse(ttype); - if (columnPositions.empty()) { - return false; - } + Y_ABORT_UNLESS(response.GetYdbResults().size() > 0); + + NYdb::TResultSet rs(response.GetYdbResults(0)); + NYdb::TResultSetParser parser(response.GetYdbResults(0)); results.clear(); - for (const NKikimrMiniKQL::TValue& listItem : response.GetResults(0).GetValue().GetStruct().Get(0).GetList()) { - results.push_back(GetResultFromValueListItem(listItem, columnPositions)); + while(parser.TryNextRow()) { + results.push_back(GetResultFromValueListItem(parser, rs)); } + return true; } diff --git a/ydb/core/local_pgwire/local_pgwire.cpp b/ydb/core/local_pgwire/local_pgwire.cpp index f71cd38d07d5..7dc6f855ce00 100644 --- a/ydb/core/local_pgwire/local_pgwire.cpp +++ b/ydb/core/local_pgwire/local_pgwire.cpp @@ -18,33 +18,8 @@ class TPgYdbProxy : public TActor { using TBase = TActor; struct TSecurityState { - TString Ticket; - Ydb::Auth::LoginResult LoginResult; - TEvTicketParser::TError Error; - TIntrusiveConstPtr Token; TString SerializedToken; - }; - - struct TTokenState { - std::unordered_set Senders; - }; - - struct TEvPrivate { - enum EEv { - EvTokenReady = EventSpaceBegin(NActors::TEvents::ES_PRIVATE), - EvEnd - }; - - static_assert(EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE), "expect EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE)"); - - struct TEvTokenReady : TEventLocal { - Ydb::Auth::LoginResult LoginResult; - TActorId Sender; - TString Database; - TString PeerName; - - TEvTokenReady() = default; - }; + TString Ticket; }; struct TConnectionState { @@ -54,7 +29,6 @@ class TPgYdbProxy : public TActor { std::unordered_map ConnectionState; std::unordered_map SecurityState; - std::unordered_map TokenState; uint32_t ConnectionNum = 0; public: @@ -63,85 +37,24 @@ class TPgYdbProxy : public TActor { { } - void Handle(TEvTicketParser::TEvAuthorizeTicketResult::TPtr& ev) { - auto token = ev->Get()->Ticket; - auto itTokenState = TokenState.find(token); - if (itTokenState == TokenState.end()) { - BLOG_W("Couldn't find token in reply from TicketParser"); - return; - } - for (auto sender : itTokenState->second.Senders) { - auto& securityState(SecurityState[sender]); - securityState.Ticket = token; - securityState.Error = ev->Get()->Error; - securityState.Token = ev->Get()->Token; - securityState.SerializedToken = ev->Get()->SerializedToken; - auto authResponse = std::make_unique(); - if (ev->Get()->Error) { - authResponse->Error = ev->Get()->Error.Message; - } - Send(sender, authResponse.release()); - } - TokenState.erase(itTokenState); - } - - void Handle(TEvPrivate::TEvTokenReady::TPtr& ev) { - auto token = ev->Get()->LoginResult.token(); - auto itTokenState = TokenState.find(token); - if (itTokenState == TokenState.end()) { - itTokenState = TokenState.insert({token, {}}).first; - } - bool needSend = itTokenState->second.Senders.empty(); - itTokenState->second.Senders.insert(ev->Get()->Sender); - if (needSend) { - Send(MakeTicketParserID(), new TEvTicketParser::TEvAuthorizeTicket({ - .Database = ev->Get()->Database, - .Ticket = token, - .PeerName = ev->Get()->PeerName, - })); - } - SecurityState[ev->Get()->Sender].LoginResult = std::move(ev->Get()->LoginResult); - } - void Handle(NPG::TEvPGEvents::TEvAuth::TPtr& ev) { - std::unordered_map clientParams = ev->Get()->InitialMessage->GetClientParams(); BLOG_D("TEvAuth " << ev->Get()->InitialMessage->Dump() << " cookie " << ev->Cookie); - Ydb::Auth::LoginRequest request; - request.set_user(clientParams["user"]); + std::unordered_map clientParams = ev->Get()->InitialMessage->GetClientParams(); + TPgWireAuthData pgWireAuthData; + pgWireAuthData.UserName = clientParams["user"]; if (ev->Get()->PasswordMessage) { - request.set_password(TString(ev->Get()->PasswordMessage->GetPassword())); + pgWireAuthData.Password = TString(ev->Get()->PasswordMessage->GetPassword()); } - TActorSystem* actorSystem = TActivationContext::ActorSystem(); - TActorId sender = ev->Sender; - TString database = clientParams["database"]; - if (database == "/postgres") { + pgWireAuthData.Sender = ev->Sender; + pgWireAuthData.DatabasePath = clientParams["database"]; + if (pgWireAuthData.DatabasePath == "/postgres") { auto authResponse = std::make_unique(); authResponse->Error = Ydb::StatusIds_StatusCode_Name(Ydb::StatusIds_StatusCode::StatusIds_StatusCode_BAD_REQUEST); - actorSystem->Send(sender, authResponse.release()); + Send(pgWireAuthData.Sender, authResponse.release()); } - TString peerName = TStringBuilder() << ev->Get()->Address; + pgWireAuthData.PeerName = TStringBuilder() << ev->Get()->Address; - using TRpcEv = NGRpcService::TGRpcRequestWrapperNoAuth; - auto rpcFuture = NRpcService::DoLocalRpc(std::move(request), database, {}, actorSystem); - rpcFuture.Subscribe([actorSystem, sender, database, peerName, selfId = SelfId()](const NThreading::TFuture& future) { - auto& response = future.GetValueSync(); - if (response.operation().status() == Ydb::StatusIds::SUCCESS) { - auto tokenReady = std::make_unique(); - response.operation().result().UnpackTo(&(tokenReady->LoginResult)); - tokenReady->Sender = sender; - tokenReady->Database = database; - tokenReady->PeerName = peerName; - actorSystem->Send(selfId, tokenReady.release()); - } else { - auto authResponse = std::make_unique(); - if (response.operation().issues_size() > 0) { - authResponse->Error = response.operation().issues(0).message(); - } else { - authResponse->Error = Ydb::StatusIds_StatusCode_Name(response.operation().status()); - } - actorSystem->Send(sender, authResponse.release()); - } - }); + Register(CreateLocalPgWireAuthActor(pgWireAuthData, SelfId())); } void Handle(NPG::TEvPGEvents::TEvConnectionOpened::TPtr& ev) { @@ -173,7 +86,6 @@ class TPgYdbProxy : public TActor { } SecurityState.erase(ev->Sender); ConnectionState.erase(itConnection); - // TODO: cleanup TokenState too } void Handle(NPG::TEvPGEvents::TEvQuery::TPtr& ev) { @@ -236,6 +148,18 @@ class TPgYdbProxy : public TActor { } } + void Handle(TEvEvents::TEvAuthResponse::TPtr& ev) { + auto& securityState = SecurityState[ev->Get()->Sender]; + auto authResponse = std::make_unique(); + if (!ev->Get()->ErrorMessage.empty()) { + authResponse->Error = ev->Get()->ErrorMessage; + } else { + securityState.SerializedToken = ev->Get()->SerializedToken; + securityState.Ticket = ev->Get()->Ticket; + } + Send(ev->Get()->Sender, authResponse.release()); + } + STATEFN(StateWork) { switch (ev->GetTypeRewrite()) { hFunc(NPG::TEvPGEvents::TEvAuth, Handle); @@ -248,8 +172,7 @@ class TPgYdbProxy : public TActor { hFunc(NPG::TEvPGEvents::TEvExecute, Handle); hFunc(NPG::TEvPGEvents::TEvClose, Handle); hFunc(NPG::TEvPGEvents::TEvCancelRequest, Handle); - hFunc(TEvPrivate::TEvTokenReady, Handle); - hFunc(TEvTicketParser::TEvAuthorizeTicketResult, Handle); + hFunc(TEvEvents::TEvAuthResponse, Handle); } } }; diff --git a/ydb/core/local_pgwire/local_pgwire.h b/ydb/core/local_pgwire/local_pgwire.h index a5c9cc395793..b9d6588981b3 100644 --- a/ydb/core/local_pgwire/local_pgwire.h +++ b/ydb/core/local_pgwire/local_pgwire.h @@ -1,3 +1,6 @@ +#pragma once + +#include "local_pgwire_util.h" #include namespace NLocalPgWire { @@ -5,4 +8,6 @@ namespace NLocalPgWire { inline NActors::TActorId CreateLocalPgWireProxyId(uint32_t nodeId = 0) { return NActors::TActorId(nodeId, "localpgwire"); } NActors::IActor* CreateLocalPgWireProxy(); +NActors::IActor* CreateLocalPgWireAuthActor(const TPgWireAuthData& pgWireAuthData, const NActors::TActorId& pgYdbProxy); + } diff --git a/ydb/core/local_pgwire/local_pgwire_auth_actor.cpp b/ydb/core/local_pgwire/local_pgwire_auth_actor.cpp new file mode 100644 index 000000000000..896247f599b9 --- /dev/null +++ b/ydb/core/local_pgwire/local_pgwire_auth_actor.cpp @@ -0,0 +1,192 @@ +#include "log_impl.h" +#include "local_pgwire.h" +#include "local_pgwire_util.h" + +#include +#include +#include +#include + +#include +#include + +#include + +#include + +namespace NLocalPgWire { + +using namespace NActors; +using namespace NKikimr; + +class TPgYdbAuthActor : public NActors::TActorBootstrapped { + using TBase = TActor; + + struct TEvPrivate { + enum EEv { + EvTokenReady = EventSpaceBegin(NActors::TEvents::ES_PRIVATE), + EvAuthFailed, + EvEnd + }; + + static_assert(EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE), "expect EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE)"); + + struct TEvTokenReady : TEventLocal { + Ydb::Auth::LoginResult LoginResult; + + TEvTokenReady() = default; + }; + + struct TEvAuthFailed : NActors::TEventLocal { + TString ErrorMessage; + }; + }; + + TPgWireAuthData PgWireAuthData; + TActorId PgYdbProxy; + + TString DatabaseId; + TString FolderId; + TString SerializedToken; + TString Ticket; + +public: + TPgYdbAuthActor(const TPgWireAuthData& pgWireAuthData, const TActorId& pgYdbProxy) + : PgWireAuthData(pgWireAuthData) + , PgYdbProxy(pgYdbProxy) { + } + + void Bootstrap() { + if (PgWireAuthData.UserName == "__ydb_apikey") { + if (PgWireAuthData.Password.empty()) { + SendResponseAndDie("Invalid password"); + } + SendDescribeRequest(); + } else { + SendLoginRequest(); + } + + Become(&TPgYdbAuthActor::StateWork); + } + + void Handle(TEvTicketParser::TEvAuthorizeTicketResult::TPtr& ev) { + if (ev->Get()->Error) { + SendResponseAndDie(ev->Get()->Error.Message); + return; + } + + SerializedToken = ev->Get()->SerializedToken; + Ticket = ev->Get()->Ticket; + + SendResponseAndDie(); + } + + void Handle(TEvPrivate::TEvTokenReady::TPtr& ev) { + Send(MakeTicketParserID(), new TEvTicketParser::TEvAuthorizeTicket({ + .Database = PgWireAuthData.DatabasePath, + .Ticket = ev->Get()->LoginResult.token(), + .PeerName = PgWireAuthData.PeerName, + })); + } + + void Handle(TEvPrivate::TEvAuthFailed::TPtr& ev) { + SendResponseAndDie(ev->Get()->ErrorMessage); + } + + void Handle(NKikimr::TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { + const NKikimr::NSchemeCache::TSchemeCacheNavigate* navigate = ev->Get()->Request.Get(); + if (navigate->ErrorCount) { + SendResponseAndDie(TStringBuilder() << "Database with path '" << PgWireAuthData.DatabasePath << "' doesn't exists"); + return; + } + Y_ABORT_UNLESS(navigate->ResultSet.size() == 1); + + const auto& entry = navigate->ResultSet.front(); + + for (const auto& attr : entry.Attributes) { + if (attr.first == "folder_id") FolderId = attr.second; + else if (attr.first == "database_id") DatabaseId = attr.second; + } + + SendApiKeyRequest(); + } + + STATEFN(StateWork) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvPrivate::TEvTokenReady, Handle); + hFunc(TEvTicketParser::TEvAuthorizeTicketResult, Handle); + hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); + hFunc(TEvPrivate::TEvAuthFailed, Handle); + } + } +private: + void SendLoginRequest() { + Ydb::Auth::LoginRequest request; + request.set_user(PgWireAuthData.UserName); + if (!PgWireAuthData.Password.empty()) { + request.set_password(PgWireAuthData.Password); + } + + auto* actorSystem = TActivationContext::ActorSystem();; + + using TRpcEv = NGRpcService::TGRpcRequestWrapperNoAuth; + auto rpcFuture = NRpcService::DoLocalRpc(std::move(request), PgWireAuthData.DatabasePath, {}, actorSystem); + rpcFuture.Subscribe([actorSystem, selfId = SelfId()](const NThreading::TFuture& future) { + auto& response = future.GetValueSync(); + if (response.operation().status() == Ydb::StatusIds::SUCCESS) { + auto tokenReady = std::make_unique(); + response.operation().result().UnpackTo(&(tokenReady->LoginResult)); + actorSystem->Send(selfId, tokenReady.release()); + } else { + auto authFailedEvent = std::make_unique(); + if (response.operation().issues_size() > 0) { + authFailedEvent->ErrorMessage = response.operation().issues(0).message(); + } else { + authFailedEvent->ErrorMessage = Ydb::StatusIds_StatusCode_Name(response.operation().status()); + } + actorSystem->Send(selfId, authFailedEvent.release()); + } + }); + } + + void SendApiKeyRequest() { + auto entries = NKikimr::NGRpcProxy::V1::GetTicketParserEntries(DatabaseId, FolderId); + + Send(NKikimr::MakeTicketParserID(), new NKikimr::TEvTicketParser::TEvAuthorizeTicket({ + .Database = PgWireAuthData.DatabasePath, + .Ticket = "ApiKey " + PgWireAuthData.Password, + .PeerName = PgWireAuthData.PeerName, + .Entries = entries + })); + } + + void SendDescribeRequest() { + auto schemeCacheRequest = std::make_unique(); + NKikimr::NSchemeCache::TSchemeCacheNavigate::TEntry entry; + entry.Path = NKikimr::SplitPath(PgWireAuthData.DatabasePath); + entry.Operation = NKikimr::NSchemeCache::TSchemeCacheNavigate::OpPath; + entry.SyncVersion = false; + schemeCacheRequest->ResultSet.emplace_back(entry); + Send(NKikimr::MakeSchemeCacheID(), MakeHolder(schemeCacheRequest.release())); + } + + void SendResponseAndDie(const TString& errorMessage = "") { + std::unique_ptr authResponse; + if (!errorMessage.empty()) { + authResponse = std::make_unique(errorMessage, PgWireAuthData.Sender); + } else { + authResponse = std::make_unique(SerializedToken, Ticket, PgWireAuthData.Sender); + } + + Send(PgYdbProxy, authResponse.release()); + + PassAway(); + } +}; + + +NActors::IActor* CreateLocalPgWireAuthActor(const TPgWireAuthData& pgWireAuthData, const TActorId& pgYdbProxy) { + return new TPgYdbAuthActor(pgWireAuthData, pgYdbProxy); +} + +} diff --git a/ydb/core/local_pgwire/local_pgwire_util.h b/ydb/core/local_pgwire/local_pgwire_util.h index 21ecf6dd88db..0ef16d84b27e 100644 --- a/ydb/core/local_pgwire/local_pgwire_util.h +++ b/ydb/core/local_pgwire/local_pgwire_util.h @@ -30,6 +30,14 @@ struct TConnectionState { uint32_t ConnectionNum = 0; }; +struct TPgWireAuthData { + TActorId Sender; + TString UserName; + TString DatabasePath; + TString Password; + TString PeerName; +}; + struct TParsedStatement { NPG::TPGParse::TQueryData QueryData; std::vector ParameterTypes; @@ -56,6 +64,7 @@ struct TEvEvents { EvUpdateStatement, EvSingleQuery, EvCancelRequest, + EvAuthResponse, EvEnd }; @@ -98,6 +107,24 @@ struct TEvEvents { struct TEvCancelRequest : NActors::TEventLocal { TEvCancelRequest() = default; }; + + struct TEvAuthResponse : NActors::TEventLocal { + TString SerializedToken; + TString Ticket; + TString ErrorMessage; + TActorId Sender; + + TEvAuthResponse(const TString& serializedToken, const TString& ticket, const TActorId& sender) + : SerializedToken(serializedToken) + , Ticket(ticket) + , Sender(sender) + {} + + TEvAuthResponse(const TString& errorMessage, const TActorId& sender) + : ErrorMessage(errorMessage) + , Sender(sender) + {} + }; }; TString ColumnPrimitiveValueToString(NYdb::TValueParser& valueParser); diff --git a/ydb/core/local_pgwire/ya.make b/ydb/core/local_pgwire/ya.make index d63b67b22f5e..71b533976682 100644 --- a/ydb/core/local_pgwire/ya.make +++ b/ydb/core/local_pgwire/ya.make @@ -1,6 +1,7 @@ LIBRARY() SRCS( + local_pgwire_auth_actor.cpp local_pgwire_connection.cpp local_pgwire.cpp local_pgwire.h @@ -18,6 +19,7 @@ PEERDIR( ydb/core/kqp/common/events ydb/core/kqp/common/simple ydb/core/kqp/executer_actor + ydb/core/base ydb/core/grpc_services ydb/core/grpc_services/local_rpc ydb/core/protos @@ -25,6 +27,7 @@ PEERDIR( ydb/core/ydb_convert ydb/public/api/grpc ydb/public/lib/operation_id/protos + ydb/services/persqueue_v1/actors ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/log_backend/log_backend.cpp b/ydb/core/log_backend/log_backend.cpp index 249f1b6a77d1..ad56e773bef8 100644 --- a/ydb/core/log_backend/log_backend.cpp +++ b/ydb/core/log_backend/log_backend.cpp @@ -15,7 +15,14 @@ TAutoPtr CreateLogBackendWithUnifiedAgent( const auto& uaClientConfig = logConfig.GetUAClientConfig(); auto uaCounters = GetServiceCounters(counters, "utils")->GetSubgroup("subsystem", "ua_client"); auto logName = uaClientConfig.GetLogName(); - TAutoPtr uaLogBackend = TLogBackendBuildHelper::CreateLogBackendFromUAClientConfig(uaClientConfig, uaCounters, logName); + TAutoPtr uaLogBackend = TLogBackendBuildHelper::CreateLogBackendFromUAClientConfig( + uaClientConfig, + uaCounters, + logName, + runConfig.TenantName == "" ? "static" : "slot", + runConfig.TenantName, + runConfig.ClusterName + ); logBackend = logBackend ? NActors::CreateCompositeLogBackend({logBackend, uaLogBackend}) : uaLogBackend; } if (logBackend) { @@ -52,7 +59,14 @@ TAutoPtr CreateMeteringLogBackendWithUnifiedAgent( auto logName = meteringConfig.HasLogName() ? meteringConfig.GetLogName() : uaClientConfig.GetLogName(); - TAutoPtr uaLogBackend = TLogBackendBuildHelper::CreateLogBackendFromUAClientConfig(uaClientConfig, uaCounters, logName); + TAutoPtr uaLogBackend = TLogBackendBuildHelper::CreateLogBackendFromUAClientConfig( + uaClientConfig, + uaCounters, + logName, + runConfig.TenantName == "" ? "static" : "slot", + runConfig.TenantName, + runConfig.ClusterName + ); logBackend = logBackend ? NActors::CreateCompositeLogBackend({logBackend, uaLogBackend}) : uaLogBackend; } @@ -100,7 +114,14 @@ TAutoPtr CreateAuditLogUnifiedAgentBackend( auto logName = runConfig.AppConfig.GetAuditConfig().GetUnifiedAgentBackend().HasLogName() ? runConfig.AppConfig.GetAuditConfig().GetUnifiedAgentBackend().GetLogName() : uaClientConfig.GetLogName(); - logBackend = TLogBackendBuildHelper::CreateLogBackendFromUAClientConfig(uaClientConfig, uaCounters, logName); + logBackend = TLogBackendBuildHelper::CreateLogBackendFromUAClientConfig( + uaClientConfig, + uaCounters, + logName, + runConfig.TenantName == "" ? "static" : "slot", + runConfig.TenantName, + runConfig.ClusterName + ); } return logBackend; diff --git a/ydb/core/log_backend/log_backend_build.cpp b/ydb/core/log_backend/log_backend_build.cpp index 1261f2dde53d..db31ce3722a7 100644 --- a/ydb/core/log_backend/log_backend_build.cpp +++ b/ydb/core/log_backend/log_backend_build.cpp @@ -15,7 +15,11 @@ TAutoPtr TLogBackendBuildHelper::CreateLogBackendFromLogConfig(cons return logBackend; } -TAutoPtr TLogBackendBuildHelper::CreateLogBackendFromUAClientConfig(const NKikimrConfig::TUAClientConfig& uaClientConfig, NMonitoring::TDynamicCounterPtr uaCounters, const TString& logName) { +TAutoPtr TLogBackendBuildHelper::CreateLogBackendFromUAClientConfig(const NKikimrConfig::TUAClientConfig& uaClientConfig, + NMonitoring::TDynamicCounterPtr uaCounters, + const TString& logName, const TString& nodeType = "static", + const TString& tenant = "", + const TString& clusterName = "") { auto parameters = NUnifiedAgent::TClientParameters(uaClientConfig.GetUri()) .SetCounters(uaCounters) .SetMaxInflightBytes(uaClientConfig.GetMaxInflightBytes()); @@ -44,6 +48,18 @@ TAutoPtr TLogBackendBuildHelper::CreateLogBackendFromUAClientConfig (*sessionParameters.Meta)["_log_name"] = logName; } + if (nodeType) { + (*sessionParameters.Meta)["node_type"] = nodeType; + } + + if (tenant) { + (*sessionParameters.Meta)["database"] = tenant; + } + + if (clusterName) { + (*sessionParameters.Meta)["cluster"] = clusterName; + } + TAutoPtr uaLogBackend = MakeLogBackend(parameters, sessionParameters).Release(); return uaLogBackend; } diff --git a/ydb/core/log_backend/log_backend_build.h b/ydb/core/log_backend/log_backend_build.h index ab672bb81961..0577a01004bc 100644 --- a/ydb/core/log_backend/log_backend_build.h +++ b/ydb/core/log_backend/log_backend_build.h @@ -16,6 +16,9 @@ class TLogBackendBuildHelper { static TAutoPtr CreateLogBackendFromUAClientConfig( const NKikimrConfig::TUAClientConfig& uaClientConfig, NMonitoring::TDynamicCounterPtr uaCounters, - const TString& logName); + const TString& logName, + const TString& nodeType, + const TString& tenant, + const TString& clusterName); }; } diff --git a/ydb/core/mind/bscontroller/bsc.cpp b/ydb/core/mind/bscontroller/bsc.cpp index 9ce2d1ca2700..b56b6dfe11f4 100644 --- a/ydb/core/mind/bscontroller/bsc.cpp +++ b/ydb/core/mind/bscontroller/bsc.cpp @@ -131,6 +131,8 @@ void TBlobStorageController::Handle(TEvNodeWardenStorageConfig::TPtr ev) { auto prevStaticVSlots = std::exchange(StaticVSlots, {}); StaticVDiskMap.clear(); + const TMonotonic mono = TActivationContext::Monotonic(); + if (StorageConfig.HasBlobStorageConfig()) { if (const auto& bsConfig = StorageConfig.GetBlobStorageConfig(); bsConfig.HasServiceSet()) { const auto& ss = bsConfig.GetServiceSet(); @@ -143,7 +145,7 @@ void TBlobStorageController::Handle(TEvNodeWardenStorageConfig::TPtr ev) { const auto& location = vslot.GetVDiskLocation(); const TPDiskId pdiskId(location.GetNodeID(), location.GetPDiskID()); const TVSlotId vslotId(pdiskId, location.GetVDiskSlotID()); - StaticVSlots.try_emplace(vslotId, vslot, prevStaticVSlots); + StaticVSlots.try_emplace(vslotId, vslot, prevStaticVSlots, mono); const TVDiskID& vdiskId = VDiskIDFromVDiskID(vslot.GetVDiskID()); StaticVDiskMap.emplace(vdiskId, vslotId); StaticVDiskMap.emplace(TVDiskID(vdiskId.GroupID, 0, vdiskId), vslotId); @@ -270,6 +272,13 @@ void TBlobStorageController::Handle(TEvInterconnect::TEvNodesInfo::TPtr &ev) { const bool initial = !HostRecords; HostRecords = std::make_shared(ev->Get()); if (initial) { + if (auto *appData = AppData()) { + if (appData->Icb) { + EnableSelfHealWithDegraded = std::make_shared(0, 0, 1); + appData->Icb->RegisterSharedControl(*EnableSelfHealWithDegraded, + "BlobStorageControllerControls.EnableSelfHealWithDegraded"); + } + } SelfHealId = Register(CreateSelfHealActor()); PushStaticGroupsToSelfHeal(); if (StorageConfigObtained) { @@ -337,7 +346,7 @@ void TBlobStorageController::ValidateInternalState() { Y_ABORT_UNLESS(donor->GetShortVDiskId() == vslot->GetShortVDiskId()); } if (vslot->Group) { - if (vslot->Status == NKikimrBlobStorage::EVDiskStatus::READY) { + if (vslot->GetStatus() == NKikimrBlobStorage::EVDiskStatus::READY) { Y_DEBUG_ABORT_UNLESS(vslot->IsReady || vslot->IsInVSlotReadyTimestampQ()); } else { Y_DEBUG_ABORT_UNLESS(!vslot->IsReady && !vslot->IsInVSlotReadyTimestampQ()); @@ -401,7 +410,7 @@ ui32 TBlobStorageController::GetEventPriority(IEventHandle *ev) { const auto& record = msg->Record; for (const auto& item : record.GetVDiskStatus()) { const TVSlotId vslotId(item.GetNodeId(), item.GetPDiskId(), item.GetVSlotId()); - if (TVSlotInfo *slot = FindVSlot(vslotId); slot && slot->Status > item.GetStatus()) { + if (TVSlotInfo *slot = FindVSlot(vslotId); slot && slot->GetStatus() > item.GetStatus()) { return 1; } else if (const auto it = StaticVSlots.find(vslotId); it != StaticVSlots.end() && it->second.VDiskStatus > item.GetStatus()) { return 1; diff --git a/ydb/core/mind/bscontroller/cmds_box.cpp b/ydb/core/mind/bscontroller/cmds_box.cpp index e1e821c5c643..4b7f53c6beb0 100644 --- a/ydb/core/mind/bscontroller/cmds_box.cpp +++ b/ydb/core/mind/bscontroller/cmds_box.cpp @@ -213,7 +213,7 @@ namespace NKikimr::NBsController { for (const auto& [id, slot] : pdisk->VSlotsOnPDisk) { if (slot->Group) { auto *m = VSlots.FindForUpdate(slot->VSlotId); - m->Status = NKikimrBlobStorage::EVDiskStatus::ERROR; + m->VDiskStatus = NKikimrBlobStorage::EVDiskStatus::ERROR; m->IsReady = false; TGroupInfo *group = Groups.FindForUpdate(slot->Group->ID); GroupFailureModelChanged.insert(slot->Group->ID); diff --git a/ydb/core/mind/bscontroller/cmds_storage_pool.cpp b/ydb/core/mind/bscontroller/cmds_storage_pool.cpp index 516ab802d6ad..01a09938030b 100644 --- a/ydb/core/mind/bscontroller/cmds_storage_pool.cpp +++ b/ydb/core/mind/bscontroller/cmds_storage_pool.cpp @@ -557,7 +557,7 @@ namespace NKikimr::NBsController { x->MutableVDiskMetrics()->CopyFrom(*vslot.VDiskMetrics); x->MutableVDiskMetrics()->ClearVDiskId(); } - x->SetStatus(NKikimrBlobStorage::EVDiskStatus_Name(vslot.VDiskStatus)); + x->SetStatus(NKikimrBlobStorage::EVDiskStatus_Name(vslot.VDiskStatus.value_or(NKikimrBlobStorage::EVDiskStatus::ERROR))); x->SetReady(vslot.ReadySince <= mono); } if (const auto& s = Self.StorageConfig; s.HasBlobStorageConfig()) { @@ -698,7 +698,7 @@ namespace NKikimr::NBsController { TGroupInfo *group = Groups.FindForUpdate(vslot->GroupId); vslot->Mood = TMood::Wipe; - vslot->Status = NKikimrBlobStorage::EVDiskStatus::ERROR; + vslot->VDiskStatus = NKikimrBlobStorage::EVDiskStatus::ERROR; vslot->IsReady = false; GroupFailureModelChanged.insert(group->ID); group->CalculateGroupStatus(); @@ -744,7 +744,7 @@ namespace NKikimr::NBsController { TGroupInfo *group = Groups.FindForUpdate(vslot->GroupId); vslot->Mood = targetMood; - vslot->Status = NKikimrBlobStorage::EVDiskStatus::ERROR; + vslot->VDiskStatus = NKikimrBlobStorage::EVDiskStatus::ERROR; vslot->IsReady = false; GroupFailureModelChanged.insert(group->ID); group->CalculateGroupStatus(); diff --git a/ydb/core/mind/bscontroller/config.cpp b/ydb/core/mind/bscontroller/config.cpp index 4ed5a2cffc5b..b63680720525 100644 --- a/ydb/core/mind/bscontroller/config.cpp +++ b/ydb/core/mind/bscontroller/config.cpp @@ -486,9 +486,9 @@ namespace NKikimr::NBsController { if (!overlay->second || !overlay->second->Group) { // deleted one (overlay->second ? overlay->second : base->second)->DropFromVSlotReadyTimestampQ(); NotReadyVSlotIds.erase(overlay->first); - } else if (overlay->second->Status != NKikimrBlobStorage::EVDiskStatus::READY) { + } else if (overlay->second->GetStatus() != NKikimrBlobStorage::EVDiskStatus::READY) { overlay->second->DropFromVSlotReadyTimestampQ(); - } else if (!base || base->second->Status != NKikimrBlobStorage::EVDiskStatus::READY) { + } else if (!base || base->second->GetStatus() != NKikimrBlobStorage::EVDiskStatus::READY) { overlay->second->PutInVSlotReadyTimestampQ(now); } else { Y_DEBUG_ABORT_UNLESS(overlay->second->IsReady || overlay->second->IsInVSlotReadyTimestampQ()); @@ -998,7 +998,7 @@ namespace NKikimr::NBsController { pb->SetAllocatedSize(vslot.Metrics.GetAllocatedSize()); pb->MutableVDiskMetrics()->CopyFrom(vslot.Metrics); pb->MutableVDiskMetrics()->ClearVDiskId(); - pb->SetStatus(NKikimrBlobStorage::EVDiskStatus_Name(vslot.Status)); + pb->SetStatus(NKikimrBlobStorage::EVDiskStatus_Name(vslot.GetStatus())); for (const TVSlotId& vslotId : vslot.Donors) { auto *item = pb->AddDonors(); Serialize(item->MutableVSlotId(), vslotId); diff --git a/ydb/core/mind/bscontroller/config.h b/ydb/core/mind/bscontroller/config.h index 7352658bc107..8c1bb321e5ec 100644 --- a/ydb/core/mind/bscontroller/config.h +++ b/ydb/core/mind/bscontroller/config.h @@ -101,6 +101,7 @@ namespace NKikimr { // when the config cmd received const TInstant Timestamp; + const TMonotonic Mono; // various settings from controller const bool DonorMode; @@ -124,7 +125,8 @@ namespace NKikimr { bool PushStaticGroupsToSelfHeal = false; public: - TConfigState(TBlobStorageController &controller, const THostRecordMap &hostRecords, TInstant timestamp) + TConfigState(TBlobStorageController &controller, const THostRecordMap &hostRecords, TInstant timestamp, + TMonotonic mono) : Self(controller) , HostConfigs(&controller.HostConfigs) , Boxes(&controller.Boxes) @@ -142,6 +144,7 @@ namespace NKikimr { , NextStoragePoolId(&controller.NextStoragePoolId) , HostRecords(hostRecords) , Timestamp(timestamp) + , Mono(mono) , DonorMode(controller.DonorMode) , DefaultMaxSlots(controller.DefaultMaxSlots) , StaticVSlots(controller.StaticVSlots) diff --git a/ydb/core/mind/bscontroller/config_cmd.cpp b/ydb/core/mind/bscontroller/config_cmd.cpp index 88fe17f61f34..ca5e9f91da25 100644 --- a/ydb/core/mind/bscontroller/config_cmd.cpp +++ b/ydb/core/mind/bscontroller/config_cmd.cpp @@ -180,7 +180,7 @@ namespace NKikimr::NBsController { Response->MutableStatus()->RemoveLast(); } - State.emplace(*Self, Self->HostRecords, TActivationContext::Now()); + State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic()); State->CheckConsistency(); TString m; diff --git a/ydb/core/mind/bscontroller/config_fit_groups.cpp b/ydb/core/mind/bscontroller/config_fit_groups.cpp index e72184045d26..e3f1f199de01 100644 --- a/ydb/core/mind/bscontroller/config_fit_groups.cpp +++ b/ydb/core/mind/bscontroller/config_fit_groups.cpp @@ -524,7 +524,7 @@ namespace NKikimr { // also we have to find replicating VSlots on this PDisk and assume they consume up to // max(vslotSize for every slot in group), not their actual AllocatedSize for (const auto& [id, slot] : info.VSlotsOnPDisk) { - if (slot->Group && slot->Status != NKikimrBlobStorage::EVDiskStatus::READY) { + if (slot->Group && slot->GetStatus() != NKikimrBlobStorage::EVDiskStatus::READY) { ui64 maxGroupSlotSize = 0; for (const TVSlotInfo *peer : slot->Group->VDisksInGroup) { maxGroupSlotSize = Max(maxGroupSlotSize, peer->Metrics.GetAllocatedSize()); @@ -607,6 +607,7 @@ namespace NKikimr { groupInfo->ID, 0, groupInfo->Generation, StoragePool.VDiskKind, failRealmIdx, failDomainIdx, vdiskIdx, TMood::Normal, groupInfo, &VSlotReadyTimestampQ, TInstant::Zero(), TDuration::Zero()); + vslotInfo->VDiskStatusTimestamp = State.Mono; // mark as uncommitted State.UncommittedVSlots.insert(vslotId); diff --git a/ydb/core/mind/bscontroller/drop_donor.cpp b/ydb/core/mind/bscontroller/drop_donor.cpp index 55558a8322ee..b9b0b9cdd944 100644 --- a/ydb/core/mind/bscontroller/drop_donor.cpp +++ b/ydb/core/mind/bscontroller/drop_donor.cpp @@ -18,7 +18,7 @@ class TBlobStorageController::TTxDropDonor TTxType GetTxType() const override { return NBlobStorageController::TXTYPE_DROP_DONOR; } bool Execute(TTransactionContext &txc, const TActorContext&) override { - State.emplace(*Self, Self->HostRecords, TActivationContext::Now()); + State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic()); State->CheckConsistency(); for (const TVSlotId& vslotId : VSlotIds) { if (const TVSlotInfo *vslot = State->VSlots.Find(vslotId); vslot && !vslot->IsBeingDeleted()) { diff --git a/ydb/core/mind/bscontroller/impl.h b/ydb/core/mind/bscontroller/impl.h index d55e57b34d63..c8e24f664880 100644 --- a/ydb/core/mind/bscontroller/impl.h +++ b/ydb/core/mind/bscontroller/impl.h @@ -124,16 +124,17 @@ class TBlobStorageController : public TActor, public TTa TVSlotReadyTimestampQ::iterator VSlotReadyTimestampIter; public: - NKikimrBlobStorage::EVDiskStatus Status = NKikimrBlobStorage::EVDiskStatus::ERROR; + std::optional VDiskStatus; + TMonotonic VDiskStatusTimestamp; bool IsReady = false; bool OnlyPhantomsRemain = false; public: void SetStatus(NKikimrBlobStorage::EVDiskStatus status, TMonotonic now, TInstant instant, bool onlyPhantomsRemain) { - if (status != Status) { + if (status != VDiskStatus) { if (status == NKikimrBlobStorage::EVDiskStatus::REPLICATING) { // became "replicating" LastGotReplicating = instant; - } else if (Status == NKikimrBlobStorage::EVDiskStatus::REPLICATING) { // was "replicating" + } else if (VDiskStatus == NKikimrBlobStorage::EVDiskStatus::REPLICATING) { // was "replicating" Y_DEBUG_ABORT_UNLESS(LastGotReplicating != TInstant::Zero()); ReplicationTime += instant - LastGotReplicating; LastGotReplicating = {}; @@ -145,7 +146,7 @@ class TBlobStorageController : public TActor, public TTa LastSeenReady = instant; } - Status = status; + VDiskStatus = status; IsReady = false; if (status == NKikimrBlobStorage::EVDiskStatus::READY) { PutInVSlotReadyTimestampQ(now); @@ -159,6 +160,10 @@ class TBlobStorageController : public TActor, public TTa } } + NKikimrBlobStorage::EVDiskStatus GetStatus() const { + return VDiskStatus.value_or(NKikimrBlobStorage::EVDiskStatus::ERROR); + } + void PutInVSlotReadyTimestampQ(TMonotonic now) { const TMonotonic readyAfter = now + ReadyStablePeriod; // vdisk will be treated as READY one shortly, but not now Y_ABORT_UNLESS(VSlotReadyTimestampIter == TVSlotReadyTimestampQ::iterator()); @@ -291,15 +296,16 @@ class TBlobStorageController : public TActor, public TTa TString GetStatusString() const { TStringStream s; - s << NKikimrBlobStorage::EVDiskStatus_Name(Status); - if (Status == NKikimrBlobStorage::REPLICATING && OnlyPhantomsRemain) { + const auto status = GetStatus(); + s << NKikimrBlobStorage::EVDiskStatus_Name(status); + if (status == NKikimrBlobStorage::REPLICATING && OnlyPhantomsRemain) { s << "/p"; } return s.Str(); } bool IsOperational() const { - return Status >= NKikimrBlobStorage::REPLICATING; + return GetStatus() >= NKikimrBlobStorage::REPLICATING; } void OnCommit(); @@ -1510,6 +1516,7 @@ class TBlobStorageController : public TActor, public TTa bool AllowMultipleRealmsOccupation = true; bool StorageConfigObtained = false; bool Loaded = false; + std::shared_ptr EnableSelfHealWithDegraded; std::set> GroupToNode; @@ -2276,7 +2283,7 @@ class TBlobStorageController : public TActor, public TTa histo.IncrementFor(passed.Seconds()); TDuration timeBeingReplicating = slot->ReplicationTime; - if (slot->Status == NKikimrBlobStorage::EVDiskStatus::REPLICATING) { + if (slot->GetStatus() == NKikimrBlobStorage::EVDiskStatus::REPLICATING) { timeBeingReplicating += now - slot->LastGotReplicating; } @@ -2301,11 +2308,12 @@ class TBlobStorageController : public TActor, public TTa const NKikimrBlobStorage::TVDiskKind::EVDiskKind VDiskKind; std::optional VDiskMetrics; - NKikimrBlobStorage::EVDiskStatus VDiskStatus = NKikimrBlobStorage::EVDiskStatus::ERROR; + std::optional VDiskStatus; + TMonotonic VDiskStatusTimestamp; TMonotonic ReadySince = TMonotonic::Max(); // when IsReady becomes true for this disk; Max() in non-READY state TStaticVSlotInfo(const NKikimrBlobStorage::TNodeWardenServiceSet::TVDisk& vdisk, - std::map& prev) + std::map& prev, TMonotonic mono) : VDiskId(VDiskIDFromVDiskID(vdisk.GetVDiskID())) , VDiskKind(vdisk.GetVDiskKind()) { @@ -2315,7 +2323,10 @@ class TBlobStorageController : public TActor, public TTa TStaticVSlotInfo& item = it->second; VDiskMetrics = std::move(item.VDiskMetrics); VDiskStatus = item.VDiskStatus; + VDiskStatusTimestamp = item.VDiskStatusTimestamp; ReadySince = item.ReadySince; + } else { + VDiskStatusTimestamp = mono; } } }; diff --git a/ydb/core/mind/bscontroller/load_everything.cpp b/ydb/core/mind/bscontroller/load_everything.cpp index b8eb7d59a5e2..a6efc5a39457 100644 --- a/ydb/core/mind/bscontroller/load_everything.cpp +++ b/ydb/core/mind/bscontroller/load_everything.cpp @@ -352,6 +352,7 @@ class TBlobStorageController::TTxLoadEverything : public TTransactionBaseVSlots.clear(); { using T = Schema::VSlot; @@ -374,6 +375,7 @@ class TBlobStorageController::TTxLoadEverything : public TTransactionBaseNotReadyVSlotIds.insert(x.VSlotId); } + x.VDiskStatusTimestamp = mono; if (!slot.Next()) { return false; diff --git a/ydb/core/mind/bscontroller/monitoring.cpp b/ydb/core/mind/bscontroller/monitoring.cpp index 9f6fa7597e6d..374955bc70f7 100644 --- a/ydb/core/mind/bscontroller/monitoring.cpp +++ b/ydb/core/mind/bscontroller/monitoring.cpp @@ -1296,7 +1296,7 @@ void TBlobStorageController::RenderVSlotRow(IOutputStream& out, const TVSlotInfo } TABLED() { TDuration time = vslot.ReplicationTime; - if (vslot.Status == NKikimrBlobStorage::EVDiskStatus::REPLICATING) { + if (vslot.GetStatus() == NKikimrBlobStorage::EVDiskStatus::REPLICATING) { time += TActivationContext::Now() - vslot.LastGotReplicating; } out << time; diff --git a/ydb/core/mind/bscontroller/node_report.cpp b/ydb/core/mind/bscontroller/node_report.cpp index 868d8fb98822..ed1b116f1ca9 100644 --- a/ydb/core/mind/bscontroller/node_report.cpp +++ b/ydb/core/mind/bscontroller/node_report.cpp @@ -26,7 +26,7 @@ class TBlobStorageController::TTxNodeReport return true; } - State.emplace(*Self, Self->HostRecords, TActivationContext::Now()); + State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic()); State->CheckConsistency(); NIceDb::TNiceDb db(txc.DB); diff --git a/ydb/core/mind/bscontroller/register_node.cpp b/ydb/core/mind/bscontroller/register_node.cpp index 69d05dcdfd8d..d0952a8bdf0b 100644 --- a/ydb/core/mind/bscontroller/register_node.cpp +++ b/ydb/core/mind/bscontroller/register_node.cpp @@ -160,7 +160,7 @@ class TBlobStorageController::TTxUpdateNodeDrives bool Execute(TTransactionContext& txc, const TActorContext&) override { const TNodeId nodeId = Record.GetNodeId(); - State.emplace(*Self, Self->HostRecords, TActivationContext::Now()); + State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic()); State->CheckConsistency(); auto updateIsSuccessful = true; @@ -547,7 +547,7 @@ void TBlobStorageController::OnWardenDisconnected(TNodeId nodeId, TActorId serve updates.push_back({ .VDiskId = it->second->GetVDiskId(), .IsReady = it->second->IsReady, - .VDiskStatus = it->second->Status, + .VDiskStatus = it->second->GetStatus(), }); ScrubState.UpdateVDiskState(&*it->second); SysViewChangedVSlots.insert(it->second->VSlotId); diff --git a/ydb/core/mind/bscontroller/self_heal.cpp b/ydb/core/mind/bscontroller/self_heal.cpp index 2d13a1d86cfd..d2ff2877b55b 100644 --- a/ydb/core/mind/bscontroller/self_heal.cpp +++ b/ydb/core/mind/bscontroller/self_heal.cpp @@ -43,6 +43,7 @@ namespace NKikimr::NBsController { std::shared_ptr Topology; TBlobStorageGroupInfo::TGroupVDisks FailedGroupDisks; const bool IsSelfHealReasonDecommit; + const bool IgnoreDegradedGroupsChecks; const bool DonorMode; THashSet PendingVDisks; THashMap ActorToDiskMap; @@ -51,7 +52,7 @@ namespace NKikimr::NBsController { public: TReassignerActor(TActorId controllerId, TGroupId groupId, TEvControllerUpdateSelfHealInfo::TGroupContent group, std::optional vdiskToReplace, std::shared_ptr topology, - bool isSelfHealReasonDecommit, bool donorMode) + bool isSelfHealReasonDecommit, bool ignoreDegradedGroupsChecks, bool donorMode) : ControllerId(controllerId) , GroupId(groupId) , Group(std::move(group)) @@ -59,6 +60,7 @@ namespace NKikimr::NBsController { , Topology(std::move(topology)) , FailedGroupDisks(Topology.get()) , IsSelfHealReasonDecommit(isSelfHealReasonDecommit) + , IgnoreDegradedGroupsChecks(ignoreDegradedGroupsChecks) , DonorMode(donorMode) {} @@ -166,6 +168,9 @@ namespace NKikimr::NBsController { request->SetIgnoreGroupReserve(true); request->SetSettleOnlyOnOperationalDisks(true); request->SetIsSelfHealReasonDecommit(IsSelfHealReasonDecommit); + if (IgnoreDegradedGroupsChecks) { + request->SetIgnoreDegradedGroupsChecks(IgnoreDegradedGroupsChecks); + } request->SetAllowUnusableDisks(true); if (VDiskToReplace) { ev->SelfHeal = true; @@ -278,6 +283,7 @@ namespace NKikimr::NBsController { bool AllowMultipleRealmsOccupation; bool DonorMode; THostRecordMap HostRecords; + std::shared_ptr EnableSelfHealWithDegraded; using TTopologyDescr = std::tuple; THashMap> Topologies; @@ -289,13 +295,15 @@ namespace NKikimr::NBsController { public: TSelfHealActor(ui64 tabletId, std::shared_ptr unreassignableGroups, THostRecordMap hostRecords, - bool groupLayoutSanitizerEnabled, bool allowMultipleRealmsOccupation, bool donorMode) + bool groupLayoutSanitizerEnabled, bool allowMultipleRealmsOccupation, bool donorMode, + std::shared_ptr enableSelfHealWithDegraded) : TabletId(tabletId) , UnreassignableGroups(std::move(unreassignableGroups)) , GroupLayoutSanitizerEnabled(groupLayoutSanitizerEnabled) , AllowMultipleRealmsOccupation(allowMultipleRealmsOccupation) , DonorMode(donorMode) , HostRecords(std::move(hostRecords)) + , EnableSelfHealWithDegraded(std::move(enableSelfHealWithDegraded)) {} void Bootstrap(const TActorId& parentId) { @@ -427,11 +435,35 @@ namespace NKikimr::NBsController { // check if it is possible to move anything out bool isSelfHealReasonDecommit; - if (const auto v = FindVDiskToReplace(group.Content, now, group.Topology.get(), &isSelfHealReasonDecommit)) { + bool ignoreDegradedGroupsChecks; + if (const auto v = FindVDiskToReplace(group.Content, now, group.Topology.get(), &isSelfHealReasonDecommit, + &ignoreDegradedGroupsChecks)) { group.ReassignerActorId = Register(new TReassignerActor(ControllerId, group.GroupId, group.Content, - *v, group.Topology, isSelfHealReasonDecommit, DonorMode)); + *v, group.Topology, isSelfHealReasonDecommit, ignoreDegradedGroupsChecks, DonorMode)); } else { ++counter; // this group can't be reassigned right now + + auto log = [&]() { + TStringStream ss; + ss << "["; + bool first = true; + for (const auto& [vdiskId, vdisk] : group.Content.VDisks) { + if (!std::exchange(first, false)) { + ss << ","; + } + ss << "{"; + ss << vdiskId; + ss << (IsReady(vdisk, now) ? " Ready" : " NotReady"); + ss << (vdisk.Faulty ? " Faulty" : ""); + ss << (vdisk.Bad ? " IsBad" : ""); + ss << (vdisk.Decommitted ? " Decommitted" : ""); + ss << "}"; + } + ss << "]"; + return ss.Str(); + }; + + STLOG(PRI_INFO, BS_SELFHEAL, BSSH11, "group can't be reassigned right now " << log(), (GroupId, group.GroupId)); } } @@ -462,7 +494,8 @@ namespace NKikimr::NBsController { ADD_RECORD_WITH_TIMESTAMP_TO_OPERATION_LOG(GroupLayoutSanitizerOperationLog, "Start sanitizing GroupId# " << group.GroupId << " GroupGeneration# " << group.Content.Generation); group.ReassignerActorId = Register(new TReassignerActor(ControllerId, group.GroupId, group.Content, - std::nullopt, group.Topology, false /*isSelfHealReasonDecommit*/, DonorMode)); + std::nullopt, group.Topology, false /*isSelfHealReasonDecommit*/, + false /*ignoreDegradedGroupsChecks*/, DonorMode)); } } } @@ -512,7 +545,8 @@ namespace NKikimr::NBsController { } std::optional FindVDiskToReplace(const TEvControllerUpdateSelfHealInfo::TGroupContent& content, - TMonotonic now, TBlobStorageGroupInfo::TTopology *topology, bool *isSelfHealReasonDecommit) { + TMonotonic now, TBlobStorageGroupInfo::TTopology *topology, bool *isSelfHealReasonDecommit, + bool *ignoreDegradedGroupsChecks) { // main idea of selfhealing is step-by-step healing of bad group; we can allow healing of group with more // than one disk missing, but we should not move next faulty disk until previous one is replicated, at least // partially (meaning only phantoms left) @@ -531,7 +565,7 @@ namespace NKikimr::NBsController { } [[fallthrough]]; case NKikimrBlobStorage::EVDiskStatus::INIT_PENDING: - return std::nullopt; // don't touch group with replicating disks + return std::nullopt; // don't touch group with replicating or starting disks default: break; @@ -557,6 +591,7 @@ namespace NKikimr::NBsController { continue; // this group will become degraded when applying self-heal logic, skip disk } *isSelfHealReasonDecommit = vdisk.IsSelfHealReasonDecommit; + *ignoreDegradedGroupsChecks = checker.IsDegraded(failedByReadiness) && *EnableSelfHealWithDegraded; return vdiskId; } } @@ -864,7 +899,7 @@ namespace NKikimr::NBsController { IActor *TBlobStorageController::CreateSelfHealActor() { Y_ABORT_UNLESS(HostRecords); return new TSelfHealActor(TabletID(), SelfHealUnreassignableGroups, HostRecords, GroupLayoutSanitizerEnabled, - AllowMultipleRealmsOccupation, DonorMode); + AllowMultipleRealmsOccupation, DonorMode, EnableSelfHealWithDegraded); } void TBlobStorageController::InitializeSelfHealState() { @@ -913,7 +948,7 @@ namespace NKikimr::NBsController { slot->OnlyPhantomsRemain, slot->IsReady, TMonotonic::Zero(), - slot->Status, + slot->GetStatus(), }; } } @@ -960,7 +995,7 @@ namespace NKikimr::NBsController { false, /* OnlyPhantomsRemain */ true, /* IsReady; decision is based on ReadySince */ info.ReadySince, - info.VDiskStatus, + info.VDiskStatus.value_or(NKikimrBlobStorage::EVDiskStatus::ERROR), }; } } @@ -987,7 +1022,7 @@ namespace NKikimr::NBsController { const bool was = slot->IsOperational(); if (const TGroupInfo *group = slot->Group) { const bool wasReady = slot->IsReady; - if (slot->Status != m.GetStatus() || slot->OnlyPhantomsRemain != m.GetOnlyPhantomsRemain()) { + if (slot->GetStatus() != m.GetStatus() || slot->OnlyPhantomsRemain != m.GetOnlyPhantomsRemain()) { slot->SetStatus(m.GetStatus(), mono, now, m.GetOnlyPhantomsRemain()); if (slot->IsReady != wasReady) { ScrubState.UpdateVDiskState(slot); @@ -1001,14 +1036,14 @@ namespace NKikimr::NBsController { .VDiskId = vdiskId, .OnlyPhantomsRemain = slot->OnlyPhantomsRemain, .IsReady = slot->IsReady, - .VDiskStatus = slot->Status, + .VDiskStatus = slot->GetStatus(), }); if (!was && slot->IsOperational() && !group->SeenOperational) { groups.insert(const_cast(group)); } SysViewChangedVSlots.insert(vslotId); } - if (slot->Status == NKikimrBlobStorage::EVDiskStatus::READY) { + if (slot->GetStatus() == NKikimrBlobStorage::EVDiskStatus::READY) { // we can release donor slots without further notice then the VDisk is completely replicated; we // intentionally use GetStatus() here instead of IsReady() to prevent waiting for (const TVSlotId& donorVSlotId : slot->Donors) { diff --git a/ydb/core/mind/bscontroller/sys_view.cpp b/ydb/core/mind/bscontroller/sys_view.cpp index 6526402a825c..b9fed5ab6d1d 100644 --- a/ydb/core/mind/bscontroller/sys_view.cpp +++ b/ydb/core/mind/bscontroller/sys_view.cpp @@ -325,7 +325,8 @@ void CopyInfo(NKikimrSysView::TPDiskInfo* info, const THolder status, NKikimrBlobStorage::TVDiskKind::EVDiskKind kind, + bool isBeingDeleted) { pb->SetGroupId(vdiskId.GroupID.GetRawId()); pb->SetGroupGeneration(vdiskId.GroupGeneration); pb->SetFailRealm(vdiskId.FailRealm); @@ -337,7 +338,9 @@ void SerializeVSlotInfo(NKikimrSysView::TVSlotInfo *pb, const TVDiskID& vdiskId, if (m.HasAvailableSize()) { pb->SetAvailableSize(m.GetAvailableSize()); } - pb->SetStatusV2(NKikimrBlobStorage::EVDiskStatus_Name(status)); + if (status) { + pb->SetStatusV2(NKikimrBlobStorage::EVDiskStatus_Name(*status)); + } pb->SetKind(NKikimrBlobStorage::TVDiskKind::EVDiskKind_Name(kind)); if (isBeingDeleted) { pb->SetIsBeingDeleted(true); @@ -345,8 +348,8 @@ void SerializeVSlotInfo(NKikimrSysView::TVSlotInfo *pb, const TVDiskID& vdiskId, } void CopyInfo(NKikimrSysView::TVSlotInfo* info, const THolder& vSlotInfo) { - SerializeVSlotInfo(info, vSlotInfo->GetVDiskId(), vSlotInfo->Metrics, vSlotInfo->Status, vSlotInfo->Kind, - vSlotInfo->IsBeingDeleted()); + SerializeVSlotInfo(info, vSlotInfo->GetVDiskId(), vSlotInfo->Metrics, vSlotInfo->VDiskStatus, + vSlotInfo->Kind, vSlotInfo->IsBeingDeleted()); } void CopyInfo(NKikimrSysView::TGroupInfo* info, const THolder& groupInfo) { @@ -422,6 +425,21 @@ void TBlobStorageController::UpdateSystemViews() { return; } + const TMonotonic now = TActivationContext::Monotonic(); + const TDuration expiration = TDuration::Seconds(15); + for (auto& [key, value] : VSlots) { + if (!value->VDiskStatus && value->VDiskStatusTimestamp + expiration <= now) { + value->VDiskStatus = NKikimrBlobStorage::ERROR; + SysViewChangedVSlots.insert(key); + } + } + for (auto& [key, value] : StaticVSlots) { + if (!value.VDiskStatus && value.VDiskStatusTimestamp + expiration <= now) { + value.VDiskStatus = NKikimrBlobStorage::ERROR; + SysViewChangedVSlots.insert(key); + } + } + if (!SysViewChangedPDisks.empty() || !SysViewChangedVSlots.empty() || !SysViewChangedGroups.empty() || !SysViewChangedStoragePools.empty() || SysViewChangedSettings) { auto update = MakeHolder(); diff --git a/ydb/core/mind/bscontroller/virtual_group.cpp b/ydb/core/mind/bscontroller/virtual_group.cpp index 66f3056e06ab..ee3b31fb2ac2 100644 --- a/ydb/core/mind/bscontroller/virtual_group.cpp +++ b/ydb/core/mind/bscontroller/virtual_group.cpp @@ -248,7 +248,7 @@ namespace NKikimr::NBsController { if (const TGroupInfo *group = Self->FindGroup(GroupId); !group || group->VirtualGroupSetupMachineId != MachineId) { return true; // another machine is already running } - State.emplace(*Self, Self->HostRecords, TActivationContext::Now()); + State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic()); TGroupInfo *group = State->Groups.FindForUpdate(GroupId); Y_ABORT_UNLESS(group); if (!Callback(*group, *State)) { @@ -294,7 +294,7 @@ namespace NKikimr::NBsController { if (Token.expired()) { return true; // actor is already dead } - State.emplace(*Self, Self->HostRecords, TActivationContext::Now()); + State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic()); const size_t n = State->BlobDepotDeleteQueue.Unshare().erase(GroupId); Y_ABORT_UNLESS(n == 1); TString error; @@ -897,7 +897,7 @@ namespace NKikimr::NBsController { TTxType GetTxType() const override { return NBlobStorageController::TXTYPE_DECOMMIT_GROUP; } bool Execute(TTransactionContext& txc, const TActorContext&) override { - State.emplace(*Self, Self->HostRecords, TActivationContext::Now()); + State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic()); Action(*State); TString error; if (State->Changed() && !Self->CommitConfigUpdates(*State, true, true, true, txc, &error)) { diff --git a/ydb/core/mind/dynamic_nameserver.cpp b/ydb/core/mind/dynamic_nameserver.cpp index 9f31734f3c67..d6d135286404 100644 --- a/ydb/core/mind/dynamic_nameserver.cpp +++ b/ydb/core/mind/dynamic_nameserver.cpp @@ -286,6 +286,7 @@ void TDynamicNameserver::UpdateState(const NKikimrNodeBroker::TNodesInfo &rec, ctx.Schedule(config->Epoch.End - ctx.Now(), new TEvPrivate::TEvUpdateEpoch(domain, config->Epoch.Id + 1)); } else { + // Note: this update may be optimized to only include new nodes for (auto &node : rec.GetNodes()) { auto nodeId = node.GetNodeId(); if (!config->DynamicNodes.contains(nodeId)) diff --git a/ydb/core/mind/hive/balancer.cpp b/ydb/core/mind/hive/balancer.cpp index 637a15f9753b..620f502eead6 100644 --- a/ydb/core/mind/hive/balancer.cpp +++ b/ydb/core/mind/hive/balancer.cpp @@ -62,17 +62,17 @@ void BalanceNodes } template<> -void BalanceTablets(std::vector& tablets, EResourceToBalance resourceToBalance) { +void BalanceTablets(std::vector::iterator first, std::vector::iterator last, EResourceToBalance resourceToBalance) { auto& randGen = *TAppData::RandomProvider.Get(); // weighted random shuffle std::vector weights; - weights.reserve(tablets.size()); - for (auto it = tablets.begin(); it != tablets.end(); ++it) { + weights.reserve(last - first); + for (auto it = first; it != last; ++it) { weights.emplace_back((*it)->GetWeight(resourceToBalance)); } - auto itT = tablets.begin(); + auto itT = first; auto itW = weights.begin(); - while (itT != tablets.end() && itW != weights.end()) { + while (itT != last && itW != weights.end()) { auto idx = std::discrete_distribution(itW, weights.end())(randGen); if (idx != 0) { std::iter_swap(itT, std::next(itT, idx)); @@ -84,32 +84,32 @@ void BalanceTablets -void BalanceTablets(std::vector& tablets, EResourceToBalance resourceToBalance) { - std::sort(tablets.begin(), tablets.end(), [resourceToBalance](const TTabletInfo* a, const TTabletInfo* b) -> bool { +void BalanceTablets(std::vector::iterator first, std::vector::iterator last, EResourceToBalance resourceToBalance) { + std::sort(first, last, [resourceToBalance](const TTabletInfo* a, const TTabletInfo* b) -> bool { return a->GetWeight(resourceToBalance) > b->GetWeight(resourceToBalance); }); } template<> -void BalanceTablets(std::vector& tablets, EResourceToBalance) { +void BalanceTablets(std::vector::iterator first, std::vector::iterator last, EResourceToBalance) { auto& randGen = *TAppData::RandomProvider.Get(); - std::shuffle(tablets.begin(), tablets.end(), randGen); + std::shuffle(first, last, randGen); } template<> -void BalanceTablets(std::vector& tablets, EResourceToBalance resourceToBalance) { +void BalanceTablets(std::vector::iterator first, std::vector::iterator last, EResourceToBalance resourceToBalance) { auto& randGen = *TAppData::RandomProvider.Get(); std::vector> weights; - weights.reserve(tablets.size()); - for (TTabletInfo* tablet : tablets) { - double weight = tablet->GetWeight(resourceToBalance); - weights.emplace_back(weight * randGen(), tablet); + weights.reserve(last - first); + for (auto it = first; it != last; ++it) { + double weight = (*it)->GetWeight(resourceToBalance); + weights.emplace_back(weight * randGen(), *it); } std::sort(weights.begin(), weights.end(), [](const auto& a, const auto& b) -> bool { return a.first > b.first; }); for (size_t n = 0; n < weights.size(); ++n) { - tablets[n] = weights[n].second; + first[n] = weights[n].second; } } @@ -252,18 +252,31 @@ class THiveBalancer : public NActors::TActorBootstrapped, public } BLOG_TRACE("Balancer on node " << node->Id << ": " << tablets.size() << "/" << nodeTablets.size() << " tablets are suitable for balancing"); if (!tablets.empty()) { + // avoid moving system tablets if possible + std::vector::iterator partitionIt; + if (Hive->GetLessSystemTabletsMoves()) { + partitionIt = std::partition(tablets.begin(), tablets.end(), [](TTabletInfo* tablet) { + return !THive::IsSystemTablet(tablet->GetTabletType()); + }); + } else { + partitionIt = tablets.end(); + } switch (Hive->GetTabletBalanceStrategy()) { case NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_OLD_WEIGHTED_RANDOM: - BalanceTablets(tablets, Settings.ResourceToBalance); + BalanceTablets(tablets.begin(), partitionIt, Settings.ResourceToBalance); + BalanceTablets(partitionIt, tablets.end(), Settings.ResourceToBalance); break; case NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_WEIGHTED_RANDOM: - BalanceTablets(tablets, Settings.ResourceToBalance); + BalanceTablets(tablets.begin(), partitionIt, Settings.ResourceToBalance); + BalanceTablets(partitionIt, tablets.end(), Settings.ResourceToBalance); break; case NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_HEAVIEST: - BalanceTablets(tablets, Settings.ResourceToBalance); + BalanceTablets(tablets.begin(), partitionIt, Settings.ResourceToBalance); + BalanceTablets(partitionIt, tablets.end(), Settings.ResourceToBalance); break; case NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_RANDOM: - BalanceTablets(tablets, Settings.ResourceToBalance); + BalanceTablets(tablets.begin(), partitionIt, Settings.ResourceToBalance); + BalanceTablets(partitionIt, tablets.end(), Settings.ResourceToBalance); break; } Tablets.clear(); diff --git a/ydb/core/mind/hive/balancer.h b/ydb/core/mind/hive/balancer.h index 62289c4f2401..e5ee1a02820d 100644 --- a/ydb/core/mind/hive/balancer.h +++ b/ydb/core/mind/hive/balancer.h @@ -10,7 +10,7 @@ template& nodes, EResourceToBalance resourceTobalance); template -void BalanceTablets(std::vector& tablets, EResourceToBalance resourceToBalance); +void BalanceTablets(std::vector::iterator first, std::vector::iterator last, EResourceToBalance resourceToBalance); template void BalanceChannels(std::vector& channels, NKikimrConfig::THiveConfig::EHiveStorageBalanceStrategy metricToBalance); diff --git a/ydb/core/mind/hive/hive_impl.cpp b/ydb/core/mind/hive/hive_impl.cpp index 8807f3626302..55dada0352af 100644 --- a/ydb/core/mind/hive/hive_impl.cpp +++ b/ydb/core/mind/hive/hive_impl.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -95,11 +96,13 @@ void THive::RestartPipeTx(ui64 tabletId) { } bool THive::TryToDeleteNode(TNodeInfo* node) { - if (node->CanBeDeleted()) { + if (node->CanBeDeleted(TActivationContext::Now())) { + BLOG_I("TryToDeleteNode(" << node->Id << "): deleting"); DeleteNode(node->Id); return true; } if (!node->DeletionScheduled) { + BLOG_D("TryToDeleteNode(" << node->Id << "): waiting " << GetNodeDeletePeriod()); Schedule(GetNodeDeletePeriod(), new TEvPrivate::TEvDeleteNode(node->Id)); node->DeletionScheduled = true; } @@ -117,12 +120,15 @@ void THive::Handle(TEvTabletPipe::TEvServerConnected::TPtr& ev) { void THive::Handle(TEvTabletPipe::TEvServerDisconnected::TPtr& ev) { if (ev->Get()->TabletId == TabletID()) { BLOG_TRACE("Handle TEvTabletPipe::TEvServerDisconnected(" << ev->Get()->ClientId << ") " << ev->Get()->ServerId); - TNodeInfo* node = FindNode(ev->Get()->ClientId.NodeId()); + auto nodeId = ev->Get()->ClientId.NodeId(); + TNodeInfo* node = FindNode(nodeId); if (node != nullptr) { Erase(node->PipeServers, ev->Get()->ServerId); if (node->PipeServers.empty() && node->IsUnknown()) { ObjectDistributions.RemoveNode(*node); - TryToDeleteNode(node); + if (TryToDeleteNode(node)) { + Execute(CreateDeleteNode(nodeId)); + } } } } @@ -233,6 +239,7 @@ void THive::ExecuteProcessBootQueue(NIceDb::TNiceDb& db, TSideEffects& sideEffec if (tablet == nullptr) { continue; } + tablet->InWaitQueue = false; if (tablet->IsAlive()) { BLOG_D("tablet " << record.TabletId << " already alive, skipping"); continue; @@ -253,9 +260,10 @@ void THive::ExecuteProcessBootQueue(NIceDb::TNiceDb& db, TSideEffects& sideEffec sideEffects.Send(actorToNotify, new TEvPrivate::TEvRestartComplete(tablet->GetFullTabletId(), "boot delay")); } tablet->ActorsToNotifyOnRestart.clear(); + tablet->InWaitQueue = true; if (tablet->IsFollower()) { TLeaderTabletInfo& leader = tablet->GetLeader(); - UpdateTabletFollowersNumber(leader, db, sideEffects); + UpdateTabletFollowersNumber(leader, db, sideEffects); // this may delete tablet } BootQueue.AddToWaitQueue(record); // waiting for new node continue; @@ -495,6 +503,7 @@ void THive::Handle(TEvPrivate::TEvBootTablets::TPtr&) { for (auto* node : unimportantNodes) { node->Ping(); } + ProcessNodePingQueue(); TVector tabletsToReleaseFromParent; TSideEffects sideEffects; sideEffects.Reset(SelfId()); @@ -685,11 +694,13 @@ void THive::Cleanup() { void THive::Handle(TEvLocal::TEvStatus::TPtr& ev) { BLOG_D("Handle TEvLocal::TEvStatus for Node " << ev->Sender.NodeId() << ": " << ev->Get()->Record.ShortDebugString()); + RemoveFromPingInProgress(ev->Sender.NodeId()); Execute(CreateStatus(ev->Sender, ev->Get()->Record)); } void THive::Handle(TEvLocal::TEvSyncTablets::TPtr& ev) { BLOG_D("THive::Handle::TEvSyncTablets"); + RemoveFromPingInProgress(ev->Sender.NodeId()); Execute(CreateSyncTablets(ev->Sender, ev->Get()->Record)); } @@ -743,6 +754,7 @@ void THive::Handle(TEvInterconnect::TEvNodeConnected::TPtr &ev) { void THive::Handle(TEvInterconnect::TEvNodeDisconnected::TPtr &ev) { TNodeId nodeId = ev->Get()->NodeId; BLOG_W("Handle TEvInterconnect::TEvNodeDisconnected, NodeId " << nodeId); + RemoveFromPingInProgress(nodeId); if (ConnectedNodes.erase(nodeId)) { UpdateCounterNodesConnected(-1); } @@ -915,6 +927,7 @@ void THive::Handle(TEvents::TEvUndelivered::TPtr &ev) { case TEvLocal::EvPing: { TNodeId nodeId = ev->Cookie; TNodeInfo* node = FindNode(nodeId); + NodePingsInProgress.erase(nodeId); if (node != nullptr && ev->Sender == node->Local) { if (node->IsDisconnecting()) { // ping continiousily until we fully disconnected from the node @@ -923,6 +936,7 @@ void THive::Handle(TEvents::TEvUndelivered::TPtr &ev) { KillNode(node->Id, node->Local); } } + ProcessNodePingQueue(); break; } }; @@ -981,8 +995,9 @@ void THive::OnActivateExecutor(const TActorContext&) { BuildLocalConfig(); ClusterConfig = AppData()->HiveConfig; SpreadNeighbours = ClusterConfig.GetSpreadNeighbours(); + NodeBrokerEpoch = TDuration::MicroSeconds(NKikimrNodeBroker::TConfig().GetEpochDuration()); Send(NConsole::MakeConfigsDispatcherID(SelfId().NodeId()), - new NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionRequest(NKikimrConsole::TConfigItem::HiveConfigItem)); + new NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionRequest({NKikimrConsole::TConfigItem::HiveConfigItem, NKikimrConsole::TConfigItem::NodeBrokerConfigItem})); Execute(CreateInitScheme()); if (!ResponsivenessPinger) { ResponsivenessPinger = new TTabletResponsivenessPinger(TabletCounters->Simple()[NHive::COUNTER_RESPONSE_TIME_USEC], TDuration::Seconds(1)); @@ -1686,6 +1701,21 @@ void THive::UpdateCounterNodesConnected(i64 nodesConnectedDiff) { } } +void THive::UpdateCounterPingQueueSize() { + if (TabletCounters != nullptr) { + auto& counter = TabletCounters->Simple()[NHive::COUNTER_PINGQUEUE_SIZE]; + counter.Set(NodePingQueue.size()); + } +} + +void THive::UpdateCounterTabletsStarting(i64 tabletsStartingDiff) { + if (TabletCounters != nullptr) { + auto& counter = TabletCounters->Simple()[NHive::COUNTER_TABLETS_STARTING]; + auto newValue = counter.Get() + tabletsStartingDiff; + counter.Set(newValue); + } +} + void THive::RecordTabletMove(const TTabletMoveInfo& moveInfo) { TabletMoveHistory.PushBack(moveInfo); TabletCounters->Cumulative()[NHive::COUNTER_TABLETS_MOVED].Increment(1); @@ -1837,6 +1867,9 @@ void THive::FillTabletInfo(NKikimrHive::TEvResponseHiveInfo& response, ui64 tabl if (req.GetReturnMetrics()) { tabletInfo.MutableMetrics()->CopyFrom(info->GetResourceValues()); } + if (info->InWaitQueue) { + tabletInfo.SetInWaitQueue(true); + } if (req.GetReturnChannelHistory()) { for (const auto& channel : info->TabletStorageInfo->Channels) { auto& tabletChannel = *tabletInfo.AddTabletChannels(); @@ -2195,7 +2228,9 @@ void THive::Handle(TEvHive::TEvInitiateTabletExternalBoot::TPtr& ev) { void THive::Handle(NConsole::TEvConsole::TEvConfigNotificationRequest::TPtr& ev) { const NKikimrConsole::TConfigNotificationRequest& record = ev->Get()->Record; ClusterConfig = record.GetConfig().GetHiveConfig(); - BLOG_D("Received TEvConsole::TEvConfigNotificationRequest with update of cluster config: " << ClusterConfig.ShortDebugString()); + NodeBrokerEpoch = TDuration::MicroSeconds(record.GetConfig().GetNodeBrokerConfig().GetEpochDuration()); + BLOG_D("Received TEvConsole::TEvConfigNotificationRequest with update of cluster config: " << ClusterConfig.ShortDebugString() + << "; " << record.GetConfig().GetNodeBrokerConfig().ShortDebugString()); BuildCurrentConfig(); Send(ev->Sender, new NConsole::TEvConsole::TEvConfigNotificationResponse(record), 0, ev->Cookie); } @@ -2327,7 +2362,8 @@ void THive::Handle(TEvPrivate::TEvProcessTabletBalancer::TPtr&) { nodeUsageHistogram.IncrementFor(record.Usage * 100); } - if (stats.MaxUsage >= GetMaxNodeUsageToKick()) { + double minUsageToKick = GetMaxNodeUsageToKick() - GetNodeUsageRangeToKick(); + if (stats.MaxUsage >= GetMaxNodeUsageToKick() && stats.MinUsage < minUsageToKick) { std::vector overloadedNodes; for (const auto& [nodeId, nodeInfo] : Nodes) { if (nodeInfo.IsAlive() && !nodeInfo.Down && nodeInfo.IsOverloaded()) { @@ -2661,6 +2697,25 @@ void THive::ExecuteStartTablet(TFullTabletId tabletId, const TActorId& local, ui Execute(CreateStartTablet(tabletId, local, cookie, external)); } +void THive::QueuePing(const TActorId& local) { + NodePingQueue.push(local); +} + +void THive::ProcessNodePingQueue() { + while (!NodePingQueue.empty() && NodePingsInProgress.size() < GetMaxPingsInFlight()) { + TActorId local = NodePingQueue.front(); + TNodeId node = local.NodeId(); + NodePingQueue.pop(); + NodePingsInProgress.insert(node); + SendPing(local, node); + } +} + +void THive::RemoveFromPingInProgress(TNodeId node) { + NodePingsInProgress.erase(node); + ProcessNodePingQueue(); +} + void THive::SendPing(const TActorId& local, TNodeId id) { Send(local, new TEvLocal::TEvPing(HiveId, @@ -2747,6 +2802,7 @@ void THive::UpdateTabletFollowersNumber(TLeaderTabletInfo& tablet, NIceDb::TNice db.Table().Key(tablet.Id, follower.Id).Delete(); follower.InitiateStop(sideEffects); tablet.Followers.erase(std::prev(itFollower.base())); + UpdateCounterTabletsTotal(-1); --followerCount; } } @@ -3339,13 +3395,16 @@ void THive::Handle(TEvPrivate::TEvLogTabletMoves::TPtr&) { } void THive::Handle(TEvPrivate::TEvDeleteNode::TPtr& ev) { - auto node = FindNode(ev->Get()->NodeId); + auto nodeId = ev->Get()->NodeId; + auto node = FindNode(nodeId); if (node == nullptr) { return; } node->DeletionScheduled = false; if (!node->IsAlive()) { - TryToDeleteNode(node); + if (TryToDeleteNode(node)) { + Execute(CreateDeleteNode(nodeId)); + } } } diff --git a/ydb/core/mind/hive/hive_impl.h b/ydb/core/mind/hive/hive_impl.h index 57a0a3dce3d3..4cadc764ac56 100644 --- a/ydb/core/mind/hive/hive_impl.h +++ b/ydb/core/mind/hive/hive_impl.h @@ -301,6 +301,7 @@ class THive : public TActor, public TTabletExecutedFlat, public THiveShar ITransaction* CreateRequestTabletOwners(TEvHive::TEvRequestTabletOwners::TPtr event); ITransaction* CreateUpdateTabletsObject(TEvHive::TEvUpdateTabletsObject::TPtr event); ITransaction* CreateUpdateDomain(TSubDomainKey subdomainKey, TEvHive::TEvUpdateDomain::TPtr event = {}); + ITransaction* CreateDeleteNode(TNodeId nodeId); public: TDomainsView DomainsView; @@ -415,6 +416,8 @@ class THive : public TActor, public TTabletExecutedFlat, public THiveShar TEventPriorityQueue EventQueue{*this}; ui64 OperationsLogIndex = 0; std::vector ActorsWaitingToMoveTablets; + std::queue NodePingQueue; + std::unordered_set NodePingsInProgress; struct TPendingCreateTablet { NKikimrHive::TEvCreateTablet CreateTablet; @@ -443,6 +446,7 @@ class THive : public TActor, public TTabletExecutedFlat, public THiveShar NKikimrConfig::THiveConfig ClusterConfig; NKikimrConfig::THiveConfig DatabaseConfig; + TDuration NodeBrokerEpoch; std::unordered_map TabletLimit; // built from CurrentConfig std::unordered_map DefaultDataCentersPreference; std::unordered_map> RegisteredDataCenterNodes; @@ -649,6 +653,8 @@ TTabletInfo* FindTabletEvenInDeleting(TTabletId tabletId, TFollowerId followerId void UpdateCounterBootQueueSize(ui64 bootQueueSize); void UpdateCounterEventQueueSize(i64 eventQueueSizeDiff); void UpdateCounterNodesConnected(i64 nodesConnectedDiff); + void UpdateCounterPingQueueSize(); + void UpdateCounterTabletsStarting(i64 tabletsStartingDiff); void RecordTabletMove(const TTabletMoveInfo& info); bool DomainHasNodes(const TSubDomainKey &domainKey) const; void ProcessBootQueue(); @@ -677,7 +683,10 @@ TTabletInfo* FindTabletEvenInDeleting(TTabletId tabletId, TFollowerId followerId void UpdateRegisteredDataCenters(); void AddRegisteredDataCentersNode(TDataCenterId dataCenterId, TNodeId nodeId); void RemoveRegisteredDataCentersNode(TDataCenterId dataCenterId, TNodeId nodeId); + void QueuePing(const TActorId& local); void SendPing(const TActorId& local, TNodeId id); + void RemoveFromPingInProgress(TNodeId node); + void ProcessNodePingQueue(); void SendReconnect(const TActorId& local); static THolder BuildGroupParametersForChannel(const TLeaderTabletInfo& tablet, ui32 channelId); void KickTablet(const TTabletInfo& tablet); @@ -733,7 +742,11 @@ TTabletInfo* FindTabletEvenInDeleting(TTabletId tabletId, TFollowerId followerId } TDuration GetNodeDeletePeriod() const { - return TDuration::Seconds(CurrentConfig.GetNodeDeletePeriod()); + if (CurrentConfig.HasNodeDeletePeriod()) { + return TDuration::Seconds(CurrentConfig.GetNodeDeletePeriod()); + } else { + return NodeBrokerEpoch; + } } ui64 GetDrainInflight() const { @@ -934,6 +947,19 @@ TTabletInfo* FindTabletEvenInDeleting(TTabletId tabletId, TFollowerId followerId return CurrentConfig.GetStorageBalancerInflight(); } + double GetNodeUsageRangeToKick() const { + return CurrentConfig.GetNodeUsageRangeToKick(); + } + + ui64 GetMaxPingsInFlight() const { + return CurrentConfig.GetMaxPingsInFlight(); + } + + bool GetLessSystemTabletsMoves() const { + return CurrentConfig.GetLessSystemTabletsMoves(); + + } + static void ActualizeRestartStatistics(google::protobuf::RepeatedField& restartTimestamps, ui64 barrier); static ui64 GetRestartsPerPeriod(const google::protobuf::RepeatedField& restartTimestamps, ui64 barrier); static bool IsSystemTablet(TTabletTypes::EType type); diff --git a/ydb/core/mind/hive/hive_impl_ut.cpp b/ydb/core/mind/hive/hive_impl_ut.cpp index 0f71b8d31503..bf540540a7e0 100644 --- a/ydb/core/mind/hive/hive_impl_ut.cpp +++ b/ydb/core/mind/hive/hive_impl_ut.cpp @@ -109,7 +109,7 @@ Y_UNIT_TEST_SUITE(THiveImplTest) { auto CheckSpeedAndDistribution = []( std::unordered_map& allTablets, - std::function&, EResourceToBalance)> func, + std::function::iterator, std::vector::iterator, EResourceToBalance)> func, EResourceToBalance resource) -> void { std::vector tablets; @@ -119,7 +119,7 @@ Y_UNIT_TEST_SUITE(THiveImplTest) { TProfileTimer timer; - func(tablets, resource); + func(tablets.begin(), tablets.end(), resource); double passed = timer.Get().SecondsFloat(); diff --git a/ydb/core/mind/hive/hive_ut.cpp b/ydb/core/mind/hive/hive_ut.cpp index 1d89e73632ee..4bfa4031dee3 100644 --- a/ydb/core/mind/hive/hive_ut.cpp +++ b/ydb/core/mind/hive/hive_ut.cpp @@ -1069,6 +1069,119 @@ Y_UNIT_TEST_SUITE(THiveTest) { UNIT_ASSERT(!isNodeEmpty(nodeId)); } + Y_UNIT_TEST(DrainWithHiveRestart) { + // 1. Drain a node + // 2. Kill it & wait for hive to delete it + // 3. Start the node again + // 4. Restart hive + // 5. Ensure node is not down (by creating tablets) + const int NUM_NODES = 3; + const int NUM_TABLETS = 10; + TTestBasicRuntime runtime(NUM_NODES, false); + Setup(runtime, true, 2, [](TAppPrepare& app) { + app.HiveConfig.SetNodeDeletePeriod(1); + }); + const ui64 hiveTablet = MakeDefaultHiveID(); + const ui64 testerTablet = MakeTabletID(false, 1); + const TActorId hiveActor = CreateTestBootstrapper(runtime, CreateTestTabletInfo(hiveTablet, TTabletTypes::Hive), &CreateDefaultHive); + runtime.EnableScheduleForActor(hiveActor); + { + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvLocal::EvStatus, NUM_NODES); + runtime.DispatchEvents(options); + } + TTabletTypes::EType tabletType = TTabletTypes::Dummy; + std::unordered_set tablets; + TActorId senderA = runtime.AllocateEdgeActor(0); + auto createTablets = [&] { + for (int i = 0; i < NUM_TABLETS; ++i) { + THolder ev(new TEvHive::TEvCreateTablet(testerTablet, 100500 + tablets.size(), tabletType, BINDED_CHANNELS)); + runtime.SendToPipe(hiveTablet, senderA, ev.Release(), 0, GetPipeConfigWithRetries()); + TAutoPtr handle; + auto createTabletReply = runtime.GrabEdgeEventRethrow(handle); + ui64 tabletId = createTabletReply->Record.GetTabletID(); + tablets.insert(tabletId); + } + NTabletPipe::TClientConfig pipeConfig; + pipeConfig.RetryPolicy = NTabletPipe::TClientRetryPolicy::WithRetries(); + for (TTabletId tabletId : tablets) { + MakeSureTabletIsUp(runtime, tabletId, 0, &pipeConfig); + } + }; + + createTablets(); + + ui32 nodeIdx = 0; + ui32 nodeId = runtime.GetNodeId(nodeIdx); + { + Ctest << "1. Drain a node\n"; + + runtime.SendToPipe(hiveTablet, senderA, new TEvHive::TEvDrainNode(nodeId)); + + Ctest << "2. Kill it & wait for hive to delete it\n"; + + SendKillLocal(runtime, nodeIdx); + { + TDispatchOptions options; + options.FinalEvents.emplace_back(NHive::TEvPrivate::EvDeleteNode); + runtime.DispatchEvents(options); + runtime.AdvanceCurrentTime(TDuration::Seconds(2)); + runtime.DispatchEvents(options); + } + } + + auto isNodeEmpty = [&](ui32 nodeId) -> bool { + bool empty = true; + TAutoPtr handle; + TActorId whiteboard = NNodeWhiteboard::MakeNodeWhiteboardServiceId(nodeId); + runtime.Send(new IEventHandle(whiteboard, senderA, new NNodeWhiteboard::TEvWhiteboard::TEvTabletStateRequest())); + NNodeWhiteboard::TEvWhiteboard::TEvTabletStateResponse* wbResponse = runtime.GrabEdgeEventRethrow(handle); + for (const NKikimrWhiteboard::TTabletStateInfo& tabletInfo : wbResponse->Record.GetTabletStateInfo()) { + if (tablets.contains(tabletInfo.GetTabletId()) && tabletInfo.GetState() != NKikimrWhiteboard::TTabletStateInfo::Dead) { + Ctest << "Tablet " << tabletInfo.GetTabletId() << "." << tabletInfo.GetFollowerId() + << " is not dead yet (" << NKikimrWhiteboard::TTabletStateInfo::ETabletState_Name(tabletInfo.GetState()) << ")" << Endl; + empty = false; + } + } + return empty; + }; + + Ctest << "3. Start the node again\n"; + CreateLocal(runtime, nodeIdx); + + { + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvLocal::EvStatus); + runtime.DispatchEvents(options); + } + + Ctest << "4. Restart hive\n"; + + runtime.Register(CreateTabletKiller(hiveTablet)); + { + TDispatchOptions options; + std::unordered_set nodesConnected; + auto observer = runtime.AddObserver([&](auto&& ev) { nodesConnected.insert(ev->Sender.NodeId()); }); + auto waitFor = [&](const auto& condition, const TString& description) { + while (!condition()) { + Ctest << "waiting for " << description << Endl; + TDispatchOptions options; + options.CustomFinalCondition = [&]() { + return condition(); + }; + runtime.DispatchEvents(options); + } + }; + waitFor([&](){return nodesConnected.size() == NUM_NODES; }, "nodes to connect"); + } + + Ctest << "5. Ensure node is not down (by creating tablets)\n"; + + createTablets(); + + UNIT_ASSERT(!isNodeEmpty(nodeId)); + } + Y_UNIT_TEST(TestCreateSubHiveCreateTablet) { TTestBasicRuntime runtime(1, false); Setup(runtime, true); @@ -3961,6 +4074,7 @@ Y_UNIT_TEST_SUITE(THiveTest) { // this value of MaxNodeUsageToKick is selected specifically to make test scenario work // in link with number of tablets and values of network usage metrics used below app.HiveConfig.SetMaxNodeUsageToKick(0.01); + app.HiveConfig.SetNodeUsageRangeToKick(0); app.HiveConfig.SetEmergencyBalancerInflight(1); // to ensure fair distribution }); @@ -4776,6 +4890,84 @@ Y_UNIT_TEST_SUITE(THiveTest) { } } + Y_UNIT_TEST(TestHiveBalancerUselessNeighbourMoves) { + // 7 tablets of same object, 3 nodes, one of nodes cannot run them + // distribution should be (4, 3, 0) + // this should trigger balancer, but not lead to any moves + static constexpr ui64 NUM_NODES = 3; + static constexpr ui64 NUM_TABLETS = 7; + TTestBasicRuntime runtime(NUM_NODES, false); + Setup(runtime, true, 1, [](TAppPrepare& app) { + app.HiveConfig.SetTabletKickCooldownPeriod(0); + app.HiveConfig.SetResourceChangeReactionPeriod(0); + app.HiveConfig.SetMetricsWindowSize(1); + }); + const int nodeBase = runtime.GetNodeId(0); + TActorId senderA = runtime.AllocateEdgeActor(); + const ui64 hiveTablet = MakeDefaultHiveID(); + const ui64 testerTablet = MakeTabletID(false, 1); + + auto getDistribution = [hiveTablet, nodeBase, senderA, &runtime]() -> std::array, NUM_NODES> { + std::array, NUM_NODES> nodeTablets = {}; + { + runtime.SendToPipe(hiveTablet, senderA, new TEvHive::TEvRequestHiveInfo()); + TAutoPtr handle; + TEvHive::TEvResponseHiveInfo* response = runtime.GrabEdgeEventRethrow(handle); + for (const NKikimrHive::TTabletInfo& tablet : response->Record.GetTablets()) { + UNIT_ASSERT_C(((int)tablet.GetNodeID() - nodeBase >= 0) && (tablet.GetNodeID() - nodeBase < NUM_NODES), + "nodeId# " << tablet.GetNodeID() << " nodeBase# " << nodeBase); + nodeTablets[tablet.GetNodeID() - nodeBase].push_back(tablet.GetTabletID()); + } + } + return nodeTablets; + }; + + CreateTestBootstrapper(runtime, CreateTestTabletInfo(hiveTablet, TTabletTypes::Hive), &CreateDefaultHive); + + // wait for creation of nodes + { + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvLocal::EvStatus, NUM_NODES); + runtime.DispatchEvents(options); + } + + TTabletTypes::EType tabletType = TTabletTypes::Dummy; + std::vector tablets; + tablets.reserve(NUM_TABLETS); + for (size_t i = 0; i < NUM_TABLETS; ++i) { + THolder ev(new TEvHive::TEvCreateTablet(testerTablet, 100500 + i, tabletType, BINDED_CHANNELS)); + ev->Record.SetObjectId(1); + ev->Record.AddAllowedNodeIDs(nodeBase); + ev->Record.AddAllowedNodeIDs(nodeBase + 1); + ui64 tabletId = SendCreateTestTablet(runtime, hiveTablet, testerTablet, std::move(ev), 0, true); + MakeSureTabletIsUp(runtime, tabletId, 0); + tablets.push_back(tabletId); + } + + auto initialDistribution = getDistribution(); + + for (auto tablet : tablets) { + THolder metrics = MakeHolder(); + NKikimrHive::TTabletMetrics* metric = metrics->Record.AddTabletMetrics(); + metric->SetTabletID(tablet); + metric->MutableResourceUsage()->SetCPU(0); + metric->MutableResourceUsage()->SetMemory(0); + + runtime.SendToPipe(hiveTablet, senderA, metrics.Release()); + } + + { + TDispatchOptions options; + options.FinalEvents.emplace_back(NHive::TEvPrivate::EvBalancerOut); + runtime.DispatchEvents(options, TDuration::Seconds(10)); + } + + // Check that balancer moved no tablets + auto newDistribution = getDistribution(); + + UNIT_ASSERT_EQUAL(initialDistribution, newDistribution); + } + Y_UNIT_TEST(TestHiveBalancerWithImmovableTablets) { static constexpr ui64 TABLETS_PER_NODE = 10; TTestBasicRuntime runtime(3, false); @@ -4855,6 +5047,75 @@ Y_UNIT_TEST_SUITE(THiveTest) { UNIT_ASSERT_VALUES_EQUAL(newDistribution[1].size(), TABLETS_PER_NODE - 1); } + Y_UNIT_TEST(TestHiveBalancerHighUsage) { + static constexpr ui64 NUM_NODES = 2; + TTestBasicRuntime runtime(2, false); + Setup(runtime, true, 1, [](TAppPrepare& app) { + app.HiveConfig.SetTabletKickCooldownPeriod(0); + app.HiveConfig.SetResourceChangeReactionPeriod(0); + }); + const int nodeBase = runtime.GetNodeId(0); + TActorId senderA = runtime.AllocateEdgeActor(); + const ui64 hiveTablet = MakeDefaultHiveID(); + const ui64 testerTablet = MakeTabletID(false, 1); + + auto getDistribution = [hiveTablet, nodeBase, senderA, &runtime]() -> std::array, NUM_NODES> { + std::array, NUM_NODES> nodeTablets = {}; + { + runtime.SendToPipe(hiveTablet, senderA, new TEvHive::TEvRequestHiveInfo()); + TAutoPtr handle; + TEvHive::TEvResponseHiveInfo* response = runtime.GrabEdgeEventRethrow(handle); + for (const NKikimrHive::TTabletInfo& tablet : response->Record.GetTablets()) { + UNIT_ASSERT_C(((int)tablet.GetNodeID() - nodeBase >= 0) && (tablet.GetNodeID() - nodeBase < NUM_NODES), + "nodeId# " << tablet.GetNodeID() << " nodeBase# " << nodeBase); + nodeTablets[tablet.GetNodeID() - nodeBase].push_back(tablet.GetTabletID()); + } + } + return nodeTablets; + }; + + CreateTestBootstrapper(runtime, CreateTestTabletInfo(hiveTablet, TTabletTypes::Hive), &CreateDefaultHive); + + // wait for creation of nodes + { + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvLocal::EvStatus, NUM_NODES); + runtime.DispatchEvents(options); + } + + TTabletTypes::EType tabletType = TTabletTypes::Dummy; + for (size_t i = 0; i < 2; ++i) { + THolder ev(new TEvHive::TEvCreateTablet(testerTablet, 100500 + i, tabletType, BINDED_CHANNELS)); + ev->Record.SetObjectId(i); + ui64 tabletId = SendCreateTestTablet(runtime, hiveTablet, testerTablet, std::move(ev), 0, true); + MakeSureTabletIsUp(runtime, tabletId, 0); + } + + auto initialDistribution = getDistribution(); + + std::array usages = {.89, .91}; + for (ui32 i = 0; i < 2; ++i) { + for (ui32 node = 0; node < NUM_NODES; ++node) { + TActorId sender = runtime.AllocateEdgeActor(node); + THolder metrics = MakeHolder(); + metrics->Record.SetTotalNodeUsage(usages[node]); + + runtime.SendToPipe(hiveTablet, sender, metrics.Release(), node); + } + } + + { + TDispatchOptions options; + options.FinalEvents.emplace_back(NHive::TEvPrivate::EvBalancerOut); + runtime.DispatchEvents(options, TDuration::Seconds(10)); + } + + // Check that balancer moved no tablets + auto newDistribution = getDistribution(); + + UNIT_ASSERT_EQUAL(initialDistribution, newDistribution); + } + Y_UNIT_TEST(TestUpdateTabletsObjectUpdatesMetrics) { TTestBasicRuntime runtime(1, false); Setup(runtime, true); diff --git a/ydb/core/mind/hive/monitoring.cpp b/ydb/core/mind/hive/monitoring.cpp index a1be16436e5d..f442726a910f 100644 --- a/ydb/core/mind/hive/monitoring.cpp +++ b/ydb/core/mind/hive/monitoring.cpp @@ -246,7 +246,7 @@ class TTxMonEvent_MemStateTablets : public TTransactionBase { if (WaitingOnly) { tabletIdIndex.reserve(Self->BootQueue.WaitQueue.size()); for (const TBootQueue::TBootQueueRecord& rec : Self->BootQueue.WaitQueue) { - TTabletInfo* tablet = Self->FindTablet(rec.TabletId); + TTabletInfo* tablet = Self->FindTablet(rec.TabletId, rec.FollowerId); if (tablet != nullptr) { tabletIdIndex.push_back({tabletIndexFunction(*tablet), tablet}); } @@ -794,6 +794,7 @@ class TTxMonEvent_Settings : public TTransactionBase, public TLoggedMonTr UpdateConfig(db, "MinNetworkScatterToBalance", configUpdates); UpdateConfig(db, "MinCounterScatterToBalance", configUpdates); UpdateConfig(db, "MaxNodeUsageToKick", configUpdates, TSchemeIds::State::MaxNodeUsageToKick); + UpdateConfig(db, "NodeUsageRangeToKick", configUpdates); UpdateConfig(db, "ResourceChangeReactionPeriod", configUpdates, TSchemeIds::State::ResourceChangeReactionPeriod); UpdateConfig(db, "TabletKickCooldownPeriod", configUpdates, TSchemeIds::State::TabletKickCooldownPeriod); UpdateConfig(db, "SpreadNeighbours", configUpdates, TSchemeIds::State::SpreadNeighbours); @@ -835,6 +836,7 @@ class TTxMonEvent_Settings : public TTransactionBase, public TLoggedMonTr UpdateConfig(db, "MinStorageScatterToBalance", configUpdates); UpdateConfig(db, "MinGroupUsageToBalance", configUpdates); UpdateConfig(db, "StorageBalancerInflight", configUpdates); + UpdateConfig(db, "LessSystemTabletsMoves", configUpdates); if (params.contains("BalancerIgnoreTabletTypes")) { auto value = params.Get("BalancerIgnoreTabletTypes"); @@ -1140,6 +1142,7 @@ class TTxMonEvent_Settings : public TTransactionBase, public TLoggedMonTr ShowConfig(out, "MinCounterScatterToBalance"); ShowConfig(out, "MinNodeUsageToBalance"); ShowConfig(out, "MaxNodeUsageToKick"); + ShowConfig(out, "NodeUsageRangeToKick"); ShowConfig(out, "ResourceChangeReactionPeriod"); ShowConfig(out, "TabletKickCooldownPeriod"); ShowConfig(out, "NodeSelectStrategy"); @@ -1180,6 +1183,7 @@ class TTxMonEvent_Settings : public TTransactionBase, public TLoggedMonTr ShowConfig(out, "MinStorageScatterToBalance"); ShowConfig(out, "MinGroupUsageToBalance"); ShowConfig(out, "StorageBalancerInflight"); + ShowConfig(out, "LessSystemTabletsMoves"); ShowConfigForBalancerIgnoreTabletTypes(out); out << "
"; diff --git a/ydb/core/mind/hive/node_info.cpp b/ydb/core/mind/hive/node_info.cpp index 21c92abdce94..341e309b94ef 100644 --- a/ydb/core/mind/hive/node_info.cpp +++ b/ydb/core/mind/hive/node_info.cpp @@ -356,7 +356,7 @@ void TNodeInfo::DeregisterInDomains() { void TNodeInfo::Ping() { Y_ABORT_UNLESS((bool)Local); BLOG_D("Node(" << Id << ") Ping(" << Local << ")"); - Hive.SendPing(Local, Id); + Hive.QueuePing(Local); } void TNodeInfo::SendReconnect(const TActorId& local) { @@ -467,7 +467,7 @@ TResourceRawValues TNodeInfo::GetStDevResourceValues() { return GetStDev(values); } -bool TNodeInfo::CanBeDeleted() const { +bool TNodeInfo::CanBeDeleted(TInstant now) const { TInstant lastAlive(TInstant::MilliSeconds(Statistics.GetLastAliveTimestamp())); if (lastAlive) { return (IsDisconnected() || IsUnknown()) @@ -475,7 +475,7 @@ bool TNodeInfo::CanBeDeleted() const { && GetTabletsTotal() == 0 && LockedTablets.empty() && !Freeze - && (lastAlive + Hive.GetNodeDeletePeriod() < TInstant::Now()); + && (lastAlive + Hive.GetNodeDeletePeriod() < now); } else { return (IsDisconnected() || IsUnknown()) && !Local && GetTabletsTotal() == 0 && LockedTablets.empty() && !Freeze; } diff --git a/ydb/core/mind/hive/node_info.h b/ydb/core/mind/hive/node_info.h index 961116babce5..957ff626abf6 100644 --- a/ydb/core/mind/hive/node_info.h +++ b/ydb/core/mind/hive/node_info.h @@ -132,7 +132,11 @@ struct TNodeInfo { ui32 GetTabletNeighboursCount(const TTabletInfo& tablet) const { auto it = TabletsOfObject.find(tablet.GetObjectId()); if (it != TabletsOfObject.end()) { - return it->second.size(); + auto count = it->second.size(); + if (tablet.IsAliveOnLocal(Local)) { + --count; + } + return count; } else { return 0; } @@ -227,7 +231,7 @@ struct TNodeInfo { } } - bool CanBeDeleted() const; + bool CanBeDeleted(TInstant now) const; void RegisterInDomains(); void DeregisterInDomains(); void Ping(); diff --git a/ydb/core/mind/hive/tablet_info.h b/ydb/core/mind/hive/tablet_info.h index 35920dd1748c..5d754ddd55d7 100644 --- a/ydb/core/mind/hive/tablet_info.h +++ b/ydb/core/mind/hive/tablet_info.h @@ -162,6 +162,8 @@ struct TTabletInfo { TInstant PostponedStart; EBalancerPolicy BalancerPolicy; TNodeId FailedNodeId = 0; // last time we tried to start the tablet, we failed on this node + bool InWaitQueue = false; + TInstant BootTime; TTabletInfo(ETabletRole role, THive& hive); TTabletInfo(const TTabletInfo&) = delete; diff --git a/ydb/core/mind/hive/tx__delete_node.cpp b/ydb/core/mind/hive/tx__delete_node.cpp new file mode 100644 index 000000000000..fa3b039826de --- /dev/null +++ b/ydb/core/mind/hive/tx__delete_node.cpp @@ -0,0 +1,38 @@ +#include "hive_impl.h" +#include "hive_log.h" + +namespace NKikimr { +namespace NHive { + +class TTxDeleteNode : public TTransactionBase { +protected: + TNodeId NodeId; +public: + TTxDeleteNode(TNodeId nodeId, THive *hive) + : TBase(hive) + , NodeId(nodeId) + {} + + bool Execute(TTransactionContext &txc, const TActorContext&) override { + NIceDb::TNiceDb db(txc.DB); + db.Table().Key(NodeId).Delete(); + auto restrictionsRowset = db.Table().Range(NodeId).Select(); + while (!restrictionsRowset.EndOfSet()) { + db.Table().Key(restrictionsRowset.GetKey()).Delete(); + if (!restrictionsRowset.Next()) { + return false; + } + } + return true; + } + + void Complete(const TActorContext&) override { + } + }; + + ITransaction* THive::CreateDeleteNode(TNodeId nodeId) { + return new TTxDeleteNode(nodeId, this); + } + +} // NHive +} // NKikimr diff --git a/ydb/core/mind/hive/tx__load_everything.cpp b/ydb/core/mind/hive/tx__load_everything.cpp index 88967b806b4b..770dea99cad3 100644 --- a/ydb/core/mind/hive/tx__load_everything.cpp +++ b/ydb/core/mind/hive/tx__load_everything.cpp @@ -318,6 +318,11 @@ class TTxLoadEverything : public TTransactionBase { node.Statistics = nodeRowset.GetValueOrDefault(); node.Name = nodeRowset.GetValueOrDefault(); node.BecomeUpOnRestart = nodeRowset.GetValueOrDefault(false); + if (node.BecomeUpOnRestart) { + // If a node must become up on restart, it must have been down + // That was not persisted to avoid issues with downgrades + node.Down = true; + } if (nodeRowset.HaveValue()) { auto location = nodeRowset.GetValue(); if (location.HasDataCenter()) { @@ -333,9 +338,9 @@ class TTxLoadEverything : public TTransactionBase { // it's safe to call here, because there is no any tablets in the node yet node.BecomeDisconnected(); } - if (node.CanBeDeleted()) { + if (Self->TryToDeleteNode(&node)) { + // node is deleted from hashmap db.Table().Key(nodeId).Delete(); - Self->Nodes.erase(nodeId); } else if (node.IsUnknown() && node.LocationAcquired) { Self->AddRegisteredDataCentersNode(node.Location.GetDataCenterId(), node.Id); } @@ -683,8 +688,9 @@ class TTxLoadEverything : public TTransactionBase { size_t numDeletedNodes = 0; size_t numDeletedRestrictions = 0; + TInstant now = TActivationContext::Now(); for (auto itNode = Self->Nodes.begin(); itNode != Self->Nodes.end();) { - if (itNode->second.CanBeDeleted()) { + if (itNode->second.CanBeDeleted(now)) { ++numDeletedNodes; auto restrictionsRowset = db.Table().Range(itNode->first).Select(); while (!restrictionsRowset.EndOfSet()) { diff --git a/ydb/core/mind/hive/tx__register_node.cpp b/ydb/core/mind/hive/tx__register_node.cpp index e03431ef4789..7c40397313e4 100644 --- a/ydb/core/mind/hive/tx__register_node.cpp +++ b/ydb/core/mind/hive/tx__register_node.cpp @@ -23,7 +23,7 @@ class TTxRegisterNode : public TTransactionBase { TNodeId nodeId = Local.NodeId(); TNodeInfo& node = Self->GetNode(nodeId); if (node.Local != Local) { - TInstant now = TInstant::Now(); + TInstant now = TActivationContext::Now(); node.Statistics.AddRestartTimestamp(now.MilliSeconds()); node.ActualizeNodeStatistics(now); for (const auto& t : node.Tablets) { @@ -57,6 +57,7 @@ class TTxRegisterNode : public TTransactionBase { db.Table().Key(nodeId).Update(false, false); } if (node.BecomeUpOnRestart) { + BLOG_TRACE("THive::TTxRegisterNode(" << Local.NodeId() << ")::Execute - node became up on restart"); node.SetDown(false); node.BecomeUpOnRestart = false; db.Table().Key(nodeId).Update(false, false); @@ -87,7 +88,9 @@ class TTxRegisterNode : public TTransactionBase { BLOG_D("THive::TTxRegisterNode(" << Local.NodeId() << ")::Complete"); TNodeInfo* node = Self->FindNode(Local.NodeId()); if (node != nullptr && node->Local) { // we send ping on every RegisterNode because we want to re-sync tablets upon every reconnection + Self->NodePingsInProgress.erase(node->Id); node->Ping(); + Self->ProcessNodePingQueue(); } } }; diff --git a/ydb/core/mind/hive/tx__start_tablet.cpp b/ydb/core/mind/hive/tx__start_tablet.cpp index 068f9915432b..034fd6a124bb 100644 --- a/ydb/core/mind/hive/tx__start_tablet.cpp +++ b/ydb/core/mind/hive/tx__start_tablet.cpp @@ -10,6 +10,7 @@ class TTxStartTablet : public TTransactionBase { ui64 Cookie; bool External; TSideEffects SideEffects; + bool Success; public: TTxStartTablet(TFullTabletId tabletId, const TActorId& local, ui64 cookie, bool external, THive *hive) @@ -23,10 +24,12 @@ class TTxStartTablet : public TTransactionBase { TTxType GetTxType() const override { return NHive::TXTYPE_START_TABLET; } bool Execute(TTransactionContext& txc, const TActorContext&) override { + Success = false; SideEffects.Reset(Self->SelfId()); BLOG_D("THive::TTxStartTablet::Execute Tablet " << TabletId); TTabletInfo* tablet = Self->FindTablet(TabletId); if (tablet != nullptr) { + tablet->BootTime = TActivationContext::Now(); // finish fast-move operation if (tablet->LastNodeId != 0 && tablet->LastNodeId != Local.NodeId()) { TNodeInfo* lastNode = Self->FindNode(tablet->LastNodeId); @@ -65,6 +68,7 @@ class TTxStartTablet : public TTransactionBase { new TEvLocal::TEvBootTablet(*leader.TabletStorageInfo, promotableFollowerId, leader.KnownGeneration), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, Cookie); + Success = true; return true; } else { BLOG_W("THive::TTxStartTablet::Execute, ignoring TEvBootTablet(" << leader.ToString() << ") - wrong state or node"); @@ -79,6 +83,7 @@ class TTxStartTablet : public TTransactionBase { new TEvLocal::TEvBootTablet(*follower.LeaderTablet.TabletStorageInfo, follower.Id), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, Cookie); + Success = true; return true; } else { BLOG_W("THive::TTxStartTablet::Execute, ignoring TEvBootTablet(" << follower.ToString() << ") - wrong state or node"); @@ -108,6 +113,9 @@ class TTxStartTablet : public TTransactionBase { void Complete(const TActorContext& ctx) override { BLOG_D("THive::TTxStartTablet::Complete Tablet " << TabletId << " SideEffects: " << SideEffects); SideEffects.Complete(ctx); + if (Success) { + Self->UpdateCounterTabletsStarting(+1); + } } }; diff --git a/ydb/core/mind/hive/tx__switch_drain.cpp b/ydb/core/mind/hive/tx__switch_drain.cpp index 1e87a70b1e49..e665b58e2d95 100644 --- a/ydb/core/mind/hive/tx__switch_drain.cpp +++ b/ydb/core/mind/hive/tx__switch_drain.cpp @@ -38,7 +38,10 @@ class TTxSwitchDrainOn : public TTransactionBase { } node->SetDown(true); if (Settings.Persist) { - db.Table().Key(NodeId).Update(true, node->BecomeUpOnRestart); + db.Table().Key(NodeId).Update(node->BecomeUpOnRestart); + if (Settings.DownPolicy == NKikimrHive::DRAIN_POLICY_KEEP_DOWN) { + db.Table().Key(NodeId).Update(true); + } } } Self->StartHiveDrain(NodeId, std::move(Settings)); diff --git a/ydb/core/mind/hive/tx__update_tablet_metrics.cpp b/ydb/core/mind/hive/tx__update_tablet_metrics.cpp index 7e6150ec9a83..ed7d689456cc 100644 --- a/ydb/core/mind/hive/tx__update_tablet_metrics.cpp +++ b/ydb/core/mind/hive/tx__update_tablet_metrics.cpp @@ -54,6 +54,7 @@ class TTxUpdateTabletMetrics : public TTransactionBase { } TNodeInfo* node = Self->FindNode(nodeId); if (node != nullptr) { + node->UpdateResourceMaximum(record.GetResourceMaximum()); node->UpdateResourceTotalUsage(record); node->Statistics.SetLastAliveTimestamp(now.MilliSeconds()); node->ActualizeNodeStatistics(now); diff --git a/ydb/core/mind/hive/tx__update_tablet_status.cpp b/ydb/core/mind/hive/tx__update_tablet_status.cpp index 621978d56892..c4efbbb9b24f 100644 --- a/ydb/core/mind/hive/tx__update_tablet_status.cpp +++ b/ydb/core/mind/hive/tx__update_tablet_status.cpp @@ -80,6 +80,14 @@ class TTxUpdateTabletStatus : public TTransactionBase { if (Status == TEvLocal::TEvTabletStatus::StatusOk) { tablet->Statistics.AddRestartTimestamp(now.MilliSeconds()); tablet->ActualizeTabletStatistics(now); + if (tablet->BootTime != TInstant()) { + TDuration startTime = now - tablet->BootTime; + if (startTime > TDuration::Seconds(30)) { + BLOG_W("Tablet " << tablet->GetFullTabletId() << " was starting for " << startTime.Seconds() << " seconds"); + } + Self->TabletCounters->Percentile()[NHive::COUNTER_TABLETS_START_TIME].IncrementFor(startTime.MilliSeconds()); + Self->UpdateCounterTabletsStarting(-1); + } TNodeInfo* node = Self->FindNode(Local.NodeId()); if (node == nullptr) { // event from IC about disconnection of the node could overtake events from the node itself because of Pipe Server diff --git a/ydb/core/mind/hive/ya.make b/ydb/core/mind/hive/ya.make index 5d4132950cd9..4666bb93cc34 100644 --- a/ydb/core/mind/hive/ya.make +++ b/ydb/core/mind/hive/ya.make @@ -47,6 +47,7 @@ SRCS( tx__configure_subdomain.cpp tx__create_tablet.cpp tx__cut_tablet_history.cpp + tx__delete_node.cpp tx__delete_tablet.cpp tx__delete_tablet_result.cpp tx__disconnect_node.cpp diff --git a/ydb/core/mind/local.cpp b/ydb/core/mind/local.cpp index ea60b85275a8..ebd564891b0a 100644 --- a/ydb/core/mind/local.cpp +++ b/ydb/core/mind/local.cpp @@ -109,6 +109,7 @@ class TLocalNodeRegistrar : public TActorBootstrapped { ui64 UserPoolUsage = 0; // (usage uS x threads) / sec ui64 MemUsage = 0; ui64 MemLimit = 0; + ui64 CpuLimit = 0; // PotentialMaxThreadCount of UserPool double NodeUsage = 0; bool SentDrainNode = false; @@ -272,28 +273,28 @@ class TLocalNodeRegistrar : public TActorBootstrapped { HandlePipeDestroyed(ctx); } - void SendStatusOk(const TActorContext &ctx) { - LOG_DEBUG_S(ctx, NKikimrServices::LOCAL, "TLocalNodeRegistrar SendStatusOk"); - TAutoPtr eventStatus = new TEvLocal::TEvStatus(TEvLocal::TEvStatus::StatusOk); - auto& record = eventStatus->Record; - record.SetStartTime(StartTime.GetValue()); - record.MutableResourceMaximum()->CopyFrom(ResourceLimit); - if (!record.GetResourceMaximum().HasCPU()) { - TExecutorPoolStats poolStats; - TVector statsCopy; - TVector sharedStatsCopy; - ctx.ExecutorThread.ActorSystem->GetPoolStats(AppData()->UserPoolId, poolStats, statsCopy, sharedStatsCopy); - if (!statsCopy.empty()) { - record.MutableResourceMaximum()->SetCPU(poolStats.CurrentThreadCount * 1000000); + void FillResourceMaximum(NKikimrTabletBase::TMetrics* record) { + record->CopyFrom(ResourceLimit); + if (!record->HasCPU()) { + if (CpuLimit != 0) { + record->SetCPU(CpuLimit); } } - if (!record.GetResourceMaximum().HasMemory()) { + if (!record->HasMemory()) { if (MemLimit != 0) { - record.MutableResourceMaximum()->SetMemory(MemLimit); + record->SetMemory(MemLimit); } else { - record.MutableResourceMaximum()->SetMemory(NSystemInfo::TotalMemorySize()); + record->SetMemory(NSystemInfo::TotalMemorySize()); } } + } + + void SendStatusOk(const TActorContext &ctx) { + LOG_DEBUG_S(ctx, NKikimrServices::LOCAL, "TLocalNodeRegistrar SendStatusOk"); + TAutoPtr eventStatus = new TEvLocal::TEvStatus(TEvLocal::TEvStatus::StatusOk); + auto& record = eventStatus->Record; + record.SetStartTime(StartTime.GetValue()); + FillResourceMaximum(record.MutableResourceMaximum()); NTabletPipe::SendData(ctx, HivePipeClient, eventStatus.Release()); } @@ -587,6 +588,7 @@ class TLocalNodeRegistrar : public TActorBootstrapped { record.MutableTotalResourceUsage()->SetMemory(MemUsage); } record.SetTotalNodeUsage(NodeUsage); + FillResourceMaximum(record.MutableResourceMaximum()); NTabletPipe::SendData(ctx, HivePipeClient, event.Release()); SendTabletMetricsTime = ctx.Now(); } else { @@ -649,7 +651,8 @@ class TLocalNodeRegistrar : public TActorBootstrapped { const NKikimrWhiteboard::TSystemStateInfo& info = record.GetSystemStateInfo(0); if (static_cast(info.PoolStatsSize()) > AppData()->UserPoolId) { const auto& poolStats(info.GetPoolStats(AppData()->UserPoolId)); - UserPoolUsage = poolStats.usage() * poolStats.threads() * 1000000; // uS + CpuLimit = poolStats.limit() * 1'000'000; // microseconds + UserPoolUsage = poolStats.usage() * CpuLimit; // microseconds } // Note: we use allocated memory because MemoryUsed(AnonRSS) has lag diff --git a/ydb/core/mind/node_broker.cpp b/ydb/core/mind/node_broker.cpp index 27f96e66ca40..feb19c94e374 100644 --- a/ydb/core/mind/node_broker.cpp +++ b/ydb/core/mind/node_broker.cpp @@ -7,7 +7,9 @@ #include #include #include +#include #include +#include #include #include @@ -66,6 +68,7 @@ void TNodeBroker::OnActivateExecutor(const TActorContext &ctx) EnableStableNodeNames = appData->FeatureFlags.GetEnableStableNodeNames(); + Executor()->RegisterExternalTabletCounters(TabletCountersPtr); ClearState(); ProcessTx(CreateTxInitScheme(), ctx); @@ -287,15 +290,47 @@ void TNodeBroker::AddDelayedListNodesRequest(ui64 epoch, void TNodeBroker::ProcessListNodesRequest(TEvNodeBroker::TEvListNodes::TPtr &ev) { - ui64 version = ev->Get()->Record.GetCachedVersion(); + auto *msg = ev->Get(); NKikimrNodeBroker::TNodesInfo info; Epoch.Serialize(*info.MutableEpoch()); info.SetDomain(AppData()->DomainsInfo->GetDomain()->DomainUid); TAutoPtr resp = new TEvNodeBroker::TEvNodesInfo(info); - if (version != Epoch.Version) + + bool optimized = false; + + if (msg->Record.HasCachedVersion()) { + if (msg->Record.GetCachedVersion() == Epoch.Version) { + // Client has an up-to-date list already + optimized = true; + } else { + // We may be able to only send added nodes in the same epoch when + // all deltas are cached up to the current epoch inclusive. + ui64 neededFirstVersion = msg->Record.GetCachedVersion() + 1; + if (!EpochDeltasVersions.empty() && + EpochDeltasVersions.front() <= neededFirstVersion && + EpochDeltasVersions.back() == Epoch.Version && + neededFirstVersion <= Epoch.Version) + { + ui64 firstIndex = neededFirstVersion - EpochDeltasVersions.front(); + if (firstIndex > 0) { + // Note: usually there is a small number of nodes added + // between subsequent requests, so this substr should be + // very cheap. + resp->PreSerializedData = EpochDeltasCache.substr(EpochDeltasEndOffsets[firstIndex - 1]); + } else { + resp->PreSerializedData = EpochDeltasCache; + } + optimized = true; + } + } + } + + if (!optimized) { resp->PreSerializedData = EpochCache; + } + TabletCounters->Percentile()[COUNTER_LIST_NODES_BYTES].IncrementFor(resp->GetCachedByteSize()); LOG_TRACE_S(TActorContext::AsActorContext(), NKikimrServices::NODE_BROKER, "Send TEvNodesInfo for epoch " << Epoch.ToString()); @@ -304,12 +339,16 @@ void TNodeBroker::ProcessListNodesRequest(TEvNodeBroker::TEvListNodes::TPtr &ev) void TNodeBroker::ProcessDelayedListNodesRequests() { + THashSet processed; while (!DelayedListNodesRequests.empty()) { auto it = DelayedListNodesRequests.begin(); if (it->first > Epoch.Id) break; - ProcessListNodesRequest(it->second); + // Avoid processing more than one request from the same sender + if (processed.insert(it->second->Sender).second) { + ProcessListNodesRequest(it->second); + } DelayedListNodesRequests.erase(it); } } @@ -427,6 +466,12 @@ void TNodeBroker::PrepareEpochCache() FillNodeInfo(entry.second, *info.AddExpiredNodes()); Y_PROTOBUF_SUPPRESS_NODISCARD info.SerializeToString(&EpochCache); + TabletCounters->Simple()[COUNTER_EPOCH_SIZE_BYTES].Set(EpochCache.Size()); + + EpochDeltasCache.clear(); + EpochDeltasVersions.clear(); + EpochDeltasEndOffsets.clear(); + TabletCounters->Simple()[COUNTER_EPOCH_DELTAS_SIZE_BYTES].Set(EpochDeltasCache.size()); } void TNodeBroker::AddNodeToEpochCache(const TNodeInfo &node) @@ -441,6 +486,18 @@ void TNodeBroker::AddNodeToEpochCache(const TNodeInfo &node) Y_PROTOBUF_SUPPRESS_NODISCARD info.SerializeToString(&delta); EpochCache += delta; + TabletCounters->Simple()[COUNTER_EPOCH_SIZE_BYTES].Set(EpochCache.Size()); + + if (!EpochDeltasVersions.empty() && EpochDeltasVersions.back() + 1 != Epoch.Version) { + EpochDeltasCache.clear(); + EpochDeltasVersions.clear(); + EpochDeltasEndOffsets.clear(); + } + + EpochDeltasCache += delta; + EpochDeltasVersions.push_back(Epoch.Version); + EpochDeltasEndOffsets.push_back(EpochDeltasCache.size()); + TabletCounters->Simple()[COUNTER_EPOCH_DELTAS_SIZE_BYTES].Set(EpochDeltasCache.size()); } void TNodeBroker::SubscribeForConfigUpdates(const TActorContext &ctx) @@ -822,6 +879,7 @@ void TNodeBroker::Handle(TEvConsole::TEvReplaceConfigSubscriptionsResponse::TPtr void TNodeBroker::Handle(TEvNodeBroker::TEvListNodes::TPtr &ev, const TActorContext &) { + TabletCounters->Cumulative()[COUNTER_LIST_NODES_REQUESTS].Increment(1); auto &rec = ev->Get()->Record; ui64 epoch = rec.GetMinEpoch(); @@ -836,6 +894,7 @@ void TNodeBroker::Handle(TEvNodeBroker::TEvListNodes::TPtr &ev, void TNodeBroker::Handle(TEvNodeBroker::TEvResolveNode::TPtr &ev, const TActorContext &ctx) { + TabletCounters->Cumulative()[COUNTER_RESOLVE_NODE_REQUESTS].Increment(1); ui32 nodeId = ev->Get()->Record.GetNodeId(); TAutoPtr resp = new TEvNodeBroker::TEvResolvedNode; @@ -859,10 +918,11 @@ void TNodeBroker::Handle(TEvNodeBroker::TEvRegistrationRequest::TPtr &ev, { LOG_TRACE_S(ctx, NKikimrServices::NODE_BROKER, "Handle TEvNodeBroker::TEvRegistrationRequest" << ": request# " << ev->Get()->Record.ShortDebugString()); + TabletCounters->Cumulative()[COUNTER_REGISTRATION_REQUESTS].Increment(1); - class TRegisterNodeActor : public TActorBootstrapped { + class TResolveTenantActor : public TActorBootstrapped { TEvNodeBroker::TEvRegistrationRequest::TPtr Ev; - TNodeBroker *Self; + TActorId ReplyTo; NActors::TScopeId ScopeId; TSubDomainKey ServicedSubDomain; @@ -871,9 +931,9 @@ void TNodeBroker::Handle(TEvNodeBroker::TEvRegistrationRequest::TPtr &ev, return NKikimrServices::TActivity::NODE_BROKER_ACTOR; } - TRegisterNodeActor(TEvNodeBroker::TEvRegistrationRequest::TPtr& ev, TNodeBroker *self) + TResolveTenantActor(TEvNodeBroker::TEvRegistrationRequest::TPtr& ev, TActorId replyTo) : Ev(ev) - , Self(self) + , ReplyTo(replyTo) {} void Bootstrap(const TActorContext& ctx) { @@ -930,7 +990,7 @@ void TNodeBroker::Handle(TEvNodeBroker::TEvRegistrationRequest::TPtr &ev, << ": scope id# " << ScopeIdToString(ScopeId) << ": serviced subdomain# " << ServicedSubDomain); - Self->ProcessTx(Self->CreateTxRegisterNode(Ev, ScopeId, ServicedSubDomain), ctx); + Send(ReplyTo, new TEvPrivate::TEvResolvedRegistrationRequest(Ev, ScopeId, ServicedSubDomain)); Die(ctx); } @@ -939,12 +999,13 @@ void TNodeBroker::Handle(TEvNodeBroker::TEvRegistrationRequest::TPtr &ev, CFunc(TEvents::TSystem::Undelivered, HandleUndelivered) }) }; - ctx.RegisterWithSameMailbox(new TRegisterNodeActor(ev, this)); + ctx.RegisterWithSameMailbox(new TResolveTenantActor(ev, SelfId())); } void TNodeBroker::Handle(TEvNodeBroker::TEvExtendLeaseRequest::TPtr &ev, const TActorContext &ctx) { + TabletCounters->Cumulative()[COUNTER_EXTEND_LEASE_REQUESTS].Increment(1); ui32 nodeId = ev->Get()->Record.GetNodeId(); ProcessTx(nodeId, CreateTxExtendLease(ev), ctx); } @@ -989,6 +1050,29 @@ void TNodeBroker::Handle(TEvPrivate::TEvUpdateEpoch::TPtr &ev, ProcessTx(CreateTxUpdateEpoch(), ctx); } +void TNodeBroker::Handle(TEvPrivate::TEvResolvedRegistrationRequest::TPtr &ev, + const TActorContext &ctx) +{ + ProcessTx(CreateTxRegisterNode(ev), ctx); +} + +TNodeBroker::TNodeBroker(const TActorId &tablet, TTabletStorageInfo *info) + : TActor(&TThis::StateInit) + , TTabletExecutedFlat(info, tablet, new NMiniKQL::TMiniKQLFactory) + , EpochDuration(TDuration::Hours(1)) + , ConfigSubscriptionId(0) + , StableNodeNamePrefix("slot-") + , TxProcessor(new TTxProcessor(*this, "root", NKikimrServices::NODE_BROKER)) +{ + TabletCountersPtr.Reset(new TProtobufTabletCounters< + ESimpleCounters_descriptor, + ECumulativeCounters_descriptor, + EPercentileCounters_descriptor, + ETxTypes_descriptor + >()); + TabletCounters = TabletCountersPtr.Get(); +} + IActor *CreateNodeBroker(const TActorId &tablet, TTabletStorageInfo *info) { diff --git a/ydb/core/mind/node_broker__extend_lease.cpp b/ydb/core/mind/node_broker__extend_lease.cpp index a9845d7ceff0..7788593e0257 100644 --- a/ydb/core/mind/node_broker__extend_lease.cpp +++ b/ydb/core/mind/node_broker__extend_lease.cpp @@ -2,6 +2,7 @@ #include "node_broker__scheme.h" #include +#include namespace NKikimr { namespace NNodeBroker { @@ -17,6 +18,8 @@ class TNodeBroker::TTxExtendLease : public TTransactionBase { { } + TTxType GetTxType() const override { return TXTYPE_EXTEND_LEASE; } + bool Error(TStatus::ECode code, const TString &reason, const TActorContext &ctx) diff --git a/ydb/core/mind/node_broker__init_scheme.cpp b/ydb/core/mind/node_broker__init_scheme.cpp index bc0c011c34ad..8e44825ce559 100644 --- a/ydb/core/mind/node_broker__init_scheme.cpp +++ b/ydb/core/mind/node_broker__init_scheme.cpp @@ -1,6 +1,8 @@ #include "node_broker_impl.h" #include "node_broker__scheme.h" +#include + namespace NKikimr { namespace NNodeBroker { @@ -11,6 +13,8 @@ class TNodeBroker::TTxInitScheme : public TTransactionBase { { } + TTxType GetTxType() const override { return TXTYPE_INIT_SCHEME; } + bool Execute(TTransactionContext &txc, const TActorContext &ctx) override { LOG_DEBUG(ctx, NKikimrServices::NODE_BROKER, "TTxInitScheme Execute"); diff --git a/ydb/core/mind/node_broker__load_state.cpp b/ydb/core/mind/node_broker__load_state.cpp index 7cca5bea2c8d..8a58c91c6c99 100644 --- a/ydb/core/mind/node_broker__load_state.cpp +++ b/ydb/core/mind/node_broker__load_state.cpp @@ -2,6 +2,7 @@ #include "node_broker__scheme.h" #include +#include namespace NKikimr { namespace NNodeBroker { @@ -13,6 +14,8 @@ class TNodeBroker::TTxLoadState : public TTransactionBase { { } + TTxType GetTxType() const override { return TXTYPE_LOAD_STATE; } + bool Execute(TTransactionContext &txc, const TActorContext &ctx) override { LOG_DEBUG(ctx, NKikimrServices::NODE_BROKER, "TTxLoadState Execute"); diff --git a/ydb/core/mind/node_broker__register_node.cpp b/ydb/core/mind/node_broker__register_node.cpp index 1b4ea33e47b0..e10237f4e614 100644 --- a/ydb/core/mind/node_broker__register_node.cpp +++ b/ydb/core/mind/node_broker__register_node.cpp @@ -2,6 +2,7 @@ #include "node_broker__scheme.h" #include +#include namespace NKikimr { namespace NNodeBroker { @@ -10,18 +11,19 @@ using namespace NKikimrNodeBroker; class TNodeBroker::TTxRegisterNode : public TTransactionBase { public: - TTxRegisterNode(TNodeBroker *self, TEvNodeBroker::TEvRegistrationRequest::TPtr &ev, - const NActors::TScopeId& scopeId, const TSubDomainKey& servicedSubDomain) + TTxRegisterNode(TNodeBroker *self, TEvPrivate::TEvResolvedRegistrationRequest::TPtr &resolvedEv) : TBase(self) - , Event(ev) - , ScopeId(scopeId) - , ServicedSubDomain(servicedSubDomain) + , Event(resolvedEv->Get()->Request) + , ScopeId(resolvedEv->Get()->ScopeId) + , ServicedSubDomain(resolvedEv->Get()->ServicedSubDomain) , NodeId(0) , ExtendLease(false) , FixNodeId(false) { } + TTxType GetTxType() const override { return TXTYPE_REGISTER_NODE; } + bool Error(TStatus::ECode code, const TString &reason, const TActorContext &ctx) @@ -186,11 +188,9 @@ class TNodeBroker::TTxRegisterNode : public TTransactionBase { bool FixNodeId; }; -ITransaction *TNodeBroker::CreateTxRegisterNode(TEvNodeBroker::TEvRegistrationRequest::TPtr &ev, - const NActors::TScopeId& scopeId, - const TSubDomainKey& servicedSubDomain) +ITransaction *TNodeBroker::CreateTxRegisterNode(TEvPrivate::TEvResolvedRegistrationRequest::TPtr &ev) { - return new TTxRegisterNode(this, ev, scopeId, servicedSubDomain); + return new TTxRegisterNode(this, ev); } } // NNodeBroker diff --git a/ydb/core/mind/node_broker__update_config.cpp b/ydb/core/mind/node_broker__update_config.cpp index 2211b447ac1e..c5a891f83b03 100644 --- a/ydb/core/mind/node_broker__update_config.cpp +++ b/ydb/core/mind/node_broker__update_config.cpp @@ -1,6 +1,8 @@ #include "node_broker_impl.h" #include "node_broker__scheme.h" +#include + namespace NKikimr { namespace NNodeBroker { @@ -24,6 +26,8 @@ class TNodeBroker::TTxUpdateConfig : public TTransactionBase { { } + TTxType GetTxType() const override { return TXTYPE_UPDATE_CONFIG; } + bool ProcessNotification(const TActorContext &ctx) { auto &rec = Notification->Get()->Record; diff --git a/ydb/core/mind/node_broker__update_config_subscription.cpp b/ydb/core/mind/node_broker__update_config_subscription.cpp index 8fa376ec85b8..dec51fc71f14 100644 --- a/ydb/core/mind/node_broker__update_config_subscription.cpp +++ b/ydb/core/mind/node_broker__update_config_subscription.cpp @@ -1,6 +1,8 @@ #include "node_broker_impl.h" #include "node_broker__scheme.h" +#include + namespace NKikimr { namespace NNodeBroker { @@ -14,6 +16,8 @@ class TNodeBroker::TTxUpdateConfigSubscription : public TTransactionBase + namespace NKikimr { namespace NNodeBroker { @@ -11,6 +13,8 @@ class TNodeBroker::TTxUpdateEpoch : public TTransactionBase { { } + TTxType GetTxType() const override { return TXTYPE_UPDATE_EPOCH; } + bool Execute(TTransactionContext &txc, const TActorContext &ctx) override { LOG_DEBUG_S(ctx, NKikimrServices::NODE_BROKER, "TTxUpdateEpoch Execute"); diff --git a/ydb/core/mind/node_broker_impl.h b/ydb/core/mind/node_broker_impl.h index 9efe587947b2..6e522e8b8792 100644 --- a/ydb/core/mind/node_broker_impl.h +++ b/ydb/core/mind/node_broker_impl.h @@ -46,6 +46,7 @@ class TNodeBroker : public TActor struct TEvPrivate { enum EEv { EvUpdateEpoch = EventSpaceBegin(TEvents::ES_PRIVATE), + EvResolvedRegistrationRequest, EvEnd }; @@ -53,6 +54,22 @@ class TNodeBroker : public TActor static_assert(EvEnd < EventSpaceEnd(TKikimrEvents::ES_PRIVATE), "expect EvEnd < EventSpaceEnd(TKikimrEvents::ES_PRIVATE)"); struct TEvUpdateEpoch : public TEventLocal {}; + + struct TEvResolvedRegistrationRequest : public TEventLocal { + + TEvResolvedRegistrationRequest( + TEvNodeBroker::TEvRegistrationRequest::TPtr request, + NActors::TScopeId scopeId, + TSubDomainKey servicedSubDomain) + : Request(request) + , ScopeId(scopeId) + , ServicedSubDomain(servicedSubDomain) + {} + + TEvNodeBroker::TEvRegistrationRequest::TPtr Request; + NActors::TScopeId ScopeId; + TSubDomainKey ServicedSubDomain; + }; }; private: @@ -138,9 +155,7 @@ class TNodeBroker : public TActor ITransaction *CreateTxExtendLease(TEvNodeBroker::TEvExtendLeaseRequest::TPtr &ev); ITransaction *CreateTxInitScheme(); ITransaction *CreateTxLoadState(); - ITransaction *CreateTxRegisterNode(TEvNodeBroker::TEvRegistrationRequest::TPtr &ev, - const NActors::TScopeId& scopeId, - const TSubDomainKey& servicedSubDomain); + ITransaction *CreateTxRegisterNode(TEvPrivate::TEvResolvedRegistrationRequest::TPtr &ev); ITransaction *CreateTxUpdateConfig(TEvConsole::TEvConfigNotificationRequest::TPtr &ev); ITransaction *CreateTxUpdateConfig(TEvNodeBroker::TEvSetConfigRequest::TPtr &ev); ITransaction *CreateTxUpdateConfigSubscription(TEvConsole::TEvReplaceConfigSubscriptionsResponse::TPtr &ev); @@ -192,6 +207,7 @@ class TNodeBroker : public TActor HFuncTraced(TEvNodeBroker::TEvGetConfigRequest, Handle); HFuncTraced(TEvNodeBroker::TEvSetConfigRequest, Handle); HFuncTraced(TEvPrivate::TEvUpdateEpoch, Handle); + HFuncTraced(TEvPrivate::TEvResolvedRegistrationRequest, Handle); IgnoreFunc(TEvTabletPipe::TEvServerConnected); IgnoreFunc(TEvTabletPipe::TEvServerDisconnected); IgnoreFunc(NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionResponse); @@ -293,6 +309,8 @@ class TNodeBroker : public TActor const TActorContext &ctx); void Handle(TEvPrivate::TEvUpdateEpoch::TPtr &ev, const TActorContext &ctx); + void Handle(TEvPrivate::TEvResolvedRegistrationRequest::TPtr &ev, + const TActorContext &ctx); // All registered dynamic nodes. THashMap Nodes; @@ -323,16 +341,15 @@ class TNodeBroker : public TActor TSchedulerCookieHolder EpochTimerCookieHolder; TString EpochCache; + TString EpochDeltasCache; + TVector EpochDeltasVersions; + TVector EpochDeltasEndOffsets; + + TTabletCountersBase* TabletCounters; + TAutoPtr TabletCountersPtr; + public: - TNodeBroker(const TActorId &tablet, TTabletStorageInfo *info) - : TActor(&TThis::StateInit) - , TTabletExecutedFlat(info, tablet, new NMiniKQL::TMiniKQLFactory) - , EpochDuration(TDuration::Hours(1)) - , ConfigSubscriptionId(0) - , StableNodeNamePrefix("slot-") - , TxProcessor(new TTxProcessor(*this, "root", NKikimrServices::NODE_BROKER)) - { - } + TNodeBroker(const TActorId &tablet, TTabletStorageInfo *info); static constexpr NKikimrServices::TActivity::EType ActorActivityType() { diff --git a/ydb/core/mind/node_broker_ut.cpp b/ydb/core/mind/node_broker_ut.cpp index 33156cd3af8a..55404659b077 100644 --- a/ydb/core/mind/node_broker_ut.cpp +++ b/ydb/core/mind/node_broker_ut.cpp @@ -858,6 +858,48 @@ Y_UNIT_TEST_SUITE(TNodeBrokerTest) { UNIT_ASSERT_VALUES_EQUAL(epoch1.GetId(), epoch.GetId() + 5); } + Y_UNIT_TEST(TestListNodesEpochDeltas) + { + TTestBasicRuntime runtime(8, false); + Setup(runtime, 10); + TActorId sender = runtime.AllocateEdgeActor(); + + WaitForEpochUpdate(runtime, sender); + WaitForEpochUpdate(runtime, sender); + + auto epoch0 = GetEpoch(runtime, sender); + CheckRegistration(runtime, sender, "host1", 1001, "host1.yandex.net", "1.2.3.4", + 1, 2, 3, 4, TStatus::OK, NODE1, epoch0.GetNextEnd()); + auto epoch1 = CheckFilteredNodesList(runtime, sender, {NODE1}, {}, 0, epoch0.GetVersion()); + CheckRegistration(runtime, sender, "host2", 1001, "host2.yandex.net", "1.2.3.5", + 1, 2, 3, 5, TStatus::OK, NODE2, epoch1.GetNextEnd()); + auto epoch2 = CheckFilteredNodesList(runtime, sender, {NODE2}, {}, 0, epoch1.GetVersion()); + CheckRegistration(runtime, sender, "host3", 1001, "host3.yandex.net", "1.2.3.6", + 1, 2, 3, 6, TStatus::OK, NODE3, epoch2.GetNextEnd()); + auto epoch3 = CheckFilteredNodesList(runtime, sender, {NODE3}, {}, 0, epoch2.GetVersion()); + + CheckFilteredNodesList(runtime, sender, {NODE1, NODE2, NODE3}, {}, 0, epoch0.GetVersion()); + CheckFilteredNodesList(runtime, sender, {NODE2, NODE3}, {}, 0, epoch1.GetVersion()); + CheckFilteredNodesList(runtime, sender, {}, {}, 0, epoch3.GetVersion()); + + RebootTablet(runtime, MakeNodeBrokerID(), sender); + CheckFilteredNodesList(runtime, sender, {}, {}, 0, epoch3.GetVersion()); + + CheckRegistration(runtime, sender, "host4", 1001, "host4.yandex.net", "1.2.3.7", + 1, 2, 3, 7, TStatus::OK, NODE4, epoch3.GetNextEnd()); + auto epoch4 = CheckFilteredNodesList(runtime, sender, {NODE4}, {}, 0, epoch3.GetVersion()); + + // NodeBroker doesn't have enough history in memory and replies with the full node list + CheckFilteredNodesList(runtime, sender, {NODE1, NODE2, NODE3, NODE4}, {}, 0, epoch2.GetVersion()); + + WaitForEpochUpdate(runtime, sender); + auto epoch5 = GetEpoch(runtime, sender); + CheckFilteredNodesList(runtime, sender, {}, {}, 0, epoch5.GetVersion()); + + // New epoch may remove nodes, so deltas are not returned on epoch change + CheckFilteredNodesList(runtime, sender, {NODE1, NODE2, NODE3, NODE4}, {}, 0, epoch3.GetVersion()); + } + Y_UNIT_TEST(TestRandomActions) { TTestBasicRuntime runtime(8, false); diff --git a/ydb/core/mon/async_http_mon.cpp b/ydb/core/mon/async_http_mon.cpp index 7eabb9f8409b..8df05b03fbc9 100644 --- a/ydb/core/mon/async_http_mon.cpp +++ b/ydb/core/mon/async_http_mon.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -145,6 +146,11 @@ class THttpMonRequest : public NMonitoring::IMonHttpRequest { return {}; } + bool AcceptsJsonResponse() { + TStringBuf acceptHeader = GetHeader("Accept"); + return acceptHeader.find(TStringBuf("application/json")) != TStringBuf::npos; + } + virtual TStringBuf GetCookie(TStringBuf name) const override { NHttp::TCookies cookies(GetHeader("Cookie")); return cookies.Get(name); @@ -213,7 +219,8 @@ class THttpMonLegacyActorRequest : public TActorBootstrappedGet()->Request->URL.Before('?')); TString status(response->Status); - NMonitoring::THistogramPtr ResponseTimeHgram = NKikimr::GetServiceCounters(NKikimr::AppData()->Counters, "utils") + NMonitoring::THistogramPtr ResponseTimeHgram = NKikimr::GetServiceCounters(NKikimr::AppData()->Counters, + ActorMonPage->MonServiceName) ->GetSubgroup("subsystem", "mon") ->GetSubgroup("url", url) ->GetSubgroup("status", status) @@ -239,18 +246,24 @@ class THttpMonLegacyActorRequest : public TActorBootstrappedCreateResponseString(response)); PassAway(); } + bool CredentialsProvided() { + return Container.GetCookie("ydb_session_id") || Container.GetHeader("Authorization"); + } + TString YdbToHttpError(Ydb::StatusIds::StatusCode status) { switch (status) { case Ydb::StatusIds::UNAUTHORIZED: - return "401 Unauthorized"; + // YDB status UNAUTHORIZED is used for both access denied case and if no credentials were provided. + return CredentialsProvided() ? "403 Forbidden" : "401 Unauthorized"; case Ydb::StatusIds::INTERNAL_ERROR: return "500 Internal Server Error"; case Ydb::StatusIds::UNAVAILABLE: @@ -267,26 +280,45 @@ class THttpMonLegacyActorRequest : public TActorBootstrappedGet()->Request; - NHttp::THeaders headers(request->Headers); TStringBuilder response; TStringBuilder body; - const TString httpError = YdbToHttpError(result.Status); - body << "

" << httpError << "

"; - if (result.Issues) { - body << "

" << result.Issues.ToString() << "

"; - } - body << ""; - TString origin = TString(headers["Origin"]); - if (origin.empty()) { - origin = "*"; + TStringBuf contentType; + const TString httpError = YdbToHttpError(status); + + if (Container.AcceptsJsonResponse()) { + contentType = "application/json"; + NJson::TJsonValue json; + TString message; + MakeJsonErrorReply(json, message, issues, NYdb::EStatus(status)); + NJson::WriteJson(&body.Out, &json); + } else { + contentType = "text/html"; + body << "

" << httpError << "

"; + if (issues) { + body << "

" << issues.ToString() << "

"; + } + body << ""; } + response << "HTTP/1.1 " << httpError << "\r\n"; - response << "Access-Control-Allow-Origin: " << origin << "\r\n"; - response << "Access-Control-Allow-Credentials: true\r\n"; - response << "Access-Control-Allow-Headers: Content-Type,Authorization,Origin,Accept\r\n"; - response << "Access-Control-Allow-Methods: OPTIONS, GET, POST, PUT, DELETE\r\n"; - response << "Content-Type: text/html\r\n"; + if (addAccessControlHeaders) { + NHttp::THeaders headers(request->Headers); + TString origin = TString(headers["Origin"]); + if (origin.empty()) { + origin = "*"; + } + response << "Access-Control-Allow-Origin: " << origin << "\r\n"; + response << "Access-Control-Allow-Credentials: true\r\n"; + response << "Access-Control-Allow-Headers: Content-Type,Authorization,Origin,Accept\r\n"; + response << "Access-Control-Allow-Methods: OPTIONS, GET, POST, PUT, DELETE\r\n"; + } + + response << "Content-Type: " << contentType << "\r\n"; response << "Content-Length: " << body.Size() << "\r\n"; response << "\r\n"; response << body; @@ -295,21 +327,9 @@ class THttpMonLegacyActorRequest : public TActorBootstrappedGet()->Request; - TStringBuilder response; - TStringBuilder body; - body << "

403 Forbidden

"; - if (!error.empty()) { - body << "

" << error << "

"; - } - body << ""; - response << "HTTP/1.1 403 Forbidden\r\n"; - response << "Content-Type: text/html\r\n"; - response << "Content-Length: " << body.Size() << "\r\n"; - response << "\r\n"; - response << body; - ReplyWith(request->CreateResponseString(response)); - PassAway(); + NYql::TIssues issues; + issues.AddIssue(error); + ReplyErrorAndPassAway(Ydb::StatusIds::UNAUTHORIZED, issues, false); } void SendRequest(const NKikimr::NGRpcService::TEvRequestAuthAndCheckResult* result = nullptr) { @@ -355,13 +375,15 @@ class THttpMonLegacyActorRequest : public TActorBootstrappedAllowedSIDs) { - if (result.UserToken->IsExist(sid)) { - found = true; - break; + if (result.UserToken) { + for (const TString& sid : ActorMonPage->AllowedSIDs) { + if (result.UserToken->IsExist(sid)) { + found = true; + break; + } } } - if (found || ActorMonPage->AllowedSIDs.empty()) { + if (found || ActorMonPage->AllowedSIDs.empty() || !result.UserToken) { SendRequest(&result); } else { return ReplyForbiddenAndPassAway("SID is not allowed"); @@ -533,10 +555,26 @@ class THttpMonServiceNodeRequest : public TActorBootstrapped parser(response); + + NHttp::THeadersBuilder headers(parser.Headers); + headers.Set("X-Forwarded-From-Node", TStringBuilder() << Event->Sender.NodeId()); + + NHttp::THttpRenderer renderer; + renderer.InitRequest(parser.Method, parser.URL, parser.Protocol, parser.Version); + renderer.Set(headers); + if (parser.HaveBody()) { + renderer.SetBody(parser.Body); // it shouldn't be here, 30x with a body is a bad idea + } + renderer.Finish(); + return renderer.AsString(); + } + void Bootstrap() { NHttp::THttpConfig::SocketAddressType address; FromProto(address, Event->Get()->Record.GetAddress()); - NHttp::THttpIncomingRequestPtr request = new NHttp::THttpIncomingRequest(Event->Get()->Record.GetHttpRequest(), Endpoint, address); + NHttp::THttpIncomingRequestPtr request = new NHttp::THttpIncomingRequest(RewriteWithForwardedFromNode(Event->Get()->Record.GetHttpRequest()), Endpoint, address); TStringBuilder prefix; prefix << "/node/" << TActivationContext::ActorSystem()->NodeId; if (request->URL.SkipPrefix(prefix)) { @@ -551,9 +589,36 @@ class THttpMonServiceNodeRequest : public TActorBootstrapped parser(response); + + NHttp::THeadersBuilder headers(parser.Headers); + headers.Set("Location", TStringBuilder() << "/node/" << TActivationContext::ActorSystem()->NodeId << headers["Location"]); + + NHttp::THttpRenderer renderer; + renderer.InitResponse(parser.Protocol, parser.Version, parser.Status, parser.Message); + renderer.Set(headers); + if (parser.HaveBody()) { + renderer.SetBody(parser.Body); // it shouldn't be here, 30x with a body is a bad idea + } + renderer.Finish(); + return renderer.AsString(); + } + void Handle(NHttp::TEvHttpProxy::TEvHttpOutgoingResponse::TPtr& ev) { + TString httpResponse = ev->Get()->Response->AsString(); + switch (FromStringWithDefault(ev->Get()->Response->Status)) { + case 301: + case 303: + case 307: + case 308: + if (!NHttp::THeaders(ev->Get()->Response->Headers).Get("Location").starts_with("/node/")) { + httpResponse = RewriteLocationWithNode(httpResponse); + } + break; + } auto response = std::make_unique(); - response->Record.SetHttpResponse(ev->Get()->Response->AsString()); + response->Record.SetHttpResponse(httpResponse); Send(Event->Sender, response.release(), 0, Event->Cookie); PassAway(); } @@ -823,7 +888,8 @@ NMonitoring::IMonPage* TAsyncHttpMon::RegisterActorPage(TRegisterActorPageFields fields.ActorSystem, fields.ActorId, fields.AllowedSIDs ? fields.AllowedSIDs : Config.AllowedSIDs, - fields.UseAuth ? Config.Authorizer : TRequestAuthorizer()); + fields.UseAuth ? Config.Authorizer : TRequestAuthorizer(), + fields.MonServiceName); if (fields.Index) { fields.Index->Register(page); if (fields.SortPages) { diff --git a/ydb/core/mon/mon.cpp b/ydb/core/mon/mon.cpp index 4d7b8c797e2c..311132a04fda 100644 --- a/ydb/core/mon/mon.cpp +++ b/ydb/core/mon/mon.cpp @@ -8,6 +8,7 @@ #include #include +#include #include @@ -88,6 +89,48 @@ NActors::IEventHandle* GetAuthorizeTicketResult(const NActors::TActorId& owner) } } +void MakeJsonErrorReply(NJson::TJsonValue& jsonResponse, TString& message, const NYdb::TStatus& status) { + MakeJsonErrorReply(jsonResponse, message, status.GetIssues(), status.GetStatus()); +} + +void MakeJsonErrorReply(NJson::TJsonValue& jsonResponse, TString& message, const NYql::TIssues& issues, NYdb::EStatus status) { + google::protobuf::RepeatedPtrField protoIssues; + NYql::IssuesToMessage(issues, &protoIssues); + + message.clear(); + + NJson::TJsonValue& jsonIssues = jsonResponse["issues"]; + for (const auto& queryIssue : protoIssues) { + NJson::TJsonValue& issue = jsonIssues.AppendValue({}); + NProtobufJson::Proto2Json(queryIssue, issue); + } + + TString textStatus = TStringBuilder() << status; + jsonResponse["status"] = textStatus; + + // find first deepest error + std::stable_sort(protoIssues.begin(), protoIssues.end(), [](const Ydb::Issue::IssueMessage& a, const Ydb::Issue::IssueMessage& b) -> bool { + return a.severity() < b.severity(); + }); + + const google::protobuf::RepeatedPtrField* protoIssuesPtr = &protoIssues; + while (protoIssuesPtr->size() > 0 && protoIssuesPtr->at(0).issuesSize() > 0) { + protoIssuesPtr = &protoIssuesPtr->at(0).issues(); + } + + if (protoIssuesPtr->size() > 0) { + const Ydb::Issue::IssueMessage& issue = protoIssuesPtr->at(0); + NProtobufJson::Proto2Json(issue, jsonResponse["error"]); + message = issue.message(); + } else { + jsonResponse["error"]["message"] = textStatus; + } + + if (message.empty()) { + message = textStatus; + } +} + IMonPage* TMon::RegisterActorPage(TIndexMonPage* index, const TString& relPath, const TString& title, bool preTag, TActorSystem* actorSystem, const TActorId& actorId, bool useAuth, bool sortPages) { return RegisterActorPage({ diff --git a/ydb/core/mon/mon.h b/ydb/core/mon/mon.h index 69373811a836..bdf30bc354ba 100644 --- a/ydb/core/mon/mon.h +++ b/ydb/core/mon/mon.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -10,12 +11,17 @@ #include #include +#include +#include namespace NActors { IEventHandle* SelectAuthorizationScheme(const NActors::TActorId& owner, NMonitoring::IMonHttpRequest& request); IEventHandle* GetAuthorizeTicketResult(const NActors::TActorId& owner); +void MakeJsonErrorReply(NJson::TJsonValue& jsonResponse, TString& message, const NYql::TIssues& issues, NYdb::EStatus status); +void MakeJsonErrorReply(NJson::TJsonValue& jsonResponse, TString& message, const NYdb::TStatus& status); + class TActorSystem; struct TActorId; @@ -54,6 +60,7 @@ class TMon { bool UseAuth = true; TVector AllowedSIDs; bool SortPages = true; + TString MonServiceName = "utils"; }; virtual NMonitoring::IMonPage* RegisterActorPage(TRegisterActorPageFields fields) = 0; diff --git a/ydb/core/mon/mon_impl.h b/ydb/core/mon/mon_impl.h index 135b95c69ef9..99ea18cba1d7 100644 --- a/ydb/core/mon/mon_impl.h +++ b/ydb/core/mon/mon_impl.h @@ -360,7 +360,7 @@ class TActorMonPage: public IMonPage { public: TActorMonPage(const TString &path, const TString &title, const TString &host, bool preTag, TActorSystem *actorSystem, const TActorId &actorId, const TVector &sids, - TMon::TRequestAuthorizer authorizer) + TMon::TRequestAuthorizer authorizer, TString monServiceName = "utils") : IMonPage(path, title) , Host(host) , PreTag(preTag) @@ -368,6 +368,7 @@ class TActorMonPage: public IMonPage { , TargetActorId(actorId) , AllowedSIDs(sids) , Authorizer(std::move(authorizer)) + , MonServiceName(monServiceName) { } @@ -405,6 +406,7 @@ class TActorMonPage: public IMonPage { TActorId TargetActorId; const TVector AllowedSIDs; TMon::TRequestAuthorizer Authorizer; + TString MonServiceName; }; inline TString GetPageFullPath(const NMonitoring::IMonPage* page) { diff --git a/ydb/core/mon/sync_http_mon.cpp b/ydb/core/mon/sync_http_mon.cpp index 822cd7e3389e..4fc334def9d8 100644 --- a/ydb/core/mon/sync_http_mon.cpp +++ b/ydb/core/mon/sync_http_mon.cpp @@ -81,7 +81,8 @@ namespace NActors { fields.ActorSystem, fields.ActorId, fields.AllowedSIDs ? fields.AllowedSIDs : Config.AllowedSIDs, - fields.UseAuth ? Config.Authorizer : TRequestAuthorizer()); + fields.UseAuth ? Config.Authorizer : TRequestAuthorizer(), + fields.MonServiceName); if (fields.Index) { fields.Index->Register(page); if (fields.SortPages) { diff --git a/ydb/core/mon/ya.make b/ydb/core/mon/ya.make index 86f82b217bf8..25a2e6b3c283 100644 --- a/ydb/core/mon/ya.make +++ b/ydb/core/mon/ya.make @@ -14,6 +14,7 @@ SRCS( PEERDIR( library/cpp/json library/cpp/lwtrace/mon + library/cpp/protobuf/json library/cpp/string_utils/url ydb/core/base ydb/core/grpc_services/base @@ -21,6 +22,8 @@ PEERDIR( ydb/library/aclib ydb/library/actors/core ydb/library/actors/http + ydb/library/yql/public/issue + ydb/public/sdk/cpp/client/ydb_types/status ) END() diff --git a/ydb/core/node_whiteboard/node_whiteboard.h b/ydb/core/node_whiteboard/node_whiteboard.h index 2b697e1a81ca..ba0d5e999850 100644 --- a/ydb/core/node_whiteboard/node_whiteboard.h +++ b/ydb/core/node_whiteboard/node_whiteboard.h @@ -361,23 +361,18 @@ struct TEvWhiteboard{ } } - TEvSystemStateUpdate(const TVector>& poolStats) { + TEvSystemStateUpdate(const TVector>& poolStats) { for (const auto& row : poolStats) { auto& pb = *Record.AddPoolStats(); pb.SetName(std::get<0>(row)); pb.SetUsage(std::get<1>(row)); pb.SetThreads(std::get<2>(row)); + pb.SetLimit(std::get<3>(row)); } } TEvSystemStateUpdate(const TNodeLocation& systemLocation) { systemLocation.Serialize(Record.MutableLocation(), false); - const auto& x = systemLocation.GetLegacyValue(); - auto *pb = Record.MutableSystemLocation(); - pb->SetDataCenter(x.DataCenter); - pb->SetRoom(x.Room); - pb->SetRack(x.Rack); - pb->SetBody(x.Body); } TEvSystemStateUpdate(const NKikimrWhiteboard::TSystemStateInfo& systemStateInfo) { @@ -506,5 +501,41 @@ inline TActorId MakeNodeWhiteboardServiceId(ui32 node) { IActor* CreateNodeWhiteboardService(); -} // NTabletState +template +struct WhiteboardResponse {}; + +template<> +struct WhiteboardResponse { + using Type = TEvWhiteboard::TEvTabletStateResponse; +}; + +template<> +struct WhiteboardResponse { + using Type = TEvWhiteboard::TEvPDiskStateResponse; +}; + +template<> +struct WhiteboardResponse { + using Type = TEvWhiteboard::TEvVDiskStateResponse; +}; + +template<> +struct WhiteboardResponse { + using Type = TEvWhiteboard::TEvSystemStateResponse; +}; + +template<> +struct WhiteboardResponse { + using Type = TEvWhiteboard::TEvBSGroupStateResponse; +}; + +template<> +struct WhiteboardResponse { + using Type = TEvWhiteboard::TEvNodeStateResponse; +}; + +template +::google::protobuf::RepeatedField GetDefaultWhiteboardFields(); + +} // NNodeWhiteboard } // NKikimr diff --git a/ydb/core/persqueue/blob.cpp b/ydb/core/persqueue/blob.cpp index 80027c17577b..5564d7cd03e2 100644 --- a/ydb/core/persqueue/blob.cpp +++ b/ydb/core/persqueue/blob.cpp @@ -422,7 +422,7 @@ void TBatch::Unpack() { PackedData.Clear(); } -void TBatch::UnpackTo(TVector *blobs) +void TBatch::UnpackTo(TVector *blobs) const { Y_ABORT_UNLESS(PackedData.size()); auto type = Header.GetFormat(); @@ -446,7 +446,7 @@ NScheme::TDataRef GetChunk(const char*& data, const char *end) return NScheme::TDataRef(data - size, size); } -void TBatch::UnpackToType1(TVector *blobs) { +void TBatch::UnpackToType1(TVector *blobs) const { Y_ABORT_UNLESS(Header.GetFormat() == NKikimrPQ::TBatchHeader::ECompressed); Y_ABORT_UNLESS(PackedData.size()); ui32 totalBlobs = Header.GetCount() + Header.GetInternalPartsCount(); @@ -606,7 +606,7 @@ void TBatch::UnpackToType1(TVector *blobs) { } } -void TBatch::UnpackToType0(TVector *blobs) { +void TBatch::UnpackToType0(TVector *blobs) const { Y_ABORT_UNLESS(Header.GetFormat() == NKikimrPQ::TBatchHeader::EUncompressed); Y_ABORT_UNLESS(PackedData.size()); ui32 shift = 0; @@ -640,7 +640,7 @@ ui32 TBatch::FindPos(const ui64 offset, const ui16 partNo) const { void THead::Clear() { Offset = PartNo = PackedSize = 0; - Batches.clear(); + ClearBatches(); } ui64 THead::GetNextOffset() const @@ -650,11 +650,7 @@ ui64 THead::GetNextOffset() const ui16 THead::GetInternalPartsCount() const { - ui16 res = 0; - for (auto& b : Batches) { - res += b.GetInternalPartsCount(); - } - return res; + return InternalPartsCount; } ui32 THead::GetCount() const @@ -675,15 +671,73 @@ IOutputStream& operator <<(IOutputStream& out, const THead& value) } ui32 THead::FindPos(const ui64 offset, const ui16 partNo) const { - ui32 i = 0; - for (; i < Batches.size(); ++i) { - //this batch contains blobs with position bigger than requested - if (Batches[i].GetOffset() > offset || Batches[i].GetOffset() == offset && Batches[i].GetPartNo() > partNo) - break; - } - if (i == 0) + if (Batches.empty()) { return Max(); - return i - 1; + } + + ui32 i = Batches.size() - 1; + while (i > 0 && Batches[i].IsGreaterThan(offset, partNo)) { + --i; + } + + if (i == 0) { + if (Batches[i].IsGreaterThan(offset, partNo)) { + return Max(); + } else { + return 0; + } + } + + return i; +} + +void THead::AddBatch(const TBatch& batch) { + auto& b = Batches.emplace_back(batch); + InternalPartsCount += b.GetInternalPartsCount(); +} + +void THead::ClearBatches() { + Batches.clear(); + InternalPartsCount = 0; +} + +const std::deque& THead::GetBatches() const { + return Batches; +} + +const TBatch& THead::GetBatch(ui32 idx) const { + return Batches.at(idx); +} + +const TBatch& THead::GetLastBatch() const { + Y_ABORT_UNLESS(!Batches.empty()); + return Batches.back(); +} + +TBatch THead::ExtractFirstBatch() { + Y_ABORT_UNLESS(!Batches.empty()); + auto batch = std::move(Batches.front()); + InternalPartsCount -= batch.GetInternalPartsCount(); + Batches.pop_front(); + return batch; +} + +THead::TBatchAccessor THead::MutableBatch(ui32 idx) { + Y_ABORT_UNLESS(idx < Batches.size()); + return TBatchAccessor(Batches[idx]); +} + +THead::TBatchAccessor THead::MutableLastBatch() { + Y_ABORT_UNLESS(!Batches.empty()); + return TBatchAccessor(Batches.back()); +} + +void THead::AddBlob(const TClientBlob& blob) { + Y_ABORT_UNLESS(!Batches.empty()); + auto& batch = Batches.back(); + InternalPartsCount -= batch.GetInternalPartsCount(); + batch.AddBlob(blob); + InternalPartsCount += batch.GetInternalPartsCount(); } TPartitionedBlob::TRenameFormedBlobInfo::TRenameFormedBlobInfo(const TKey& oldKey, const TKey& newKey, ui32 size) : @@ -832,7 +886,7 @@ auto TPartitionedBlob::CreateFormedBlob(ui32 size, bool useRename) -> std::optio GlueHead = GlueNewHead = false; if (!Blobs.empty()) { - TBatch batch{Offset, Blobs.front().GetPartNo(), std::move(Blobs)}; + auto batch = TBatch::FromBlobs(Offset, std::move(Blobs)); Blobs.clear(); batch.Pack(); Y_ABORT_UNLESS(batch.Packed); diff --git a/ydb/core/persqueue/blob.h b/ydb/core/persqueue/blob.h index 24aa479a2eaa..6ad52f28e42c 100644 --- a/ydb/core/persqueue/blob.h +++ b/ydb/core/persqueue/blob.h @@ -121,38 +121,30 @@ struct TBatch { TVector InternalPartsPos; NKikimrPQ::TBatchHeader Header; TBuffer PackedData; + TBatch() : Packed(false) { PackedData.Reserve(8_MB); } - TBatch(const ui64 offset, const ui16 partNo, const TVector& blobs) - : Packed(false) + TBatch(const ui64 offset, const ui16 partNo) + : TBatch() { - PackedData.Reserve(8_MB); Header.SetOffset(offset); Header.SetPartNo(partNo); Header.SetUnpackedSize(0); Header.SetCount(0); Header.SetInternalPartsCount(0); - for (auto& b : blobs) { - AddBlob(b); - } } - TBatch(const ui64 offset, const ui16 partNo, const std::deque& blobs) - : Packed(false) - { - PackedData.Reserve(8_MB); - Header.SetOffset(offset); - Header.SetPartNo(partNo); - Header.SetUnpackedSize(0); - Header.SetCount(0); - Header.SetInternalPartsCount(0); + static TBatch FromBlobs(const ui64 offset, std::deque&& blobs) { + Y_ABORT_UNLESS(!blobs.empty()); + TBatch batch(offset, blobs.front().GetPartNo()); for (auto& b : blobs) { - AddBlob(b); + batch.AddBlob(b); } + return batch; } void AddBlob(const TClientBlob &b) { @@ -187,6 +179,9 @@ struct TBatch { ui16 GetInternalPartsCount() const { return Header.GetInternalPartsCount(); } + bool IsGreaterThan(ui64 offset, ui16 partNo) const { + return GetOffset() > offset || GetOffset() == offset && GetPartNo() > partNo; + } TBatch(const NKikimrPQ::TBatchHeader &header, const char* data) : Packed(true) @@ -198,9 +193,9 @@ struct TBatch { ui32 GetPackedSize() const { Y_ABORT_UNLESS(Packed); return sizeof(ui16) + PackedData.size() + Header.ByteSize(); } void Pack(); void Unpack(); - void UnpackTo(TVector *result); - void UnpackToType0(TVector *result); - void UnpackToType1(TVector *result); + void UnpackTo(TVector *result) const; + void UnpackToType0(TVector *result) const; + void UnpackToType1(TVector *result) const; void SerializeTo(TString& res) const; @@ -232,14 +227,39 @@ class TBlobIterator { ui16 InternalPartsCount; }; +class TPartitionedBlob; + //THead represents bathes, stored in head(at most 8 Mb) struct THead { - std::deque Batches; //all batches except last must be packed // BlobsSize <= 512Kb // size of Blobs after packing must be <= BlobsSize //otherwise head will be compacted not in total, some blobs will still remain in head //PackedSize + BlobsSize must be <= 8Mb +private: + std::deque Batches; + ui16 InternalPartsCount = 0; + + friend class TPartitionedBlob; + + class TBatchAccessor { + TBatch& Batch; + + public: + explicit TBatchAccessor(TBatch& batch) + : Batch(batch) + {} + + void Pack() { + Batch.Pack(); + } + + void Unpack() { + Batch.Unpack(); + } + }; + +public: ui64 Offset; ui16 PartNo; ui32 PackedSize; @@ -261,6 +281,18 @@ struct THead { //return Max if not such pos in head //returns batch with such position ui32 FindPos(const ui64 offset, const ui16 partNo) const; + + void AddBatch(const TBatch& batch); + void ClearBatches(); + const std::deque& GetBatches() const; + const TBatch& GetBatch(ui32 idx) const; + const TBatch& GetLastBatch() const; + TBatchAccessor MutableBatch(ui32 idx); + TBatchAccessor MutableLastBatch(); + TBatch ExtractFirstBatch(); + void AddBlob(const TClientBlob& blob); + + friend IOutputStream& operator <<(IOutputStream& out, const THead& value); }; IOutputStream& operator <<(IOutputStream& out, const THead& value); diff --git a/ydb/core/persqueue/cluster_tracker.cpp b/ydb/core/persqueue/cluster_tracker.cpp index 4b1d757b1fca..2b0d15f693e5 100644 --- a/ydb/core/persqueue/cluster_tracker.cpp +++ b/ydb/core/persqueue/cluster_tracker.cpp @@ -13,6 +13,8 @@ #include #include +#include + namespace NKikimr::NPQ::NClusterTracker { inline auto& Ctx() { @@ -132,6 +134,7 @@ class TClusterTracker: public TActorBootstrapped { req->Record.MutableRequest()->SetKeepSession(false); req->Record.MutableRequest()->SetQuery(MakeListClustersQuery()); req->Record.MutableRequest()->SetDatabase(GetDatabase()); + req->Record.MutableRequest()->SetUsePublicResponseDataFormat(true); // useless without explicit session // req->Record.MutableRequest()->MutableQueryCachePolicy()->set_keep_in_cache(true); req->Record.MutableRequest()->MutableTxControl()->mutable_begin_tx()->mutable_serializable_read_write(); @@ -144,45 +147,52 @@ class TClusterTracker: public TActorBootstrapped { LOG_DEBUG_S(Ctx(), NKikimrServices::PERSQUEUE_CLUSTER_TRACKER, "HandleWhileWorking TEvQueryResponse"); const auto& record = ev->Get()->Record.GetRef(); - if (record.GetYdbStatus() == Ydb::StatusIds::SUCCESS && record.GetResponse().GetResults(0).GetValue().GetStruct(0).ListSize()) { - LOG_DEBUG_S(Ctx(), NKikimrServices::PERSQUEUE_CLUSTER_TRACKER, "HandleWhileWorking TEvQueryResponse UpdateClustersList"); - UpdateClustersList(record); + if (record.GetYdbStatus() == Ydb::StatusIds::SUCCESS) { + NYdb::TResultSetParser parser(record.GetResponse().GetYdbResults(0)); + if (parser.RowsCount()) { + LOG_DEBUG_S(Ctx(), NKikimrServices::PERSQUEUE_CLUSTER_TRACKER, "HandleWhileWorking TEvQueryResponse UpdateClustersList"); + UpdateClustersList(parser); - Y_ABORT_UNLESS(ClustersList); - Y_ABORT_UNLESS(ClustersList->Clusters.size()); - Y_ABORT_UNLESS(ClustersListUpdateTimestamp && *ClustersListUpdateTimestamp); + Y_ABORT_UNLESS(ClustersList); + Y_ABORT_UNLESS(ClustersList->Clusters.size()); + Y_ABORT_UNLESS(ClustersListUpdateTimestamp && *ClustersListUpdateTimestamp); - BroadcastClustersUpdate(); + BroadcastClustersUpdate(); - Schedule(TDuration::Seconds(Cfg().GetClustersUpdateTimeoutSec()), new TEvents::TEvWakeup); - } else { - LOG_ERROR_S(Ctx(), NKikimrServices::PERSQUEUE_CLUSTER_TRACKER, "failed to list clusters: " << record); + Schedule(TDuration::Seconds(Cfg().GetClustersUpdateTimeoutSec()), new TEvents::TEvWakeup); + return; + } + } - ClustersList = nullptr; + LOG_ERROR_S(Ctx(), NKikimrServices::PERSQUEUE_CLUSTER_TRACKER, "failed to list clusters: " << record); - Schedule(TDuration::Seconds(Cfg().GetClustersUpdateTimeoutOnErrorSec()), new TEvents::TEvWakeup); - } + ClustersList = nullptr; + Schedule(TDuration::Seconds(Cfg().GetClustersUpdateTimeoutOnErrorSec()), new TEvents::TEvWakeup); } template - void UpdateClustersList(const TProtoRecord& record) { + void UpdateClustersList(TProtoRecord& parser) { auto clustersList = MakeIntrusive(); - auto& t = record.GetResponse().GetResults(0).GetValue().GetStruct(0); - clustersList->Clusters.resize(t.ListSize()); + clustersList->Clusters.resize(parser.RowsCount()); - for (size_t i = 0; i < t.ListSize(); ++i) { + bool firstRow = parser.TryNextRow(); + YQL_ENSURE(firstRow); + clustersList->Version = *parser.ColumnParser(5).GetOptionalInt64(); + size_t i = 0; + + do { auto& cluster = clustersList->Clusters[i]; - cluster.Name = t.GetList(i).GetStruct(0).GetOptional().GetText(); + cluster.Name = *parser.ColumnParser(0).GetOptionalUtf8(); cluster.Datacenter = cluster.Name; - cluster.Balancer = t.GetList(i).GetStruct(1).GetOptional().GetText(); + cluster.Balancer = *parser.ColumnParser(1).GetOptionalUtf8(); - cluster.IsLocal = t.GetList(i).GetStruct(2).GetOptional().GetBool(); - cluster.IsEnabled = t.GetList(i).GetStruct(3).GetOptional().GetBool(); - cluster.Weight = t.GetList(i).GetStruct(4).GetOptional().GetUint64(); - } + cluster.IsLocal = *parser.ColumnParser(2).GetOptionalBool(); + cluster.IsEnabled = *parser.ColumnParser(3).GetOptionalBool(); + cluster.Weight = *parser.ColumnParser(4).GetOptionalUint64(); - clustersList->Version = t.GetList(0).GetStruct(5).GetOptional().GetInt64(); + ++i; + } while (parser.TryNextRow()); ClustersList = std::move(clustersList); ClustersListUpdateTimestamp = Ctx().Now(); diff --git a/ydb/core/persqueue/dread_cache_service/caching_service.cpp b/ydb/core/persqueue/dread_cache_service/caching_service.cpp index 6f8951df4aa0..7181e7161385 100644 --- a/ydb/core/persqueue/dread_cache_service/caching_service.cpp +++ b/ydb/core/persqueue/dread_cache_service/caching_service.cpp @@ -475,6 +475,10 @@ class TPQDirectReadCacheService : public TActorBootstrapped { TEvUpdateConfig() {} }; + struct TEvUpdateConfigBuilder: public TEvUpdateConfig { + using TBase::Record; + }; + struct TEvUpdateBalancerConfig: public TEventPB { TEvUpdateBalancerConfig() {} @@ -245,7 +249,11 @@ struct TEvPersQueue { {} }; - struct TEvProposeTransaction : public TEventPB { + struct TEvProposeTransaction : public TEventPreSerializedPB { + }; + + struct TEvProposeTransactionBuilder: public TEvProposeTransaction { + using TBase::Record; }; struct TEvProposeTransactionResult : public TEventPB { diff --git a/ydb/core/persqueue/events/internal.h b/ydb/core/persqueue/events/internal.h index b588ba9c32b8..99c05a742c31 100644 --- a/ydb/core/persqueue/events/internal.h +++ b/ydb/core/persqueue/events/internal.h @@ -576,13 +576,15 @@ struct TEvPQ { }; struct TEvChangePartitionConfig : public TEventLocal { - TEvChangePartitionConfig(const NPersQueue::TTopicConverterPtr& topicConverter, const NKikimrPQ::TPQTabletConfig& config) + TEvChangePartitionConfig(const NPersQueue::TTopicConverterPtr& topicConverter, const NKikimrPQ::TPQTabletConfig& config, const NKikimrPQ::TBootstrapConfig& bootstrapConfig) : TopicConverter(topicConverter) , Config(config) + , BootstrapConfig(bootstrapConfig) {} NPersQueue::TTopicConverterPtr TopicConverter; NKikimrPQ::TPQTabletConfig Config; + NKikimrPQ::TBootstrapConfig BootstrapConfig; }; struct TEvPartitionConfigChanged : public TEventLocal { @@ -820,7 +822,7 @@ struct TEvPQ { }; struct TEvTxCalcPredicateResult : public TEventLocal { - TEvTxCalcPredicateResult(ui64 step, ui64 txId, const NPQ::TPartitionId& partition, bool predicate) : + TEvTxCalcPredicateResult(ui64 step, ui64 txId, const NPQ::TPartitionId& partition, TMaybe predicate) : Step(step), TxId(txId), Partition(partition), @@ -831,7 +833,7 @@ struct TEvPQ { ui64 Step; ui64 TxId; NPQ::TPartitionId Partition; - bool Predicate = false; + TMaybe Predicate; }; struct TEvProposePartitionConfig : public TEventLocal { @@ -845,6 +847,7 @@ struct TEvPQ { ui64 TxId; NPersQueue::TTopicConverterPtr TopicConverter; NKikimrPQ::TPQTabletConfig Config; + NKikimrPQ::TBootstrapConfig BootstrapConfig; }; struct TEvProposePartitionConfigResult : public TEventLocal { diff --git a/ydb/core/persqueue/fetch_request_actor.cpp b/ydb/core/persqueue/fetch_request_actor.cpp index a5d259a6a2aa..42533fc525ef 100644 --- a/ydb/core/persqueue/fetch_request_actor.cpp +++ b/ydb/core/persqueue/fetch_request_actor.cpp @@ -93,6 +93,9 @@ struct TEvPrivate { TActorId RequesterId; ui64 PendingQuotaAmount; + std::unordered_map PrivateTopicPathToCdcPath; + std::unordered_map CdcPathToPrivateTopicPath; + public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::PQ_FETCH_REQUEST; @@ -176,16 +179,24 @@ struct TEvPrivate { schemeCacheRequest->DatabaseName = Settings.Database; THashSet topicsRequested; - for (const auto& part : Settings.Partitions) { - auto ins = topicsRequested.insert(part.Topic).second; - if (!ins) - continue; - auto split = NKikimr::SplitPath(part.Topic); + + if (PrivateTopicPathToCdcPath.empty()) { + for (const auto& part : Settings.Partitions) { + topicsRequested.insert(part.Topic); + } + } else { + for (const auto& [key, value] : PrivateTopicPathToCdcPath) { + topicsRequested.insert(key); + } + } + + for (const auto& topicName : topicsRequested) { + auto split = NKikimr::SplitPath(topicName); TSchemeCacheNavigate::TEntry entry; entry.Path.insert(entry.Path.end(), split.begin(), split.end()); entry.SyncVersion = true; - entry.ShowPrivatePath = false; + entry.ShowPrivatePath = true; entry.Operation = NSchemeCache::TSchemeCacheNavigate::OpList; schemeCacheRequest->ResultSet.emplace_back(std::move(entry)); @@ -197,6 +208,7 @@ struct TEvPrivate { void HandleSchemeCacheResponse(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx) { LOG_DEBUG_S(ctx, NKikimrServices::PQ_FETCH_REQUEST, "Handle SchemeCache response"); auto& result = ev->Get()->Request; + bool anyCdcTopicInRequest = false; for (const auto& entry : result->ResultSet) { auto path = CanonizePath(NKikimr::JoinPath(entry.Path)); switch (entry.Status) { @@ -219,6 +231,16 @@ struct TEvPrivate { ), ctx ); } + if (entry.Kind == NSchemeCache::TSchemeCacheNavigate::KindCdcStream) { + anyCdcTopicInRequest = true; + Y_ABORT_UNLESS(entry.ListNodeEntry->Children.size() == 1); + auto privateTopicPath = CanonizePath(JoinPath(ChildPath(NKikimr::SplitPath(path), entry.ListNodeEntry->Children.at(0).Name))); + PrivateTopicPathToCdcPath[privateTopicPath] = path; + CdcPathToPrivateTopicPath[path] = privateTopicPath; + TopicInfo[privateTopicPath] = TopicInfo[path]; + TopicInfo.erase(path); + continue; + } if (entry.Kind != TSchemeCacheNavigate::EKind::KindTopic) { return SendReplyAndDie( CreateErrorReply( @@ -256,6 +278,12 @@ struct TEvPrivate { topicInfo.BalancerTabletId = description.GetBalancerTabletID(); topicInfo.PQInfo = entry.PQGroupInfo; } + + if (anyCdcTopicInRequest) { + SendSchemeCacheRequest(ctx); + return; + } + for (auto& p: TopicInfo) { ProcessMetadata(p.first, p.second, ctx); } @@ -393,8 +421,15 @@ struct TEvPrivate { return SendReplyAndDie(std::move(Response), ctx); } Y_ABORT_UNLESS(FetchRequestReadsDone < Settings.Partitions.size()); - const auto& req = Settings.Partitions[FetchRequestReadsDone]; - const auto& topic = req.Topic; + auto& req = Settings.Partitions[FetchRequestReadsDone]; + + auto& topic = req.Topic; + + auto cdcToPrivateIt = CdcPathToPrivateTopicPath.find(req.Topic); + if (cdcToPrivateIt != CdcPathToPrivateTopicPath.end()) { + topic = cdcToPrivateIt->second; + } + const auto& offset = req.Offset; const auto& part = req.Partition; const auto& maxBytes = req.MaxBytes; @@ -462,7 +497,13 @@ struct TEvPrivate { const auto& topic = req.Topic; const auto& part = req.Partition; - res->SetTopic(topic); + auto privateTopicToCdcIt = PrivateTopicPathToCdcPath.find(topic); + if (privateTopicToCdcIt == PrivateTopicPathToCdcPath.end()) { + res->SetTopic(topic); + } else { + res->SetTopic(PrivateTopicPathToCdcPath[topic]); + } + res->SetPartition(part); auto read = res->MutableReadResult(); if (record.HasPartitionResponse() && record.GetPartitionResponse().HasCmdReadResult()) diff --git a/ydb/core/persqueue/partition.cpp b/ydb/core/persqueue/partition.cpp index 8a16e3f7ec04..c7b40b2c8f84 100644 --- a/ydb/core/persqueue/partition.cpp +++ b/ydb/core/persqueue/partition.cpp @@ -55,8 +55,8 @@ auto GetStepAndTxId(const E& event) return GetStepAndTxId(event.Step, event.TxId); } -bool TPartition::LastOffsetHasBeenCommited(const TUserInfo& userInfo) const { - return !IsActive() && static_cast(std::max(userInfo.Offset, 0)) == EndOffset; +bool TPartition::LastOffsetHasBeenCommited(const TUserInfoBase& userInfo) const { + return !IsActive() && (static_cast(std::max(userInfo.Offset, 0)) == EndOffset || StartOffset == EndOffset); } struct TMirrorerInfo { @@ -82,7 +82,7 @@ TString TPartition::LogPrefix() const { } else { state = "Unknown"; } - return TStringBuilder() << "[Partition:" << Partition << ", State:" << state << "] "; + return TStringBuilder() << "[PQ: " << TabletID << ", Partition:" << Partition << ", State:" << state << "] "; } bool TPartition::IsActive() const { @@ -245,7 +245,7 @@ void TPartition::EmplaceResponse(TMessage&& message, const TActorContext& ctx) { ); } -ui64 TPartition::MeteringDataSize() const { +ui64 TPartition::UserDataSize() const { if (DataKeysBody.size() <= 1) { // tiny optimization - we do not meter very small queues up to 16MB return 0; @@ -260,25 +260,39 @@ ui64 TPartition::MeteringDataSize() const { return size >= lastBlobSize ? size - lastBlobSize : 0; } +ui64 TPartition::MeteringDataSize(TInstant now) const { + if (IsActive() || NKikimrPQ::TPQTabletConfig::METERING_MODE_REQUEST_UNITS == Config.GetMeteringMode()) { + return UserDataSize(); + } else { + // We only add the amount of data that is blocked by an important consumer. + ui64 size = 0; + auto expirationTimestamp = now - TDuration::Seconds(Config.GetPartitionConfig().GetLifetimeSeconds()) - WAKE_TIMEOUT; + for (size_t i = 1; i < DataKeysBody.size() && DataKeysBody[i].Timestamp < expirationTimestamp; ++i) { + size += DataKeysBody[i].Size; + } + return size; + } +} + ui64 TPartition::ReserveSize() const { - return TopicPartitionReserveSize(Config); + return IsActive() ? TopicPartitionReserveSize(Config) : 0; } ui64 TPartition::StorageSize(const TActorContext&) const { - return std::max(MeteringDataSize(), ReserveSize()); + return std::max(UserDataSize(), ReserveSize()); } ui64 TPartition::UsedReserveSize(const TActorContext&) const { - return std::min(MeteringDataSize(), ReserveSize()); + return std::min(UserDataSize(), ReserveSize()); } ui64 TPartition::GetUsedStorage(const TInstant& now) { const auto duration = now - LastUsedStorageMeterTimestamp; LastUsedStorageMeterTimestamp = now; - auto dataSize = MeteringDataSize(); + auto dataSize = MeteringDataSize(now); auto reservedSize = ReserveSize(); - ui64 size = dataSize > reservedSize ? dataSize - reservedSize : 0; + auto size = dataSize > reservedSize ? dataSize - reservedSize : 0; return size * duration.MilliSeconds() / 1000 / 1_MB; // mb*seconds } @@ -493,8 +507,8 @@ void TPartition::DestroyActor(const TActorContext& ctx) UsersInfoStorage->Clear(ctx); } + Send(ReadQuotaTrackerActor, new TEvents::TEvPoisonPill()); if (!IsSupportive()) { - Send(ReadQuotaTrackerActor, new TEvents::TEvPoisonPill()); Send(WriteQuotaTrackerActor, new TEvents::TEvPoisonPill()); } @@ -749,7 +763,7 @@ void TPartition::Handle(TEvPQ::TEvPartitionStatus::TPtr& ev, const TActorContext result.SetReadBytesQuota(maxQuota); - result.SetPartitionSize(MeteringDataSize()); + result.SetPartitionSize(UserDataSize()); result.SetUsedReserveSize(UsedReserveSize(ctx)); result.SetLastWriteTimestampMs(WriteTimestamp.MilliSeconds()); @@ -885,7 +899,7 @@ void TPartition::Handle(TEvPQ::TEvUpdateWriteTimestamp::TPtr& ev, const TActorCo void TPartition::Handle(TEvPersQueue::TEvProposeTransaction::TPtr& ev, const TActorContext& ctx) { - const NKikimrPQ::TEvProposeTransaction& event = ev->Get()->Record; + const NKikimrPQ::TEvProposeTransaction& event = ev->Get()->GetRecord(); Y_ABORT_UNLESS(event.GetTxBodyCase() == NKikimrPQ::TEvProposeTransaction::kData); Y_ABORT_UNLESS(event.HasData()); const NKikimrPQ::TDataTransaction& txBody = event.GetData(); @@ -913,6 +927,10 @@ void TPartition::Handle(TEvPersQueue::TEvProposeTransaction::TPtr& ev, const TAc void TPartition::Handle(TEvPQ::TEvProposePartitionConfig::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvPQ::TEvProposePartitionConfig" << + " Step " << ev->Get()->Step << + ", TxId " << ev->Get()->TxId); + PushBackDistrTx(ev->Release()); ProcessTxsAndUserActs(ctx); @@ -925,6 +943,10 @@ void TPartition::HandleOnInit(TEvPQ::TEvTxCalcPredicate::TPtr& ev, const TActorC void TPartition::HandleOnInit(TEvPQ::TEvTxCommit::TPtr& ev, const TActorContext&) { + PQ_LOG_D("HandleOnInit TEvPQ::TEvTxCommit" << + " Step " << ev->Get()->Step << + ", TxId " << ev->Get()->TxId); + PendingEvents.emplace_back(ev->ReleaseBase().Release()); } @@ -935,11 +957,30 @@ void TPartition::HandleOnInit(TEvPQ::TEvTxRollback::TPtr& ev, const TActorContex void TPartition::HandleOnInit(TEvPQ::TEvProposePartitionConfig::TPtr& ev, const TActorContext&) { + PQ_LOG_D("HandleOnInit TEvPQ::TEvProposePartitionConfig" << + " Step " << ev->Get()->Step << + ", TxId " << ev->Get()->TxId); + PendingEvents.emplace_back(ev->ReleaseBase().Release()); } void TPartition::Handle(TEvPQ::TEvTxCalcPredicate::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvPQ::TEvTxCalcPredicate" << + " Step " << ev->Get()->Step << + ", TxId " << ev->Get()->TxId); + + if (PlanStep.Defined() && TxId.Defined()) { + if (GetStepAndTxId(*ev->Get()) < GetStepAndTxId(*PlanStep, *TxId)) { + Send(Tablet, + MakeHolder(ev->Get()->Step, + ev->Get()->TxId, + Partition, + Nothing()).Release()); + return; + } + } + PushBackDistrTx(ev->Release()); ProcessTxsAndUserActs(ctx); @@ -947,8 +988,15 @@ void TPartition::Handle(TEvPQ::TEvTxCalcPredicate::TPtr& ev, const TActorContext void TPartition::Handle(TEvPQ::TEvTxCommit::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvPQ::TEvTxCommit" << + " Step " << ev->Get()->Step << + ", TxId " << ev->Get()->TxId); + if (PlanStep.Defined() && TxId.Defined()) { if (GetStepAndTxId(*ev->Get()) < GetStepAndTxId(*PlanStep, *TxId)) { + PQ_LOG_D("Send TEvTxCommitDone" << + " Step " << ev->Get()->Step << + ", TxId " << ev->Get()->TxId); ctx.Send(Tablet, MakeCommitDone(ev->Get()->Step, ev->Get()->TxId).Release()); return; } @@ -956,11 +1004,20 @@ void TPartition::Handle(TEvPQ::TEvTxCommit::TPtr& ev, const TActorContext& ctx) auto txIter = TransactionsInflight.begin(); if (ChangeConfig) { - Y_ABORT_UNLESS(TransactionsInflight.size() == 1); + Y_ABORT_UNLESS(TransactionsInflight.size() == 1, + "PQ: %" PRIu64 ", Partition: %" PRIu32 ", Step: %" PRIu64 ", TxId: %" PRIu64, + TabletID, Partition.OriginalPartitionId, + ev->Get()->Step, ev->Get()->TxId); } else { - Y_ABORT_UNLESS(!TransactionsInflight.empty()); + Y_ABORT_UNLESS(!TransactionsInflight.empty(), + "PQ: %" PRIu64 ", Partition: %" PRIu32 ", Step: %" PRIu64 ", TxId: %" PRIu64, + TabletID, Partition.OriginalPartitionId, + ev->Get()->Step, ev->Get()->TxId); txIter = TransactionsInflight.find(ev->Get()->TxId); - Y_ABORT_UNLESS(!txIter.IsEnd()); + Y_ABORT_UNLESS(!txIter.IsEnd(), + "PQ: %" PRIu64 ", Partition: %" PRIu32 ", Step: %" PRIu64 ", TxId: %" PRIu64, + TabletID, Partition.OriginalPartitionId, + ev->Get()->Step, ev->Get()->TxId); } Y_ABORT_UNLESS(txIter->second->State == ECommitState::Pending); @@ -973,18 +1030,29 @@ void TPartition::Handle(TEvPQ::TEvTxRollback::TPtr& ev, const TActorContext& ctx auto* event = ev->Get(); if (PlanStep.Defined() && TxId.Defined()) { if (GetStepAndTxId(*event) < GetStepAndTxId(*PlanStep, *TxId)) { + PQ_LOG_D("Rollback for" << + " Step " << ev->Get()->Step << + ", TxId " << ev->Get()->TxId); return; } } auto txIter = TransactionsInflight.begin(); if (ChangeConfig) { - Y_ABORT_UNLESS(TransactionsInflight.size() == 1); + Y_ABORT_UNLESS(TransactionsInflight.size() == 1, + "PQ: %" PRIu64 ", Partition: %" PRIu32, + TabletID, Partition.OriginalPartitionId); } else { + Y_ABORT_UNLESS(!TransactionsInflight.empty(), + "PQ: %" PRIu64 ", Partition: %" PRIu32, + TabletID, Partition.OriginalPartitionId); txIter = TransactionsInflight.find(ev->Get()->TxId); - Y_ABORT_UNLESS(!txIter.IsEnd()); + Y_ABORT_UNLESS(!txIter.IsEnd(), + "PQ: %" PRIu64 ", Partition: %" PRIu32, + TabletID, Partition.OriginalPartitionId); } Y_ABORT_UNLESS(txIter->second->State == ECommitState::Pending); + txIter->second->State = ECommitState::Aborted; ProcessTxsAndUserActs(ctx); } @@ -1109,19 +1177,20 @@ void TPartition::Handle(TEvPQ::TEvGetWriteInfoError::TPtr& ev, const TActorConte void TPartition::ReplyToProposeOrPredicate(TSimpleSharedPtr& tx, bool isPredicate) { if (isPredicate) { - auto insRes = TransactionsInflight.insert(std::make_pair(tx->Tx->TxId, tx)); + auto insRes = TransactionsInflight.emplace(tx->Tx->TxId, tx); Y_ABORT_UNLESS(insRes.second); - Send(Tablet, MakeHolder(tx->Tx->Step, - tx->Tx->TxId, - Partition, - *tx->Predicate).Release()); + Send(Tablet, + MakeHolder(tx->Tx->Step, + tx->Tx->TxId, + Partition, + *tx->Predicate).Release()); } else { - auto insRes = TransactionsInflight.insert(std::make_pair(tx->ProposeConfig->TxId, tx)); + auto insRes = TransactionsInflight.emplace(tx->ProposeConfig->TxId, tx); Y_ABORT_UNLESS(insRes.second); Send(Tablet, - MakeHolder(tx->ProposeConfig->Step, - tx->ProposeConfig->TxId, - Partition).Release()); + MakeHolder(tx->ProposeConfig->Step, + tx->ProposeConfig->TxId, + Partition).Release()); } } @@ -1157,18 +1226,25 @@ void TPartition::Handle(TEvPQ::TEvGetMaxSeqNoRequest::TPtr& ev, const TActorCont void TPartition::Handle(TEvPQ::TEvBlobResponse::TPtr& ev, const TActorContext& ctx) { const ui64 cookie = ev->Get()->GetCookie(); - Y_ABORT_UNLESS(ReadInfo.contains(cookie)); - auto it = ReadInfo.find(cookie); - Y_ABORT_UNLESS(it != ReadInfo.end()); + + // If there is no such cookie, then read was canceled. + // For example, it can be after consumer deletion + if (it == ReadInfo.end()) { + return; + } TReadInfo info = std::move(it->second); ReadInfo.erase(it); - //make readinfo class - auto& userInfo = UsersInfoStorage->GetOrCreate(info.User, ctx); + auto* userInfo = UsersInfoStorage->GetIfExists(info.User); + if (!userInfo) { + ReplyError(ctx, info.Destination, NPersQueue::NErrorCode::BAD_REQUEST, GetConsumerDeletedMessage(info.User)); + OnReadRequestFinished(info.Destination, 0, info.User, ctx); + } + TReadAnswer answer(info.FormAnswer( - ctx, *ev->Get(), EndOffset, Partition, &userInfo, + ctx, *ev->Get(), EndOffset, Partition, userInfo, info.Destination, GetSizeLag(info.Offset), Tablet, Config.GetMeteringMode() )); const auto& resp = dynamic_cast(answer.Event.Get())->Response; @@ -1921,7 +1997,7 @@ TPartition::EProcessResult TPartition::PreProcessUserActionOrTransaction(TSimple return EProcessResult::Continue; } t->Predicate.ConstructInPlace(true); - return PreProcessImmediateTx(t->ProposeTransaction->Record); + return PreProcessImmediateTx(t->ProposeTransaction->GetRecord()); } else if (t->Tx) { // Distributed TX if (t->Predicate.Defined()) { // Predicate defined - either failed previously or Tx created with predicate defined. @@ -1983,7 +2059,8 @@ bool TPartition::ExecUserActionOrTransaction(TSimpleSharedPtr& t, } else if (t->ProposeConfig) { Y_ABORT_UNLESS(ChangingConfig); ChangeConfig = MakeSimpleShared(TopicConverter, - t->ProposeConfig->Config); + t->ProposeConfig->Config, + t->ProposeConfig->BootstrapConfig); PendingPartitionConfig = GetPartitionConfig(ChangeConfig->Config); SendChangeConfigReply = false; } @@ -2069,7 +2146,8 @@ bool TPartition::BeginTransaction(const TEvPQ::TEvProposePartitionConfig& event) { ChangeConfig = MakeSimpleShared(TopicConverter, - event.Config); + event.Config, + event.BootstrapConfig); PendingPartitionConfig = GetPartitionConfig(ChangeConfig->Config); SendChangeConfigReply = false; @@ -2175,6 +2253,7 @@ void TPartition::CommitWriteOperations(TTransaction& t) }, std::nullopt}; msg.Internal = true; + WriteInflightSize += msg.Msg.Data.size(); ExecRequest(msg, *Parameters, PersistRequest.Get()); auto& info = TxSourceIdForPostPersist[blob.SourceId]; @@ -2305,6 +2384,7 @@ void TPartition::OnProcessTxsAndUserActsWriteComplete(const TActorContext& ctx) if (ChangeConfig) { EndChangePartitionConfig(std::move(ChangeConfig->Config), + std::move(ChangeConfig->BootstrapConfig), ChangeConfig->TopicConverter, ctx); } @@ -2349,6 +2429,20 @@ void TPartition::OnProcessTxsAndUserActsWriteComplete(const TActorContext& ctx) } UsersInfoStorage->Remove(user, ctx); + + // Finish all ongoing reads + std::unordered_set readCookies; + for (auto& [cookie, info] : ReadInfo) { + if (info.User == user) { + readCookies.insert(cookie); + ReplyError(ctx, info.Destination, NPersQueue::NErrorCode::BAD_REQUEST, GetConsumerDeletedMessage(user)); + OnReadRequestFinished(info.Destination, 0, user, ctx); + } + } + for (ui64 cookie : readCookies) { + ReadInfo.erase(cookie); + } + Send(ReadQuotaTrackerActor, new TEvPQ::TEvConsumerRemoved(user)); } } @@ -2371,12 +2465,24 @@ void TPartition::OnProcessTxsAndUserActsWriteComplete(const TActorContext& ctx) } void TPartition::EndChangePartitionConfig(NKikimrPQ::TPQTabletConfig&& config, + NKikimrPQ::TBootstrapConfig&& bootstrapConfig, NPersQueue::TTopicConverterPtr topicConverter, const TActorContext& ctx) { Config = std::move(config); PartitionConfig = GetPartitionConfig(Config); PartitionGraph = MakePartitionGraph(Config); + + for (const auto& mg : bootstrapConfig.GetExplicitMessageGroups()) { + TMaybe keyRange; + if (mg.HasKeyRange()) { + keyRange = TPartitionKeyRange::Parse(mg.GetKeyRange()); + } + + TSourceIdInfo sourceId(0, 0, ctx.Now(), std::move(keyRange), false); + SourceIdStorage.RegisterSourceIdInfo(mg.GetId(), std::move(sourceId), true); + } + TopicConverter = topicConverter; NewPartition = false; @@ -2386,14 +2492,15 @@ void TPartition::EndChangePartitionConfig(NKikimrPQ::TPQTabletConfig&& config, InitSplitMergeSlidingWindow(); } - Send(ReadQuotaTrackerActor, new TEvPQ::TEvChangePartitionConfig(TopicConverter, Config)); - Send(WriteQuotaTrackerActor, new TEvPQ::TEvChangePartitionConfig(TopicConverter, Config)); + Send(ReadQuotaTrackerActor, new TEvPQ::TEvChangePartitionConfig(TopicConverter, Config, bootstrapConfig)); + Send(WriteQuotaTrackerActor, new TEvPQ::TEvChangePartitionConfig(TopicConverter, Config, bootstrapConfig)); TotalPartitionWriteSpeed = config.GetPartitionConfig().GetWriteSpeedInBytesPerSecond(); if (Config.GetPartitionConfig().HasMirrorFrom()) { if (Mirrorer) { ctx.Send(Mirrorer->Actor, new TEvPQ::TEvChangePartitionConfig(TopicConverter, - Config)); + Config, + bootstrapConfig)); } else { CreateMirrorerActor(); } @@ -2481,7 +2588,7 @@ TPartition::EProcessResult TPartition::PreProcessImmediateTx(const NKikimrPQ::TE void TPartition::ExecImmediateTx(TTransaction& t) { --ImmediateTxCount; - auto& record = t.ProposeTransaction->Record; + const auto& record = t.ProposeTransaction->GetRecord(); Y_ABORT_UNLESS(record.GetTxBodyCase() == NKikimrPQ::TEvProposeTransaction::kData); Y_ABORT_UNLESS(record.HasData()); @@ -2494,7 +2601,7 @@ void TPartition::ExecImmediateTx(TTransaction& t) t.Message); return; } - for (auto& operation : record.GetData().GetOperations()) { + for (const auto& operation : record.GetData().GetOperations()) { if (!operation.HasBegin() || !operation.HasEnd() || !operation.HasConsumer()) { continue; //Write operation - handled separately via WriteInfo } @@ -2843,6 +2950,10 @@ void TPartition::EmulatePostProcessUserAct(const TEvPQ::TEvSetClientInfo& act, userInfo.Offset = offset; + if (LastOffsetHasBeenCommited(userInfo)) { + SendReadingFinished(user); + } + auto counter = setSession ? COUNTER_PQ_CREATE_SESSION_OK : (dropSession ? COUNTER_PQ_DELETE_SESSION_OK : COUNTER_PQ_SET_CLIENT_OFFSET_OK); TabletCounters.Cumulative()[counter].Increment(1); } @@ -2880,6 +2991,10 @@ void TPartition::ScheduleReplyPropose(const NKikimrPQ::TEvProposeTransaction& ev NKikimrPQ::TError::EKind kind, const TString& reason) { + PQ_LOG_D("schedule TEvPersQueue::TEvProposeTransactionResult(" << + NKikimrPQ::TEvProposeTransactionResult_EStatus_Name(statusCode) << + ")" << + ", reason=" << reason); Replies.emplace_back(ActorIdFromProto(event.GetSourceActor()), MakeReplyPropose(event, statusCode, @@ -3295,6 +3410,8 @@ void TPartition::Handle(TEvPQ::TEvCheckPartitionStatusRequest::TPtr& ev, const T void TPartition::HandleOnInit(TEvPQ::TEvDeletePartition::TPtr& ev, const TActorContext&) { + PQ_LOG_D("HandleOnInit TEvPQ::TEvDeletePartition"); + Y_ABORT_UNLESS(IsSupportive()); PendingEvents.emplace_back(ev->ReleaseBase().Release()); @@ -3302,6 +3419,8 @@ void TPartition::HandleOnInit(TEvPQ::TEvDeletePartition::TPtr& ev, const TActorC void TPartition::Handle(TEvPQ::TEvDeletePartition::TPtr&, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvPQ::TEvDeletePartition"); + Y_ABORT_UNLESS(IsSupportive()); Y_ABORT_UNLESS(DeletePartitionState == DELETION_NOT_INITED); @@ -3314,20 +3433,25 @@ void TPartition::ScheduleNegativeReplies() { auto processQueue = [&](std::deque& queue) { for (auto& event : queue) { - if (auto* setInfo = std::get_if<0>(&event.Event)) { - ScheduleNegativeReply(*setInfo->Get()); - } else if (auto* tx = std::get_if<1>(&event.Event)) { - if (tx->Get()->ProposeTransaction) { - ScheduleNegativeReply(*tx->Get()->ProposeTransaction); - } else { - ScheduleNegativeReply(*tx->Get()); + std::visit(TOverloaded{ + [this](TSimpleSharedPtr& v) { + ScheduleNegativeReply(*v); + }, + [this](TSimpleSharedPtr& v) { + if (v->ProposeTransaction) { + ScheduleNegativeReply(*v->ProposeTransaction); + } else { + ScheduleNegativeReply(*v); + } + }, + [this](TMessage& v) { + ScheduleNegativeReply(v); } - } else { - ScheduleNegativeReply(*(std::get_if<2>(&event.Event))); - } + }, event.Event); } queue.clear(); }; + processQueue(UserActionAndTransactionEvents); processQueue(UserActionAndTxPendingCommit); } diff --git a/ydb/core/persqueue/partition.h b/ydb/core/persqueue/partition.h index f907594bbfbb..31315dbc7f0b 100644 --- a/ydb/core/persqueue/partition.h +++ b/ydb/core/persqueue/partition.h @@ -66,7 +66,6 @@ struct TTransaction { explicit TTransaction(TSimpleSharedPtr proposeConfig) : ProposeConfig(proposeConfig) { - Y_ABORT_UNLESS(ProposeConfig); } @@ -74,8 +73,9 @@ struct TTransaction { : ProposeTransaction(proposeTx) , State(ECommitState::Committed) { - if (proposeTx->Record.HasSupportivePartitionActor()) { - SupportivePartitionActor = ActorIdFromProto(proposeTx->Record.GetSupportivePartitionActor()); + const auto& record = proposeTx->GetRecord(); + if (record.HasSupportivePartitionActor()) { + SupportivePartitionActor = ActorIdFromProto(record.GetSupportivePartitionActor()); } Y_ABORT_UNLESS(ProposeTransaction); } @@ -148,7 +148,7 @@ class TPartition : public TActorBootstrapped { bool CanWrite() const; bool CanEnqueue() const; - bool LastOffsetHasBeenCommited(const TUserInfo& userInfo) const; + bool LastOffsetHasBeenCommited(const TUserInfoBase& userInfo) const; void ReplyError(const TActorContext& ctx, const ui64 dst, NPersQueue::NErrorCode::EErrorCode errorCode, const TString& error); void ReplyPropose(const TActorContext& ctx, const NKikimrPQ::TEvProposeTransaction& event, NKikimrPQ::TEvProposeTransactionResult::EStatus statusCode, @@ -391,6 +391,7 @@ class TPartition : public TActorBootstrapped { void OnProcessTxsAndUserActsWriteComplete(const TActorContext& ctx); void EndChangePartitionConfig(NKikimrPQ::TPQTabletConfig&& config, + NKikimrPQ::TBootstrapConfig&& bootstrapConfig, NPersQueue::TTopicConverterPtr topicConverter, const TActorContext& ctx); TString GetKeyConfig() const; @@ -458,7 +459,9 @@ class TPartition : public TActorBootstrapped { } // The size of the data realy was persisted in the storage by the partition - ui64 MeteringDataSize() const; + ui64 UserDataSize() const; + // The size of the data was metered to user + ui64 MeteringDataSize(TInstant now) const; // The size of the storage that was reserved by the partition ui64 ReserveSize() const; // The size of the storage that usud by the partition. That included combination of the reserver and realy persisted data. @@ -620,6 +623,7 @@ class TPartition : public TActorBootstrapped { static void RemoveMessages(TMessageQueue& src, TMessageQueue& dst); void RemovePendingRequests(TMessageQueue& requests); void RemoveMessagesToQueue(TMessageQueue& requests); + static TString GetConsumerDeletedMessage(TStringBuf consumerName); private: ui64 TabletID; @@ -915,6 +919,8 @@ class TPartition : public TActorBootstrapped { TDeque> PendingEvents; TRowVersion LastEmittedHeartbeat; + TLastCounter SourceIdCounter; + const NKikimrPQ::TPQTabletConfig::TPartition* GetPartitionConfig(const NKikimrPQ::TPQTabletConfig& config); bool ClosedInternalPartition = false; @@ -952,4 +958,3 @@ class TPartition : public TActorBootstrapped { }; } // namespace NKikimr::NPQ - diff --git a/ydb/core/persqueue/partition_init.cpp b/ydb/core/persqueue/partition_init.cpp index 2f2b9b4c6f42..e17775cbb93c 100644 --- a/ydb/core/persqueue/partition_init.cpp +++ b/ydb/core/persqueue/partition_init.cpp @@ -169,15 +169,14 @@ void TInitConfigStep::Handle(TEvKeyValue::TEvResponse::TPtr& ev, const TActorCon if (Partition()->Config.GetVersion() < Partition()->TabletConfig.GetVersion()) { auto event = MakeHolder(Partition()->TopicConverter, - Partition()->TabletConfig); + Partition()->TabletConfig, + NKikimrPQ::TBootstrapConfig()); Partition()->PushFrontDistrTx(event.Release()); } break; case NKikimrProto::NODATA: Partition()->Config = Partition()->TabletConfig; - Partition()->PartitionConfig = GetPartitionConfig(Partition()->Config, Partition()->Partition.OriginalPartitionId); - Partition()->PartitionGraph = MakePartitionGraph(Partition()->Config); break; case NKikimrProto::ERROR: @@ -191,6 +190,9 @@ void TInitConfigStep::Handle(TEvKeyValue::TEvResponse::TPtr& ev, const TActorCon Y_ABORT("bad status"); }; + Partition()->PartitionConfig = GetPartitionConfig(Partition()->Config, Partition()->Partition.OriginalPartitionId); + Partition()->PartitionGraph = MakePartitionGraph(Partition()->Config); + Done(ctx); } @@ -632,7 +634,7 @@ void TInitDataStep::Handle(TEvKeyValue::TEvResponse::TPtr &ev, const TActorConte Y_ABORT_UNLESS(size == read.GetValue().size()); for (TBlobIterator it(key, read.GetValue()); it.IsValid(); it.Next()) { - head.Batches.emplace_back(it.GetBatch()); + head.AddBatch(it.GetBatch()); } head.PackedSize += size; diff --git a/ydb/core/persqueue/partition_read.cpp b/ydb/core/persqueue/partition_read.cpp index 100ce3e6a827..4c7abf54e4f2 100644 --- a/ydb/core/persqueue/partition_read.cpp +++ b/ydb/core/persqueue/partition_read.cpp @@ -626,13 +626,13 @@ TVector TPartition::GetReadRequestFromHead( Y_ABORT_UNLESS(pos != Max()); } ui32 lastBlobSize = 0; - for (;pos < Head.Batches.size(); ++pos) { + for (;pos < Head.GetBatches().size(); ++pos) { TVector blobs; - Head.Batches[pos].UnpackTo(&blobs); + Head.GetBatch(pos).UnpackTo(&blobs); ui32 i = 0; - ui64 offset = Head.Batches[pos].GetOffset(); - ui16 pno = Head.Batches[pos].GetPartNo(); + ui64 offset = Head.GetBatch(pos).GetOffset(); + ui16 pno = Head.GetBatch(pos).GetPartNo(); for (; i < blobs.size(); ++i) { ui64 curOffset = offset; @@ -748,8 +748,8 @@ void TPartition::DoRead(TEvPQ::TEvRead::TPtr&& readEvent, TDuration waitQuotaTim auto* read = readEvent->Get(); const TString& user = read->ClientId; auto userInfo = UsersInfoStorage->GetIfExists(user); - if(!userInfo) { - ReplyError(ctx, read->Cookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "cannot finish read request. Consumer " << read->ClientId << " is gone from partition"); + if (!userInfo) { + ReplyError(ctx, read->Cookie, NPersQueue::NErrorCode::BAD_REQUEST, GetConsumerDeletedMessage(read->ClientId)); Send(ReadQuotaTrackerActor, new TEvPQ::TEvConsumerRemoved(user)); OnReadRequestFinished(read->Cookie, 0, user, ctx); return; @@ -796,12 +796,11 @@ void TPartition::DoRead(TEvPQ::TEvRead::TPtr&& readEvent, TDuration waitQuotaTim return; } - if (offset > EndOffset) { + if (offset >= EndOffset) { ReplyError(ctx, read->Cookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "Offset more than EndOffset. Offset=" << offset << ", EndOffset=" << EndOffset); return; } - Y_ABORT_UNLESS(offset < EndOffset); ProcessRead(ctx, std::move(info), cookie, false); } @@ -1027,4 +1026,8 @@ void TPartition::ProcessRead(const TActorContext& ctx, TReadInfo&& info, const u ctx.Send(BlobCache, request.Release()); } +TString TPartition::GetConsumerDeletedMessage(TStringBuf consumerName) { + return TStringBuilder() << "cannot finish read request. Consumer " << consumerName << " is gone from partition"; +} + } // namespace NKikimr::NPQ diff --git a/ydb/core/persqueue/partition_scale_manager.cpp b/ydb/core/persqueue/partition_scale_manager.cpp index 8d03b87e1d04..b36fdb18584e 100644 --- a/ydb/core/persqueue/partition_scale_manager.cpp +++ b/ydb/core/persqueue/partition_scale_manager.cpp @@ -8,12 +8,14 @@ namespace NPQ { TPartitionScaleManager::TPartitionScaleManager( const TString& topicName, + const TString& topicPath, const TString& databasePath, ui64 pathId, int version, const NKikimrPQ::TPQTabletConfig& config ) : TopicName(topicName) + , TopicPath(topicPath) , DatabasePath(databasePath) , BalancerConfig(pathId, version, config) { } @@ -45,6 +47,7 @@ void TPartitionScaleManager::TrySendScaleRequest(const TActorContext& ctx) { << "send split request"); CurrentScaleRequest = ctx.Register(new TPartitionScaleRequest( TopicName, + TopicPath, DatabasePath, BalancerConfig.PathId, BalancerConfig.PathVersion, diff --git a/ydb/core/persqueue/partition_scale_manager.h b/ydb/core/persqueue/partition_scale_manager.h index dc46b38f0831..39251d5b0610 100644 --- a/ydb/core/persqueue/partition_scale_manager.h +++ b/ydb/core/persqueue/partition_scale_manager.h @@ -47,7 +47,7 @@ class TPartitionScaleManager { }; public: - TPartitionScaleManager(const TString& topicPath, const TString& databasePath, ui64 pathId, int version, const NKikimrPQ::TPQTabletConfig& config); + TPartitionScaleManager(const TString& topicName, const TString& topicPath, const TString& databasePath, ui64 pathId, int version, const NKikimrPQ::TPQTabletConfig& config); public: void HandleScaleStatusChange(const ui32 partition, NKikimrPQ::EScaleStatus scaleStatus, const TActorContext& ctx); @@ -71,6 +71,7 @@ class TPartitionScaleManager { static const ui32 MAX_SCALE_REQUEST_REPEAT_SECONDS_TIMEOUT = 1000; const TString TopicName; + const TString TopicPath; TString DatabasePath = ""; TActorId CurrentScaleRequest; TDuration RequestTimeout = TDuration::MilliSeconds(0); diff --git a/ydb/core/persqueue/partition_scale_request.cpp b/ydb/core/persqueue/partition_scale_request.cpp index 28e7d8f7a595..4697a57b14fc 100644 --- a/ydb/core/persqueue/partition_scale_request.cpp +++ b/ydb/core/persqueue/partition_scale_request.cpp @@ -4,15 +4,17 @@ namespace NKikimr { namespace NPQ { TPartitionScaleRequest::TPartitionScaleRequest( - TString topicName, - TString databasePath, + const TString& topicName, + const TString& topicPath, + const TString& databasePath, ui64 pathId, ui64 pathVersion, - std::vector splits, - const std::vector merges, - NActors::TActorId parentActorId + const std::vector& splits, + const std::vector& merges, + const NActors::TActorId& parentActorId ) : Topic(topicName) + , TopicPath(topicPath) , DatabasePath(databasePath) , PathId(pathId) , PathVersion(pathVersion) @@ -30,14 +32,17 @@ void TPartitionScaleRequest::Bootstrap(const NActors::TActorContext &ctx) { void TPartitionScaleRequest::SendProposeRequest(const NActors::TActorContext &ctx) { auto proposal = std::make_unique(); proposal->Record.SetDatabaseName(CanonizePath(DatabasePath)); - FillProposeRequest(*proposal, DatabasePath, Topic, ctx); + FillProposeRequest(*proposal, ctx); ctx.Send(MakeTxProxyID(), proposal.release()); } -void TPartitionScaleRequest::FillProposeRequest(TEvTxUserProxy::TEvProposeTransaction& proposal, const TString& workingDir, const TString& topicName, const NActors::TActorContext &ctx) { +void TPartitionScaleRequest::FillProposeRequest(TEvTxUserProxy::TEvProposeTransaction& proposal, const NActors::TActorContext &ctx) { + auto workingDir = TopicPath.substr(0, TopicPath.size() - Topic.size()); + auto& modifyScheme = *proposal.Record.MutableTransaction()->MutableModifyScheme(); modifyScheme.SetOperationType(NKikimrSchemeOp::ESchemeOpAlterPersQueueGroup); modifyScheme.SetWorkingDir(workingDir); + modifyScheme.SetInternal(true); auto applyIf = modifyScheme.AddApplyIf(); applyIf->SetPathId(PathId); @@ -45,9 +50,9 @@ void TPartitionScaleRequest::FillProposeRequest(TEvTxUserProxy::TEvProposeTransa applyIf->SetCheckEntityVersion(true); NKikimrSchemeOp::TPersQueueGroupDescription groupDescription; - groupDescription.SetName(topicName); + groupDescription.SetName(Topic); TStringBuilder logMessage; - logMessage << "TPartitionScaleRequest::FillProposeRequest trying to scale partitions. Spilts: "; + logMessage << "TPartitionScaleRequest::FillProposeRequest trying to scale partitions of '" << workingDir << "/" << Topic << "'. Spilts: "; for(const auto& split: Splits) { auto* newSplit = groupDescription.AddSplit(); logMessage << "partition: " << split.GetPartition() << " boundary: '" << split.GetSplitBoundary() << "' "; diff --git a/ydb/core/persqueue/partition_scale_request.h b/ydb/core/persqueue/partition_scale_request.h index 017825e78d82..764c8f033eb0 100644 --- a/ydb/core/persqueue/partition_scale_request.h +++ b/ydb/core/persqueue/partition_scale_request.h @@ -26,7 +26,10 @@ class TPartitionScaleRequest: public NActors::TActorBootstrapped splits, const std::vector merges, NActors::TActorId parentActorId); + TPartitionScaleRequest(const TString& topicName, const TString& topicPath, const TString& databasePath, ui64 pathId, ui64 pathVersion, + const std::vector& splits, + const std::vector& merges, + const NActors::TActorId& parentActorId); public: void Bootstrap(const NActors::TActorContext &ctx); @@ -48,10 +51,11 @@ class TPartitionScaleRequest: public NActors::TActorBootstrapped SplitPath(const TString& path); void SendProposeRequest(const NActors::TActorContext &ctx); - void FillProposeRequest(TEvTxUserProxy::TEvProposeTransaction& proposal, const TString& workingDir, const TString& topicName, const NActors::TActorContext &ctx); + void FillProposeRequest(TEvTxUserProxy::TEvProposeTransaction& proposal, const NActors::TActorContext &ctx); private: const TString Topic; + const TString TopicPath; const TString DatabasePath; const ui64 PathId; const ui64 PathVersion; diff --git a/ydb/core/persqueue/partition_sourcemanager.cpp b/ydb/core/persqueue/partition_sourcemanager.cpp index f81b3a7da3b2..c9214300384a 100644 --- a/ydb/core/persqueue/partition_sourcemanager.cpp +++ b/ydb/core/persqueue/partition_sourcemanager.cpp @@ -81,7 +81,8 @@ void TPartitionSourceManager::TModificationBatch::Cancel() { } bool TPartitionSourceManager::TModificationBatch::HasModifications() const { - return !SourceIdWriter.GetSourceIdsToWrite().empty(); + return !SourceIdWriter.GetSourceIdsToWrite().empty() + || !SourceIdWriter.GetSourceIdsToDelete().empty(); } void TPartitionSourceManager::TModificationBatch::FillRequest(TEvKeyValue::TEvRequest* request) { diff --git a/ydb/core/persqueue/partition_write.cpp b/ydb/core/persqueue/partition_write.cpp index e0346e2f9010..a115a56d39cb 100644 --- a/ydb/core/persqueue/partition_write.cpp +++ b/ydb/core/persqueue/partition_write.cpp @@ -166,6 +166,12 @@ void TPartition::ProcessReserveRequests(const TActorContext& ctx) { const ui64& cookie = ReserveRequests.front()->Cookie; const bool& lastRequest = ReserveRequests.front()->LastRequest; + if (!IsActive()) { + ReplyOk(ctx, cookie); + ReserveRequests.pop_front(); + continue; + } + auto it = Owners.find(owner); if (ClosedInternalPartition) { ReplyError(ctx, cookie, NPersQueue::NErrorCode::BAD_REQUEST, "ReserveRequest to closed supportive partition"); @@ -239,13 +245,14 @@ void TPartition::HandleOnIdle(TEvPQ::TEvWrite::TPtr& ev, const TActorContext& ct void TPartition::AnswerCurrentWrites(const TActorContext& ctx) { PQ_LOG_T("TPartition::AnswerCurrentWrites. Responses.size()=" << Responses.size()); + const auto now = ctx.Now(); ui64 offset = EndOffset; while (!Responses.empty()) { const auto& response = Responses.front(); const TDuration queueTime = response.QueueTime; - const TDuration writeTime = ctx.Now() - response.WriteTimeBaseline; + const TDuration writeTime = now - response.WriteTimeBaseline; if (response.IsWrite()) { const auto& writeResponse = response.GetWrite(); @@ -257,6 +264,7 @@ void TPartition::AnswerCurrentWrites(const TActorContext& ctx) { bool already = false; + SourceIdCounter.Use(s, now); auto it = SourceIdStorage.GetInMemorySourceIds().find(s); ui64 maxSeqNo = 0; @@ -397,7 +405,7 @@ void TPartition::SyncMemoryStateWithKVState(const TActorContext& ctx) { Head.PackedSize = 0; Head.Offset = NewHead.Offset; Head.PartNo = NewHead.PartNo; //no partNo at this point - Head.Batches.clear(); + Head.ClearBatches(); } while (!CompactedKeys.empty()) { @@ -420,9 +428,8 @@ void TPartition::SyncMemoryStateWithKVState(const TActorContext& ctx) { } // head cleared, all data moved to body //append Head with newHead - while (!NewHead.Batches.empty()) { - Head.Batches.push_back(NewHead.Batches.front()); - NewHead.Batches.pop_front(); + while (!NewHead.GetBatches().empty()) { + Head.AddBatch(NewHead.ExtractFirstBatch()); } Head.PackedSize += NewHead.PackedSize; @@ -462,20 +469,14 @@ void TPartition::UpdateAfterWriteCounters(bool writeComplete) { // If supportive - update counters only prior to write, otherwise - only after writes; return; } - if (BytesWrittenGrpc) - BytesWrittenGrpc.Inc(WriteNewSizeInternal); - if (BytesWrittenTotal) - BytesWrittenTotal.Inc(WriteNewSize); - - if (BytesWrittenUncompressed) - BytesWrittenUncompressed.Inc(WriteNewSizeUncompressed); + BytesWrittenGrpc.Inc(WriteNewSizeInternal); + BytesWrittenTotal.Inc(WriteNewSize); + BytesWrittenUncompressed.Inc(WriteNewSizeUncompressed); if (BytesWrittenComp) BytesWrittenComp.Inc(WriteCycleSize); - if (MsgsWrittenGrpc) - MsgsWrittenGrpc.Inc(WriteNewMessagesInternal); - if (MsgsWrittenTotal) { - MsgsWrittenTotal.Inc(WriteNewMessages); - } + + MsgsWrittenGrpc.Inc(WriteNewMessagesInternal); + MsgsWrittenTotal.Inc(WriteNewMessages); } void TPartition::HandleWriteResponse(const TActorContext& ctx) { @@ -485,13 +486,15 @@ void TPartition::HandleWriteResponse(const TActorContext& ctx) { } HaveWriteMsg = false; + const auto now = ctx.Now(); + for (auto& [sourceId, info] : TxSourceIdForPostPersist) { auto it = SourceIdStorage.GetInMemorySourceIds().find(sourceId); if (it.IsEnd()) { - SourceIdStorage.RegisterSourceId(sourceId, info.SeqNo, info.Offset, ctx.Now()); + SourceIdStorage.RegisterSourceId(sourceId, info.SeqNo, info.Offset, now); } else { ui64 seqNo = std::max(info.SeqNo, it->second.SeqNo); - SourceIdStorage.RegisterSourceId(sourceId, it->second.Updated(seqNo, info.Offset, ctx.Now())); + SourceIdStorage.RegisterSourceId(sourceId, it->second.Updated(seqNo, info.Offset, now)); } } TxSourceIdForPostPersist.clear(); @@ -505,8 +508,8 @@ void TPartition::HandleWriteResponse(const TActorContext& ctx) { } ui64 prevEndOffset = EndOffset; - ui32 totalLatencyMs = (ctx.Now() - WriteCycleStartTime).MilliSeconds(); - ui32 writeLatencyMs = (ctx.Now() - WriteStartTime).MilliSeconds(); + ui32 totalLatencyMs = (now - WriteCycleStartTime).MilliSeconds(); + ui32 writeLatencyMs = (now - WriteStartTime).MilliSeconds(); WriteLatency.IncFor(writeLatencyMs, 1); if (writeLatencyMs >= AppData(ctx)->PQConfig.GetWriteLatencyBigMs()) { @@ -522,7 +525,6 @@ void TPartition::HandleWriteResponse(const TActorContext& ctx) { UpdateAfterWriteCounters(true); //All ok - auto now = ctx.Now(); for (auto& avg : AvgWriteBytes) { avg.Update(WriteNewSize, now); } @@ -538,11 +540,9 @@ void TPartition::HandleWriteResponse(const TActorContext& ctx) { if (SupportivePartitionTimeLag) { SupportivePartitionTimeLag->UpdateTimestamp(now.MilliSeconds()); } - if (SplitMergeEnabled(Config)) { - SplitMergeAvgWriteBytes->Update(WriteNewSizeFull, now); - auto needScaling = CheckScaleStatus(ctx); - ChangeScaleStatusIfNeeded(needScaling); - } + + auto writeNewSizeFull = WriteNewSizeFull; + WriteCycleSize = 0; WriteNewSize = 0; WriteNewSizeFull = 0; @@ -556,6 +556,12 @@ void TPartition::HandleWriteResponse(const TActorContext& ctx) { AnswerCurrentWrites(ctx); SyncMemoryStateWithKVState(ctx); + if (SplitMergeEnabled(Config)) { + SplitMergeAvgWriteBytes->Update(writeNewSizeFull, now); + auto needScaling = CheckScaleStatus(ctx); + ChangeScaleStatusIfNeeded(needScaling); + } + //if EndOffset changed there could be subscriptions witch could be completed TVector> reads = Subscriber.GetReads(EndOffset); for (auto& read : reads) { @@ -569,7 +575,10 @@ void TPartition::HandleWriteResponse(const TActorContext& ctx) { } NKikimrPQ::EScaleStatus TPartition::CheckScaleStatus(const TActorContext& ctx) { - auto const writeSpeedUsagePercent = SplitMergeAvgWriteBytes->GetValue() * 100.0 / Config.GetPartitionStrategy().GetScaleThresholdSeconds() / TotalPartitionWriteSpeed; + const auto writeSpeedUsagePercent = SplitMergeAvgWriteBytes->GetValue() * 100.0 / Config.GetPartitionStrategy().GetScaleThresholdSeconds() / TotalPartitionWriteSpeed; + const auto sourceIdWindow = TDuration::Seconds(std::min(5, Config.GetPartitionStrategy().GetScaleThresholdSeconds())); + const auto sourceIdCount = SourceIdCounter.Count(ctx.Now() - sourceIdWindow); + LOG_DEBUG_S( ctx, NKikimrServices::PERSQUEUE, "TPartition::CheckScaleStatus" @@ -577,15 +586,17 @@ NKikimrPQ::EScaleStatus TPartition::CheckScaleStatus(const TActorContext& ctx) { << " writeSpeedUsagePercent# " << writeSpeedUsagePercent << " scaleThresholdSeconds# " << Config.GetPartitionStrategy().GetScaleThresholdSeconds() << " totalPartitionWriteSpeed# " << TotalPartitionWriteSpeed + << " sourceIdCount=" << sourceIdCount << " Topic: \"" << TopicName() << "\"." << " Partition: " << Partition ); + auto splitEnabled = Config.GetPartitionStrategy().GetPartitionStrategyType() == ::NKikimrPQ::TPQTabletConfig_TPartitionStrategyType::TPQTabletConfig_TPartitionStrategyType_CAN_SPLIT || Config.GetPartitionStrategy().GetPartitionStrategyType() == ::NKikimrPQ::TPQTabletConfig_TPartitionStrategyType::TPQTabletConfig_TPartitionStrategyType_CAN_SPLIT_AND_MERGE; auto mergeEnabled = Config.GetPartitionStrategy().GetPartitionStrategyType() == ::NKikimrPQ::TPQTabletConfig_TPartitionStrategyType::TPQTabletConfig_TPartitionStrategyType_CAN_SPLIT_AND_MERGE; - if (splitEnabled && writeSpeedUsagePercent >= Config.GetPartitionStrategy().GetScaleUpPartitionWriteSpeedThresholdPercent()) { + if (splitEnabled && writeSpeedUsagePercent >= Config.GetPartitionStrategy().GetScaleUpPartitionWriteSpeedThresholdPercent() && sourceIdCount > 1) { LOG_DEBUG_S( ctx, NKikimrServices::PERSQUEUE, "TPartition::CheckScaleStatus NEED_SPLIT" << " Topic: \"" << TopicName() << "\"." << @@ -596,7 +607,8 @@ NKikimrPQ::EScaleStatus TPartition::CheckScaleStatus(const TActorContext& ctx) { LOG_DEBUG_S( ctx, NKikimrServices::PERSQUEUE, "TPartition::CheckScaleStatus NEED_MERGE" << " Topic: \"" << TopicName() << "\"." << - " Partition: " << Partition + " Partition: " << Partition << " writeSpeedUsagePercent: " << writeSpeedUsagePercent << + " Threshold: " << Config.GetPartitionStrategy().GetScaleDownPartitionWriteSpeedThresholdPercent() ); return NKikimrPQ::EScaleStatus::NEED_MERGE; } @@ -1101,6 +1113,10 @@ bool TPartition::ExecRequest(TWriteMsg& p, ProcessParameters& parameters, TEvKey auto& sourceIdBatch = parameters.SourceIdBatch; auto sourceId = sourceIdBatch.GetSource(p.Msg.SourceId); + Y_DEBUG_ABORT_UNLESS(WriteInflightSize >= p.Msg.Data.size(), + "PQ %" PRIu64 ", Partition {%" PRIu32 ", %" PRIu32 "}, WriteInflightSize=%" PRIu64 ", p.Msg.Data.size=%" PRISZT, + TabletID, Partition.OriginalPartitionId, Partition.InternalPartitionId, + WriteInflightSize, p.Msg.Data.size()); WriteInflightSize -= p.Msg.Data.size(); TabletCounters.Percentile()[COUNTER_LATENCY_PQ_RECEIVE_QUEUE].IncrementFor(ctx.Now().MilliSeconds() - p.Msg.ReceiveTimestamp); @@ -1307,22 +1323,22 @@ bool TPartition::ExecRequest(TWriteMsg& p, ProcessParameters& parameters, TEvKey ctx); ui32 countOfLastParts = 0; for (auto& x : PartitionedBlob.GetClientBlobs()) { - if (NewHead.Batches.empty() || NewHead.Batches.back().Packed) { - NewHead.Batches.emplace_back(curOffset, x.GetPartNo(), TVector()); + if (NewHead.GetBatches().empty() || NewHead.GetLastBatch().Packed) { + NewHead.AddBatch(TBatch(curOffset, x.GetPartNo())); NewHead.PackedSize += GetMaxHeaderSize(); //upper bound for packed size } if (x.IsLastPart()) { ++countOfLastParts; } - Y_ABORT_UNLESS(!NewHead.Batches.back().Packed); - NewHead.Batches.back().AddBlob(x); + Y_ABORT_UNLESS(!NewHead.GetLastBatch().Packed); + NewHead.AddBlob(x); NewHead.PackedSize += x.GetBlobSize(); - if (NewHead.Batches.back().GetUnpackedSize() >= BATCH_UNPACK_SIZE_BORDER) { - NewHead.Batches.back().Pack(); - NewHead.PackedSize += NewHead.Batches.back().GetPackedSize(); //add real packed size for this blob + if (NewHead.GetLastBatch().GetUnpackedSize() >= BATCH_UNPACK_SIZE_BORDER) { + NewHead.MutableLastBatch().Pack(); + NewHead.PackedSize += NewHead.GetLastBatch().GetPackedSize(); //add real packed size for this blob NewHead.PackedSize -= GetMaxHeaderSize(); //instead of upper bound - NewHead.PackedSize -= NewHead.Batches.back().GetUnpackedSize(); + NewHead.PackedSize -= NewHead.GetLastBatch().GetUnpackedSize(); } } @@ -1399,15 +1415,15 @@ void TPartition::AddNewWriteBlob(std::pair& res, TEvKeyValue::TEvReq valueD.reserve(res.second); ui32 pp = Head.FindPos(key.GetOffset(), key.GetPartNo()); if (pp < Max() && key.GetOffset() < EndOffset) { //this batch trully contains this offset - Y_ABORT_UNLESS(pp < Head.Batches.size()); - Y_ABORT_UNLESS(Head.Batches[pp].GetOffset() == key.GetOffset()); - Y_ABORT_UNLESS(Head.Batches[pp].GetPartNo() == key.GetPartNo()); - for (; pp < Head.Batches.size(); ++pp) { //TODO - merge small batches here - Y_ABORT_UNLESS(Head.Batches[pp].Packed); - Head.Batches[pp].SerializeTo(valueD); + Y_ABORT_UNLESS(pp < Head.GetBatches().size()); + Y_ABORT_UNLESS(Head.GetBatch(pp).GetOffset() == key.GetOffset()); + Y_ABORT_UNLESS(Head.GetBatch(pp).GetPartNo() == key.GetPartNo()); + for (; pp < Head.GetBatches().size(); ++pp) { //TODO - merge small batches here + Y_ABORT_UNLESS(Head.GetBatch(pp).Packed); + Head.GetBatch(pp).SerializeTo(valueD); } } - for (auto& b : NewHead.Batches) { + for (auto& b : NewHead.GetBatches()) { Y_ABORT_UNLESS(b.Packed); b.SerializeTo(valueD); } @@ -1519,6 +1535,10 @@ void TPartition::FilterDeadlinedWrites(const TActorContext& ctx, TMessageQueue& TabletCounters.Cumulative()[COUNTER_PQ_WRITE_ERROR].Increment(1); TabletCounters.Cumulative()[COUNTER_PQ_WRITE_BYTES_ERROR].Increment(msg.Data.size() + msg.SourceId.size()); + Y_DEBUG_ABORT_UNLESS(WriteInflightSize >= msg.Data.size(), + "PQ %" PRIu64 ", Partition {%" PRIu32 ", %" PRIu32 "}, WriteInflightSize=%" PRIu64 ", msg.Data.size=%" PRISZT, + TabletID, Partition.OriginalPartitionId, Partition.InternalPartitionId, + WriteInflightSize, msg.Data.size()); WriteInflightSize -= msg.Data.size(); } @@ -1680,7 +1700,7 @@ void TPartition::BeginAppendHeadWithNewWrites(const TActorContext& ctx) NewHead.PartNo = 0; NewHead.PackedSize = 0; - Y_ABORT_UNLESS(NewHead.Batches.empty()); + Y_ABORT_UNLESS(NewHead.GetBatches().empty()); Parameters->OldPartsCleared = false; Parameters->HeadCleared = (Head.PackedSize == 0); @@ -1725,12 +1745,12 @@ void TPartition::EndAppendHeadWithNewWrites(TEvKeyValue::TEvRequest* request, co UpdateWriteBufferIsFullState(ctx.Now()); - if (!NewHead.Batches.empty() && !NewHead.Batches.back().Packed) { - NewHead.Batches.back().Pack(); - NewHead.PackedSize += NewHead.Batches.back().GetPackedSize(); //add real packed size for this blob + if (!NewHead.GetBatches().empty() && !NewHead.GetLastBatch().Packed) { + NewHead.MutableLastBatch().Pack(); + NewHead.PackedSize += NewHead.GetLastBatch().GetPackedSize(); //add real packed size for this blob NewHead.PackedSize -= GetMaxHeaderSize(); //instead of upper bound - NewHead.PackedSize -= NewHead.Batches.back().GetUnpackedSize(); + NewHead.PackedSize -= NewHead.GetLastBatch().GetUnpackedSize(); } Y_ABORT_UNLESS((Parameters->HeadCleared ? 0 : Head.PackedSize) + NewHead.PackedSize <= MaxBlobSize); //otherwise last PartitionedBlob.Add must compact all except last cl @@ -1764,7 +1784,7 @@ bool TPartition::WaitingForSubDomainQuota(const TActorContext& /*ctx*/, const ui return withSize > 0 || Size() > 0; } - return MeteringDataSize() + withSize > ReserveSize(); + return UserDataSize() + withSize > ReserveSize(); } void TPartition::RequestBlobQuota(size_t quotaSize) diff --git a/ydb/core/persqueue/percentile_counter.cpp b/ydb/core/persqueue/percentile_counter.cpp index 85edf4a950e4..dec88c19391d 100644 --- a/ydb/core/persqueue/percentile_counter.cpp +++ b/ydb/core/persqueue/percentile_counter.cpp @@ -177,6 +177,9 @@ void TPartitionHistogramWrapper::Setup(bool isSupportivePartition, std::unique_p } } void TPartitionHistogramWrapper::IncFor(ui64 key, ui64 value) { + if (!Inited) + return; + if (!IsSupportivePartition) { return Histogram->IncFor(key, value); } diff --git a/ydb/core/persqueue/percentile_counter.h b/ydb/core/persqueue/percentile_counter.h index 89ec2a2dddc9..cb78e7e49b54 100644 --- a/ydb/core/persqueue/percentile_counter.h +++ b/ydb/core/persqueue/percentile_counter.h @@ -59,8 +59,8 @@ NKikimr::NPQ::TPercentileCounter CreateSLIDurationCounter( class TPartitionCounterWrapper { private: - bool DoSave; - bool DoReport; + bool DoSave = false; + bool DoReport = false; TMaybe Counter; ui64 CounterValue = 0; bool Inited = false; diff --git a/ydb/core/persqueue/pq_impl.cpp b/ydb/core/persqueue/pq_impl.cpp index 0a7c0ff0d000..39839d698c4a 100644 --- a/ydb/core/persqueue/pq_impl.cpp +++ b/ydb/core/persqueue/pq_impl.cpp @@ -554,7 +554,7 @@ class TMonitoringProxy : public TActorBootstrapped { , TabletID(tabletId) , Inflight(inflight) { - for (auto& p: Partitions) { + for (auto& p : Partitions) { Results[p.first].push_back(Sprintf("Partition %u: NO DATA", p.first)); } } @@ -691,7 +691,11 @@ void TPersQueue::ApplyNewConfigAndReply(const TActorContext& ctx) ClearNewConfig(); for (auto& p : Partitions) { //change config for already created partitions - ctx.Send(p.second.Actor, new TEvPQ::TEvChangePartitionConfig(TopicConverter, Config)); + if (p.first.IsSupportivePartition()) { + continue; + } + + ctx.Send(p.second.Actor, new TEvPQ::TEvChangePartitionConfig(TopicConverter, Config, BootstrapConfigTx ? *BootstrapConfigTx : NKikimrPQ::TBootstrapConfig())); } ChangePartitionConfigInflight += Partitions.size(); @@ -715,6 +719,8 @@ void TPersQueue::ApplyNewConfig(const NKikimrPQ::TPQTabletConfig& newConfig, { Config = newConfig; + PQ_LOG_D("Apply new config " << Config.ShortDebugString()); + ui32 cacheSize = CACHE_SIZE; if (Config.HasCacheSize()) { cacheSize = Config.GetCacheSize(); @@ -782,7 +788,7 @@ void TPersQueue::EndWriteConfig(const NKikimrClient::TResponse& resp, const TAct NewConfigShouldBeApplied = true; //when config will be inited with old value new config will be applied } -void TPersQueue::HandleConfigReadResponse(const NKikimrClient::TResponse& resp, const TActorContext& ctx) +void TPersQueue::HandleConfigReadResponse(NKikimrClient::TResponse&& resp, const TActorContext& ctx) { bool ok = (resp.GetStatus() == NMsgBusProxy::MSTATUS_OK) && @@ -790,16 +796,68 @@ void TPersQueue::HandleConfigReadResponse(const NKikimrClient::TResponse& resp, (resp.HasSetExecutorFastLogPolicyResult()) && (resp.GetSetExecutorFastLogPolicyResult().GetStatus() == NKikimrProto::OK); if (!ok) { - LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, - "Tablet " << TabletID() << " Config read error: " << resp.DebugString() << " " << ctx.SelfID); - ctx.Send(ctx.SelfID, new TEvents::TEvPoisonPill()); + PQ_LOG_ERROR_AND_DIE("Config read error: " << resp.ShortDebugString()); + return; + } + + ConfigReadResponse = std::move(resp); + + BeginInitTransactions(); + SendTransactionsReadRequest(GetTxKey(Min()), true, ctx); +} + +void TPersQueue::SendTransactionsReadRequest(const TString& fromKey, bool includeFrom, + const TActorContext& ctx) +{ + THolder request(new TEvKeyValue::TEvRequest); + request->Record.SetCookie(READ_TXS_COOKIE); + + AddCmdReadTransactionRange(*request, fromKey, includeFrom); + + request->Record.MutableCmdSetExecutorFastLogPolicy() + ->SetIsAllowed(AppData(ctx)->PQConfig.GetTactic() == NKikimrClient::TKeyValueRequest::MIN_LATENCY); + ctx.Send(ctx.SelfID, request.Release()); +} + +TString GetLastKey(const NKikimrClient::TKeyValueResponse::TReadRangeResult& result) +{ + if (!result.PairSize()) { + return {}; + } + + return result.GetPair(result.PairSize() - 1).GetKey(); +} + +void TPersQueue::HandleTransactionsReadResponse(NKikimrClient::TResponse&& resp, const TActorContext& ctx) +{ + bool ok = + (resp.GetStatus() == NMsgBusProxy::MSTATUS_OK) && + (resp.ReadRangeResultSize() == 1) && + (resp.HasSetExecutorFastLogPolicyResult()) && + (resp.GetSetExecutorFastLogPolicyResult().GetStatus() == NKikimrProto::OK); + const auto& result = resp.GetReadRangeResult(0); + auto status = result.GetStatus(); + if (status != NKikimrProto::OVERRUN && + status != NKikimrProto::OK && + status != NKikimrProto::NODATA) { + ok = false; + } + if (!ok) { + PQ_LOG_ERROR_AND_DIE("Transactions read error: " << resp.ShortDebugString()); + return; + } + + TransactionsReadResults.emplace_back(std::move(result)); + + if (status == NKikimrProto::OVERRUN) { + SendTransactionsReadRequest(GetLastKey(result), false, ctx); return; } - ReadTxInfo(resp.GetReadResult(2), ctx); - ReadConfig(resp.GetReadResult(0), resp.GetReadRangeResult(0), ctx); - ReadTxWrites(resp.GetReadResult(2), ctx); - ReadState(resp.GetReadResult(1), ctx); + ReadTxInfo(ConfigReadResponse.GetReadResult(2), ctx); + ReadConfig(ConfigReadResponse.GetReadResult(0), TransactionsReadResults, ctx); + ReadTxWrites(ConfigReadResponse.GetReadResult(2), ctx); + ReadState(ConfigReadResponse.GetReadResult(1), ctx); } void TPersQueue::ReadTxInfo(const NKikimrClient::TKeyValueResponse::TReadResult& read, @@ -815,7 +873,7 @@ void TPersQueue::ReadTxInfo(const NKikimrClient::TKeyValueResponse::TReadResult& switch (read.GetStatus()) { case NKikimrProto::OK: { - LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " has a tx info"); + PQ_LOG_D("has a tx info"); NKikimrPQ::TTabletTxInfo info; Y_ABORT_UNLESS(info.ParseFromString(read.GetValue())); @@ -825,7 +883,7 @@ void TPersQueue::ReadTxInfo(const NKikimrClient::TKeyValueResponse::TReadResult& break; } case NKikimrProto::NODATA: { - LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " doesn't have tx info"); + PQ_LOG_D("doesn't have tx info"); InitPlanStep(); @@ -907,6 +965,37 @@ void TPersQueue::CreateOriginalPartition(const NKikimrPQ::TPQTabletConfig& confi ++OriginalPartitionsCount; } +void TPersQueue::MoveTopTxToCalculating(TDistributedTransaction& tx, + const TActorContext& ctx) +{ + std::tie(ExecStep, ExecTxId) = TxQueue.front(); + PQ_LOG_D("New ExecStep " << ExecStep << ", ExecTxId " << ExecTxId); + + switch (tx.Kind) { + case NKikimrPQ::TTransaction::KIND_DATA: + SendEvTxCalcPredicateToPartitions(ctx, tx); + break; + case NKikimrPQ::TTransaction::KIND_CONFIG: { + NPersQueue::TConverterFactoryPtr converterFactory; + CreateTopicConverter(tx.TabletConfig, + converterFactory, + tx.TopicConverter, + ctx); + CreateNewPartitions(tx.TabletConfig, + tx.TopicConverter, + ctx); + SendEvProposePartitionConfig(ctx, tx); + break; + } + case NKikimrPQ::TTransaction::KIND_UNKNOWN: + Y_ABORT_UNLESS(false); + } + + tx.State = NKikimrPQ::TTransaction::CALCULATING; + PQ_LOG_D("TxId " << tx.TxId << + ", NewState " << NKikimrPQ::TTransaction_EState_Name(tx.State)); +} + void TPersQueue::AddSupportivePartition(const TPartitionId& partitionId) { Partitions.emplace(partitionId, @@ -958,23 +1047,28 @@ void TPersQueue::InitTxWrites(const NKikimrPQ::TTabletTxInfo& info, for (size_t i = 0; i != info.TxWritesSize(); ++i) { auto& txWrite = info.GetTxWrites(i); const TWriteId writeId = GetWriteId(txWrite); - ui32 partitionId = txWrite.GetOriginalPartitionId(); - TPartitionId shadowPartitionId(partitionId, writeId, txWrite.GetInternalPartitionId()); - TxWrites[writeId].Partitions.emplace(partitionId, shadowPartitionId); + TTxWriteInfo& writeInfo = TxWrites[writeId]; + if (txWrite.HasOriginalPartitionId()) { + ui32 partitionId = txWrite.GetOriginalPartitionId(); + TPartitionId shadowPartitionId(partitionId, writeId, txWrite.GetInternalPartitionId()); - AddSupportivePartition(shadowPartitionId); - CreateSupportivePartitionActor(shadowPartitionId, ctx); - SubscribeWriteId(writeId, ctx); + writeInfo.Partitions.emplace(partitionId, shadowPartitionId); - NextSupportivePartitionId = Max(NextSupportivePartitionId, shadowPartitionId.InternalPartitionId + 1); + AddSupportivePartition(shadowPartitionId); + CreateSupportivePartitionActor(shadowPartitionId, ctx); + + NextSupportivePartitionId = Max(NextSupportivePartitionId, shadowPartitionId.InternalPartitionId + 1); + } + + SubscribeWriteId(writeId, ctx); } NewSupportivePartitions.clear(); } void TPersQueue::ReadConfig(const NKikimrClient::TKeyValueResponse::TReadResult& read, - const NKikimrClient::TKeyValueResponse::TReadRangeResult& readRange, + const TVector& readRanges, const TActorContext& ctx) { Y_ABORT_UNLESS(read.HasStatus()); @@ -986,15 +1080,6 @@ void TPersQueue::ReadConfig(const NKikimrClient::TKeyValueResponse::TReadResult& return; } - Y_ABORT_UNLESS(readRange.HasStatus()); - if (readRange.GetStatus() != NKikimrProto::OK && readRange.GetStatus() != NKikimrProto::NODATA) { - LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, - "Tablet " << TabletID() << " Transactions read error " << ctx.SelfID << - " Error status code " << readRange.GetStatus()); - ctx.Send(ctx.SelfID, new TEvents::TEvPoisonPill()); - return; - } - Y_ABORT_UNLESS(!ConfigInited); if (read.GetStatus() == NKikimrProto::OK) { @@ -1027,15 +1112,54 @@ void TPersQueue::ReadConfig(const NKikimrClient::TKeyValueResponse::TReadResult& Y_ABORT_UNLESS(TopicName.size(), "Need topic name here"); ctx.Send(CacheActor, new TEvPQ::TEvChangeCacheConfig(TopicName, cacheSize)); } else if (read.GetStatus() == NKikimrProto::NODATA) { - LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " no config, start with empty partitions and default config"); + PQ_LOG_D("no config, start with empty partitions and default config"); } else { PQ_LOG_ERROR_AND_DIE("Unexpected config read status: " << read.GetStatus()); return; } - THashMap> partitionTxs; - InitTransactions(readRange, partitionTxs); + for (const auto& readRange : readRanges) { + Y_ABORT_UNLESS(readRange.HasStatus()); + if (readRange.GetStatus() != NKikimrProto::OK && + readRange.GetStatus() != NKikimrProto::OVERRUN && + readRange.GetStatus() != NKikimrProto::NODATA) { + PQ_LOG_ERROR_AND_DIE("Transactions read error: " << readRange.GetStatus()); + return; + } + + for (size_t i = 0; i < readRange.PairSize(); ++i) { + const auto& pair = readRange.GetPair(i); + + PQ_LOG_D("ReadRange pair." << + " Key " << (pair.HasKey() ? pair.GetKey() : "unknown") << + ", Status " << pair.GetStatus()); + + NKikimrPQ::TTransaction tx; + Y_ABORT_UNLESS(tx.ParseFromString(pair.GetValue())); + + PQ_LOG_D("Load tx " << tx.ShortDebugString()); + + if (tx.GetState() == NKikimrPQ::TTransaction::CALCULATED) { + PQ_LOG_D("fix tx state"); + tx.SetState(NKikimrPQ::TTransaction::PLANNED); + } + + Txs.emplace(tx.GetTxId(), tx); + + if (tx.HasStep()) { + if (std::make_pair(tx.GetStep(), tx.GetTxId()) >= std::make_pair(ExecStep, ExecTxId)) { + PlannedTxs.emplace_back(tx.GetStep(), tx.GetTxId()); + } + } + } + } + + EndInitTransactions(); + EndReadConfig(ctx); +} +void TPersQueue::EndReadConfig(const TActorContext& ctx) +{ for (const auto& partition : Config.GetPartitions()) { // no partitions will be created with empty config const TPartitionId partitionId(partition.GetPartitionId()); CreateOriginalPartition(Config, @@ -1140,7 +1264,6 @@ void TPersQueue::InitializeMeteringSink(const TActorContext& ctx) { return result; }; - MeteringSink.Create(ctx.Now(), { .FlushInterval = TDuration::Seconds(pqConfig.GetBillingMeteringConfig().GetFlushIntervalSec()), .TabletId = ToString(TabletID()), @@ -1149,7 +1272,7 @@ void TPersQueue::InitializeMeteringSink(const TActorContext& ctx) { .YdbDatabaseId = Config.GetYdbDatabaseId(), .StreamName = streamName, .ResourceId = streamPath, - .PartitionsSize = Config.PartitionsSize(), + .PartitionsSize = CountActivePartitions(Config.GetPartitions()), .WriteQuota = Config.GetPartitionConfig().GetWriteSpeedInBytesPerSecond(), .ReservedSpace = storageLimitBytes, .ConsumersCount = countReadRulesWithPricing(ctx, Config), @@ -1224,12 +1347,16 @@ void TPersQueue::Handle(TEvKeyValue::TEvResponse::TPtr& ev, const TActorContext& break; case READ_CONFIG_COOKIE: // read is only for config - is signal to create interal actors - HandleConfigReadResponse(resp, ctx); + HandleConfigReadResponse(std::move(resp), ctx); + break; + case READ_TXS_COOKIE: + HandleTransactionsReadResponse(std::move(resp), ctx); break; case WRITE_STATE_COOKIE: EndWriteTabletState(resp, ctx); break; case WRITE_TX_COOKIE: + PQ_LOG_D("Handle TEvKeyValue::TEvResponse (WRITE_TX_COOKIE)"); EndWriteTxs(resp, ctx); break; default: @@ -1262,6 +1389,9 @@ TPartitionInfo& TPersQueue::GetPartitionInfo(const TPartitionId& partitionId) void TPersQueue::Handle(TEvPQ::TEvPartitionCounters::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvPQ::TEvPartitionCounters" << + " PartitionId " << ev->Get()->Partition); + const auto& partitionId = ev->Get()->Partition; auto& partition = GetPartitionInfo(partitionId); auto diff = ev->Get()->Counters.MakeDiffForAggr(partition.Baseline); @@ -1387,9 +1517,10 @@ void TPersQueue::Handle(TEvPQ::TEvInitComplete::TPtr& ev, const TActorContext& c ctx); } partition.PendingRequests.clear(); + } else { + ++PartitionsInited; } - ++PartitionsInited; Y_ABORT_UNLESS(ConfigInited);//partitions are inited only after config auto allInitialized = AllOriginalPartitionsInited(); @@ -1412,6 +1543,9 @@ void TPersQueue::Handle(TEvPQ::TEvInitComplete::TPtr& ev, const TActorContext& c void TPersQueue::Handle(TEvPQ::TEvError::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvPQ::TEvError" << + " Cookie " << ev->Get()->Cookie << + ", Error " << ev->Get()->Error); auto it = ResponseProxy.find(ev->Get()->Cookie); if (it == ResponseProxy.end()) @@ -1447,6 +1581,7 @@ void TPersQueue::FinishResponse(THashMap>::iter void TPersQueue::Handle(TEvPersQueue::TEvUpdateConfig::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvPersQueue::TEvUpdateConfig"); if (!ConfigInited) { UpdateConfigRequests.emplace_back(ev->Release(), ev->Sender); return; @@ -1457,6 +1592,8 @@ void TPersQueue::Handle(TEvPersQueue::TEvUpdateConfig::TPtr& ev, const TActorCon void TPersQueue::Handle(TEvPQ::TEvPartitionConfigChanged::TPtr&, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvPQ::TEvPartitionConfigChanged"); + Y_ABORT_UNLESS(ChangePartitionConfigInflight > 0); --ChangePartitionConfigInflight; @@ -1499,9 +1636,35 @@ void TPersQueue::CreateTopicConverter(const NKikimrPQ::TPQTabletConfig& config, Y_ABORT_UNLESS(topicConverter->IsValid(), "%s", topicConverter->GetReason().c_str()); } +void TPersQueue::UpdateReadRuleGenerations(NKikimrPQ::TPQTabletConfig& cfg) const +{ + Y_ABORT_UNLESS(cfg.HasVersion()); + const int curConfigVersion = cfg.GetVersion(); + + // set rr generation for provided read rules + THashMap> existed; // map name -> rrVersion, rrGeneration + for (const auto& c : Config.GetConsumers()) { + existed[c.GetName()] = std::make_pair(c.GetVersion(), c.GetGeneration()); + } + + for (auto& c : *cfg.MutableConsumers()) { + auto it = existed.find(c.GetName()); + ui64 generation = 0; + if (it != existed.end() && it->second.first == c.GetVersion()) { + generation = it->second.second; + } else { + generation = curConfigVersion; + } + c.SetGeneration(generation); + if (ReadRuleCompatible()) { + cfg.AddReadRuleGenerations(generation); + } + } +} + void TPersQueue::ProcessUpdateConfigRequest(TAutoPtr ev, const TActorId& sender, const TActorContext& ctx) { - auto& record = ev->Record; + const auto& record = ev->GetRecord(); int oldConfigVersion = Config.HasVersion() ? Config.GetVersion() : -1; int newConfigVersion = NewConfig.HasVersion() ? NewConfig.GetVersion() : oldConfigVersion; @@ -1511,7 +1674,7 @@ void TPersQueue::ProcessUpdateConfigRequest(TAutoPtr> existed; // map name -> rrVersion, rrGeneration - for (const auto& c : Config.GetConsumers()) { - existed[c.GetName()] = std::make_pair(c.GetVersion(), c.GetGeneration()); - } - - for (auto& c : *cfg.MutableConsumers()) { - auto it = existed.find(c.GetName()); - ui64 generation = 0; - if (it != existed.end() && it->second.first == c.GetVersion()) { - generation = it->second.second; - } else { - generation = curConfigVersion; - } - c.SetGeneration(generation); - if (ReadRuleCompatible()) { - cfg.AddReadRuleGenerations(generation); - } - } - } + UpdateReadRuleGenerations(cfg); LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " Config update version " << cfg.GetVersion() << "(current " << Config.GetVersion() << ") received from actor " << sender @@ -1682,7 +1825,7 @@ void TPersQueue::AddCmdWriteConfig(TEvKeyValue::TEvRequest* request, keyRange = TPartitionKeyRange::Parse(mg.GetKeyRange()); } - sourceIdWriter.RegisterSourceId(mg.GetId(), 0, 0, ctx.Now(), std::move(keyRange)); + sourceIdWriter.RegisterSourceId(mg.GetId(), 0, 0, ctx.Now(), std::move(keyRange), false); } for (const auto& partition : cfg.GetPartitions()) { @@ -1698,6 +1841,8 @@ void TPersQueue::ClearNewConfig() void TPersQueue::Handle(TEvPersQueue::TEvDropTablet::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvPersQueue::TEvDropTablet"); + auto& record = ev->Get()->Record; ui64 txId = record.GetTxId(); @@ -1732,13 +1877,19 @@ void TPersQueue::Handle(TEvPersQueue::TEvOffsets::TPtr& ev, const TActorContext& } ui32 cnt = 0; for (auto& p : Partitions) { - cnt += p.second.InitDone; + if (p.first.IsSupportivePartition()) { + continue; + } + + cnt += p.second.InitDone; } TActorId ans = CreateOffsetsProxyActor(TabletID(), ev->Sender, cnt, ctx); for (auto& p : Partitions) { - if (!p.second.InitDone) + if (!p.second.InitDone || p.first.IsSupportivePartition()) { continue; + } + THolder event = MakeHolder(ans, ev->Get()->Record.HasClientId() ? ev->Get()->Record.GetClientId() : ""); ctx.Send(p.second.Actor, event.Release()); @@ -1782,23 +1933,34 @@ void TPersQueue::ProcessStatusRequests(const TActorContext &ctx) { void TPersQueue::Handle(TEvPersQueue::TEvStatus::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvPersQueue::TEvStatus"); + ReadBalancerActorId = ev->Sender; if (!ConfigInited || !AllOriginalPartitionsInited()) { - StatusRequests.push_back(ev); - return; + PQ_LOG_D("Postpone the request." << + " ConfigInited " << static_cast(ConfigInited) << + ", PartitionsInited " << PartitionsInited << + ", OriginalPartitionsCount " << OriginalPartitionsCount); + StatusRequests.push_back(ev); + return; } ui32 cnt = 0; - for (auto& [_, partitionInfo] : Partitions) { - cnt += partitionInfo.InitDone; + for (auto& [partitionId, partitionInfo] : Partitions) { + if (partitionId.IsSupportivePartition()) { + continue; + } + + cnt += partitionInfo.InitDone; } TActorId ans = CreateStatusProxyActor(TabletID(), ev->Sender, cnt, ev->Cookie, ctx); for (auto& p : Partitions) { - if (!p.second.InitDone) { + if (!p.second.InitDone || p.first.IsSupportivePartition()) { continue; } + THolder event; if (ev->Get()->Record.GetConsumers().empty()) { event = MakeHolder(ans, ev->Get()->Record.HasClientId() ? ev->Get()->Record.GetClientId() : "", @@ -2542,7 +2704,9 @@ const TPartitionInfo& TPersQueue::GetPartitionInfo(const NKikimrClient::TPersQue const TWriteId writeId = GetWriteId(req); ui32 originalPartitionId = req.GetPartition(); - Y_ABORT_UNLESS(TxWrites.contains(writeId) && TxWrites.at(writeId).Partitions.contains(originalPartitionId)); + Y_ABORT_UNLESS(TxWrites.contains(writeId) && TxWrites.at(writeId).Partitions.contains(originalPartitionId), + "PQ %" PRIu64 ", WriteId {%" PRIu64 ", %" PRIu64 "}, Partition %" PRIu32, + TabletID(), writeId.NodeId, writeId.KeyId, originalPartitionId); const TPartitionId& partitionId = TxWrites.at(writeId).Partitions.at(originalPartitionId); Y_ABORT_UNLESS(Partitions.contains(partitionId)); @@ -2837,6 +3001,8 @@ void TPersQueue::Handle(TEvPersQueue::TEvRequest::TPtr& ev, const TActorContext& void TPersQueue::Handle(TEvTabletPipe::TEvServerConnected::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvTabletPipe::TEvServerConnected"); + auto it = PipesInfo.insert({ev->Get()->ClientId, {}}).first; it->second.ServerActors++; LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " server connected, pipe " @@ -2848,6 +3014,8 @@ void TPersQueue::Handle(TEvTabletPipe::TEvServerConnected::TPtr& ev, const TActo void TPersQueue::Handle(TEvTabletPipe::TEvServerDisconnected::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvTabletPipe::TEvServerDisconnected"); + //inform partition if needed; auto it = PipesInfo.find(ev->Get()->ClientId); if (it != PipesInfo.end()) { @@ -2871,6 +3039,8 @@ void TPersQueue::Handle(TEvTabletPipe::TEvServerDisconnected::TPtr& ev, const TA void TPersQueue::Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvTabletPipe::TEvClientConnected"); + Y_ABORT_UNLESS(ev->Get()->Leader, "Unexpectedly connected to follower of tablet %" PRIu64, ev->Get()->TabletId); if (PipeClientCache->OnConnect(ev)) { @@ -2885,6 +3055,8 @@ void TPersQueue::Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActo void TPersQueue::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvTabletPipe::TEvClientDestroyed"); + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " Client pipe to tablet " << ev->Get()->TabletId << " is reset"); @@ -3032,14 +3204,15 @@ void TPersQueue::Handle(TEvMediatorTimecast::TEvRegisterTabletResult::TPtr& ev, MediatorTimeCastEntry = message->Entry; Y_ABORT_UNLESS(MediatorTimeCastEntry); - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << - "Registered with mediator time cast"); + PQ_LOG_D("Registered with mediator time cast"); TryWriteTxs(ctx); } void TPersQueue::Handle(TEvInterconnect::TEvNodeInfo::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvInterconnect::TEvNodeInfo"); + Y_ABORT_UNLESS(ev->Get()->Node); DCId = ev->Get()->Node->Location.GetDataCenterId(); ResourceMetrics = Executor()->GetResourceMetrics(); @@ -3051,19 +3224,28 @@ void TPersQueue::Handle(TEvInterconnect::TEvNodeInfo::TPtr& ev, const TActorCont request->Record.AddCmdRead()->SetKey(KeyState()); request->Record.AddCmdRead()->SetKey(KeyTxInfo()); - auto cmd = request->Record.AddCmdReadRange(); - cmd->MutableRange()->SetFrom(GetTxKey(Min())); - cmd->MutableRange()->SetIncludeFrom(true); - cmd->MutableRange()->SetTo(GetTxKey(Max())); - cmd->MutableRange()->SetIncludeTo(true); - cmd->SetIncludeData(true); - request->Record.MutableCmdSetExecutorFastLogPolicy() ->SetIsAllowed(AppData(ctx)->PQConfig.GetTactic() == NKikimrClient::TKeyValueRequest::MIN_LATENCY); ctx.Send(ctx.SelfID, request.Release()); + ctx.Schedule(TDuration::Seconds(5), new TEvents::TEvWakeup()); } +void TPersQueue::AddCmdReadTransactionRange(TEvKeyValue::TEvRequest& request, + const TString& fromKey, bool includeFrom) +{ + auto cmd = request.Record.AddCmdReadRange(); + cmd->MutableRange()->SetFrom(fromKey); + cmd->MutableRange()->SetIncludeFrom(includeFrom); + cmd->MutableRange()->SetTo(GetTxKey(Max())); + cmd->MutableRange()->SetIncludeTo(true); + cmd->SetIncludeData(true); + + PQ_LOG_D("Transactions request." << + " From " << cmd->MutableRange()->GetFrom() << + ", To " << cmd->MutableRange()->GetTo()); +} + void TPersQueue::HandleWakeup(const TActorContext& ctx) { THashSet groups; for (auto& p : Partitions) { @@ -3098,7 +3280,7 @@ void TPersQueue::DeleteExpiredTransactions(const TActorContext& ctx) void TPersQueue::Handle(TEvPersQueue::TEvCancelTransactionProposal::TPtr& ev, const TActorContext& ctx) { - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " Handle TEvPersQueue::TEvCancelTransactionProposal"); + PQ_LOG_D("Handle TEvPersQueue::TEvCancelTransactionProposal"); NKikimrPQ::TEvCancelTransactionProposal& event = ev->Get()->Record; Y_ABORT_UNLESS(event.HasTxId()); @@ -3114,9 +3296,9 @@ void TPersQueue::Handle(TEvPersQueue::TEvCancelTransactionProposal::TPtr& ev, co void TPersQueue::Handle(TEvPersQueue::TEvProposeTransaction::TPtr& ev, const TActorContext& ctx) { - PQ_LOG_D("Handle TEvPersQueue::TEvProposeTransaction " << ev->Get()->Record.DebugString()); + const NKikimrPQ::TEvProposeTransaction& event = ev->Get()->GetRecord(); + PQ_LOG_D("Handle TEvPersQueue::TEvProposeTransaction " << event.ShortDebugString()); - NKikimrPQ::TEvProposeTransaction& event = ev->Get()->Record; switch (event.GetTxBodyCase()) { case NKikimrPQ::TEvProposeTransaction::kData: HandleDataTransaction(ev->Release(), ctx); @@ -3141,7 +3323,7 @@ bool TPersQueue::CheckTxWriteOperation(const NKikimrPQ::TPartitionOperation& ope TPartitionId partitionId(operation.GetPartitionId(), writeId, operation.GetSupportivePartition()); - PQ_LOG_D("partitionId=" << partitionId); + PQ_LOG_D("PartitionId " << partitionId << " for WriteId " << writeId); return Partitions.contains(partitionId); } @@ -3152,7 +3334,6 @@ bool TPersQueue::CheckTxWriteOperations(const NKikimrPQ::TDataTransaction& txBod } const TWriteId writeId = GetWriteId(txBody); - PQ_LOG_D("writeId=" << writeId); for (auto& operation : txBody.GetOperations()) { auto isWrite = [](const NKikimrPQ::TPartitionOperation& o) { @@ -3172,13 +3353,13 @@ bool TPersQueue::CheckTxWriteOperations(const NKikimrPQ::TDataTransaction& txBod void TPersQueue::HandleDataTransaction(TAutoPtr ev, const TActorContext& ctx) { - NKikimrPQ::TEvProposeTransaction& event = ev->Record; + NKikimrPQ::TEvProposeTransaction& event = *ev->MutableRecord(); Y_ABORT_UNLESS(event.GetTxBodyCase() == NKikimrPQ::TEvProposeTransaction::kData); Y_ABORT_UNLESS(event.HasData()); const NKikimrPQ::TDataTransaction& txBody = event.GetData(); if (TabletState != NKikimrPQ::ENormal) { - PQ_LOG_D("invalid PQ tablet state (" << NKikimrPQ::ETabletState_Name(TabletState) << ")"); + PQ_LOG_D("TxId " << event.GetTxId() << " invalid PQ tablet state (" << NKikimrPQ::ETabletState_Name(TabletState) << ")"); SendProposeTransactionAbort(ActorIdFromProto(event.GetSourceActor()), event.GetTxId(), NKikimrPQ::TError::ERROR, @@ -3192,7 +3373,7 @@ void TPersQueue::HandleDataTransaction(TAutoPtr partitionId = FindPartitionId(txBody); if (!partitionId.Defined()) { - PQ_LOG_D("unknown partition for WriteId " << txBody.GetWriteId()); + PQ_LOG_W("TxId " << event.GetTxId() << " unknown partition for WriteId " << txBody.GetWriteId()); SendProposeTransactionAbort(ActorIdFromProto(event.GetSourceActor()), event.GetTxId(), NKikimrPQ::TError::INTERNAL, @@ -3256,7 +3464,7 @@ void TPersQueue::HandleDataTransaction(TAutoPtr ev, const TActorContext& ctx) { - NKikimrPQ::TEvProposeTransaction& event = ev->Record; + const NKikimrPQ::TEvProposeTransaction& event = ev->GetRecord(); Y_ABORT_UNLESS(event.GetTxBodyCase() == NKikimrPQ::TEvProposeTransaction::kConfig); Y_ABORT_UNLESS(event.HasConfig()); @@ -3267,14 +3475,7 @@ void TPersQueue::HandleConfigTransaction(TAutoPtrGet()->Record; - - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, - "Tablet: " << TabletID() << - ", PlanStep: " << event.GetStep() << - ", Mediator: " << event.GetMediatorID()); + PQ_LOG_D("Handle TEvTxProcessing::TEvPlanStep " << ev->Get()->Record.ShortDebugString()); EvPlanStepQueue.emplace_back(ev->Sender, ev->Release().Release()); @@ -3283,7 +3484,7 @@ void TPersQueue::Handle(TEvTxProcessing::TEvPlanStep::TPtr& ev, const TActorCont void TPersQueue::Handle(TEvTxProcessing::TEvReadSet::TPtr& ev, const TActorContext& ctx) { - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " Handle TEvTxProcessing::TEvReadSet"); + PQ_LOG_D("Handle TEvTxProcessing::TEvReadSet " << ev->Get()->Record.ShortDebugString()); NKikimrTx::TEvReadSet& event = ev->Get()->Record; Y_ABORT_UNLESS(event.HasTxId()); @@ -3293,7 +3494,7 @@ void TPersQueue::Handle(TEvTxProcessing::TEvReadSet::TPtr& ev, const TActorConte ack = std::make_unique(*ev->Get(), TabletID()); } - if (auto tx = GetTransaction(ctx, event.GetTxId()); tx && tx->Senders.contains(event.GetTabletProducer())) { + if (auto tx = GetTransaction(ctx, event.GetTxId()); tx && tx->PredicatesReceived.contains(event.GetTabletProducer())) { tx->OnReadSet(event, ev->Sender, std::move(ack)); if (tx->State == NKikimrPQ::TTransaction::WAIT_RS) { @@ -3302,6 +3503,7 @@ void TPersQueue::Handle(TEvTxProcessing::TEvReadSet::TPtr& ev, const TActorConte TryWriteTxs(ctx); } } else if (ack) { + PQ_LOG_D("send TEvReadSetAck to " << event.GetTabletProducer()); // // для неизвестных транзакций подтверждение отправляется сразу // @@ -3311,7 +3513,7 @@ void TPersQueue::Handle(TEvTxProcessing::TEvReadSet::TPtr& ev, const TActorConte void TPersQueue::Handle(TEvTxProcessing::TEvReadSetAck::TPtr& ev, const TActorContext& ctx) { - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " Handle TEvTxProcessing::TEvReadSetAck"); + PQ_LOG_D("Handle TEvTxProcessing::TEvReadSetAck " << ev->Get()->Record.ShortDebugString()); NKikimrTx::TEvReadSetAck& event = ev->Get()->Record; Y_ABORT_UNLESS(event.HasTxId()); @@ -3324,7 +3526,7 @@ void TPersQueue::Handle(TEvTxProcessing::TEvReadSetAck::TPtr& ev, const TActorCo tx->OnReadSetAck(event); tx->UnbindMsgsFromPipe(event.GetTabletConsumer()); - if (tx->State == NKikimrPQ::TTransaction::EXECUTED) { + if (tx->State == NKikimrPQ::TTransaction::WAIT_RS_ACKS) { CheckTxState(ctx, *tx); TryWriteTxs(ctx); @@ -3335,19 +3537,19 @@ void TPersQueue::Handle(TEvPQ::TEvTxCalcPredicateResult::TPtr& ev, const TActorC { const TEvPQ::TEvTxCalcPredicateResult& event = *ev->Get(); - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, - "Tablet " << TabletID() << - " Handle TEvPQ::TEvTxCalcPredicateResult" << - " Step " << event.Step << - " TxId " << event.TxId << - " Partition " << event.Partition << - " Predicate " << (event.Predicate ? "true" : "false")); + PQ_LOG_D("Handle TEvPQ::TEvTxCalcPredicateResult" << + " Step " << event.Step << + ", TxId " << event.TxId << + ", Partition " << event.Partition << + ", Predicate " << event.Predicate); auto tx = GetTransaction(ctx, event.TxId); if (!tx) { return; } + Y_ABORT_UNLESS(tx->State == NKikimrPQ::TTransaction::CALCULATING); + tx->OnTxCalcPredicateResult(event); CheckTxState(ctx, *tx); @@ -3359,6 +3561,11 @@ void TPersQueue::Handle(TEvPQ::TEvProposePartitionConfigResult::TPtr& ev, const { const TEvPQ::TEvProposePartitionConfigResult& event = *ev->Get(); + PQ_LOG_D("Handle TEvPQ::TEvProposePartitionConfigResult" << + " Step " << event.Step << + ", TxId " << event.TxId << + ", Partition " << event.Partition); + auto tx = GetTransaction(ctx, event.TxId); if (!tx) { return; @@ -3375,10 +3582,13 @@ void TPersQueue::Handle(TEvPQ::TEvProposePartitionConfigResult::TPtr& ev, const void TPersQueue::Handle(TEvPQ::TEvTxCommitDone::TPtr& ev, const TActorContext& ctx) { - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " Handle TEvPQ::TEvTxCommitDone"); - const TEvPQ::TEvTxCommitDone& event = *ev->Get(); + PQ_LOG_D("Handle TEvPQ::TEvTxCommitDone" << + " Step " << event.Step << + ", TxId " << event.TxId << + ", Partition " << event.Partition); + auto tx = GetTransaction(ctx, event.TxId); if (!tx) { return; @@ -3424,6 +3634,7 @@ bool TPersQueue::CanProcessTxWrites() const void TPersQueue::SubscribeWriteId(const TWriteId& writeId, const TActorContext& ctx) { + PQ_LOG_D("send TEvSubscribeLock for WriteId " << writeId); ctx.Send(NLongTxService::MakeLongTxServiceID(writeId.NodeId), new NLongTxService::TEvLongTxService::TEvSubscribeLock(writeId.KeyId, writeId.NodeId)); } @@ -3431,6 +3642,7 @@ void TPersQueue::SubscribeWriteId(const TWriteId& writeId, void TPersQueue::UnsubscribeWriteId(const TWriteId& writeId, const TActorContext& ctx) { + PQ_LOG_D("send TEvUnsubscribeLock for WriteId " << writeId); ctx.Send(NLongTxService::MakeLongTxServiceID(writeId.NodeId), new NLongTxService::TEvLongTxService::TEvUnsubscribeLock(writeId.KeyId, writeId.NodeId)); } @@ -3475,6 +3687,7 @@ void TPersQueue::BeginWriteTxs(const TActorContext& ctx) PendingSupportivePartitions = std::move(NewSupportivePartitions); NewSupportivePartitions.clear(); + PQ_LOG_D("Send TEvKeyValue::TEvRequest (WRITE_TX_COOKIE)"); ctx.Send(ctx.SelfID, request.Release()); TryReturnTabletStateAll(ctx); @@ -3529,7 +3742,7 @@ void TPersQueue::ProcessProposeTransactionQueue(const TActorContext& ctx) const auto front = std::move(EvProposeTransactionQueue.front()); EvProposeTransactionQueue.pop_front(); - const NKikimrPQ::TEvProposeTransaction& event = front->Record; + const NKikimrPQ::TEvProposeTransaction& event = front->GetRecord(); TDistributedTransaction& tx = Txs[event.GetTxId()]; switch (tx.State) { @@ -3537,9 +3750,15 @@ void TPersQueue::ProcessProposeTransactionQueue(const TActorContext& ctx) tx.OnProposeTransaction(event, GetAllowedStep(), TabletID()); + if (tx.Kind == NKikimrPQ::TTransaction::KIND_CONFIG) { + UpdateReadRuleGenerations(tx.TabletConfig); + } + if (tx.WriteId.Defined()) { const TWriteId& writeId = *tx.WriteId; - Y_ABORT_UNLESS(TxWrites.contains(writeId)); + Y_ABORT_UNLESS(TxWrites.contains(writeId), + "PQ %" PRIu64 ", TxId %" PRIu64 ", WriteId {%" PRIu64 ", %" PRIu64 "}", + TabletID(), tx.TxId, writeId.NodeId, writeId.KeyId); TTxWriteInfo& writeInfo = TxWrites.at(writeId); writeInfo.TxId = tx.TxId; } @@ -3657,20 +3876,34 @@ void TPersQueue::ProcessWriteTxs(const TActorContext& ctx, void TPersQueue::ProcessDeleteTxs(const TActorContext& ctx, NKikimrClient::TKeyValueRequest& request) { - Y_ABORT_UNLESS(!WriteTxsInProgress); + Y_ABORT_UNLESS(!WriteTxsInProgress, + "PQ %" PRIu64, + TabletID()); for (ui64 txId : DeleteTxs) { - auto tx = GetTransaction(ctx, txId); - Y_ABORT_UNLESS(tx); - - tx->AddCmdDelete(request); + PQ_LOG_D("delete key for TxId " << txId); + AddCmdDeleteTx(request, txId); - Txs.erase(tx->TxId); + auto tx = GetTransaction(ctx, txId); + if (tx) { + ChangedTxs.insert(txId); + } } DeleteTxs.clear(); } +void TPersQueue::AddCmdDeleteTx(NKikimrClient::TKeyValueRequest& request, + ui64 txId) +{ + TString key = GetTxKey(txId); + auto range = request.AddCmdDeleteRange()->MutableRange(); + range->SetFrom(key); + range->SetIncludeFrom(true); + range->SetTo(key); + range->SetIncludeTo(true); +} + void TPersQueue::ProcessConfigTx(const TActorContext& ctx, TEvKeyValue::TEvRequest* request) { @@ -3715,11 +3948,16 @@ void TPersQueue::SavePlanStep(NKikimrPQ::TTabletTxInfo& info) void TPersQueue::SaveTxWrites(NKikimrPQ::TTabletTxInfo& info) { for (auto& [writeId, write] : TxWrites) { - for (auto [partitionId, shadowPartitionId] : write.Partitions) { + if (write.Partitions.empty()) { auto* txWrite = info.MutableTxWrites()->Add(); SetWriteId(*txWrite, writeId); - txWrite->SetOriginalPartitionId(partitionId); - txWrite->SetInternalPartitionId(shadowPartitionId.InternalPartitionId); + } else { + for (auto [partitionId, shadowPartitionId] : write.Partitions) { + auto* txWrite = info.MutableTxWrites()->Add(); + SetWriteId(*txWrite, writeId); + txWrite->SetOriginalPartitionId(partitionId); + txWrite->SetInternalPartitionId(shadowPartitionId.InternalPartitionId); + } } } @@ -3773,8 +4011,8 @@ void TPersQueue::SendEvReadSetToReceivers(const TActorContext& ctx, TString body; Y_ABORT_UNLESS(data.SerializeToString(&body)); - PQ_LOG_D("Send TEvTxProcessing::TEvReadSet to " << tx.Receivers.size() << " receivers. Wait TEvTxProcessing::TEvReadSet from " << tx.Senders.size() << " senders."); - for (ui64 receiverId : tx.Receivers) { + PQ_LOG_D("Send TEvTxProcessing::TEvReadSet to " << tx.PredicateRecipients.size() << " receivers. Wait TEvTxProcessing::TEvReadSet from " << tx.PredicatesReceived.size() << " senders."); + for (auto& [receiverId, _] : tx.PredicateRecipients) { if (receiverId != TabletID()) { auto event = std::make_unique(tx.Step, tx.TxId, @@ -3783,6 +4021,7 @@ void TPersQueue::SendEvReadSetToReceivers(const TActorContext& ctx, TabletID(), body, 0); + PQ_LOG_D("Send TEvReadSet to tablet " << receiverId); SendToPipe(receiverId, tx, std::move(event), ctx); } } @@ -3791,7 +4030,9 @@ void TPersQueue::SendEvReadSetToReceivers(const TActorContext& ctx, void TPersQueue::SendEvReadSetAckToSenders(const TActorContext& ctx, TDistributedTransaction& tx) { + PQ_LOG_D("TPersQueue::SendEvReadSetAckToSenders"); for (auto& [target, event] : tx.ReadSetAcks) { + PQ_LOG_D("Send TEvTxProcessing::TEvReadSetAck " << event->ToString()); ctx.Send(target, event.release()); } } @@ -3849,7 +4090,9 @@ void TPersQueue::SendEvTxCalcPredicateToPartitions(const TActorContext& ctx, if (tx.WriteId.Defined()) { const TWriteId& writeId = *tx.WriteId; - Y_ABORT_UNLESS(TxWrites.contains(writeId)); + Y_ABORT_UNLESS(TxWrites.contains(writeId), + "PQ %" PRIu64 ", TxId %" PRIu64 ", WriteId {%" PRIu64 ", %" PRIu64 "}", + TabletID(), tx.TxId, writeId.NodeId, writeId.KeyId); const TTxWriteInfo& writeInfo = TxWrites.at(writeId); for (auto& [originalPartitionId, partitionId] : writeInfo.Partitions) { @@ -3886,7 +4129,9 @@ void TPersQueue::SendEvTxCommitToPartitions(const TActorContext& ctx, auto event = std::make_unique(tx.Step, tx.TxId); auto p = Partitions.find(TPartitionId(partitionId)); - Y_ABORT_UNLESS(p != Partitions.end()); + Y_ABORT_UNLESS(p != Partitions.end(), + "Tablet %" PRIu64 ", Partition %" PRIu32 ", TxId %" PRIu64, + TabletID(), partitionId, tx.TxId); ctx.Send(p->second.Actor, event.release()); } @@ -3969,11 +4214,10 @@ const THashSet& TPersQueue::GetBindedTxs(ui64 tabletId) TDistributedTransaction* TPersQueue::GetTransaction(const TActorContext& ctx, ui64 txId) { + Y_UNUSED(ctx); auto p = Txs.find(txId); if (p == Txs.end()) { - LOG_WARN_S(ctx, NKikimrServices::PERSQUEUE, - "Tablet " << TabletID() << - " Unknown transaction " << txId); + PQ_LOG_W("Unknown transaction " << txId); return nullptr; } return &p->second; @@ -3982,116 +4226,95 @@ TDistributedTransaction* TPersQueue::GetTransaction(const TActorContext& ctx, void TPersQueue::CheckTxState(const TActorContext& ctx, TDistributedTransaction& tx) { + PQ_LOG_D("TxId " << tx.TxId << + ", State " << NKikimrPQ::TTransaction_EState_Name(tx.State)); + switch (tx.State) { case NKikimrPQ::TTransaction::UNKNOWN: - Y_ABORT_UNLESS(tx.TxId != Max()); - - PQ_LOG_T("TxId="<< tx.TxId << ", State=UNKNOWN"); + Y_ABORT_UNLESS(tx.TxId != Max(), + "PQ %" PRIu64 ", TxId %" PRIu64, + TabletID(), tx.TxId); WriteTx(tx, NKikimrPQ::TTransaction::PREPARED); ScheduleProposeTransactionResult(tx); tx.State = NKikimrPQ::TTransaction::PREPARING; + PQ_LOG_D("TxId " << tx.TxId << + ", NewState " << NKikimrPQ::TTransaction_EState_Name(tx.State)); break; case NKikimrPQ::TTransaction::PREPARING: - Y_ABORT_UNLESS(tx.WriteInProgress); - - PQ_LOG_T("TxId="<< tx.TxId << ", State=PREPARING"); + Y_ABORT_UNLESS(tx.WriteInProgress, + "PQ %" PRIu64 ", TxId %" PRIu64, + TabletID(), tx.TxId); tx.WriteInProgress = false; - // - // запланированные события будут отправлены в EndWriteTxs - // + // scheduled events will be sent to EndWriteTxs tx.State = NKikimrPQ::TTransaction::PREPARED; + PQ_LOG_D("TxId " << tx.TxId << + ", NewState " << NKikimrPQ::TTransaction_EState_Name(tx.State)); break; case NKikimrPQ::TTransaction::PREPARED: - Y_ABORT_UNLESS(tx.Step != Max()); - - PQ_LOG_T("TxId="<< tx.TxId << ", State=PREPARED"); + Y_ABORT_UNLESS(tx.Step != Max(), + "PQ %" PRIu64 ", TxId %" PRIu64, + TabletID(), tx.TxId); WriteTx(tx, NKikimrPQ::TTransaction::PLANNED); tx.State = NKikimrPQ::TTransaction::PLANNING; + PQ_LOG_D("TxId " << tx.TxId << + ", NewState " << NKikimrPQ::TTransaction_EState_Name(tx.State)); break; case NKikimrPQ::TTransaction::PLANNING: - Y_ABORT_UNLESS(tx.WriteInProgress); - - PQ_LOG_T("TxId="<< tx.TxId << ", State=PLANNING"); + Y_ABORT_UNLESS(tx.WriteInProgress, + "PQ %" PRIu64 ", TxId %" PRIu64, + TabletID(), tx.TxId); tx.WriteInProgress = false; - // - // запланированные события будут отправлены в EndWriteTxs - // + // scheduled events will be sent to EndWriteTxs tx.State = NKikimrPQ::TTransaction::PLANNED; + PQ_LOG_D("TxId " << tx.TxId << + ", NewState " << NKikimrPQ::TTransaction_EState_Name(tx.State)); [[fallthrough]]; case NKikimrPQ::TTransaction::PLANNED: - PQ_LOG_T("TxId="<< tx.TxId << ", State=PLANNED" << - ", (!TxQueue.empty())=" << !TxQueue.empty()); + PQ_LOG_D("TxQueue.size " << TxQueue.size()); if (!TxQueue.empty() && (TxQueue.front().second == tx.TxId)) { - std::tie(ExecStep, ExecTxId) = TxQueue.front(); - PQ_LOG_D("ExecStep " << ExecStep << ", ExecTxId " << ExecTxId); - - switch (tx.Kind) { - case NKikimrPQ::TTransaction::KIND_DATA: - SendEvTxCalcPredicateToPartitions(ctx, tx); - break; - case NKikimrPQ::TTransaction::KIND_CONFIG: { - NPersQueue::TConverterFactoryPtr converterFactory; - CreateTopicConverter(tx.TabletConfig, - converterFactory, - tx.TopicConverter, - ctx); - CreateNewPartitions(tx.TabletConfig, - tx.TopicConverter, - ctx); - SendEvProposePartitionConfig(ctx, tx); - break; - } - case NKikimrPQ::TTransaction::KIND_UNKNOWN: - Y_ABORT_UNLESS(false); - } - - tx.State = NKikimrPQ::TTransaction::CALCULATING; + MoveTopTxToCalculating(tx, ctx); } break; case NKikimrPQ::TTransaction::CALCULATING: - Y_ABORT_UNLESS(tx.PartitionRepliesCount <= tx.PartitionRepliesExpected); + Y_ABORT_UNLESS(tx.PartitionRepliesCount <= tx.PartitionRepliesExpected, + "PQ %" PRIu64 ", TxId %" PRIu64 ", PartitionRepliesCount %" PRISZT ", PartitionRepliesExpected %" PRISZT, + TabletID(), tx.TxId, + tx.PartitionRepliesCount, tx.PartitionRepliesExpected); - PQ_LOG_T("TxId="<< tx.TxId << ", State=CALCULATING" << - ", tx.PartitionRepliesCount=" << tx.PartitionRepliesCount << - ", tx.PartitionRepliesExpected=" << tx.PartitionRepliesExpected); + PQ_LOG_D("Received " << tx.PartitionRepliesCount << + ", Expected " << tx.PartitionRepliesExpected); if (tx.PartitionRepliesCount == tx.PartitionRepliesExpected) { switch (tx.Kind) { case NKikimrPQ::TTransaction::KIND_DATA: - SendEvReadSetToReceivers(ctx, tx); - - WriteTx(tx, NKikimrPQ::TTransaction::WAIT_RS); - - tx.State = NKikimrPQ::TTransaction::CALCULATED; - break; - case NKikimrPQ::TTransaction::KIND_CONFIG: - SendEvReadSetToReceivers(ctx, tx); + WriteTx(tx, NKikimrPQ::TTransaction::CALCULATED); - tx.State = NKikimrPQ::TTransaction::WAIT_RS; + tx.State = NKikimrPQ::TTransaction::CALCULATED; + PQ_LOG_D("TxId " << tx.TxId << + ", NewState " << NKikimrPQ::TTransaction_EState_Name(tx.State)); - CheckTxState(ctx, tx); break; case NKikimrPQ::TTransaction::KIND_UNKNOWN: @@ -4102,25 +4325,25 @@ void TPersQueue::CheckTxState(const TActorContext& ctx, break; case NKikimrPQ::TTransaction::CALCULATED: - Y_ABORT_UNLESS(tx.WriteInProgress); - - PQ_LOG_T("TxId="<< tx.TxId << ", State=CALCULATED"); - - tx.WriteInProgress = false; - tx.State = NKikimrPQ::TTransaction::WAIT_RS; + PQ_LOG_D("TxId " << tx.TxId << + ", NewState " << NKikimrPQ::TTransaction_EState_Name(tx.State)); - [[fallthrough]]; - - case NKikimrPQ::TTransaction::WAIT_RS: // // the number of TEvReadSetAck sent should not be greater than the number of senders // from TEvProposeTransaction // - Y_ABORT_UNLESS(tx.ReadSetAcks.size() <= tx.Senders.size()); + Y_ABORT_UNLESS(tx.ReadSetAcks.size() <= tx.PredicatesReceived.size(), + "PQ %" PRIu64 ", TxId %" PRIu64 ", ReadSetAcks.size %" PRISZT ", PredicatesReceived.size %" PRISZT, + TabletID(), tx.TxId, + tx.ReadSetAcks.size(), tx.PredicatesReceived.size()); + + SendEvReadSetToReceivers(ctx, tx); - PQ_LOG_T("TxId="<< tx.TxId << ", State=WAIT_RS" << - ", tx.HaveParticipantsDecision()=" << tx.HaveParticipantsDecision()); + [[fallthrough]]; + + case NKikimrPQ::TTransaction::WAIT_RS: + PQ_LOG_D("HaveParticipantsDecision " << tx.HaveParticipantsDecision()); if (tx.HaveParticipantsDecision()) { if (tx.GetDecision() == NKikimrTx::TReadSetData::DECISION_COMMIT) { @@ -4130,6 +4353,8 @@ void TPersQueue::CheckTxState(const TActorContext& ctx, } tx.State = NKikimrPQ::TTransaction::EXECUTING; + PQ_LOG_D("TxId " << tx.TxId << + ", NewState " << NKikimrPQ::TTransaction_EState_Name(tx.State)); } else { break; } @@ -4137,23 +4362,28 @@ void TPersQueue::CheckTxState(const TActorContext& ctx, [[fallthrough]]; case NKikimrPQ::TTransaction::EXECUTING: - Y_ABORT_UNLESS(tx.PartitionRepliesCount <= tx.PartitionRepliesExpected); + Y_ABORT_UNLESS(tx.PartitionRepliesCount <= tx.PartitionRepliesExpected, + "PQ %" PRIu64 ", TxId %" PRIu64 ", PartitionRepliesCount %" PRISZT ", PartitionRepliesExpected %" PRISZT, + TabletID(), tx.TxId, + tx.PartitionRepliesCount, tx.PartitionRepliesExpected); + + PQ_LOG_D("Received " << tx.PartitionRepliesCount << + ", Expected " << tx.PartitionRepliesExpected); - PQ_LOG_T("TxId="<< tx.TxId << ", State=EXECUTING" << - ", tx.PartitionRepliesCount=" << tx.PartitionRepliesCount << - ", tx.PartitionRepliesExpected=" << tx.PartitionRepliesExpected); if (tx.PartitionRepliesCount == tx.PartitionRepliesExpected) { - Y_ABORT_UNLESS(!TxQueue.empty()); - Y_ABORT_UNLESS(TxQueue.front().second == tx.TxId); + Y_ABORT_UNLESS(!TxQueue.empty(), + "PQ %" PRIu64 ", TxId %" PRIu64, + TabletID(), tx.TxId); + Y_ABORT_UNLESS(TxQueue.front().second == tx.TxId, + "PQ %" PRIu64 ", TxId %" PRIu64, + TabletID(), tx.TxId); SendEvProposeTransactionResult(ctx, tx); switch (tx.Kind) { case NKikimrPQ::TTransaction::KIND_DATA: - SendEvReadSetAckToSenders(ctx, tx); break; case NKikimrPQ::TTransaction::KIND_CONFIG: - SendEvReadSetAckToSenders(ctx, tx); ApplyNewConfig(tx.TabletConfig, ctx); TabletConfigTx = tx.TabletConfig; BootstrapConfigTx = tx.BootstrapConfig; @@ -4162,30 +4392,85 @@ void TPersQueue::CheckTxState(const TActorContext& ctx, Y_ABORT_UNLESS(false); } - tx.State = NKikimrPQ::TTransaction::EXECUTED; + WriteTx(tx, NKikimrPQ::TTransaction::EXECUTED); - TxQueue.pop(); - TryStartTransaction(ctx); - } else { - break; + PQ_LOG_D("delete partitions for TxId " << tx.TxId); + BeginDeletePartitions(tx); + + tx.State = NKikimrPQ::TTransaction::EXECUTED; + PQ_LOG_D("TxId " << tx.TxId << + ", NewState " << NKikimrPQ::TTransaction_EState_Name(tx.State)); } - [[fallthrough]]; + break; case NKikimrPQ::TTransaction::EXECUTED: - PQ_LOG_T("TxId="<< tx.TxId << ", State=EXECUTED, tx.HaveAllRecipientsReceive()=" << tx.HaveAllRecipientsReceive()); - if (tx.HaveAllRecipientsReceive()) { - if (tx.WriteId.Defined()) { - BeginDeleteTx(tx); - } else { - DeleteTx(tx); - } + SendEvReadSetAckToSenders(ctx, tx); + + tx.State = NKikimrPQ::TTransaction::WAIT_RS_ACKS; + PQ_LOG_D("TxId " << tx.TxId << + ", NewState " << NKikimrPQ::TTransaction_EState_Name(tx.State)); + + [[fallthrough]]; + + case NKikimrPQ::TTransaction::WAIT_RS_ACKS: + PQ_LOG_D("HaveAllRecipientsReceive " << tx.HaveAllRecipientsReceive() << + ", AllSupportivePartitionsHaveBeenDeleted " << AllSupportivePartitionsHaveBeenDeleted(tx.WriteId)); + if (tx.HaveAllRecipientsReceive() && AllSupportivePartitionsHaveBeenDeleted(tx.WriteId)) { + DeleteTx(tx); + // implicitly switch to the state DELETING + } + + break; + + case NKikimrPQ::TTransaction::DELETING: + // The PQ tablet has persisted its state. Now she can delete the transaction and take the next one. + if (!TxQueue.empty() && (TxQueue.front().second == tx.TxId)) { + TxQueue.pop(); + TryStartTransaction(ctx); } + DeleteWriteId(tx.WriteId); + PQ_LOG_D("delete TxId " << tx.TxId); + Txs.erase(tx.TxId); + + // If this was the last transaction, then you need to send responses to messages about changes + // in the status of the PQ tablet (if they came) + TryReturnTabletStateAll(ctx); break; } } +bool TPersQueue::AllSupportivePartitionsHaveBeenDeleted(const TMaybe& writeId) const +{ + if (!writeId.Defined()) { + return true; + } + + Y_ABORT_UNLESS(TxWrites.contains(*writeId), + "PQ %" PRIu64 ", WriteId {%" PRIu64 ", %" PRIu64 "}", + TabletID(), writeId->NodeId, writeId->KeyId); + const TTxWriteInfo& writeInfo = TxWrites.at(*writeId); + + PQ_LOG_D("WriteId " << *writeId << + " Partitions.size=" << writeInfo.Partitions.size()); + bool deleted = + writeInfo.Partitions.empty() + ; + + return deleted; +} + +void TPersQueue::DeleteWriteId(const TMaybe& writeId) +{ + if (!writeId.Defined() || !TxWrites.contains(*writeId)) { + return; + } + + PQ_LOG_D("delete WriteId " << *writeId); + TxWrites.erase(*writeId); +} + void TPersQueue::WriteTx(TDistributedTransaction& tx, NKikimrPQ::TTransaction::EState state) { WriteTxs[tx.TxId] = state; @@ -4195,8 +4480,14 @@ void TPersQueue::WriteTx(TDistributedTransaction& tx, NKikimrPQ::TTransaction::E void TPersQueue::DeleteTx(TDistributedTransaction& tx) { + PQ_LOG_D("add an TxId " << tx.TxId << " to the list for deletion"); + DeleteTxs.insert(tx.TxId); + tx.State = NKikimrPQ::TTransaction::DELETING; + PQ_LOG_D("TxId " << tx.TxId << + ", NewState " << NKikimrPQ::TTransaction_EState_Name(tx.State)); + tx.WriteInProgress = true; } @@ -4212,7 +4503,9 @@ void TPersQueue::CheckChangedTxStates(const TActorContext& ctx) { for (ui64 txId : ChangedTxs) { auto tx = GetTransaction(ctx, txId); - Y_ABORT_UNLESS(tx); + Y_ABORT_UNLESS(tx, + "PQ %" PRIu64 ", TxId %" PRIu64, + TabletID(), txId); CheckTxState(ctx, *tx); } @@ -4278,17 +4571,22 @@ void TPersQueue::SendProposeTransactionAbort(const TActorId& target, void TPersQueue::SendEvProposePartitionConfig(const TActorContext& ctx, TDistributedTransaction& tx) { - for (auto& [_, partition] : Partitions) { + for (auto& [partitionId, partition] : Partitions) { + if (partitionId.IsSupportivePartition()) { + continue; + } + auto event = std::make_unique(tx.Step, tx.TxId); event->TopicConverter = tx.TopicConverter; event->Config = tx.TabletConfig; + event->BootstrapConfig = tx.BootstrapConfig; ctx.Send(partition.Actor, std::move(event)); } tx.PartitionRepliesCount = 0; - tx.PartitionRepliesExpected = Partitions.size(); + tx.PartitionRepliesExpected = OriginalPartitionsCount; } TActorId TPersQueue::GetPartitionQuoter(const TPartitionId& partition) { @@ -4371,49 +4669,32 @@ void TPersQueue::EnsurePartitionsAreNotDeleted(const NKikimrPQ::TPQTabletConfig& } } -void TPersQueue::InitTransactions(const NKikimrClient::TKeyValueResponse::TReadRangeResult& readRange, - THashMap>& partitionTxs) +void TPersQueue::BeginInitTransactions() { Txs.clear(); TxQueue.clear(); - std::deque> plannedTxs; - const auto& ctx = ActorContext(); - - for (size_t i = 0; i < readRange.PairSize(); ++i) { - auto& pair = readRange.GetPair(i); - - NKikimrPQ::TTransaction tx; - Y_ABORT_UNLESS(tx.ParseFromString(pair.GetValue())); - - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " " << - "Tx: " << tx.DebugString()); - - Txs.emplace(tx.GetTxId(), tx); + PlannedTxs.clear(); +} - if (tx.HasStep()) { - if (std::make_pair(tx.GetStep(), tx.GetTxId()) >= std::make_pair(ExecStep, ExecTxId)) { - plannedTxs.emplace_back(tx.GetStep(), tx.GetTxId()); - } - } - } +void TPersQueue::EndInitTransactions() +{ + PQ_LOG_D("Txs.size=" << Txs.size() << ", PlannedTxs.size=" << PlannedTxs.size()); - std::sort(plannedTxs.begin(), plannedTxs.end()); - for (auto& item : plannedTxs) { + std::sort(PlannedTxs.begin(), PlannedTxs.end()); + for (auto& item : PlannedTxs) { TxQueue.push(item); } if (!TxQueue.empty()) { - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " " << - "top tx queue (" << TxQueue.front().first << ", " << TxQueue.front().second << ")"); + PQ_LOG_D("top tx queue (" << TxQueue.front().first << ", " << TxQueue.front().second << ")"); } - - Y_UNUSED(partitionTxs); } void TPersQueue::TryStartTransaction(const TActorContext& ctx) { if (TxQueue.empty()) { + PQ_LOG_D("empty tx queue"); return; } @@ -4463,6 +4744,8 @@ void TPersQueue::Handle(TEvPQ::TEvSubDomainStatus::TPtr& ev, const TActorContext void TPersQueue::Handle(TEvPersQueue::TEvProposeTransactionAttach::TPtr &ev, const TActorContext &ctx) { + PQ_LOG_D("Handle TEvPersQueue::TEvProposeTransactionAttach " << ev->Get()->Record.ShortDebugString()); + const ui64 txId = ev->Get()->Record.GetTxId(); NKikimrProto::EReplyStatus status = NKikimrProto::NODATA; @@ -4487,17 +4770,17 @@ void TPersQueue::Handle(TEvPQ::TEvCheckPartitionStatusRequest::TPtr& ev, const T { auto& record = ev->Get()->Record; auto it = Partitions.find(TPartitionId(TPartitionId(record.GetPartition()))); - if (it == Partitions.end()) { + if (InitCompleted && it == Partitions.end()) { LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "Unknown partition " << record.GetPartition()); - auto response = THolder(); + auto response = MakeHolder(); response->Record.SetStatus(NKikimrPQ::ETopicPartitionStatus::Deleted); Send(ev->Sender, response.Release()); return; } - if (it->second.InitDone) { + if (it != Partitions.end() && it->second.InitDone) { Forward(ev, it->second.Actor); } else { CheckPartitionStatusRequests[record.GetPartition()].push_back(ev); @@ -4518,13 +4801,16 @@ void TPersQueue::ProcessCheckPartitionStatusRequests(const TPartitionId& partiti } } -void TPersQueue::Handle(NLongTxService::TEvLongTxService::TEvLockStatus::TPtr& ev, const TActorContext&) +void TPersQueue::Handle(NLongTxService::TEvLongTxService::TEvLockStatus::TPtr& ev) { + PQ_LOG_D("Handle TEvLongTxService::TEvLockStatus " << ev->Get()->Record.ShortDebugString()); + auto& record = ev->Get()->Record; const TWriteId writeId(record.GetLockNode(), record.GetLockId()); if (!TxWrites.contains(writeId)) { // the transaction has already been completed + PQ_LOG_D("unknown WriteId " << writeId); return; } @@ -4532,13 +4818,18 @@ void TPersQueue::Handle(NLongTxService::TEvLongTxService::TEvLockStatus::TPtr& e writeInfo.LongTxSubscriptionStatus = record.GetStatus(); if (writeInfo.LongTxSubscriptionStatus == NKikimrLongTxService::TEvLockStatus::STATUS_SUBSCRIBED) { + PQ_LOG_D("subscribed WriteId " << writeId); return; } - if (!writeInfo.TxId.Defined()) { - // the message TEvProposeTransaction will not come anymore - BeginDeletePartitions(writeInfo); + if (writeInfo.TxId.Defined()) { + // the message `TEvProposeTransaction` has already arrived + PQ_LOG_D("there is already a transaction TxId " << writeInfo.TxId << " for WriteId " << writeId); + return; } + + PQ_LOG_D("delete partitions for WriteId " << writeId); + BeginDeletePartitions(writeInfo); } void TPersQueue::Handle(TEvPQ::TEvReadingPartitionStatusRequest::TPtr& ev, const TActorContext& ctx) @@ -4555,12 +4846,29 @@ void TPersQueue::Handle(TEvPQ::TEvPartitionScaleStatusChanged::TPtr& ev, const T } } +void TPersQueue::DeletePartition(const TPartitionId& partitionId, const TActorContext& ctx) +{ + auto p = Partitions.find(partitionId); + if (p == Partitions.end()) { + return; + } + + const TPartitionInfo& partition = p->second; + ctx.Send(partition.Actor, new TEvents::TEvPoisonPill()); + + Partitions.erase(partitionId); +} + void TPersQueue::Handle(TEvPQ::TEvDeletePartitionDone::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvPQ::TEvDeletePartitionDone " << ev->Get()->PartitionId); + auto* event = ev->Get(); Y_ABORT_UNLESS(event->PartitionId.WriteId.Defined()); const TWriteId& writeId = *event->PartitionId.WriteId; - Y_ABORT_UNLESS(TxWrites.contains(writeId)); + Y_ABORT_UNLESS(TxWrites.contains(writeId), + "PQ %" PRIu64 ", WriteId {%" PRIu64 ", %" PRIu64 "}", + TabletID(), writeId.NodeId, writeId.KeyId); TTxWriteInfo& writeInfo = TxWrites.at(writeId); Y_ABORT_UNLESS(writeInfo.Partitions.contains(event->PartitionId.OriginalPartitionId)); const TPartitionId& partitionId = writeInfo.Partitions.at(event->PartitionId.OriginalPartitionId); @@ -4568,18 +4876,18 @@ void TPersQueue::Handle(TEvPQ::TEvDeletePartitionDone::TPtr& ev, const TActorCon Y_ABORT_UNLESS(partitionId.IsSupportivePartition()); Y_ABORT_UNLESS(Partitions.contains(partitionId)); - Partitions.erase(partitionId); + DeletePartition(partitionId, ctx); writeInfo.Partitions.erase(partitionId.OriginalPartitionId); if (writeInfo.Partitions.empty()) { UnsubscribeWriteId(writeId, ctx); if (writeInfo.TxId.Defined()) { if (auto tx = GetTransaction(ctx, *writeInfo.TxId); tx) { - DeleteTx(*tx); + if (tx->State == NKikimrPQ::TTransaction::WAIT_RS_ACKS) { + CheckTxState(ctx, *tx); + } } } - PQ_LOG_D("delete WriteId " << writeId); - TxWrites.erase(writeId); } TxWritesChanged = true; @@ -4588,49 +4896,49 @@ void TPersQueue::Handle(TEvPQ::TEvDeletePartitionDone::TPtr& ev, const TActorCon void TPersQueue::Handle(TEvPQ::TEvTransactionCompleted::TPtr& ev, const TActorContext&) { + PQ_LOG_D("Handle TEvPQ::TEvTransactionCompleted" << + " WriteId " << ev->Get()->WriteId); + auto* event = ev->Get(); if (!event->WriteId.Defined()) { return; } const TWriteId& writeId = *event->WriteId; - Y_ABORT_UNLESS(TxWrites.contains(writeId)); + Y_ABORT_UNLESS(TxWrites.contains(writeId), + "PQ %" PRIu64 ", WriteId {%" PRIu64 ", %" PRIu64 "}", + TabletID(), writeId.NodeId, writeId.KeyId); TTxWriteInfo& writeInfo = TxWrites.at(writeId); Y_ABORT_UNLESS(writeInfo.Partitions.size() == 1); BeginDeletePartitions(writeInfo); } -void TPersQueue::BeginDeleteTx(const TDistributedTransaction& tx) -{ - Y_ABORT_UNLESS(tx.WriteId.Defined()); - const TWriteId& writeId = *tx.WriteId; - if (!TxWrites.contains(writeId)) { - // the transaction has already been completed - return; - } - - TTxWriteInfo& writeInfo = TxWrites.at(writeId); - if (writeInfo.LongTxSubscriptionStatus == NKikimrLongTxService::TEvLockStatus::STATUS_SUBSCRIBED) { - return; - } - - BeginDeletePartitions(writeInfo); -} - void TPersQueue::BeginDeletePartitions(TTxWriteInfo& writeInfo) { if (writeInfo.Deleting) { + PQ_LOG_D("Already deleting WriteInfo"); return; } for (auto& [_, partitionId] : writeInfo.Partitions) { Y_ABORT_UNLESS(Partitions.contains(partitionId)); const TPartitionInfo& partition = Partitions.at(partitionId); + PQ_LOG_D("send TEvPQ::TEvDeletePartition to partition " << partitionId); Send(partition.Actor, new TEvPQ::TEvDeletePartition); } writeInfo.Deleting = true; } +void TPersQueue::BeginDeletePartitions(const TDistributedTransaction& tx) +{ + if (!tx.WriteId.Defined() || !TxWrites.contains(*tx.WriteId)) { + return; + } + + TTxWriteInfo& writeInfo = TxWrites.at(*tx.WriteId); + BeginDeletePartitions(writeInfo); +} + TString TPersQueue::LogPrefix() const { return TStringBuilder() << "[PQ: " << TabletID() << "] "; } @@ -4685,7 +4993,7 @@ bool TPersQueue::HandleHook(STFUNC_SIG) HFuncTraced(TEvMediatorTimecast::TEvRegisterTabletResult, Handle); HFuncTraced(TEvPQ::TEvCheckPartitionStatusRequest, Handle); HFuncTraced(TEvPQ::TEvPartitionScaleStatusChanged, Handle); - HFuncTraced(NLongTxService::TEvLongTxService::TEvLockStatus, Handle); + hFuncTraced(NLongTxService::TEvLongTxService::TEvLockStatus, Handle); HFuncTraced(TEvPQ::TEvReadingPartitionStatusRequest, Handle); HFuncTraced(TEvPQ::TEvDeletePartitionDone, Handle); HFuncTraced(TEvPQ::TEvTransactionCompleted, Handle); diff --git a/ydb/core/persqueue/pq_impl.h b/ydb/core/persqueue/pq_impl.h index a754c6828749..e7fabe310422 100644 --- a/ydb/core/persqueue/pq_impl.h +++ b/ydb/core/persqueue/pq_impl.h @@ -35,6 +35,7 @@ class TPersQueue : public NKeyValue::TKeyValueFlat { READ_CONFIG_COOKIE = 3, WRITE_STATE_COOKIE = 4, WRITE_TX_COOKIE = 5, + READ_TXS_COOKIE = 6, }; void CreatedHook(const TActorContext& ctx) override; @@ -97,7 +98,8 @@ class TPersQueue : public NKeyValue::TKeyValueFlat { //response from KV on READ or WRITE config request void Handle(TEvKeyValue::TEvResponse::TPtr& ev, const TActorContext& ctx); - void HandleConfigReadResponse(const NKikimrClient::TResponse& resp, const TActorContext& ctx); + void HandleConfigReadResponse(NKikimrClient::TResponse&& resp, const TActorContext& ctx); + void HandleTransactionsReadResponse(NKikimrClient::TResponse&& resp, const TActorContext& ctx); void ApplyNewConfigAndReply(const TActorContext& ctx); void ApplyNewConfig(const NKikimrPQ::TPQTabletConfig& newConfig, const TActorContext& ctx); @@ -108,7 +110,7 @@ class TPersQueue : public NKeyValue::TKeyValueFlat { void ReadTxWrites(const NKikimrClient::TKeyValueResponse::TReadResult& read, const TActorContext& ctx); void ReadConfig(const NKikimrClient::TKeyValueResponse::TReadResult& read, - const NKikimrClient::TKeyValueResponse::TReadRangeResult& readRange, + const TVector& readRanges, const TActorContext& ctx); void ReadState(const NKikimrClient::TKeyValueResponse::TReadResult& read, const TActorContext& ctx); @@ -493,16 +495,43 @@ class TPersQueue : public NKeyValue::TKeyValueFlat { bool AllOriginalPartitionsInited() const; - void Handle(NLongTxService::TEvLongTxService::TEvLockStatus::TPtr& ev, const TActorContext& ctx); + void Handle(NLongTxService::TEvLongTxService::TEvLockStatus::TPtr& ev); void Handle(TEvPQ::TEvDeletePartitionDone::TPtr& ev, const TActorContext& ctx); void Handle(TEvPQ::TEvTransactionCompleted::TPtr& ev, const TActorContext& ctx); - void BeginDeleteTx(const TDistributedTransaction& tx); void BeginDeletePartitions(TTxWriteInfo& writeInfo); + void BeginDeletePartitions(const TDistributedTransaction& tx); bool CheckTxWriteOperation(const NKikimrPQ::TPartitionOperation& operation, const TWriteId& writeId) const; bool CheckTxWriteOperations(const NKikimrPQ::TDataTransaction& txBody) const; + + void MoveTopTxToCalculating(TDistributedTransaction& tx, const TActorContext& ctx); + void DeletePartition(const TPartitionId& partitionId, const TActorContext& ctx); + + std::deque> PlannedTxs; + + void BeginInitTransactions(); + void EndInitTransactions(); + + void EndReadConfig(const TActorContext& ctx); + + void AddCmdReadTransactionRange(TEvKeyValue::TEvRequest& request, + const TString& fromKey, bool includeFrom); + + NKikimrClient::TResponse ConfigReadResponse; + TVector TransactionsReadResults; + + void SendTransactionsReadRequest(const TString& fromKey, bool includeFrom, + const TActorContext& ctx); + + void AddCmdDeleteTx(NKikimrClient::TKeyValueRequest& request, + ui64 txId); + + bool AllSupportivePartitionsHaveBeenDeleted(const TMaybe& writeId) const; + void DeleteWriteId(const TMaybe& writeId); + + void UpdateReadRuleGenerations(NKikimrPQ::TPQTabletConfig& cfg) const; }; diff --git a/ydb/core/persqueue/read_balancer.cpp b/ydb/core/persqueue/read_balancer.cpp index 495277081072..5fc814b94bc0 100644 --- a/ydb/core/persqueue/read_balancer.cpp +++ b/ydb/core/persqueue/read_balancer.cpp @@ -21,10 +21,12 @@ static constexpr TDuration ACL_SUCCESS_RETRY_TIMEOUT = TDuration::Seconds(30); static constexpr TDuration ACL_ERROR_RETRY_TIMEOUT = TDuration::Seconds(5); static constexpr TDuration ACL_EXPIRATION_TIMEOUT = TDuration::Minutes(5); -NKikimrPQ::EConsumerScalingSupport DefaultScalingSupport() { - // TODO fix me after support of paremeter ConsumerScalingSupport - return AppData()->FeatureFlags.GetEnableTopicSplitMerge() ? NKikimrPQ::EConsumerScalingSupport::FULL_SUPPORT - : NKikimrPQ::EConsumerScalingSupport::NOT_SUPPORT; +TString EncodeAnchor(const TString& v) { + auto r = Base64Encode(v); + while (r.EndsWith('=')) { + r.resize(r.size() - 1); + } + return r; } TPersQueueReadBalancer::TPersQueueReadBalancer(const TActorId &tablet, TTabletStorageInfo *info) @@ -185,110 +187,142 @@ bool TPersQueueReadBalancer::OnRenderAppHtmlPage(NMon::TEvRemoteHttpInfo::TPtr e TString TPersQueueReadBalancer::GenerateStat() { auto& metrics = AggregatedStats.Metrics; - auto balancerStatistcs = Balancer->GetStatistics(); TStringStream str; HTML(str) { - TAG(TH2) {str << "PersQueueReadBalancer Tablet";} - TAG(TH3) {str << "Topic: " << Topic;} - TAG(TH3) {str << "Generation: " << Generation;} - TAG(TH3) {str << "Inited: " << Inited;} - TAG(TH3) {str << "ActivePipes: " << balancerStatistcs.Sessions.size();} - if (Inited) { - TAG(TH3) {str << "Active partitions: " << NumActiveParts;} - TAG(TH3) {str << "[Total/Max/Avg]WriteSpeedSec: " << metrics.TotalAvgWriteSpeedPerSec << "/" << metrics.MaxAvgWriteSpeedPerSec << "/" << metrics.TotalAvgWriteSpeedPerSec / NumActiveParts;} - TAG(TH3) {str << "[Total/Max/Avg]WriteSpeedMin: " << metrics.TotalAvgWriteSpeedPerMin << "/" << metrics.MaxAvgWriteSpeedPerMin << "/" << metrics.TotalAvgWriteSpeedPerMin / NumActiveParts;} - TAG(TH3) {str << "[Total/Max/Avg]WriteSpeedHour: " << metrics.TotalAvgWriteSpeedPerHour << "/" << metrics.MaxAvgWriteSpeedPerHour << "/" << metrics.TotalAvgWriteSpeedPerHour / NumActiveParts;} - TAG(TH3) {str << "[Total/Max/Avg]WriteSpeedDay: " << metrics.TotalAvgWriteSpeedPerDay << "/" << metrics.MaxAvgWriteSpeedPerDay << "/" << metrics.TotalAvgWriteSpeedPerDay / NumActiveParts;} - TAG(TH3) {str << "TotalDataSize: " << AggregatedStats.TotalDataSize;} - TAG(TH3) {str << "ReserveSize: " << PartitionReserveSize();} - TAG(TH3) {str << "TotalUsedReserveSize: " << AggregatedStats.TotalUsedReserveSize;} - } + str << ""; + + TAG(TH3) {str << "PersQueueReadBalancer " << TabletID() << " (" << Path << ")";} + + auto property = [&](const TString& name, const auto value) { + TABLER() { + TABLED() { str << name;} + TABLED() { str << value; } + } + }; UL_CLASS("nav nav-tabs") { LI_CLASS("active") { - str << "partitions"; + str << "Generic Info"; } - for (auto& consumer : balancerStatistcs.Consumers) { + LI() { + str << "Partitions"; + } + for (auto& [consumerName, _] : Balancer->GetConsumers()) { LI() { - str << "" << NPersQueue::ConvertOldConsumerName(consumer.ConsumerName) << ""; + str << "" << NPersQueue::ConvertOldConsumerName(consumerName) << ""; } } } + DIV_CLASS("tab-content") { - DIV_CLASS_ID("tab-pane fade in active", "main") { - TABLE_SORTABLE_CLASS("table") { - TABLEHEAD() { - TABLER() { - TABLEH() {str << "partition";} - TABLEH() {str << "tabletId";} - } - } + DIV_CLASS_ID("tab-pane fade in active", "generic") { + TABLE_CLASS("tgrid") { TABLEBODY() { - for (auto& p : PartitionsInfo) { - TABLER() { - TABLED() { str << p.first;} - TABLED() { str << p.second.TabletId;} + TABLER() { + TABLED() { + TABLE_CLASS("properties") { + CAPTION() { str << "Tablet info"; } + TABLEBODY() { + property("Topic", Topic); + property("Path", Path); + property("Initialized", Inited ? "yes" : "no"); + property("SchemeShard", TStringBuilder() << "" << SchemeShardId << ""); + property("PathId", PathId); + property("Version", Version); + property("Generation", Generation); + } + } + } + TABLED() { + if (Inited) { + TABLE_CLASS("properties") { + CAPTION() { str << "Statistics"; } + TABLEBODY() { + property("Active pipes", Balancer->GetSessions().size()); + property("Active partitions", NumActiveParts); + property("Total data size", AggregatedStats.TotalDataSize); + property("Reserve size", PartitionReserveSize()); + property("Used reserve size", AggregatedStats.TotalUsedReserveSize); + property("[Total/Max/Avg]WriteSpeedSec", TStringBuilder() << metrics.TotalAvgWriteSpeedPerSec << "/" << metrics.MaxAvgWriteSpeedPerSec << "/" << metrics.TotalAvgWriteSpeedPerSec / NumActiveParts); + property("[Total/Max/Avg]WriteSpeedMin", TStringBuilder() << metrics.TotalAvgWriteSpeedPerMin << "/" << metrics.MaxAvgWriteSpeedPerMin << "/" << metrics.TotalAvgWriteSpeedPerMin / NumActiveParts); + property("[Total/Max/Avg]WriteSpeedHour", TStringBuilder() << metrics.TotalAvgWriteSpeedPerHour << "/" << metrics.MaxAvgWriteSpeedPerHour << "/" << metrics.TotalAvgWriteSpeedPerHour / NumActiveParts); + property("[Total/Max/Avg]WriteSpeedDay", TStringBuilder() << metrics.TotalAvgWriteSpeedPerDay << "/" << metrics.MaxAvgWriteSpeedPerDay << "/" << metrics.TotalAvgWriteSpeedPerDay / NumActiveParts); + } + } + } } } } } } - for (auto& consumer : balancerStatistcs.Consumers) { - DIV_CLASS_ID("tab-pane fade", "client_" + Base64Encode(consumer.ConsumerName)) { - TABLE_SORTABLE_CLASS("table") { - TABLEHEAD() { - TABLER() { - TABLEH() {str << "partition";} - TABLEH() {str << "tabletId";} - TABLEH() {str << "state";} - TABLEH() {str << "session";} - } - } - TABLEBODY() { - for (auto& partition : consumer.Partitions) { - TABLER() { - TABLED() { str << partition.PartitionId;} - TABLED() { str << partition.TabletId;} - TABLED() { str << partition.State;} - TABLED() { str << partition.Session;} - } - } + + DIV_CLASS_ID("tab-pane fade", "partitions") { + auto partitionAnchor = [&](const ui32 partitionId) { + return TStringBuilder() << "P" << partitionId; + }; + + TABLE_CLASS("table") { + TABLEHEAD() { + TABLER() { + TABLEH() { str << "Partition"; } + TABLEH() { str << "Status"; } + TABLEH() { str << "TabletId"; } + TABLEH() { str << "Parents"; } + TABLEH() { str << "Children"; } + TABLEH() { str << "Size"; } } } + TABLEBODY() { + for (auto& [partitionId, partitionInfo] : PartitionsInfo) { + const auto& stats = AggregatedStats.Stats[partitionId]; + const auto* node = PartitionGraph.GetPartition(partitionId); + TString style = node && node->Children.empty() ? "text-success" : "text-muted"; - TABLE_SORTABLE_CLASS("table") { - TABLEHEAD() { TABLER() { - TABLEH() {str << "session";} - TABLEH() {str << "suspended partitions";} - TABLEH() {str << "active partitions";} - TABLEH() {str << "inactive partitions";} - TABLEH() {str << "total partitions";} - } - } - TABLEBODY() { - - for (auto& session : balancerStatistcs.Sessions) { - TABLER() { - TABLED() { str << session.Session;} - TABLED() { str << session.SuspendedPartitionCount;} - TABLED() { str << session.ActivePartitionCount;} - TABLED() { str << session.InactivePartitionCount;} - TABLED() { str << session.TotalPartitionCount;} + TABLED() { + DIV_CLASS_ID(style, partitionAnchor(partitionId)) { + str << partitionId; + } } - } - - TABLER() { - TABLED() { str << "FREE";} - TABLED() { str << 0;} - TABLED() { str << balancerStatistcs.FreePartitions;} - TABLED() { str << balancerStatistcs.FreePartitions;} + TABLED() { + if (node) { + str << (node->Children.empty() ? "Active" : "Inactive"); + if (node->IsRoot()) { + str << " (root)"; + } + } + } + TABLED() { HREF(TStringBuilder() << "?TabletID=" << partitionInfo.TabletId) { str << partitionInfo.TabletId; } } + TABLED() { + if (node) { + for (auto* parent : node->Parents) { + HREF("#" + partitionAnchor(parent->Id)) { str << parent->Id; } + str << ", "; + } + } + } + TABLED() { + if (node) { + for (auto* child : node->Children) { + HREF("#" + partitionAnchor(child->Id)) { str << child->Id; } + str << ", "; + } + } + } + TABLED() { str << stats.DataSize; } } } } } } + + Balancer->RenderApp(str); } } return str.Str(); @@ -503,7 +537,7 @@ void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvUpdateBalancerConfig::TPtr if (SplitMergeEnabled(TabletConfig)) { if (!PartitionsScaleManager) { - PartitionsScaleManager = std::make_unique(Topic, DatabasePath, PathId, Version, TabletConfig); + PartitionsScaleManager = std::make_unique(Topic, Path, DatabasePath, PathId, Version, TabletConfig); } else { PartitionsScaleManager->UpdateBalancerConfig(PathId, Version, TabletConfig); } @@ -689,14 +723,6 @@ void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvStatusResponse::TPtr& ev, c continue; } - auto generation = partRes.GetGeneration(); - auto cookie = partRes.GetCookie(); - for (const auto& consumer : partRes.GetConsumerResult()) { - if (consumer.GetReadingFinished()) { - Balancer->SetCommittedState(consumer.GetConsumer(), partitionId, generation, cookie, ctx); - } - } - if (SplitMergeEnabled(TabletConfig) && PartitionsScaleManager) { PartitionsScaleManager->HandleScaleStatusChange(partitionId, partRes.GetScaleStatus(), ctx); } @@ -708,8 +734,11 @@ void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvStatusResponse::TPtr& ev, c AggregatedStats.Stats[partitionId].HasCounters = true; } + Balancer->Handle(ev, ctx); + if (AggregatedStats.Cookies.empty()) { CheckStat(ctx); + Balancer->ProcessPendingStats(ctx); } } @@ -813,7 +842,6 @@ void TPersQueueReadBalancer::CheckStat(const TActorContext& ctx) { NTabletPipe::SendData(ctx, GetPipeClient(SchemeShardId, ctx), ev); - UpdateCounters(ctx); } @@ -1238,16 +1266,20 @@ void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvGetReadSessionsInfo::TPtr& void TPersQueueReadBalancer::Handle(TEvPQ::TEvPartitionScaleStatusChanged::TPtr& ev, const TActorContext& ctx) { if (!SplitMergeEnabled(TabletConfig)) { + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, "Skip TEvPartitionScaleStatusChanged: autopartitioning disabled."); return; } auto& record = ev->Get()->Record; auto* node = PartitionGraph.GetPartition(record.GetPartitionId()); if (!node) { + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, "Skip TEvPartitionScaleStatusChanged: partition " << record.GetPartitionId() << " not found."); return; } if (PartitionsScaleManager) { PartitionsScaleManager->HandleScaleStatusChange(record.GetPartitionId(), record.GetScaleStatus(), ctx); + } else { + LOG_NOTICE_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, "Skip TEvPartitionScaleStatusChanged: scale manager isn`t initialized."); } } diff --git a/ydb/core/persqueue/read_balancer.h b/ydb/core/persqueue/read_balancer.h index e91c14d7b734..3f79f7b75b38 100644 --- a/ydb/core/persqueue/read_balancer.h +++ b/ydb/core/persqueue/read_balancer.h @@ -353,7 +353,7 @@ class TPersQueueReadBalancer : public TActor, public TTa }; -NKikimrPQ::EConsumerScalingSupport DefaultScalingSupport(); +TString EncodeAnchor(const TString& value); } } diff --git a/ydb/core/persqueue/read_balancer__balancing.cpp b/ydb/core/persqueue/read_balancer__balancing.cpp index 4130dd83b749..138d5608e373 100644 --- a/ydb/core/persqueue/read_balancer__balancing.cpp +++ b/ydb/core/persqueue/read_balancer__balancing.cpp @@ -6,6 +6,14 @@ namespace NKikimr::NPQ::NBalancing { +struct LowLoadSessionComparator { + bool operator()(const TSession* lhs, const TSession* rhs) const; +}; + +using TLowLoadOrderedSessions = std::set; + + + // // TPartition // @@ -65,6 +73,7 @@ bool TPartition::Reset() { bool result = IsInactive(); ScaleAwareSDK = false; + StartedReadingFromEndOffset = false; ReadingFinished = false; Commited = false; ++Cookie; @@ -284,6 +293,8 @@ void TPartitionFamily::AfterRelease() { Partitions.clear(); Partitions.insert(Partitions.end(), RootPartitions.begin(), RootPartitions.end()); + LockedPartitions.clear(); + ClassifyPartitions(); UpdatePartitionMapping(Partitions); // After reducing the number of partitions in the family, the list of reading sessions that can read this family may expand. @@ -338,7 +349,7 @@ void TPartitionFamily::AttachePartitions(const std::vector& partitions, co } auto [activePartitionCount, inactivePartitionCount] = ClassifyPartitions(newPartitions); - ChangePartitionCounters(activePartitionCount, activePartitionCount); + ChangePartitionCounters(activePartitionCount, inactivePartitionCount); if (IsActive()) { if (!Session->AllPartitionsReadable(newPartitions)) { @@ -388,7 +399,7 @@ void TPartitionFamily::InactivatePartition(ui32 partitionId) { ActivePartitionCount += active; InactivePartitionCount += inactive; - if (IsActive()) { + if (IsActive() && Session) { Session->ActivePartitionCount += active; Session->InactivePartitionCount += inactive; } @@ -408,7 +419,10 @@ void TPartitionFamily::Merge(TPartitionFamily* other) { other->RootPartitions.clear(); WantedPartitions.insert(other->WantedPartitions.begin(), other->WantedPartitions.end()); - WantedPartitions.clear(); + other->WantedPartitions.clear(); + + LockedPartitions.insert(other->LockedPartitions.begin(), other->LockedPartitions.end()); + other->LockedPartitions.clear(); ChangePartitionCounters(other->ActivePartitionCount, other->InactivePartitionCount); other->ChangePartitionCounters(-other->ActivePartitionCount, -other->InactivePartitionCount); @@ -463,7 +477,7 @@ bool TPartitionFamily::PossibleForBalance(TSession* session) { void TPartitionFamily::ClassifyPartitions() { auto [activePartitionCount, inactivePartitionCount] = ClassifyPartitions(Partitions); - ChangePartitionCounters(activePartitionCount, inactivePartitionCount); + ChangePartitionCounters(activePartitionCount - ActivePartitionCount, inactivePartitionCount - InactivePartitionCount); } template @@ -533,6 +547,7 @@ std::unique_ptr TPartitionFamily::MakeEvRelea r.SetPath(TopicPath()); r.SetGeneration(TabletGeneration()); r.SetClientId(Session->ClientId); + r.SetCount(1); r.SetGroup(partitionId + 1); ActorIdToProto(Session->Pipe, r.MutablePipeClient()); @@ -703,13 +718,13 @@ bool TConsumer::BreakUpFamily(TPartitionFamily* family, ui32 partitionId, bool d } std::vector members; - GetPartitionGraph().Travers(id, [&](auto childId) { if (partitions.contains(childId)) { - members.push_back(childId); auto [_, i] = processedPartitions.insert(childId); if (!i) { familiesIntersect = true; + } else { + members.push_back(childId); } return true; @@ -717,16 +732,25 @@ bool TConsumer::BreakUpFamily(TPartitionFamily* family, ui32 partitionId, bool d return false; }); - auto* f = CreateFamily({id}, family->Status, ctx); - f->Partitions.insert(f->Partitions.end(), members.begin(), members.end()); + bool locked = family->Session && (family->LockedPartitions.contains(id) || + std::any_of(members.begin(), members.end(), [family](auto id) { return family->LockedPartitions.contains(id); })); + auto* f = CreateFamily({id}, locked ? family->Status : TPartitionFamily::EStatus::Free, ctx); f->TargetStatus = family->TargetStatus; - f->Session = family->Session; - f->LockedPartitions = Intercept(family->LockedPartitions, f->Partitions); + f->Partitions.insert(f->Partitions.end(), members.begin(), members.end()); f->LastPipe = family->LastPipe; - if (f->Session) { + f->UpdatePartitionMapping(f->Partitions); + f->ClassifyPartitions(); + if (locked) { + f->LockedPartitions = Intercept(family->LockedPartitions, f->Partitions); + + f->Session = family->Session; f->Session->Families.try_emplace(f->Id, f); + f->Session->ActivePartitionCount += f->ActivePartitionCount; + f->Session->InactivePartitionCount += f->InactivePartitionCount; if (f->IsActive()) { ++f->Session->ActiveFamilyCount; + } else if (f->IsRelesing()) { + ++f->Session->ReleasingFamilyCount; } } @@ -1094,12 +1118,14 @@ void TConsumer::FinishReading(TEvPersQueue::TEvReadingPartitionFinishedRequest:: LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "Reading of the partition " << partitionId << " was finished by " << ConsumerName << " but the partition hasn't family"); + return; } if (!family->Session) { LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "Reading of the partition " << partitionId << " was finished by " << ConsumerName << " but the partition hasn't reading session"); + return; } auto& partition = Partitions[partitionId]; @@ -1139,11 +1165,11 @@ void TConsumer::ScheduleBalance(const TActorContext& ctx) { ctx.Send(Balancer.TopicActor.SelfId(), new TEvPQ::TEvBalanceConsumer(ConsumerName)); } -TOrderedSessions OrderSessions( +TLowLoadOrderedSessions OrderSessions( const std::unordered_map& values, std::function predicate = [](const TSession*) { return true; } ) { - TOrderedSessions result; + TLowLoadOrderedSessions result; for (auto& [_, v] : values) { if (predicate(v)) { result.insert(v); @@ -1227,7 +1253,7 @@ void TConsumer::Balance(const TActorContext& ctx) { } } - TOrderedSessions commonSessions = OrderSessions(Sessions, [](auto* session) { + TLowLoadOrderedSessions commonSessions = OrderSessions(Sessions, [](auto* session) { return !session->WithGroups(); }); @@ -1236,7 +1262,7 @@ void TConsumer::Balance(const TActorContext& ctx) { auto families = OrderFamilies(UnreadableFamilies); for (auto it = families.rbegin(); it != families.rend(); ++it) { auto* family = *it; - TOrderedSessions specialSessions; + TLowLoadOrderedSessions specialSessions; auto& sessions = (family->IsCommon()) ? commonSessions : (specialSessions = OrderSessions(family->SpecialSessions)); auto sit = sessions.begin(); @@ -1280,7 +1306,9 @@ void TConsumer::Balance(const TActorContext& ctx) { GetPrefix() << "start rebalancing. familyCount=" << familyCount << ", sessionCount=" << commonSessions.size() << ", desiredFamilyCount=" << desiredFamilyCount << ", allowPlusOne=" << allowPlusOne); - for (auto it = commonSessions.rbegin(); it != commonSessions.rend(); ++it) { + TOrderedSessions orderedSession; + orderedSession.insert(commonSessions.begin(), commonSessions.end()); + for (auto it = orderedSession.begin(); it != orderedSession.end(); ++it) { auto* session = *it; auto targerFamilyCount = desiredFamilyCount + (allowPlusOne ? 1 : 0); auto families = OrderFamilies(session->Families); @@ -1291,7 +1319,7 @@ void TConsumer::Balance(const TActorContext& ctx) { } } - if (session->ActiveFamilyCount > desiredFamilyCount) { + if (allowPlusOne) { --allowPlusOne; } } @@ -1380,7 +1408,8 @@ TSession::TSession(const TActorId& pipe) , InactivePartitionCount(0) , ReleasingPartitionCount(0) , ActiveFamilyCount(0) - , ReleasingFamilyCount(0) { + , ReleasingFamilyCount(0) + , Order(RandomNumber()) { } bool TSession::WithGroups() const { return !Partitions.empty(); } @@ -1461,50 +1490,15 @@ TConsumer* TBalancer::GetConsumer(const TString& consumerName) { return it->second.get(); } -const TStatistics TBalancer::GetStatistics() const { - TStatistics result; - - result.Consumers.reserve(Consumers.size()); - for (auto& [_, consumer] : Consumers) { - result.Consumers.push_back(TStatistics::TConsumerStatistics()); - auto& c = result.Consumers.back(); - - c.ConsumerName = consumer->ConsumerName; - c.Partitions.reserve(GetPartitionsInfo().size()); - for (auto [partitionId, partitionInfo] : GetPartitionsInfo()) { - c.Partitions.push_back(TStatistics::TConsumerStatistics::TPartitionStatistics()); - auto& p = c.Partitions.back(); - p.PartitionId = partitionId; - p.TabletId = partitionInfo.TabletId; - - auto* family = consumer->FindFamily(partitionId); - if (family && family->Session && family->LockedPartitions.contains(partitionId)) { - p.Session = family->Session->SessionName; - p.State = 1; - } - } - } - - size_t readablePartitionCount = 0; - - result.Sessions.reserve(Sessions.size()); - for (auto& [_, session] : Sessions) { - result.Sessions.push_back(TStatistics::TSessionStatistics()); - auto& s = result.Sessions.back(); - s.Session = session->SessionName; - s.ActivePartitionCount = session->ActivePartitionCount; - s.InactivePartitionCount = session->InactivePartitionCount; - s.SuspendedPartitionCount = session->ReleasingPartitionCount; - s.TotalPartitionCount = s.ActivePartitionCount + s.InactivePartitionCount; - - readablePartitionCount += s.TotalPartitionCount; - } - - result.FreePartitions = GetPartitionsInfo().size() - readablePartitionCount; +const std::unordered_map>& TBalancer::GetConsumers() const { + return Consumers; +} - return result; +const std::unordered_map>& TBalancer::GetSessions() const { + return Sessions; } + void TBalancer::UpdateConfig(std::vector addedPartitions, std::vector deletedPartitions, const TActorContext& ctx) { LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "updating configuration. Deleted partitions [" << JoinRange(", ", deletedPartitions.begin(), deletedPartitions.end()) @@ -1629,6 +1623,12 @@ void TBalancer::Handle(TEvPQ::TEvWakeupReleasePartition::TPtr &ev, const TActorC return; } + if (partition->Commited) { + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, + GetPrefix() << "skip releasing partition " << msg->PartitionId << " of consumer \"" << msg->Consumer << "\" by reading finished timeout because offset is commited"); + return; + } + LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "releasing partition " << msg->PartitionId << " of consumer \"" << msg->Consumer << "\" by reading finished timeout"); @@ -1782,10 +1782,9 @@ void TBalancer::Handle(TEvPersQueue::TEvGetReadSessionsInfo::TPtr& ev, const TAc pi->SetPartition(partitionId); auto* family = consumer->FindFamily(partitionId); - if (family && family->LockedPartitions.contains(partitionId)) { + if (family && family->Session && family->LockedPartitions.contains(partitionId)) { auto* session = family->Session; - Y_ABORT_UNLESS(session != nullptr); pi->SetClientNode(session->ClientNode); pi->SetProxyNodeId(session->ProxyNodeId); pi->SetSession(session->SessionName); @@ -1818,6 +1817,31 @@ void TBalancer::Handle(TEvPQ::TEvBalanceConsumer::TPtr& ev, const TActorContext& } } +void TBalancer::Handle(TEvPersQueue::TEvStatusResponse::TPtr& ev, const TActorContext&) { + const auto& record = ev->Get()->Record; + for (const auto& partResult : record.GetPartResult()) { + for (const auto& consumerResult : partResult.GetConsumerResult()) { + PendingUpdates[partResult.GetPartition()].push_back(TData{partResult.GetGeneration(), partResult.GetCookie(), consumerResult.GetConsumer(), consumerResult.GetReadingFinished()}); + } + } +} + +void TBalancer::ProcessPendingStats(const TActorContext& ctx) { + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, + GetPrefix() << "ProcessPendingStats. PendingUpdates size " << PendingUpdates.size()); + + GetPartitionGraph().Travers([&](ui32 id) { + for (auto& d : PendingUpdates[id]) { + if (d.Commited) { + SetCommittedState(d.Consumer, id, d.Generation, d.Cookie, ctx); + } + } + return true; + }); + + PendingUpdates.clear(); +} + TString TBalancer::GetPrefix() const { return TStringBuilder() << "balancer: [" << TopicActor.TabletID() << "] topic " << Topic() << " "; } @@ -1838,18 +1862,23 @@ bool TPartitionFamilyComparator::operator()(const TPartitionFamily* lhs, const T } bool SessionComparator::operator()(const TSession* lhs, const TSession* rhs) const { + if (lhs->Order != rhs->Order) { + return lhs->Order < rhs->Order; + } + return lhs->SessionName < rhs->SessionName; +} + + +bool LowLoadSessionComparator::operator()(const TSession* lhs, const TSession* rhs) const { if (lhs->ActiveFamilyCount != rhs->ActiveFamilyCount) { return lhs->ActiveFamilyCount < rhs->ActiveFamilyCount; } - if (lhs->ActivePartitionCount != rhs->ActivePartitionCount) { - return lhs->ActivePartitionCount < rhs->ActivePartitionCount; - } - if (lhs->InactivePartitionCount != rhs->InactivePartitionCount) { - return lhs->InactivePartitionCount < rhs->InactivePartitionCount; - } if (lhs->Partitions.size() != rhs->Partitions.size()) { return lhs->Partitions.size() < rhs->Partitions.size(); } + if (lhs->Order != rhs->Order) { + return lhs->Order < rhs->Order; + } return lhs->SessionName < rhs->SessionName; } diff --git a/ydb/core/persqueue/read_balancer__balancing.h b/ydb/core/persqueue/read_balancer__balancing.h index a48c2d6637f6..6f98329f3350 100644 --- a/ydb/core/persqueue/read_balancer__balancing.h +++ b/ydb/core/persqueue/read_balancer__balancing.h @@ -276,6 +276,8 @@ struct TSession { // The partition families that are being read by this session. std::unordered_map Families; + size_t Order; + // true if client connected to read from concret partitions bool WithGroups() const; @@ -285,32 +287,6 @@ struct TSession { TString DebugStr() const; }; -struct TStatistics { - struct TConsumerStatistics { - struct TPartitionStatistics { - ui32 PartitionId; - ui64 TabletId = 0; - ui32 State = 0; - TString Session; - }; - - TString ConsumerName; - std::vector Partitions; - }; - - struct TSessionStatistics { - TString Session; - size_t ActivePartitionCount; - size_t InactivePartitionCount; - size_t SuspendedPartitionCount; - size_t TotalPartitionCount; - }; - - std::vector Consumers; - std::vector Sessions; - - size_t FreePartitions; -}; class TBalancer { friend struct TConsumer; @@ -328,7 +304,8 @@ class TBalancer { i32 GetLifetimeSeconds() const; TConsumer* GetConsumer(const TString& consumerName); - const TStatistics GetStatistics() const; + const std::unordered_map>& GetConsumers() const; + const std::unordered_map>& GetSessions() const; void UpdateConfig(std::vector addedPartitions, std::vector deletedPartitions, const TActorContext& ctx); bool SetCommittedState(const TString& consumer, ui32 partitionId, ui32 generation, ui64 cookie, const TActorContext& ctx); @@ -350,6 +327,11 @@ class TBalancer { void Handle(TEvPQ::TEvBalanceConsumer::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPersQueue::TEvStatusResponse::TPtr& ev, const TActorContext& ctx); + void ProcessPendingStats(const TActorContext& ctx); + + void RenderApp(TStringStream& str) const; + private: TString GetPrefix() const; ui32 NextStep(); @@ -361,6 +343,14 @@ class TBalancer { std::unordered_map> Consumers; ui32 Step; + + struct TData { + ui32 Generation; + ui64 Cookie; + const TString Consumer; + bool Commited; + }; + std::unordered_map> PendingUpdates; }; } diff --git a/ydb/core/persqueue/read_balancer__balancing_app.cpp b/ydb/core/persqueue/read_balancer__balancing_app.cpp new file mode 100644 index 000000000000..30b64fd1db22 --- /dev/null +++ b/ydb/core/persqueue/read_balancer__balancing_app.cpp @@ -0,0 +1,228 @@ +#include "read_balancer__balancing.h" + +#include + +#define DEBUG(message) + + +namespace NKikimr::NPQ::NBalancing { + +void TBalancer::RenderApp(TStringStream& str) const { + auto& __stream = str; + + for (auto& [consumerName, consumer] : Consumers) { + auto consumerAnchor = "c_" + EncodeAnchor(consumerName); + + auto familyAnchor = [&](const size_t familyId) { + return TStringBuilder() << consumerAnchor << "_F" << familyId; + }; + auto partitionAnchor = [&](const ui32 partitionId) { + return TStringBuilder() << consumerAnchor << "_P" << partitionId; + }; + + DIV_CLASS_ID("tab-pane fade", consumerAnchor) { + TABLE_CLASS("table") { + CAPTION() { str << "Families"; } + TABLEHEAD() { + TABLER() { + TABLEH() { str << "Id"; } + TABLEH() { str << "Status"; } + TABLEH() { str << "Partitions"; } + TABLEH() { str << "Session"; } + TABLEH() { str << "Statistics"; } + } + } + + TABLEBODY() { + for (auto& [familyId, family] : consumer->Families) { + TABLER() { + TABLED() { DIV_CLASS_ID("text-info", familyAnchor(familyId)) { str << familyId; } } + TABLED() { str << family->Status; } + TABLED() { + for (auto partitionId : family->Partitions) { + HREF("#" + partitionAnchor(partitionId)) { str << partitionId; } + str << ", "; + } + } + TABLED() { str << (family->Session ? family->Session->SessionName : ""); } + TABLED() { str << "Active " << family->ActivePartitionCount << " / Inactive " << family->InactivePartitionCount << " / Locked " << family->LockedPartitions.size(); } + } + } + } + } + + size_t free = 0; + size_t finished = 0; + size_t read = 0; + size_t ready = 0; + + TABLE_CLASS("table") { + CAPTION() { str << "Partitions"; } + TABLEHEAD() { + TABLER() { + TABLEH() { str << "Id"; } + TABLEH() { str << "Family"; } + TABLEH() { str << "Status"; }; + TABLEH() { str << "Parents"; } + TABLEH() { str << "Description"; } + TABLEH() { str << "P Generation"; } + TABLEH() { str << "P Cookie"; } + } + } + + TABLEBODY() { + for (auto& [partitionId, partition] : consumer->Partitions) { + const auto* family = consumer->FindFamily(partitionId); + const auto* node = consumer->GetPartitionGraph().GetPartition(partitionId); + TString style = node && node->Children.empty() ? "text-success" : "text-muted"; + auto* partitionInfo = GetPartitionInfo(partitionId); + + TABLER() { + TABLED() { DIV_CLASS_ID(style, partitionAnchor(partitionId)) { + str << partitionId << " "; + if (partitionInfo) { + HREF(TStringBuilder() << "?TabletID=" << partitionInfo->TabletId) { str << "#"; } + } + } } + TABLED() { + if (family) { + HREF("#" + familyAnchor(family->Id)) { str << family->Id; } + } + } + TABLED() { + if (family) { + if (partition.IsInactive()) { + str << "Finished"; + ++finished; + } else { + str << "Read"; + ++read; + } + } else if (consumer->IsReadable(partitionId)) { + str << "Ready"; + ++ready; + } else { + str << "Free"; + ++free; + } + } + TABLED() { + if (node) { + for (auto* parent : node->Parents) { + HREF("#" + partitionAnchor(parent->Id)) { str << parent->Id; } + str << ", "; + } + } else { + str << "error: not found"; + } + } + TABLED() { + if (partition.Commited) { + str << "commited"; + } else if (partition.ReadingFinished) { + if (partition.ScaleAwareSDK) { + str << "reading child"; + } else if (partition.StartedReadingFromEndOffset) { + str << "finished"; + } else { + str << "scheduled. iteration: " << partition.Iteration; + } + } else if (partition.Iteration) { + str << "iteration: " << partition.Iteration; + } + } + TABLED() { str << partition.PartitionGeneration; } + TABLED() { str << partition.PartitionCookie; } + } + } + } + } + + TABLE_CLASS("table") { + CAPTION() { str << "Statistics"; } + TABLEBODY() { + TABLER() { + TABLED() { str << "Free"; } + TABLED() { str << free; } + } + TABLER() { + TABLED() { str << "Ready"; } + TABLED() { str << ready; } + } + TABLER() { + TABLED() { str << "Read"; } + TABLED() { str << read; } + } + TABLER() { + TABLED() { str << "Finished"; } + TABLED() { str << finished; } + } + TABLER() { + TABLED() { STRONG() { str << "Total"; }} + TABLED() { str << (finished + read + ready + free); } + } + } + } + + TABLE_CLASS("table") { + CAPTION() { str << "Sessions"; } + TABLEHEAD() { + TABLER() { + TABLEH() { } + TABLEH() { str << "Id"; } + TABLEH() { str << "Partitions"; } + TABLEH() { str << "Families"; } + TABLEH() { str << "Statistics"; }; + TABLEH() { str << "Client node"; } + TABLEH() { str << "Proxy node"; } + } + } + TABLEBODY() { + size_t familyAllCount = 0; + size_t activeFamilyCount = 0; + size_t releasingFamilyCount = 0; + size_t activePartitionCount = 0; + size_t inactivePartitionCount = 0; + size_t releasingPartitionCount = 0; + + size_t i = 0; + for (auto& [pipe, session] : Sessions) { + if (session->ClientId != consumerName) { + continue; + } + + familyAllCount += session->Families.size(); + activeFamilyCount += session->ActiveFamilyCount; + releasingFamilyCount += session->ReleasingFamilyCount; + activePartitionCount += session->ActivePartitionCount; + inactivePartitionCount += session->InactivePartitionCount; + releasingPartitionCount += session->ReleasingPartitionCount; + + TABLER() { + TABLED() { str << ++i; } + TABLED() { str << session->SessionName; } + TABLED() { str << (session->Partitions.empty() ? "" : JoinRange(", ", session->Partitions.begin(), session->Partitions.end())); } + TABLED() { str << session->Families.size() << " / " << session->ActiveFamilyCount << " / " << session->ReleasingFamilyCount; } + TABLED() { str << (session->ActivePartitionCount + session->InactivePartitionCount + session->ReleasingPartitionCount) + << " / " << session->ActivePartitionCount << " / " << session->InactivePartitionCount << " / " << session->ReleasingPartitionCount; } + TABLED() { str << session->ClientNode; } + TABLED() { str << session->ProxyNodeId; } + } + } + TABLER() { + TABLED() { } + TABLED() { str << "Total:"; } + TABLED() { } + TABLED() { str << familyAllCount << " / " << activeFamilyCount << " / " << releasingFamilyCount; } + TABLED() { str << (activePartitionCount + inactivePartitionCount + releasingPartitionCount) << " / " << activePartitionCount << " / " + << inactivePartitionCount << " / " << releasingPartitionCount; } + TABLED() { } + TABLED() { } + } + } + } + } + } +} + +} diff --git a/ydb/core/persqueue/read_balancer__txinit.h b/ydb/core/persqueue/read_balancer__txinit.h index cc9a26ad4678..8b2367a15489 100644 --- a/ydb/core/persqueue/read_balancer__txinit.h +++ b/ydb/core/persqueue/read_balancer__txinit.h @@ -60,7 +60,7 @@ struct TPersQueueReadBalancer::TTxInit : public ITransaction { Self->PartitionGraph = MakePartitionGraph(Self->TabletConfig); if (SplitMergeEnabled(Self->TabletConfig)) { - Self->PartitionsScaleManager = std::make_unique(Self->Topic, Self->DatabasePath, Self->PathId, Self->Version, Self->TabletConfig); + Self->PartitionsScaleManager = std::make_unique(Self->Topic, Self->Path, Self->DatabasePath, Self->PathId, Self->Version, Self->TabletConfig); } Self->UpdateConfigCounters(); } diff --git a/ydb/core/persqueue/sourceid.h b/ydb/core/persqueue/sourceid.h index 992e1271c847..775b5878c8fd 100644 --- a/ydb/core/persqueue/sourceid.h +++ b/ydb/core/persqueue/sourceid.h @@ -51,6 +51,7 @@ class TSourceIdStorage: private THeartbeatProcessor { void RegisterSourceId(const TString& sourceId, Args&&... args) { RegisterSourceIdInfo(sourceId, TSourceIdInfo(std::forward(args)...), false); } + void RegisterSourceIdInfo(const TString& sourceId, TSourceIdInfo&& sourceIdInfo, bool load); void DeregisterSourceId(const TString& sourceId); @@ -65,7 +66,6 @@ class TSourceIdStorage: private THeartbeatProcessor { private: void LoadRawSourceIdInfo(const TString& key, const TString& data, TInstant now); void LoadProtoSourceIdInfo(const TString& key, const TString& data); - void RegisterSourceIdInfo(const TString& sourceId, TSourceIdInfo&& sourceIdInfo, bool load); private: TSourceIdMap InMemorySourceIds; @@ -85,6 +85,10 @@ class TSourceIdWriter { return Registrations; } + const THashSet& GetSourceIdsToDelete() const { + return Deregistrations; + } + template void RegisterSourceId(const TString& sourceId, Args&&... args) { Registrations[sourceId] = TSourceIdInfo(std::forward(args)...); diff --git a/ydb/core/persqueue/transaction.cpp b/ydb/core/persqueue/transaction.cpp index 50fc8e21bfc6..8bc361b938a1 100644 --- a/ydb/core/persqueue/transaction.cpp +++ b/ydb/core/persqueue/transaction.cpp @@ -16,11 +16,29 @@ TDistributedTransaction::TDistributedTransaction(const NKikimrPQ::TTransaction& MinStep = tx.GetMinStep(); MaxStep = tx.GetMaxStep(); - for (ui64 tabletId : tx.GetSenders()) { - Senders.insert(tabletId); + ReadSetCount = 0; + + for (auto& p : tx.GetPredicatesReceived()) { + PredicatesReceived[p.GetTabletId()] = p; + + if (p.HasPredicate()) { + SetDecision(ParticipantsDecision, + p.GetPredicate() ? NKikimrTx::TReadSetData::DECISION_COMMIT : NKikimrTx::TReadSetData::DECISION_ABORT); + ++ReadSetCount; + } } - for (ui64 tabletId : tx.GetReceivers()) { - Receivers.insert(tabletId); + + PredicateAcksCount = 0; + + for (ui64 tabletId : tx.GetPredicateRecipients()) { + PredicateRecipients[tabletId] = false; + } + + if (tx.HasPredicate()) { + SelfDecision = + tx.GetPredicate() ? NKikimrTx::TReadSetData::DECISION_COMMIT : NKikimrTx::TReadSetData::DECISION_ABORT; + } else { + SelfDecision = NKikimrTx::TReadSetData::DECISION_UNKNOWN; } switch (Kind) { @@ -34,15 +52,6 @@ TDistributedTransaction::TDistributedTransaction(const NKikimrPQ::TTransaction& Y_FAIL_S("unknown transaction type"); } - if (tx.HasSelfPredicate()) { - SelfDecision = - tx.GetSelfPredicate() ? NKikimrTx::TReadSetData::DECISION_COMMIT : NKikimrTx::TReadSetData::DECISION_ABORT; - } - if (tx.HasAggrPredicate()) { - ParticipantsDecision = - tx.GetAggrPredicate() ? NKikimrTx::TReadSetData::DECISION_COMMIT : NKikimrTx::TReadSetData::DECISION_ABORT; - } - Y_ABORT_UNLESS(tx.HasSourceActor()); SourceActor = ActorIdFromProto(tx.GetSourceActor()); @@ -51,6 +60,11 @@ TDistributedTransaction::TDistributedTransaction(const NKikimrPQ::TTransaction& } } +TString TDistributedTransaction::LogPrefix() const +{ + return TStringBuilder() << "[TxId: " << TxId << "] "; +} + void TDistributedTransaction::InitDataTransaction(const NKikimrPQ::TTransaction& tx) { InitPartitions(tx.GetOperations()); @@ -115,6 +129,11 @@ void TDistributedTransaction::OnProposeTransaction(const NKikimrPQ::TEvProposeTr Y_FAIL_S("unknown TxBody case"); } + PartitionRepliesCount = 0; + PartitionRepliesExpected = 0; + + ReadSetCount = 0; + Y_ABORT_UNLESS(event.HasSourceActor()); SourceActor = ActorIdFromProto(event.GetSourceActor()); } @@ -124,15 +143,15 @@ void TDistributedTransaction::OnProposeTransaction(const NKikimrPQ::TDataTransac { Kind = NKikimrPQ::TTransaction::KIND_DATA; - for (ui64 tablet : txBody.GetSendingShards()) { - if (tablet != extractTabletId) { - Senders.insert(tablet); + for (ui64 tabletId : txBody.GetSendingShards()) { + if (tabletId != extractTabletId) { + PredicatesReceived[tabletId].SetTabletId(tabletId); } } - for (ui64 tablet : txBody.GetReceivingShards()) { - if (tablet != extractTabletId) { - Receivers.insert(tablet); + for (ui64 tabletId : txBody.GetReceivingShards()) { + if (tabletId != extractTabletId) { + PredicateRecipients[tabletId] = false; } } @@ -143,11 +162,6 @@ void TDistributedTransaction::OnProposeTransaction(const NKikimrPQ::TDataTransac } else { WriteId = Nothing(); } - - PartitionRepliesCount = 0; - PartitionRepliesExpected = 0; - - ReadSetCount = 0; } void TDistributedTransaction::OnProposeTransaction(const NKikimrPQ::TConfigTransaction& txBody, @@ -172,7 +186,7 @@ void TDistributedTransaction::OnProposeTransaction(const NKikimrPQ::TConfigTrans if (node->Children.empty()) { for (const auto* r : node->Parents) { if (extractTabletId != r->TabletId) { - Senders.insert(r->TabletId); + PredicatesReceived[r->TabletId].SetTabletId(r->TabletId); } } } @@ -180,18 +194,13 @@ void TDistributedTransaction::OnProposeTransaction(const NKikimrPQ::TConfigTrans for (const auto* r : node->Children) { if (r->Children.empty()) { if (extractTabletId != r->TabletId) { - Receivers.insert(r->TabletId); + PredicateRecipients[r->TabletId] = false; } } } } InitPartitions(); - - PartitionRepliesCount = 0; - PartitionRepliesExpected = 0; - - ReadSetCount = 0; } void TDistributedTransaction::OnPlanStep(ui64 step) @@ -204,44 +213,65 @@ void TDistributedTransaction::OnPlanStep(ui64 step) void TDistributedTransaction::OnTxCalcPredicateResult(const TEvPQ::TEvTxCalcPredicateResult& event) { - OnPartitionResult(event, - event.Predicate ? NKikimrTx::TReadSetData::DECISION_COMMIT : NKikimrTx::TReadSetData::DECISION_ABORT); + PQ_LOG_D("Handle TEvTxCalcPredicateResult"); + + TMaybe decision; + + if (event.Predicate.Defined()) { + decision = *event.Predicate ? NKikimrTx::TReadSetData::DECISION_COMMIT : NKikimrTx::TReadSetData::DECISION_ABORT; + } + + OnPartitionResult(event, decision); } void TDistributedTransaction::OnProposePartitionConfigResult(const TEvPQ::TEvProposePartitionConfigResult& event) { + PQ_LOG_D("Handle TEvProposePartitionConfigResult"); + OnPartitionResult(event, NKikimrTx::TReadSetData::DECISION_COMMIT); } template -void TDistributedTransaction::OnPartitionResult(const E& event, EDecision decision) +void TDistributedTransaction::OnPartitionResult(const E& event, TMaybe decision) { Y_ABORT_UNLESS(Step == event.Step); Y_ABORT_UNLESS(TxId == event.TxId); Y_ABORT_UNLESS(Partitions.contains(event.Partition.OriginalPartitionId)); - SetDecision(SelfDecision, decision); + if (decision.Defined()) { + SetDecision(SelfDecision, *decision); + } ++PartitionRepliesCount; + + PQ_LOG_D("Partition responses " << PartitionRepliesCount << "/" << PartitionRepliesExpected); } void TDistributedTransaction::OnReadSet(const NKikimrTx::TEvReadSet& event, const TActorId& sender, std::unique_ptr ack) { + PQ_LOG_D("Handle TEvReadSet"); + Y_ABORT_UNLESS((Step == Max()) || (event.HasStep() && (Step == event.GetStep()))); Y_ABORT_UNLESS(event.HasTxId() && (TxId == event.GetTxId())); - if (Senders.contains(event.GetTabletProducer())) { + if (PredicatesReceived.contains(event.GetTabletProducer())) { NKikimrTx::TReadSetData data; Y_ABORT_UNLESS(event.HasReadSet() && data.ParseFromString(event.GetReadSet())); SetDecision(ParticipantsDecision, data.GetDecision()); ReadSetAcks[sender] = std::move(ack); - ++ReadSetCount; + auto& p = PredicatesReceived[event.GetTabletProducer()]; + if (!p.HasPredicate()) { + p.SetPredicate(data.GetDecision() == NKikimrTx::TReadSetData::DECISION_COMMIT); + ++ReadSetCount; + + PQ_LOG_D("Predicates " << ReadSetCount << "/" << PredicatesReceived.size()); + } } else { Y_DEBUG_ABORT("unknown sender tablet %" PRIu64, event.GetTabletProducer()); } @@ -249,10 +279,17 @@ void TDistributedTransaction::OnReadSet(const NKikimrTx::TEvReadSet& event, void TDistributedTransaction::OnReadSetAck(const NKikimrTx::TEvReadSetAck& event) { + PQ_LOG_D("Handle TEvReadSetAck"); + Y_ABORT_UNLESS(event.HasStep() && (Step == event.GetStep())); Y_ABORT_UNLESS(event.HasTxId() && (TxId == event.GetTxId())); - Receivers.erase(event.GetTabletConsumer()); + if (PredicateRecipients.contains(event.GetTabletConsumer())) { + PredicateRecipients[event.GetTabletConsumer()] = true; + ++PredicateAcksCount; + + PQ_LOG_D("Predicate acks " << PredicateAcksCount << "/" << PredicateRecipients.size()); + } } void TDistributedTransaction::OnTxCommitDone(const TEvPQ::TEvTxCommitDone& event) @@ -271,7 +308,7 @@ auto TDistributedTransaction::GetDecision() const -> EDecision constexpr EDecision abort = NKikimrTx::TReadSetData::DECISION_ABORT; constexpr EDecision unknown = NKikimrTx::TReadSetData::DECISION_UNKNOWN; - EDecision aggrDecision = Senders.empty() ? commit : ParticipantsDecision; + const EDecision aggrDecision = PredicatesReceived.empty() ? commit : ParticipantsDecision; if ((SelfDecision == commit) && (aggrDecision == commit)) { return commit; @@ -286,14 +323,15 @@ auto TDistributedTransaction::GetDecision() const -> EDecision bool TDistributedTransaction::HaveParticipantsDecision() const { return - (Senders.size() == ReadSetCount) && + (PredicatesReceived.size() == ReadSetCount) && (ParticipantsDecision != NKikimrTx::TReadSetData::DECISION_UNKNOWN) || - Senders.empty(); + PredicatesReceived.empty(); } bool TDistributedTransaction::HaveAllRecipientsReceive() const { - return Receivers.empty(); + PQ_LOG_D("PredicateAcks: " << PredicateAcksCount << "/" << PredicateRecipients.size()); + return PredicateRecipients.size() == PredicateAcksCount; } void TDistributedTransaction::AddCmdWrite(NKikimrClient::TKeyValueRequest& request, @@ -321,6 +359,18 @@ void TDistributedTransaction::AddCmdWrite(NKikimrClient::TKeyValueRequest& reque Y_FAIL_S("unknown transaction type"); } + tx.MutableOperations()->Add(Operations.begin(), Operations.end()); + if (SelfDecision != NKikimrTx::TReadSetData::DECISION_UNKNOWN) { + tx.SetPredicate(SelfDecision == NKikimrTx::TReadSetData::DECISION_COMMIT); + } + + for (auto& [_, predicate] : PredicatesReceived) { + *tx.AddPredicatesReceived() = predicate; + } + for (auto& [tabletId, _] : PredicateRecipients) { + tx.AddPredicateRecipients(tabletId); + } + Y_ABORT_UNLESS(SourceActor != TActorId()); ActorIdToProto(SourceActor, tx.MutableSourceActor()); @@ -336,19 +386,6 @@ void TDistributedTransaction::AddCmdWrite(NKikimrClient::TKeyValueRequest& reque void TDistributedTransaction::AddCmdWriteDataTx(NKikimrPQ::TTransaction& tx) { - for (ui64 tabletId : Senders) { - tx.AddSenders(tabletId); - } - for (ui64 tabletId : Receivers) { - tx.AddReceivers(tabletId); - } - tx.MutableOperations()->Add(Operations.begin(), Operations.end()); - if (SelfDecision != NKikimrTx::TReadSetData::DECISION_UNKNOWN) { - tx.SetSelfPredicate(SelfDecision == NKikimrTx::TReadSetData::DECISION_COMMIT); - } - if (ParticipantsDecision != NKikimrTx::TReadSetData::DECISION_UNKNOWN) { - tx.SetAggrPredicate(ParticipantsDecision == NKikimrTx::TReadSetData::DECISION_COMMIT); - } if (WriteId.Defined()) { SetWriteId(tx, *WriteId); } @@ -360,16 +397,6 @@ void TDistributedTransaction::AddCmdWriteConfigTx(NKikimrPQ::TTransaction& tx) *tx.MutableBootstrapConfig() = BootstrapConfig; } -void TDistributedTransaction::AddCmdDelete(NKikimrClient::TKeyValueRequest& request) -{ - TString key = GetKey(); - auto range = request.AddCmdDeleteRange()->MutableRange(); - range->SetFrom(key); - range->SetIncludeFrom(true); - range->SetTo(key); - range->SetIncludeTo(true); -} - void TDistributedTransaction::SetDecision(NKikimrTx::TReadSetData::EDecision& var, NKikimrTx::TReadSetData::EDecision value) { if ((var == NKikimrTx::TReadSetData::DECISION_UNKNOWN) || (value == NKikimrTx::TReadSetData::DECISION_ABORT)) { diff --git a/ydb/core/persqueue/transaction.h b/ydb/core/persqueue/transaction.h index 151dac913233..549a10252062 100644 --- a/ydb/core/persqueue/transaction.h +++ b/ydb/core/persqueue/transaction.h @@ -46,8 +46,8 @@ struct TDistributedTransaction { EState State = NKikimrPQ::TTransaction::UNKNOWN; ui64 MinStep = Max(); ui64 MaxStep = Max(); - THashSet Senders; // список отправителей TEvReadSet - THashSet Receivers; // список получателей TEvReadSet + THashMap PredicatesReceived; + THashMap PredicateRecipients; TVector Operations; TMaybe WriteId; @@ -75,7 +75,6 @@ struct TDistributedTransaction { bool HaveAllRecipientsReceive() const; void AddCmdWrite(NKikimrClient::TKeyValueRequest& request, EState state); - void AddCmdDelete(NKikimrClient::TKeyValueRequest& request); static void SetDecision(NKikimrTx::TReadSetData::EDecision& var, NKikimrTx::TReadSetData::EDecision value); @@ -91,7 +90,9 @@ struct TDistributedTransaction { void InitPartitions(); template - void OnPartitionResult(const E& event, EDecision decision); + void OnPartitionResult(const E& event, TMaybe decision); + + TString LogPrefix() const; struct TSerializedMessage { ui32 Type; @@ -111,6 +112,7 @@ struct TDistributedTransaction { const TVector& GetBindedMsgs(ui64 tabletId); bool HasWriteOperations = false; + size_t PredicateAcksCount = 0; }; } diff --git a/ydb/core/persqueue/ut/common/autoscaling_ut_common.cpp b/ydb/core/persqueue/ut/common/autoscaling_ut_common.cpp index 4f01439c8601..db447a2c5558 100644 --- a/ydb/core/persqueue/ut/common/autoscaling_ut_common.cpp +++ b/ydb/core/persqueue/ut/common/autoscaling_ut_common.cpp @@ -96,7 +96,7 @@ TTopicSdkTestSetup CreateSetup() { NKikimrConfig::TFeatureFlags ff; ff.SetEnableTopicSplitMerge(true); ff.SetEnablePQConfigTransactionsAtSchemeShard(true); - //ff.SetEnableTopicServiceTx(true); + ff.SetEnableTopicServiceTx(true); auto settings = TTopicSdkTestSetup::MakeServerSettings(); settings.SetFeatureFlags(ff); diff --git a/ydb/core/persqueue/ut/common/pq_ut_common.cpp b/ydb/core/persqueue/ut/common/pq_ut_common.cpp index b0f3c7339d15..9c2ba84317ca 100644 --- a/ydb/core/persqueue/ut/common/pq_ut_common.cpp +++ b/ydb/core/persqueue/ut/common/pq_ut_common.cpp @@ -44,7 +44,7 @@ void PQTabletPrepare(const TTabletPreparationParameters& parameters, try { runtime.ResetScheduledCount(); - THolder request(new TEvPersQueue::TEvUpdateConfig()); + auto request = MakeHolder(); for (ui32 i = 0; i < parameters.partitions; ++i) { request->Record.MutableTabletConfig()->AddPartitionIds(i); } diff --git a/ydb/core/persqueue/ut/internals_ut.cpp b/ydb/core/persqueue/ut/internals_ut.cpp index 14c3414e2bbb..a151447b4f3d 100644 --- a/ydb/core/persqueue/ut/internals_ut.cpp +++ b/ydb/core/persqueue/ut/internals_ut.cpp @@ -39,38 +39,38 @@ void Test(bool headCompacted, ui32 parts, ui32 partSize, ui32 leftInHead) THead head; head.Offset = 100; TString value(100_KB, 'a'); - head.Batches.push_back(TBatch(head.Offset, 0, TVector())); + head.AddBatch(TBatch(head.Offset, 0)); for (ui32 i = 0; i < 50; ++i) { - head.Batches.back().AddBlob(TClientBlob( + head.AddBlob(TClientBlob( "sourceId" + TString(1,'a' + rand() % 26), i + 1, value, TMaybe(), TInstant::MilliSeconds(i + 1), TInstant::MilliSeconds(i + 1), 1, "", "" )); if (!headCompacted) - all.push_back(head.Batches.back().Blobs.back()); + all.push_back(head.GetLastBatch().Blobs.back()); } - head.Batches.back().Pack(); - UNIT_ASSERT(head.Batches.back().Header.GetFormat() == NKikimrPQ::TBatchHeader::ECompressed); - head.Batches.back().Unpack(); - head.Batches.back().Pack(); + head.MutableLastBatch().Pack(); + UNIT_ASSERT(head.GetLastBatch().Header.GetFormat() == NKikimrPQ::TBatchHeader::ECompressed); + head.MutableLastBatch().Unpack(); + head.MutableLastBatch().Pack(); TString str; - head.Batches.back().SerializeTo(str); + head.GetLastBatch().SerializeTo(str); auto header = ExtractHeader(str.c_str(), str.size()); TBatch batch(header, str.c_str() + header.ByteSize() + sizeof(ui16)); batch.Unpack(); - head.PackedSize = head.Batches.back().GetPackedSize(); - UNIT_ASSERT(head.Batches.back().GetUnpackedSize() + GetMaxHeaderSize() >= head.Batches.back().GetPackedSize()); + head.PackedSize = head.GetLastBatch().GetPackedSize(); + UNIT_ASSERT(head.GetLastBatch().GetUnpackedSize() + GetMaxHeaderSize() >= head.GetLastBatch().GetPackedSize()); THead newHead; newHead.Offset = head.GetNextOffset(); - newHead.Batches.push_back(TBatch(newHead.Offset, 0, TVector())); + newHead.AddBatch(TBatch(newHead.Offset, 0)); for (ui32 i = 0; i < 10; ++i) { - newHead.Batches.back().AddBlob(TClientBlob( + newHead.AddBlob(TClientBlob( "sourceId2", i + 1, value, TMaybe(), TInstant::MilliSeconds(i + 1000), TInstant::MilliSeconds(i + 1000), 1, "", "" )); - all.push_back(newHead.Batches.back().Blobs.back()); //newHead always glued + all.push_back(newHead.GetLastBatch().Blobs.back()); //newHead always glued } - newHead.PackedSize = newHead.Batches.back().GetUnpackedSize(); + newHead.PackedSize = newHead.GetLastBatch().GetUnpackedSize(); TString value2(partSize, 'b'); ui32 maxBlobSize = 8 << 20; TPartitionedBlob blob(TPartitionId(0), newHead.GetNextOffset(), "sourceId3", 1, parts, parts * value2.size(), head, newHead, headCompacted, false, maxBlobSize); @@ -125,16 +125,16 @@ void Test(bool headCompacted, ui32 parts, ui32 partSize, ui32 leftInHead) if (formed.empty()) { //nothing compacted - newHead must be here if (!headCompacted) { - for (auto& p : head.Batches) { - p.Unpack(); - for (const auto& b : p.Blobs) + for (ui32 pp = 0; pp < head.GetBatches().size(); ++pp) { + head.MutableBatch(pp).Unpack(); + for (const auto& b : head.GetBatch(pp).Blobs) real.push_back(b); } } - for (auto& p : newHead.Batches) { - p.Unpack(); - for (const auto& b : p.Blobs) + for (ui32 pp = 0; pp < newHead.GetBatches().size(); ++pp) { + newHead.MutableBatch(pp).Unpack(); + for (const auto& b : newHead.GetBatch(pp).Blobs) real.push_back(b); } } diff --git a/ydb/core/persqueue/ut/make_config.cpp b/ydb/core/persqueue/ut/make_config.cpp index c3cbe325cc75..eb9ae5e6486f 100644 --- a/ydb/core/persqueue/ut/make_config.cpp +++ b/ydb/core/persqueue/ut/make_config.cpp @@ -1,27 +1,44 @@ #include "make_config.h" #include +#include #include namespace NKikimr::NPQ::NHelpers { -NKikimrPQ::TPQTabletConfig MakeConfig(ui64 version, - const TVector& consumers, - ui32 partitionsCount, - NKikimrPQ::TPQTabletConfig::EMeteringMode meteringMode) +NKikimrPQ::TPQTabletConfig MakeConfig(const TMakeConfigParams& params) { NKikimrPQ::TPQTabletConfig config; - config.SetVersion(version); + config.SetVersion(params.Version); - for (auto& c : consumers) { + for (auto& c : params.Consumers) { config.AddReadRules(c.Consumer); config.AddReadRuleGenerations(c.Generation); } - for (ui32 id = 0; id < partitionsCount; ++id) { - config.AddPartitionIds(id); + for (const auto& e : params.AllPartitions) { + auto* p = config.AddAllPartitions(); + p->SetPartitionId(e.Id); + p->SetTabletId(e.TabletId); + for (auto t : e.Children) { + p->AddChildPartitionIds(t); + } + for (auto t : e.Parents) { + p->AddParentPartitionIds(t); + } + } + + for (const auto& e : params.Partitions) { + auto* p = config.AddPartitions(); + p->SetPartitionId(e.Id); + } + + if (params.AllPartitions.empty() && params.Partitions.empty()) { + for (ui32 id = 0; id < params.PartitionsCount; ++id) { + config.AddPartitionIds(id); + } } config.SetTopicName("rt3.dc1--account--topic"); @@ -30,15 +47,37 @@ NKikimrPQ::TPQTabletConfig MakeConfig(ui64 version, config.SetLocalDC(true); config.SetYdbDatabasePath(""); - config.SetMeteringMode(meteringMode); + config.SetMeteringMode(params.MeteringMode); config.MutablePartitionConfig()->SetLifetimeSeconds(TDuration::Hours(24).Seconds()); config.MutablePartitionConfig()->SetWriteSpeedInBytesPerSecond(10 << 20); + if (params.HugeConfig) { + for (size_t i = 0; i < 2'500; ++i) { + TString name = Sprintf("fake-consumer-%s-%" PRISZT, + TString(3'000, 'a').data(), i); + config.AddReadRules(name); + config.AddReadRuleGenerations(1); + } + } + Migrate(config); return config; } +NKikimrPQ::TPQTabletConfig MakeConfig(ui64 version, + const TVector& consumers, + ui32 partitionsCount, + NKikimrPQ::TPQTabletConfig::EMeteringMode meteringMode) +{ + TMakeConfigParams params; + params.Version = version; + params.Consumers = consumers; + params.PartitionsCount = partitionsCount; + params.MeteringMode = meteringMode; + return MakeConfig(params); +} + NKikimrPQ::TBootstrapConfig MakeBootstrapConfig() { return {}; diff --git a/ydb/core/persqueue/ut/make_config.h b/ydb/core/persqueue/ut/make_config.h index bdfec27058bc..9b072116be93 100644 --- a/ydb/core/persqueue/ut/make_config.h +++ b/ydb/core/persqueue/ut/make_config.h @@ -18,6 +18,25 @@ struct TCreateConsumerParams { ui64 ReadRuleGeneration = 0; }; +struct TPartitionParams { + ui32 Id = Max(); + ui64 TabletId = Max(); + TVector Children; + TVector Parents; +}; + +struct TMakeConfigParams { + ui64 Version = 0; + TVector Consumers; + TVector Partitions; + TVector AllPartitions; + ui32 PartitionsCount = 1; + NKikimrPQ::TPQTabletConfig::EMeteringMode MeteringMode = NKikimrPQ::TPQTabletConfig::METERING_MODE_REQUEST_UNITS; + bool HugeConfig = false; +}; + +NKikimrPQ::TPQTabletConfig MakeConfig(const TMakeConfigParams& params); + NKikimrPQ::TPQTabletConfig MakeConfig(ui64 version, const TVector& consumers, ui32 partitionsCount = 1, diff --git a/ydb/core/persqueue/ut/partition_ut.cpp b/ydb/core/persqueue/ut/partition_ut.cpp index 8d93159cee4b..1235bb85526a 100644 --- a/ydb/core/persqueue/ut/partition_ut.cpp +++ b/ydb/core/persqueue/ut/partition_ut.cpp @@ -882,7 +882,7 @@ void TPartitionFixture::SendProposeTransactionRequest(ui32 partition, bool immediate, ui64 txId) { - auto event = MakeHolder(); + auto event = MakeHolder(); ActorIdToProto(Ctx->Edge, event->Record.MutableSourceActor()); auto* body = event->Record.MutableData(); @@ -988,7 +988,8 @@ void TPartitionFixture::SendChangePartitionConfig(const TConfigParams& config) auto event = MakeHolder(TopicConverter, MakeConfig(config.Version, config.Consumers, 1, - config.MeteringMode)); + config.MeteringMode), + NKikimrPQ::TBootstrapConfig()); Ctx->Runtime->SingleSys()->Send(new IEventHandle(ActorId, Ctx->Edge, event.Release())); } @@ -1605,7 +1606,7 @@ ui64 TPartitionTxTestHelper::MakeAndSendWriteTx(const TSrcIdMap& srcIdsAffected) ui64 TPartitionTxTestHelper::MakeAndSendImmediateTx(const TSrcIdMap& srcIdsAffected) { auto actIter = AddWriteTxImpl(srcIdsAffected, NextActId++, 0); - auto event = MakeHolder(); + auto event = MakeHolder(); ActorIdToProto(Ctx->Edge, event->Record.MutableSourceActor()); auto* body = event->Record.MutableData(); diff --git a/ydb/core/persqueue/ut/partitiongraph_ut.cpp b/ydb/core/persqueue/ut/partitiongraph_ut.cpp index 9067d76fece7..eb8d1cff01cf 100644 --- a/ydb/core/persqueue/ut/partitiongraph_ut.cpp +++ b/ydb/core/persqueue/ut/partitiongraph_ut.cpp @@ -43,8 +43,7 @@ Y_UNIT_TEST_SUITE(TPartitionGraphTest) { p5->AddParentPartitionIds(3); p5->AddParentPartitionIds(4); - TPartitionGraph graph; - graph = std::move(MakePartitionGraph(config)); + TPartitionGraph graph = MakePartitionGraph(config); const auto n0 = graph.GetPartition(0); const auto n1 = graph.GetPartition(1); @@ -76,5 +75,45 @@ Y_UNIT_TEST_SUITE(TPartitionGraphTest) { UNIT_ASSERT(std::find(n5->HierarhicalParents.cbegin(), n5->HierarhicalParents.cend(), n2) != n5->HierarhicalParents.end()); UNIT_ASSERT(std::find(n5->HierarhicalParents.cbegin(), n5->HierarhicalParents.cend(), n3) != n5->HierarhicalParents.end()); UNIT_ASSERT(std::find(n5->HierarhicalParents.cbegin(), n5->HierarhicalParents.cend(), n4) != n5->HierarhicalParents.end()); + + { + std::set traversedNodes; + graph.Travers([&](ui32 id) { + traversedNodes.insert(id); + return true; + }); + UNIT_ASSERT_VALUES_EQUAL(traversedNodes.size(), 6); + } + + { + std::set traversedNodes; + graph.Travers(0,[&](ui32 id) { + traversedNodes.insert(id); + return true; + }); + UNIT_ASSERT_VALUES_EQUAL(traversedNodes.size(), 0); + } + + { + std::set traversedNodes; + graph.Travers(0,[&](ui32 id) { + traversedNodes.insert(id); + return true; + }, true); + UNIT_ASSERT_VALUES_EQUAL(traversedNodes.size(), 1); + UNIT_ASSERT(traversedNodes.contains(0)); + } + + { + std::set traversedNodes; + graph.Travers(2,[&](ui32 id) { + traversedNodes.insert(id); + return true; + }, true); + UNIT_ASSERT_VALUES_EQUAL(traversedNodes.size(), 3); + UNIT_ASSERT(traversedNodes.contains(2)); + UNIT_ASSERT(traversedNodes.contains(3)); + UNIT_ASSERT(traversedNodes.contains(5)); + } } } diff --git a/ydb/core/persqueue/ut/pq_ut.cpp b/ydb/core/persqueue/ut/pq_ut.cpp index abe6a4e7a0cb..aed439baa2fd 100644 --- a/ydb/core/persqueue/ut/pq_ut.cpp +++ b/ydb/core/persqueue/ut/pq_ut.cpp @@ -2326,6 +2326,100 @@ Y_UNIT_TEST(TestTabletRestoreEventsOrder) { }); } +Y_UNIT_TEST(TestReadAndDeleteConsumer) { + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); + }, [&](const TString& dispatchName, std::function setup, bool& activeZone) { + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + activeZone = false; + tc.Runtime->SetScheduledLimit(2000); + tc.Runtime->SetScheduledEventFilter(&tc.ImmediateLogFlushAndRequestTimeoutFilter); + + TVector> data; + TString msg; + msg.resize(102400, 'a'); + for (ui64 i = 1; i <= 1000; ++i) { + data.emplace_back(i, msg); + } + + PQTabletPrepare({.maxCountInPartition=100, .deleteTime=TDuration::Days(2).Seconds(), .partitions=1}, + {{"user1", true}, {"user2", true}}, tc); + CmdWrite(0, "sourceid1", data, tc, false, {}, true); + + // Reset tablet cache + PQTabletRestart(tc); + + TAutoPtr handle; + TEvPersQueue::TEvResponse* readResult = nullptr; + THolder readRequest; + TEvPersQueue::TEvUpdateConfigResponse* consumerDeleteResult = nullptr; + THolder consumerDeleteRequest; + + // Read request + { + readRequest.Reset(new TEvPersQueue::TEvRequest); + auto req = readRequest->Record.MutablePartitionRequest(); + req->SetPartition(0); + auto read = req->MutableCmdRead(); + read->SetOffset(1); + read->SetClientId("user1"); + read->SetCount(1); + read->SetBytes(1'000'000); + read->SetTimeoutMs(5000); + } + + // Consumer delete request + { + consumerDeleteRequest.Reset(new TEvPersQueue::TEvUpdateConfig()); + consumerDeleteRequest->MutableRecord()->SetTxId(42); + auto& cfg = *consumerDeleteRequest->MutableRecord()->MutableTabletConfig(); + cfg.SetVersion(42); + cfg.AddPartitionIds(0); + cfg.AddPartitions()->SetPartitionId(0); + cfg.SetLocalDC(true); + cfg.SetTopic("topic"); + auto& cons = *cfg.AddConsumers(); + cons.SetName("user2"); + cons.SetImportant(true); + } + + TActorId edge = tc.Runtime->AllocateEdgeActor(); + + // Delete consumer during read request + tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, readRequest.Release(), 0, GetPipeConfigWithRetries()); + + // Intercept TEvPQ::TEvBlobResponse event + std::vector capturedBlobResponses; + auto captureBlobResponsesObserver = tc.Runtime->AddObserver([&](TEvPQ::TEvBlobResponse::TPtr& ev) { + capturedBlobResponses.emplace_back().Swap(ev); + }); + + // Delete consumer while read request is still in progress + tc.Runtime->SendToPipe(tc.TabletId, edge, consumerDeleteRequest.Release(), 0, GetPipeConfigWithRetries()); + consumerDeleteResult = tc.Runtime->GrabEdgeEvent(handle); + { + //Cerr << "Got consumer delete response: " << consumerDeleteResult->Record << Endl; + UNIT_ASSERT(consumerDeleteResult->Record.HasStatus()); + UNIT_ASSERT_EQUAL(consumerDeleteResult->Record.GetStatus(), NKikimrPQ::EStatus::OK); + } + + // Resend intercepted blob responses and wait for read result + captureBlobResponsesObserver.Remove(); + for (auto& ev : capturedBlobResponses) { + tc.Runtime->Send(ev.Release(), 0, true); + } + + readResult = tc.Runtime->GrabEdgeEvent(handle); + { + //Cerr << "Got read response: " << readResult->Record << Endl; + UNIT_ASSERT(readResult->Record.HasStatus()); + UNIT_ASSERT_EQUAL(readResult->Record.GetErrorCode(), NPersQueue::NErrorCode::BAD_REQUEST); + UNIT_ASSERT_STRING_CONTAINS_C(readResult->Record.GetErrorReason(), "Consumer user1 is gone from partition", readResult->Record.Utf8DebugString()); + } + }); +} } // Y_UNIT_TEST_SUITE(TPQTest) diff --git a/ydb/core/persqueue/ut/pqtablet_ut.cpp b/ydb/core/persqueue/ut/pqtablet_ut.cpp index 6159a8780b0c..f6388c452904 100644 --- a/ydb/core/persqueue/ut/pqtablet_ut.cpp +++ b/ydb/core/persqueue/ut/pqtablet_ut.cpp @@ -177,6 +177,7 @@ class TPQTabletFixture : public NUnitTest::TBaseFixture { void SetUp(NUnitTest::TTestContext&) override; void TearDown(NUnitTest::TTestContext&) override; + void ResetPipe(); void EnsurePipeExist(); void SendToPipe(const TActorId& sender, IEventBase* event, @@ -206,7 +207,9 @@ class TPQTabletFixture : public NUnitTest::TBaseFixture { void StartPQWriteTxsObserver(); void WaitForPQWriteTxs(); - void WaitForCalcPredicateResult(); + template void WaitForEvent(size_t count); + void WaitForCalcPredicateResult(size_t count = 1); + void WaitForProposePartitionConfigResult(size_t count = 1); void TestWaitingForTEvReadSet(size_t senders, size_t receivers); @@ -244,6 +247,8 @@ class TPQTabletFixture : public NUnitTest::TBaseFixture { void TPQTabletFixture::SetUp(NUnitTest::TTestContext&) { Ctx.ConstructInPlace(); + Ctx->EnableDetailedPQLog = true; + Finalizer.ConstructInPlace(*Ctx); Ctx->Prepare(); @@ -251,9 +256,15 @@ void TPQTabletFixture::SetUp(NUnitTest::TTestContext&) } void TPQTabletFixture::TearDown(NUnitTest::TTestContext&) +{ + ResetPipe(); +} + +void TPQTabletFixture::ResetPipe() { if (Pipe != TActorId()) { Ctx->Runtime->ClosePipe(Pipe, Ctx->Edge, 0); + Pipe = TActorId(); } } @@ -283,7 +294,7 @@ void TPQTabletFixture::SendToPipe(const TActorId& sender, void TPQTabletFixture::SendProposeTransactionRequest(const TProposeTransactionParams& params) { - auto event = MakeHolder(); + auto event = MakeHolder(); THashSet partitions; ActorIdToProto(Ctx->Edge, event->Record.MutableSourceActor()); @@ -406,33 +417,36 @@ void TPQTabletFixture::WaitReadSet(NHelpers::TPQTabletMock& tablet, const TReadS UNIT_ASSERT(Ctx->Runtime->DispatchEvents(options)); } + auto readSet = std::move(*tablet.ReadSet); + tablet.ReadSet = Nothing(); + if (matcher.Step.Defined()) { - UNIT_ASSERT(tablet.ReadSet->HasStep()); - UNIT_ASSERT_VALUES_EQUAL(*matcher.Step, tablet.ReadSet->GetStep()); + UNIT_ASSERT(readSet.HasStep()); + UNIT_ASSERT_VALUES_EQUAL(*matcher.Step, readSet.GetStep()); } if (matcher.TxId.Defined()) { - UNIT_ASSERT(tablet.ReadSet->HasTxId()); - UNIT_ASSERT_VALUES_EQUAL(*matcher.TxId, tablet.ReadSet->GetTxId()); + UNIT_ASSERT(readSet.HasTxId()); + UNIT_ASSERT_VALUES_EQUAL(*matcher.TxId, readSet.GetTxId()); } if (matcher.Source.Defined()) { - UNIT_ASSERT(tablet.ReadSet->HasTabletSource()); - UNIT_ASSERT_VALUES_EQUAL(*matcher.Source, tablet.ReadSet->GetTabletSource()); + UNIT_ASSERT(readSet.HasTabletSource()); + UNIT_ASSERT_VALUES_EQUAL(*matcher.Source, readSet.GetTabletSource()); } if (matcher.Target.Defined()) { - UNIT_ASSERT(tablet.ReadSet->HasTabletDest()); - UNIT_ASSERT_VALUES_EQUAL(*matcher.Target, tablet.ReadSet->GetTabletDest()); + UNIT_ASSERT(readSet.HasTabletDest()); + UNIT_ASSERT_VALUES_EQUAL(*matcher.Target, readSet.GetTabletDest()); } if (matcher.Decision.Defined()) { - UNIT_ASSERT(tablet.ReadSet->HasReadSet()); + UNIT_ASSERT(readSet.HasReadSet()); NKikimrTx::TReadSetData data; - Y_ABORT_UNLESS(data.ParseFromString(tablet.ReadSet->GetReadSet())); + Y_ABORT_UNLESS(data.ParseFromString(readSet.GetReadSet())); UNIT_ASSERT_EQUAL(*matcher.Decision, data.GetDecision()); } if (matcher.Producer.Defined()) { - UNIT_ASSERT(tablet.ReadSet->HasTabletProducer()); - UNIT_ASSERT_VALUES_EQUAL(*matcher.Producer, tablet.ReadSet->GetTabletProducer()); + UNIT_ASSERT(readSet.HasTabletProducer()); + UNIT_ASSERT_VALUES_EQUAL(*matcher.Producer, readSet.GetTabletProducer()); } } @@ -521,19 +535,23 @@ void TPQTabletFixture::WaitDropTabletReply(const TDropTabletReplyMatcher& matche } } -void TPQTabletFixture::WaitForCalcPredicateResult() +template +void TPQTabletFixture::WaitForEvent(size_t count) { bool found = false; + size_t received = 0; - auto observer = [&found](TAutoPtr& event) { - if (auto* msg = event->CastAsLocal()) { - found = true; + TTestActorRuntimeBase::TEventObserver prev; + auto observer = [&found, &prev, &received, count](TAutoPtr& event) { + if (auto* msg = event->CastAsLocal()) { + ++received; + found = (received >= count); } - return TTestActorRuntimeBase::EEventAction::PROCESS; + return prev ? prev(event) : TTestActorRuntimeBase::EEventAction::PROCESS; }; - Ctx->Runtime->SetObserverFunc(observer); + prev = Ctx->Runtime->SetObserverFunc(observer); TDispatchOptions options; options.CustomFinalCondition = [&found]() { @@ -541,6 +559,18 @@ void TPQTabletFixture::WaitForCalcPredicateResult() }; UNIT_ASSERT(Ctx->Runtime->DispatchEvents(options)); + + Ctx->Runtime->SetObserverFunc(prev); +} + +void TPQTabletFixture::WaitForCalcPredicateResult(size_t count) +{ + WaitForEvent(count); +} + +void TPQTabletFixture::WaitForProposePartitionConfigResult(size_t count) +{ + WaitForEvent(count); } std::unique_ptr TPQTabletFixture::MakeGetOwnershipRequest(const TGetOwnershipRequestParams& params, @@ -669,20 +699,40 @@ void TPQTabletFixture::SendWriteRequest(const TWriteRequestParams& params) void TPQTabletFixture::WaitWriteResponse(const TWriteResponseMatcher& matcher) { - auto event = Ctx->Runtime->GrabEdgeEvent(); - UNIT_ASSERT(event != nullptr); + bool found = false; - if (matcher.Cookie.Defined()) { - UNIT_ASSERT(event->Record.HasCookie()); - UNIT_ASSERT_VALUES_EQUAL(*matcher.Cookie, event->Record.GetCookie()); - } + auto observer = [&found, &matcher](TAutoPtr& event) { + if (auto* msg = event->CastAsLocal()) { + if (matcher.Cookie.Defined()) { + if (msg->Record.HasCookie() && (*matcher.Cookie == msg->Record.GetCookie())) { + found = true; + } + } + } + + return TTestActorRuntimeBase::EEventAction::PROCESS; + }; + + auto prev = Ctx->Runtime->SetObserverFunc(observer); + + TDispatchOptions options; + options.CustomFinalCondition = [&found]() { + return found; + }; + + UNIT_ASSERT(Ctx->Runtime->DispatchEvents(options)); + + Ctx->Runtime->SetObserverFunc(prev); } void TPQTabletFixture::StartPQWriteObserver(bool& flag, unsigned cookie) { flag = false; + auto observer = [&flag, cookie](TAutoPtr& event) { if (auto* kvResponse = event->CastAsLocal()) { + if (kvResponse->Record.HasCookie()) { + } if ((event->Sender == event->Recipient) && kvResponse->Record.HasCookie() && (kvResponse->Record.GetCookie() == cookie)) { @@ -692,6 +742,7 @@ void TPQTabletFixture::StartPQWriteObserver(bool& flag, unsigned cookie) return TTestActorRuntimeBase::EEventAction::PROCESS; }; + Ctx->Runtime->SetObserverFunc(observer); } @@ -797,8 +848,8 @@ Y_UNIT_TEST_F(Multiple_PQTablets, TPQTabletFixture) WaitProposeTransactionResponse({.TxId=txId_2, .Status=NKikimrPQ::TEvProposeTransactionResult::COMPLETE}); - WaitReadSetAck(*tablet, {.Step=100, .TxId=txId_2, .Source=22222, .Target=Ctx->TabletId, .Consumer=Ctx->TabletId}); tablet->SendReadSetAck(*Ctx->Runtime, {.Step=100, .TxId=txId_2, .Source=Ctx->TabletId}); + WaitReadSetAck(*tablet, {.Step=100, .TxId=txId_2, .Source=22222, .Target=Ctx->TabletId, .Consumer=Ctx->TabletId}); // // TODO(abcdef): проверить, что удалена информация о транзакции @@ -872,8 +923,8 @@ Y_UNIT_TEST_F(PQTablet_Send_RS_With_Abort, TPQTabletFixture) WaitProposeTransactionResponse({.TxId=txId, .Status=NKikimrPQ::TEvProposeTransactionResult::ABORTED}); - WaitReadSetAck(*tablet, {.Step=100, .TxId=txId, .Source=22222, .Target=Ctx->TabletId, .Consumer=Ctx->TabletId}); tablet->SendReadSetAck(*Ctx->Runtime, {.Step=100, .TxId=txId, .Source=Ctx->TabletId}); + WaitReadSetAck(*tablet, {.Step=100, .TxId=txId, .Source=22222, .Target=Ctx->TabletId, .Consumer=Ctx->TabletId}); // // TODO(abcdef): проверить, что удалена информация о транзакции @@ -911,8 +962,8 @@ Y_UNIT_TEST_F(Partition_Send_Predicate_With_False, TPQTabletFixture) WaitProposeTransactionResponse({.TxId=txId, .Status=NKikimrPQ::TEvProposeTransactionResult::ABORTED}); - WaitReadSetAck(*tablet, {.Step=100, .TxId=txId, .Source=22222, .Target=Ctx->TabletId, .Consumer=Ctx->TabletId}); tablet->SendReadSetAck(*Ctx->Runtime, {.Step=100, .TxId=txId, .Source=Ctx->TabletId}); + WaitReadSetAck(*tablet, {.Step=100, .TxId=txId, .Source=22222, .Target=Ctx->TabletId, .Consumer=Ctx->TabletId}); // // TODO(abcdef): проверить, что удалена информация о транзакции @@ -1313,6 +1364,271 @@ Y_UNIT_TEST_F(ProposeTx_Command_After_Propose, TPQTabletFixture) .Status=NMsgBusProxy::MSTATUS_ERROR}); } +Y_UNIT_TEST_F(Read_TEvTxCommit_After_Restart, TPQTabletFixture) +{ + const ui64 txId = 67890; + const ui64 mockTabletId = 22222; + + NHelpers::TPQTabletMock* tablet = CreatePQTabletMock(mockTabletId); + PQTabletPrepare({.partitions=1}, {}, *Ctx); + + SendProposeTransactionRequest({.TxId=txId, + .Senders={mockTabletId}, .Receivers={mockTabletId}, + .TxOps={ + {.Partition=0, .Consumer="user", .Begin=0, .End=0, .Path="/topic"}, + }}); + WaitProposeTransactionResponse({.TxId=txId, + .Status=NKikimrPQ::TEvProposeTransactionResult::PREPARED}); + + SendPlanStep({.Step=100, .TxIds={txId}}); + + WaitForCalcPredicateResult(); + + // the transaction is now in the WAIT_RS state in memory and PLANNED state in disk + + PQTabletRestart(*Ctx); + + tablet->SendReadSet(*Ctx->Runtime, {.Step=100, .TxId=txId, .Target=Ctx->TabletId, .Decision=NKikimrTx::TReadSetData::DECISION_COMMIT}); + + WaitProposeTransactionResponse({.TxId=txId, + .Status=NKikimrPQ::TEvProposeTransactionResult::COMPLETE}); + + tablet->SendReadSetAck(*Ctx->Runtime, {.Step=100, .TxId=txId, .Source=Ctx->TabletId}); + WaitReadSetAck(*tablet, {.Step=100, .TxId=txId, .Source=mockTabletId, .Target=Ctx->TabletId, .Consumer=Ctx->TabletId}); +} + +Y_UNIT_TEST_F(Config_TEvTxCommit_After_Restart, TPQTabletFixture) +{ + const ui64 txId = 67890; + const ui64 mockTabletId = 22222; + + NHelpers::TPQTabletMock* tablet = CreatePQTabletMock(mockTabletId); + PQTabletPrepare({.partitions=1}, {}, *Ctx); + + auto tabletConfig = NHelpers::MakeConfig({.Version=2, + .Consumers={ + {.Consumer="client-1", .Generation=0}, + {.Consumer="client-3", .Generation=7} + }, + .Partitions={ + {.Id=0} + }, + .AllPartitions={ + {.Id=0, .TabletId=Ctx->TabletId, .Children={}, .Parents={1}}, + {.Id=1, .TabletId=mockTabletId, .Children={0}, .Parents={}} + }}); + + SendProposeTransactionRequest({.TxId=txId, + .Configs=NHelpers::TConfigParams{ + .Tablet=tabletConfig, + .Bootstrap=NHelpers::MakeBootstrapConfig(), + }}); + WaitProposeTransactionResponse({.TxId=txId, + .Status=NKikimrPQ::TEvProposeTransactionResult::PREPARED}); + + SendPlanStep({.Step=100, .TxIds={txId}}); + + WaitForProposePartitionConfigResult(); + + // the transaction is now in the WAIT_RS state in memory and PLANNED state in disk + + PQTabletRestart(*Ctx); + + tablet->SendReadSet(*Ctx->Runtime, {.Step=100, .TxId=txId, .Target=Ctx->TabletId, .Decision=NKikimrTx::TReadSetData::DECISION_COMMIT}); + + WaitProposeTransactionResponse({.TxId=txId, + .Status=NKikimrPQ::TEvProposeTransactionResult::COMPLETE}); + + tablet->SendReadSetAck(*Ctx->Runtime, {.Step=100, .TxId=txId, .Source=Ctx->TabletId}); + WaitReadSetAck(*tablet, {.Step=100, .TxId=txId, .Source=mockTabletId, .Target=Ctx->TabletId, .Consumer=Ctx->TabletId}); +} + +Y_UNIT_TEST_F(One_Tablet_For_All_Partitions, TPQTabletFixture) +{ + const ui64 txId = 67890; + + PQTabletPrepare({.partitions=1}, {}, *Ctx); + + auto tabletConfig = NHelpers::MakeConfig({.Version=2, + .Consumers={ + {.Consumer="client-1", .Generation=0}, + {.Consumer="client-3", .Generation=7} + }, + .Partitions={ + {.Id=0}, + {.Id=1}, + {.Id=2} + }, + .AllPartitions={ + {.Id=0, .TabletId=Ctx->TabletId, .Children={1, 2}, .Parents={}}, + {.Id=1, .TabletId=Ctx->TabletId, .Children={}, .Parents={0}}, + {.Id=2, .TabletId=Ctx->TabletId, .Children={}, .Parents={0}} + }}); + + SendProposeTransactionRequest({.TxId=txId, + .Configs=NHelpers::TConfigParams{ + .Tablet=tabletConfig, + .Bootstrap=NHelpers::MakeBootstrapConfig(), + }}); + WaitProposeTransactionResponse({.TxId=txId, + .Status=NKikimrPQ::TEvProposeTransactionResult::PREPARED}); + + SendPlanStep({.Step=100, .TxIds={txId}}); + + WaitForProposePartitionConfigResult(2); + + // the transaction is now in the WAIT_RS state in memory and PLANNED state in disk + + PQTabletRestart(*Ctx); + + WaitProposeTransactionResponse({.TxId=txId, + .Status=NKikimrPQ::TEvProposeTransactionResult::COMPLETE}); +} + +Y_UNIT_TEST_F(One_New_Partition_In_Another_Tablet, TPQTabletFixture) +{ + const ui64 txId = 67890; + const ui64 mockTabletId = 22222; + + NHelpers::TPQTabletMock* tablet = CreatePQTabletMock(mockTabletId); + PQTabletPrepare({.partitions=1}, {}, *Ctx); + + auto tabletConfig = NHelpers::MakeConfig({.Version=2, + .Consumers={ + {.Consumer="client-1", .Generation=0}, + {.Consumer="client-3", .Generation=7} + }, + .Partitions={ + {.Id=0}, + {.Id=1}, + }, + .AllPartitions={ + {.Id=0, .TabletId=Ctx->TabletId, .Children={1, 2}, .Parents={}}, + {.Id=1, .TabletId=Ctx->TabletId, .Children={}, .Parents={0}}, + {.Id=2, .TabletId=mockTabletId, .Children={}, .Parents={0}} + }}); + + SendProposeTransactionRequest({.TxId=txId, + .Configs=NHelpers::TConfigParams{ + .Tablet=tabletConfig, + .Bootstrap=NHelpers::MakeBootstrapConfig(), + }}); + WaitProposeTransactionResponse({.TxId=txId, + .Status=NKikimrPQ::TEvProposeTransactionResult::PREPARED}); + + SendPlanStep({.Step=100, .TxIds={txId}}); + + WaitForProposePartitionConfigResult(2); + + // the transaction is now in the WAIT_RS state in memory and PLANNED state in disk + + PQTabletRestart(*Ctx); + + tablet->SendReadSet(*Ctx->Runtime, {.Step=100, .TxId=txId, .Target=Ctx->TabletId, .Decision=NKikimrTx::TReadSetData::DECISION_COMMIT}); + + WaitProposeTransactionResponse({.TxId=txId, + .Status=NKikimrPQ::TEvProposeTransactionResult::COMPLETE}); + + tablet->SendReadSetAck(*Ctx->Runtime, {.Step=100, .TxId=txId, .Source=Ctx->TabletId}); + WaitReadSetAck(*tablet, {.Step=100, .TxId=txId, .Source=mockTabletId, .Target=Ctx->TabletId, .Consumer=Ctx->TabletId}); +} + +Y_UNIT_TEST_F(All_New_Partitions_In_Another_Tablet, TPQTabletFixture) +{ + const ui64 txId = 67890; + const ui64 mockTabletId = 22222; + + NHelpers::TPQTabletMock* tablet = CreatePQTabletMock(mockTabletId); + PQTabletPrepare({.partitions=1}, {}, *Ctx); + + auto tabletConfig = NHelpers::MakeConfig({.Version=2, + .Consumers={ + {.Consumer="client-1", .Generation=0}, + {.Consumer="client-3", .Generation=7} + }, + .Partitions={ + {.Id=0}, + {.Id=1}, + }, + .AllPartitions={ + {.Id=0, .TabletId=Ctx->TabletId, .Children={}, .Parents={2}}, + {.Id=1, .TabletId=Ctx->TabletId, .Children={}, .Parents={2}}, + {.Id=2, .TabletId=mockTabletId, .Children={0, 1}, .Parents={}} + }}); + + SendProposeTransactionRequest({.TxId=txId, + .Configs=NHelpers::TConfigParams{ + .Tablet=tabletConfig, + .Bootstrap=NHelpers::MakeBootstrapConfig(), + }}); + WaitProposeTransactionResponse({.TxId=txId, + .Status=NKikimrPQ::TEvProposeTransactionResult::PREPARED}); + + SendPlanStep({.Step=100, .TxIds={txId}}); + + WaitForProposePartitionConfigResult(2); + + // the transaction is now in the WAIT_RS state in memory and PLANNED state in disk + + PQTabletRestart(*Ctx); + + tablet->SendReadSet(*Ctx->Runtime, {.Step=100, .TxId=txId, .Target=Ctx->TabletId, .Decision=NKikimrTx::TReadSetData::DECISION_COMMIT}); + + WaitProposeTransactionResponse({.TxId=txId, + .Status=NKikimrPQ::TEvProposeTransactionResult::COMPLETE}); + + tablet->SendReadSetAck(*Ctx->Runtime, {.Step=100, .TxId=txId, .Source=Ctx->TabletId}); + WaitReadSetAck(*tablet, {.Step=100, .TxId=txId, .Source=mockTabletId, .Target=Ctx->TabletId, .Consumer=Ctx->TabletId}); +} + +Y_UNIT_TEST_F(Huge_ProposeTransacton, TPQTabletFixture) +{ + const ui64 mockTabletId = 22222; + + PQTabletPrepare({.partitions=1}, {}, *Ctx); + + auto tabletConfig = NHelpers::MakeConfig({.Version=2, + .Consumers={ + {.Consumer="client-1", .Generation=0}, + {.Consumer="client-3", .Generation=7}, + }, + .Partitions={ + {.Id=0}, + {.Id=1}, + }, + .AllPartitions={ + {.Id=0, .TabletId=Ctx->TabletId, .Children={}, .Parents={2}}, + {.Id=1, .TabletId=Ctx->TabletId, .Children={}, .Parents={2}}, + {.Id=2, .TabletId=mockTabletId, .Children={0, 1}, .Parents={}} + }, + .HugeConfig = true}); + + const ui64 txId_1 = 67890; + SendProposeTransactionRequest({.TxId=txId_1, + .Configs=NHelpers::TConfigParams{ + .Tablet=tabletConfig, + .Bootstrap=NHelpers::MakeBootstrapConfig(), + }}); + WaitProposeTransactionResponse({.TxId=txId_1, + .Status=NKikimrPQ::TEvProposeTransactionResult::PREPARED}); + + const ui64 txId_2 = 67891; + SendProposeTransactionRequest({.TxId=txId_2, + .Configs=NHelpers::TConfigParams{ + .Tablet=tabletConfig, + .Bootstrap=NHelpers::MakeBootstrapConfig(), + }}); + WaitProposeTransactionResponse({.TxId=txId_2, + .Status=NKikimrPQ::TEvProposeTransactionResult::PREPARED}); + + PQTabletRestart(*Ctx); + ResetPipe(); + + SendPlanStep({.Step=100, .TxIds={txId_1, txId_2}}); + WaitPlanStepAck({.Step=100, .TxIds={txId_1, txId_2}}); + WaitPlanStepAccepted({.Step=100}); +} + } } diff --git a/ydb/core/persqueue/ut/user_action_processor_ut.cpp b/ydb/core/persqueue/ut/user_action_processor_ut.cpp index c45bc09741bb..0796c620f39b 100644 --- a/ydb/core/persqueue/ut/user_action_processor_ut.cpp +++ b/ydb/core/persqueue/ut/user_action_processor_ut.cpp @@ -647,7 +647,7 @@ void TUserActionProcessorFixture::SendProposeTransactionRequest(ui32 partition, bool immediate, ui64 txId) { - auto event = MakeHolder(); + auto event = MakeHolder(); ActorIdToProto(Ctx->Edge, event->Record.MutableSource()); auto* body = event->Record.MutableTxBody(); @@ -665,7 +665,7 @@ void TUserActionProcessorFixture::SendProposeTransactionRequest(ui32 partition, void TUserActionProcessorFixture::SendProposeTransactionRequest(const TProposeTransactionParams& params) { - auto event = MakeHolder(); + auto event = MakeHolder(); // // Source diff --git a/ydb/core/persqueue/ut/ut_with_sdk/autoscaling_ut.cpp b/ydb/core/persqueue/ut/ut_with_sdk/autoscaling_ut.cpp index 80de3bd5b9c5..e8dc6c2a3de7 100644 --- a/ydb/core/persqueue/ut/ut_with_sdk/autoscaling_ut.cpp +++ b/ydb/core/persqueue/ut/ut_with_sdk/autoscaling_ut.cpp @@ -400,9 +400,9 @@ Y_UNIT_TEST_SUITE(TopicAutoscaling) { } - writeSession1->Close(TDuration::Seconds(1)); - writeSession2->Close(TDuration::Seconds(1)); - writeSession3->Close(TDuration::Seconds(1)); + writeSession1->Close(TDuration::Seconds(2)); + writeSession2->Close(TDuration::Seconds(2)); + writeSession3->Close(TDuration::Seconds(2)); readSession.Close(); } @@ -723,16 +723,74 @@ Y_UNIT_TEST_SUITE(TopicAutoscaling) { f.Wait(); auto v = f.GetValueSync(); - UNIT_ASSERT_C(!v.IsSuccess(), "Must receve error becuse max-partition is not 0"); + UNIT_ASSERT_C(!v.IsSuccess(), "Must receve error becuse disabling is not supported"); + } + } + + Y_UNIT_TEST(ControlPlane_BackCompatibility) { + auto topicName = "back-compatibility-test"; + + TTopicSdkTestSetup setup = CreateSetup(); + TTopicClient client = setup.MakeClient(); + + { + TCreateTopicSettings createSettings; + createSettings + .BeginConfigurePartitioningSettings() + .MinActivePartitions(3) + .EndConfigurePartitioningSettings(); + client.CreateTopic(topicName, createSettings).Wait(); + } + + { + auto describeAfterAlter = client.DescribeTopic(topicName).GetValueSync(); + + UNIT_ASSERT_VALUES_EQUAL(describeAfterAlter.GetTopicDescription().GetPartitioningSettings().GetMinActivePartitions(), 3); } { TAlterTopicSettings alterSettings; alterSettings .BeginAlterPartitioningSettings() - .MaxActivePartitions(0) + .MinActivePartitions(5) + .EndAlterTopicPartitioningSettings(); + client.AlterTopic(topicName, alterSettings).Wait(); + } + + { + auto describeAfterAlter = client.DescribeTopic(topicName).GetValueSync(); + + UNIT_ASSERT_VALUES_EQUAL(describeAfterAlter.GetTopicDescription().GetPartitioningSettings().GetMinActivePartitions(), 5); + } + } + + Y_UNIT_TEST(ControlPlane_PauseAutoPartitioning) { + auto topicName = "autoscalit-topic"; + + TTopicSdkTestSetup setup = CreateSetup(); + TTopicClient client = setup.MakeClient(); + + { + TCreateTopicSettings createSettings; + createSettings + .BeginConfigurePartitioningSettings() + .MinActivePartitions(1) + .MaxActivePartitions(100) + .BeginConfigureAutoPartitioningSettings() + .Strategy(EAutoPartitioningStrategy::ScaleUp) + .EndConfigureAutoPartitioningSettings() + .EndConfigurePartitioningSettings(); + client.CreateTopic(topicName, createSettings).Wait(); + } + + { + TAlterTopicSettings alterSettings; + alterSettings + .BeginAlterPartitioningSettings() + .MinActivePartitions(3) + .MaxActivePartitions(107) .BeginAlterAutoPartitioningSettings() - .Strategy(EAutoPartitioningStrategy::Disabled) + .Strategy(EAutoPartitioningStrategy::Paused) .EndAlterAutoPartitioningSettings() .EndAlterTopicPartitioningSettings(); auto f = client.AlterTopic(topicName, alterSettings); @@ -741,6 +799,14 @@ Y_UNIT_TEST_SUITE(TopicAutoscaling) { auto v = f.GetValueSync(); UNIT_ASSERT_C(v.IsSuccess(), "Error: " << v); } + + { + auto describeAfterAlter = client.DescribeTopic(topicName).GetValueSync(); + + UNIT_ASSERT_VALUES_EQUAL(describeAfterAlter.GetTopicDescription().GetPartitioningSettings().GetMinActivePartitions(), 3); + UNIT_ASSERT_VALUES_EQUAL(describeAfterAlter.GetTopicDescription().GetPartitioningSettings().GetMaxActivePartitions(), 107); + UNIT_ASSERT_VALUES_EQUAL(describeAfterAlter.GetTopicDescription().GetPartitioningSettings().GetAutoPartitioningSettings().GetStrategy(), EAutoPartitioningStrategy::Paused); + } } Y_UNIT_TEST(ControlPlane_AutoscalingWithStorageSizeRetention) { @@ -785,7 +851,7 @@ Y_UNIT_TEST_SUITE(TopicAutoscaling) { .BeginConfigureAutoPartitioningSettings() .UpUtilizationPercent(2) .DownUtilizationPercent(1) - .StabilizationWindow(TDuration::Seconds(1)) + .StabilizationWindow(TDuration::Seconds(2)) .Strategy(EAutoPartitioningStrategy::ScaleUp) .EndConfigureAutoPartitioningSettings() .EndConfigurePartitioningSettings(); @@ -793,19 +859,93 @@ Y_UNIT_TEST_SUITE(TopicAutoscaling) { auto msg = TString(1_MB, 'a'); - auto writeSession = CreateWriteSession(client, "producer-1", 0, TEST_TOPIC, false); - UNIT_ASSERT(writeSession->Write(Msg(msg, 1))); - UNIT_ASSERT(writeSession->Write(Msg(msg, 2))); - Sleep(TDuration::Seconds(5)); - auto describe = client.DescribeTopic(TEST_TOPIC).GetValueSync(); - UNIT_ASSERT_EQUAL(describe.GetTopicDescription().GetPartitions().size(), 3); - - auto writeSession2 = CreateWriteSession(client, "producer-1", 1, TEST_TOPIC, false); - UNIT_ASSERT(writeSession2->Write(Msg(msg, 3))); - UNIT_ASSERT(writeSession2->Write(Msg(msg, 4))); - Sleep(TDuration::Seconds(5)); - auto describe2 = client.DescribeTopic(TEST_TOPIC).GetValueSync(); - UNIT_ASSERT_EQUAL(describe2.GetTopicDescription().GetPartitions().size(), 5); + auto writeSession_1 = CreateWriteSession(client, "producer-1", 0, TEST_TOPIC, false); + auto writeSession_2 = CreateWriteSession(client, "producer-2", 0, TEST_TOPIC, false); + + { + UNIT_ASSERT(writeSession_1->Write(Msg(msg, 1))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, 2))); + Sleep(TDuration::Seconds(5)); + auto describe = client.DescribeTopic(TEST_TOPIC).GetValueSync(); + UNIT_ASSERT_EQUAL(describe.GetTopicDescription().GetPartitions().size(), 1); + } + + { + UNIT_ASSERT(writeSession_1->Write(Msg(msg, 3))); + UNIT_ASSERT(writeSession_2->Write(Msg(msg, 4))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, 5))); + UNIT_ASSERT(writeSession_2->Write(Msg(msg, 6))); + Sleep(TDuration::Seconds(5)); + auto describe = client.DescribeTopic(TEST_TOPIC).GetValueSync(); + UNIT_ASSERT_EQUAL(describe.GetTopicDescription().GetPartitions().size(), 3); + } + + auto writeSession2_1 = CreateWriteSession(client, "producer-1", 1, TEST_TOPIC, false); + auto writeSession2_2 = CreateWriteSession(client, "producer-2", 1, TEST_TOPIC, false); + + { + UNIT_ASSERT(writeSession2_1->Write(Msg(msg, 7))); + UNIT_ASSERT(writeSession2_2->Write(Msg(msg, 8))); + UNIT_ASSERT(writeSession2_1->Write(Msg(msg, 9))); + UNIT_ASSERT(writeSession2_2->Write(Msg(msg, 10))); + Sleep(TDuration::Seconds(5)); + auto describe2 = client.DescribeTopic(TEST_TOPIC).GetValueSync(); + UNIT_ASSERT_EQUAL(describe2.GetTopicDescription().GetPartitions().size(), 5); + } + } + + void ExecuteQuery(NYdb::NTable::TSession& session, const TString& query ) { + const auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); + } + + Y_UNIT_TEST(WithDir_PartitionSplit_AutosplitByLoad) { + TTopicSdkTestSetup setup = CreateSetup(); + auto client = setup.MakeClient(); + auto tableClient = setup.MakeTableClient(); + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + + setup.GetServer().AnnoyingClient->MkDir("/Root", "dir"); + + ExecuteQuery(session, R"( + --!syntax_v1 + CREATE TOPIC `/Root/dir/origin` + WITH ( + AUTO_PARTITIONING_STRATEGY = 'SCALE_UP', + MAX_ACTIVE_PARTITIONS = 50 + ); + )"); + + { + auto describe = client.DescribeTopic("/Root/dir/origin").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL(describe.GetTopicDescription().GetPartitions().size(), 1); + } + + ui64 balancerTabletId; + { + auto pathDescr = setup.GetServer().AnnoyingClient->Ls("/Root/dir/origin")->Record.GetPathDescription().GetSelf(); + balancerTabletId = pathDescr.GetBalancerTabletID(); + Cerr << ">>>>> BalancerTabletID=" << balancerTabletId << Endl << Flush; + UNIT_ASSERT(balancerTabletId); + } + + { + const auto edge = setup.GetRuntime().AllocateEdgeActor(); + setup.GetRuntime().SendToPipe(balancerTabletId, edge, new TEvPQ::TEvPartitionScaleStatusChanged(0, NKikimrPQ::EScaleStatus::NEED_SPLIT)); + } + + { + size_t partitionCount = 0; + for (size_t i = 0; i < 10; ++i) { + Sleep(TDuration::Seconds(1)); + auto describe = client.DescribeTopic("/Root/dir/origin").GetValueSync(); + partitionCount = describe.GetTopicDescription().GetPartitions().size(); + if (partitionCount == 3) { + break; + } + } + UNIT_ASSERT_VALUES_EQUAL(partitionCount, 3); + } } Y_UNIT_TEST(MidOfRange) { diff --git a/ydb/core/persqueue/ut/utils_ut.cpp b/ydb/core/persqueue/ut/utils_ut.cpp new file mode 100644 index 000000000000..85513ea70a94 --- /dev/null +++ b/ydb/core/persqueue/ut/utils_ut.cpp @@ -0,0 +1,81 @@ +#include + +#include + +namespace NKikimr::NPQ { + +Y_UNIT_TEST_SUITE(TPQUtilsTest) { + Y_UNIT_TEST(TLastCounter) { + TLastCounter counter; + + TInstant now = TInstant::Now(); + + { + auto r = counter.Count(now); + UNIT_ASSERT_VALUES_EQUAL(r, 0); + } + + { + counter.Use("v-1", now); + auto r = counter.Count(now); + UNIT_ASSERT_VALUES_EQUAL(r, 1); + } + + { + counter.Use("v-1", now); + auto r = counter.Count(now); + UNIT_ASSERT_VALUES_EQUAL(r, 1); + } + + now += TDuration::Seconds(1); + + { + counter.Use("v-1", now); + auto r = counter.Count(now - TDuration::Seconds(10)); + UNIT_ASSERT_VALUES_EQUAL(r, 1); + } + + { + auto r = counter.Count(now); + UNIT_ASSERT_VALUES_EQUAL(r, 1); + } + + { + counter.Use("v-2", now); + auto r = counter.Count(now - TDuration::Seconds(10)); + UNIT_ASSERT_VALUES_EQUAL(r, 2); + } + + { + counter.Use("v-1", now); + auto r = counter.Count(now - TDuration::Seconds(10)); + UNIT_ASSERT_VALUES_EQUAL(r, 2); + } + + now += TDuration::Seconds(1); + + { + counter.Use("v-3", now); + auto r = counter.Count(now - TDuration::Seconds(10)); + UNIT_ASSERT_VALUES_EQUAL(r, 2); + } + + now += TDuration::Seconds(1); + + { + counter.Use("v-3", now); + auto r = counter.Count(now - TDuration::Seconds(10)); + UNIT_ASSERT_VALUES_EQUAL(r, 2); + } + + now += TDuration::Seconds(1); + + { + counter.Use("v-2", now); + auto r = counter.Count(now - TDuration::Seconds(10)); + UNIT_ASSERT_VALUES_EQUAL(r, 2); + } + } +} + +} diff --git a/ydb/core/persqueue/ut/ya.make b/ydb/core/persqueue/ut/ya.make index 8b3e64ac2351..c91c10ecd965 100644 --- a/ydb/core/persqueue/ut/ya.make +++ b/ydb/core/persqueue/ut/ya.make @@ -45,6 +45,7 @@ SRCS( pqrb_describes_ut.cpp microseconds_sliding_window_ut.cpp fetch_request_ut.cpp + utils_ut.cpp ) RESOURCE( diff --git a/ydb/core/persqueue/utils.cpp b/ydb/core/persqueue/utils.cpp index 4decfc2a6c7c..1003a8003a00 100644 --- a/ydb/core/persqueue/utils.cpp +++ b/ydb/core/persqueue/utils.cpp @@ -36,6 +36,12 @@ bool SplitMergeEnabled(const NKikimrPQ::TPQTabletConfig& config) { return config.has_partitionstrategy() && config.partitionstrategy().has_partitionstrategytype() && config.partitionstrategy().partitionstrategytype() != ::NKikimrPQ::TPQTabletConfig_TPartitionStrategyType::TPQTabletConfig_TPartitionStrategyType_DISABLED; } +size_t CountActivePartitions(const ::google::protobuf::RepeatedPtrField< ::NKikimrPQ::TPQTabletConfig_TPartition >& partitions) { + return std::count_if(partitions.begin(), partitions.end(), [](const auto& p) { + return p.GetStatus() == ::NKikimrPQ::ETopicPartitionStatus::Active; + }); +} + static constexpr ui64 PUT_UNIT_SIZE = 40960u; // 40Kb ui64 PutUnitsSize(const ui64 size) { @@ -159,7 +165,36 @@ std::set TPartitionGraph::GetActiveChildren(ui32 id) const { return result; } -void TPartitionGraph::Travers(ui32 id, std::function func, bool includeSelf) const { +void Travers0(std::deque& queue, const std::function& func) { + while(!queue.empty()) { + auto* node = queue.front(); + queue.pop_front(); + + if (func(node->Id)) { + queue.insert(queue.end(), node->Children.begin(), node->Children.end()); + } + } +} + +void TPartitionGraph::Travers(const std::function& func) const { + std::deque queue; + + for (auto& [id, n] : Partitions) { + if (!n.IsRoot()) { + continue; + } + + if (!func(id)) { + continue; + } + + queue.insert(queue.end(), n.Children.begin(), n.Children.end()); + } + + Travers0(queue, func); +} + +void TPartitionGraph::Travers(ui32 id, const std::function& func, bool includeSelf) const { auto* n = GetPartition(id); if (!n) { return; @@ -172,17 +207,9 @@ void TPartitionGraph::Travers(ui32 id, std::function func, bool std::deque queue; queue.insert(queue.end(), n->Children.begin(), n->Children.end()); - while(!queue.empty()) { - auto* node = queue.front(); - queue.pop_front(); - - if (func(node->Id)) { - queue.insert(queue.end(), node->Children.begin(), node->Children.end()); - } - } + Travers0(queue, func); } - template inline int GetPartitionId(TPartition p) { return p.GetPartitionId(); @@ -271,4 +298,31 @@ TPartitionGraph MakePartitionGraph(const NKikimrSchemeOp::TPersQueueGroupDescrip return TPartitionGraph(BuildGraph(config.GetPartitions())); } +void TLastCounter::Use(const TString& value, const TInstant& now) { + const auto full = MaxValueCount == Values.size(); + if (!Values.empty() && Values[0].Value == value) { + auto& v0 = Values[0]; + if (v0.LastUseTime < now) { + v0.LastUseTime = now; + if (full && Values[1].LastUseTime != now) { + Values.push_back(std::move(v0)); + Values.pop_front(); + } + } + } else if (full && Values[1].Value == value) { + Values[1].LastUseTime = now; + } else if (!full || Values[0].LastUseTime < now) { + if (full) { + Values.pop_front(); + } + Values.push_back(Data{now, value}); + } +} + +size_t TLastCounter::Count(const TInstant& expirationTime) { + return std::count_if(Values.begin(), Values.end(), [&](const auto& i) { + return i.LastUseTime >= expirationTime; + }); +} + } // NKikimr::NPQ diff --git a/ydb/core/persqueue/utils.h b/ydb/core/persqueue/utils.h index c81442f1128c..7c42e70ff59c 100644 --- a/ydb/core/persqueue/utils.h +++ b/ydb/core/persqueue/utils.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include #include @@ -11,6 +13,8 @@ ui64 TopicPartitionReserveThroughput(const NKikimrPQ::TPQTabletConfig& config); bool SplitMergeEnabled(const NKikimrPQ::TPQTabletConfig& config); +size_t CountActivePartitions(const ::google::protobuf::RepeatedPtrField< ::NKikimrPQ::TPQTabletConfig_TPartition >& partitions); + ui64 PutUnitsSize(const ui64 size); TString SourceIdHash(const TString& sourceId); @@ -55,7 +59,9 @@ class TPartitionGraph { const Node* GetPartition(ui32 id) const; std::set GetActiveChildren(ui32 id) const; - void Travers(ui32 id, std::function func, bool includeSelf = false) const; + + void Travers(const std::function& func) const; + void Travers(ui32 id, const std::function& func, bool includeSelf = false) const; private: std::unordered_map Partitions; @@ -65,4 +71,20 @@ TPartitionGraph MakePartitionGraph(const NKikimrPQ::TPQTabletConfig& config); TPartitionGraph MakePartitionGraph(const NKikimrPQ::TUpdateBalancerConfig& config); TPartitionGraph MakePartitionGraph(const NKikimrSchemeOp::TPersQueueGroupDescription& config); +class TLastCounter { + static constexpr size_t MaxValueCount = 2; + +public: + void Use(const TString& value, const TInstant& now); + size_t Count(const TInstant& expirationTime); + +private: + struct Data { + TInstant LastUseTime; + TString Value; + }; + std::deque Values; +}; + + } // NKikimr::NPQ diff --git a/ydb/core/persqueue/writer/metadata_initializers.cpp b/ydb/core/persqueue/writer/metadata_initializers.cpp index dff6ade3ada7..b9e3103f59d3 100644 --- a/ydb/core/persqueue/writer/metadata_initializers.cpp +++ b/ydb/core/persqueue/writer/metadata_initializers.cpp @@ -74,6 +74,16 @@ void TSrcIdMetaInitializer::DoPrepare(NInitializer::IInitializerInput::TPtr cont result.emplace_back(new NInitializer::TGenericTableModifier(request, "add_column_SeqNo")); } + + { + Ydb::Table::AlterTableRequest request; + request.set_session_id(""); + request.set_path(tablePath); + request.mutable_alter_partitioning_settings()->set_min_partitions_count(50); + request.mutable_alter_partitioning_settings()->set_partitioning_by_load(::Ydb::FeatureFlag_Status::FeatureFlag_Status_ENABLED); + + result.emplace_back(new NInitializer::TGenericTableModifier(request, "enable_autopartitioning_by_load")); + } } result.emplace_back(NInitializer::TACLModifierConstructor::GetReadOnlyModifier(tablePath, "acl")); controller->OnPreparationFinished(result); diff --git a/ydb/core/persqueue/writer/partition_chooser_impl__abstract_chooser_actor.h b/ydb/core/persqueue/writer/partition_chooser_impl__abstract_chooser_actor.h index b6cb8688a8fb..ce9f1b20c2c1 100644 --- a/ydb/core/persqueue/writer/partition_chooser_impl__abstract_chooser_actor.h +++ b/ydb/core/persqueue/writer/partition_chooser_impl__abstract_chooser_actor.h @@ -217,7 +217,7 @@ class TAbstractPartitionChooserActor: public TActorBootstrapped { TThis::Become(&TThis::StateCheckPartition); if (!Partition) { - return ReplyError(ErrorCode::INITIALIZING, "Partition not choosed", ctx); + return ReplyError(TThis::PreferedPartition ? ErrorCode::WRITE_ERROR_PARTITION_INACTIVE : ErrorCode::INITIALIZING, "Partition not choosed", ctx); } PartitionHelper.Open(Partition->TabletId, ctx); diff --git a/ydb/core/persqueue/writer/partition_chooser_impl__old_chooser_actor.h b/ydb/core/persqueue/writer/partition_chooser_impl__old_chooser_actor.h index 33e1f3dd2786..0b89f1e2039f 100644 --- a/ydb/core/persqueue/writer/partition_chooser_impl__old_chooser_actor.h +++ b/ydb/core/persqueue/writer/partition_chooser_impl__old_chooser_actor.h @@ -100,6 +100,7 @@ class TPartitionChooserActor: public TAbstractPartitionChooserActorGetTypeRewrite()) { HFunc(TEvPersQueue::TEvGetPartitionIdForWriteResponse, Handle); + HFunc(TEvTabletPipe::TEvClientConnected, Handle); HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); SFunc(TEvents::TEvPoison, TThis::Die); } diff --git a/ydb/core/persqueue/writer/partition_chooser_impl__sm_chooser_actor.h b/ydb/core/persqueue/writer/partition_chooser_impl__sm_chooser_actor.h index 2342ae49f308..c44afa64c239 100644 --- a/ydb/core/persqueue/writer/partition_chooser_impl__sm_chooser_actor.h +++ b/ydb/core/persqueue/writer/partition_chooser_impl__sm_chooser_actor.h @@ -71,7 +71,7 @@ class TSMPartitionChooserActor: public TAbstractPartitionChooserActor attributes = 1 [(Ydb.size).le = 100]; + optional string location = 2; } message TObjectStorage { diff --git a/ydb/core/protos/feature_flags.proto b/ydb/core/protos/feature_flags.proto index eaf0172f27b8..0a72c49cc1f9 100644 --- a/ydb/core/protos/feature_flags.proto +++ b/ydb/core/protos/feature_flags.proto @@ -97,7 +97,7 @@ message TFeatureFlags { optional bool EnableAlterDatabaseCreateHiveFirst = 82 [default = false]; reserved 83; // EnableKqpDataQuerySourceRead optional bool EnableSmallDiskOptimization = 84 [default = true]; - optional bool EnableDataShardVolatileTransactions = 85 [default = true]; + optional bool EnableDataShardVolatileTransactions = 85 [default = false]; optional bool EnableTopicServiceTx = 86 [default = false]; optional bool EnableLLVMCache = 87 [default = false]; optional bool EnableExternalDataSources = 88 [default = false]; @@ -105,7 +105,7 @@ message TFeatureFlags { optional bool EnableSeparationComputeActorsFromRead = 90 [default = false]; optional bool EnablePQConfigTransactionsAtSchemeShard = 91 [default = false]; optional bool EnableScriptExecutionOperations = 92 [default = true]; - optional bool EnableImplicitQueryParameterTypes = 93 [default = true]; + optional bool EnableImplicitQueryParameterTypes = 93 [default = false]; optional bool EnableForceImmediateEffectsExecution = 94 [default = false]; optional bool EnableTopicSplitMerge = 95 [default = false]; optional bool EnableChangefeedDynamoDBStreamsFormat = 96 [default = true]; @@ -116,12 +116,12 @@ message TFeatureFlags { optional bool EnableIcNodeCache = 101 [default = true]; optional bool EnableTempTables = 102 [default = false]; optional bool SuppressCompatibilityCheck = 103 [default = false]; - optional bool EnableUniqConstraint = 104 [default = false]; + optional bool EnableUniqConstraint = 104 [default = true]; optional bool EnableChangefeedDebeziumJsonFormat = 105 [default = false]; optional bool EnableStatistics = 106 [default = false]; - optional bool EnableUuidAsPrimaryKey = 107 [default = false]; + optional bool EnableUuidAsPrimaryKey = 107 [default = true]; optional bool EnableTablePgTypes = 108 [default = false]; - optional bool EnableLocalDBBtreeIndex = 109 [default = true]; + optional bool EnableLocalDBBtreeIndex = 109 [default = false]; optional bool EnablePDiskHighHDDInFlight = 110 [default = false]; optional bool UseVDisksBalancing = 111 [default = false]; optional bool EnableViews = 112 [default = false]; @@ -129,7 +129,7 @@ message TFeatureFlags { optional bool EnableAccessServiceBulkAuthorization = 114 [default = false]; optional bool EnableAddColumsWithDefaults = 115 [ default = false]; optional bool EnableReplaceIfExistsForExternalEntities = 116 [ default = false]; - optional bool EnableCMSRequestPriorities = 117 [default = false]; + optional bool EnableCMSRequestPriorities = 117 [default = true]; optional bool EnableKeyvalueLogBatching = 118 [default = false]; optional bool EnableLocalDBFlatIndex = 119 [default = true]; optional bool ExtendedVDiskCounters = 120 [default = true]; @@ -141,7 +141,20 @@ message TFeatureFlags { optional bool EnableExternalSourceSchemaInference = 126 [default = false]; optional bool EnableDbMetadataCache = 127 [default = false]; optional bool EnableTableDatetime64 = 128 [default = false]; - optional bool EnableResourcePools = 129 [default = false]; + optional bool EnableResourcePools = 129 [default = false]; optional bool EnableColumnStatistics = 130 [default = false]; optional bool EnableSingleCompositeActionGroup = 131 [default = false]; + optional bool EnableResourcePoolsOnServerless = 132 [default = false]; + optional bool EnableChangefeedsOnIndexTables = 134 [default = false]; + optional bool EnableResourcePoolsCounters = 135 [default = false]; + optional bool EnableOptionalColumnsInColumnShard = 136 [default = false]; + //optional bool EnableGranularTimecast = 137 [default = true]; NB. exists in main + optional bool EnableAlterShardingInColumnShard = 138 [default = false]; + optional bool EnablePgSyntax = 139 [default = true]; + optional bool EnableTieringInColumnShard = 140 [default = false]; + optional bool EnableMetadataObjectsOnServerless = 141 [default = true]; + optional bool EnableOlapCompression = 142 [default = false]; + optional bool EnableExternalDataSourcesOnServerless = 143 [default = true]; + optional bool EnableSparsedColumns = 144 [default = false]; + optional bool EnableDriveSerialsDiscovery = 152 [default = false]; } diff --git a/ydb/core/protos/flat_scheme_op.proto b/ydb/core/protos/flat_scheme_op.proto index dfe39d11023c..a5652056a667 100644 --- a/ydb/core/protos/flat_scheme_op.proto +++ b/ydb/core/protos/flat_scheme_op.proto @@ -11,15 +11,16 @@ import "ydb/core/protos/filestore_config.proto"; import "ydb/core/protos/channel_purpose.proto"; import "ydb/core/protos/follower_group.proto"; import "ydb/core/protos/blob_depot_config.proto"; +import "ydb/core/protos/yql_translation_settings.proto"; import "ydb/public/api/protos/ydb_coordination.proto"; import "ydb/public/api/protos/ydb_export.proto"; import "ydb/public/api/protos/ydb_value.proto"; import "ydb/library/actors/protos/actors.proto"; import "ydb/library/mkql_proto/protos/minikql.proto"; import "ydb/core/protos/index_builder.proto"; -import "ydb/core/tx/columnshard/engines/scheme/statistics/protos/data.proto"; import "ydb/core/tx/columnshard/engines/scheme/defaults/protos/data.proto"; import "ydb/core/tx/columnshard/common/protos/snapshot.proto"; +import "ydb/library/formats/arrow/protos/accessor.proto"; import "google/protobuf/empty.proto"; @@ -423,6 +424,7 @@ message TOlapColumnDiff { optional TOlapColumn.TSerializer Serializer = 5; optional string StorageId = 6; optional string DefaultValue = 7; + optional NKikimrArrowAccessorProto.TRequestedConstructor DataAccessorConstructor = 8; } message TOlapColumnDescription { @@ -442,6 +444,7 @@ message TOlapColumnDescription { optional TOlapColumn.TSerializer Serializer = 10; optional string StorageId = 11; optional NKikimrColumnShardColumnDefaults.TColumnDefault DefaultValue = 12; + optional NKikimrArrowAccessorProto.TConstructor DataAccessorConstructor = 13; } message TRequestedBloomFilter { @@ -449,6 +452,15 @@ message TRequestedBloomFilter { repeated string ColumnNames = 3; } +message TRequestedMaxIndex { + optional string ColumnName = 1; +} + +message TRequestedCountMinSketch { + // sketch built on the combined data from the set of columns + repeated string ColumnNames = 1; +} + message TOlapIndexRequested { optional string Name = 1; optional TCompressionOptions Compression = 3; @@ -457,6 +469,8 @@ message TOlapIndexRequested { optional string ClassName = 39; oneof Implementation { TRequestedBloomFilter BloomFilter = 40; + TRequestedMaxIndex MaxIndex = 41; + TRequestedCountMinSketch CountMinSketch = 42; } } @@ -466,6 +480,14 @@ message TBloomFilter { repeated uint32 ColumnIds = 3; } +message TMaxIndex { + optional uint32 ColumnId = 1; +} + +message TCountMinSketch { + repeated uint32 ColumnIds = 1; +} + message TOlapIndexDescription { // This id is auto-generated by schemeshard optional uint32 Id = 1; @@ -478,6 +500,8 @@ message TOlapIndexDescription { optional string ClassName = 40; oneof Implementation { TBloomFilter BloomFilter = 41; + TMaxIndex MaxIndex = 42; + TCountMinSketch CountMinSketch = 43; } } @@ -541,7 +565,6 @@ message TColumnTableSchema { optional bool CompositeMarksDeprecated = 9 [ default = false ]; repeated TOlapIndexDescription Indexes = 10; - repeated NKikimrColumnShardStatisticsProto.TOperatorContainer Statistics = 11; optional TColumnTableSchemeOptions Options = 12; } @@ -558,8 +581,6 @@ message TAlterColumnTableSchema { repeated TOlapColumnDiff AlterColumns = 7; repeated TOlapIndexRequested UpsertIndexes = 8; repeated string DropIndexes = 9; - repeated NKikimrColumnShardStatisticsProto.TConstructorContainer UpsertStatistics = 10; - repeated string DropStatistics = 11; optional TColumnTableRequestedOptions Options = 12; } @@ -913,10 +934,6 @@ message TCreateCdcStream { optional TCdcStreamDescription StreamDescription = 2; optional uint64 RetentionPeriodSeconds = 3 [default = 86400]; // 1d by default optional uint32 TopicPartitions = 4; - oneof IndexMode { - google.protobuf.Empty AllIndexes = 5; // Create topic per each index - string IndexName = 6; - } } message TAlterCdcStream { @@ -982,10 +999,6 @@ enum EIndexState { EIndexStateWriteOnly = 3; } -message TExplicitPartitions { - repeated TSplitBoundary SplitBoundary = 1; -} - message TIndexDescription { optional string Name = 1; optional uint64 LocalPathId = 2; @@ -1000,15 +1013,10 @@ message TIndexDescription { optional uint64 PathOwnerId = 7; repeated string DataColumnNames = 8; + // DataSize + IndexSize of indexImplTable optional uint64 DataSize = 9; - - // indexImplTable settings - oneof Partitions { - uint32 UniformPartitions = 10; - TExplicitPartitions ExplicitPartitions = 11; - } - optional TPartitioningPolicy PartitioningPolicy = 12; + repeated TTableDescription IndexImplTableDescriptions = 10; } message TIndexCreationConfig { @@ -1640,7 +1648,6 @@ message TIndexBuildControl { message TLockConfig { optional string Name = 1; - optional bool AllowIndexImplLock = 2; } message TLockGuard { @@ -1809,6 +1816,7 @@ message TDescribeOptions { optional bool ReturnChannelsBinding = 8 [default = false]; optional bool ReturnRangeKey = 9 [default = true]; optional bool ReturnSetVal = 10 [default = false]; + optional bool ReturnIndexTableBoundaries = 11 [default = false]; } // Request to read scheme for a specific path @@ -2121,6 +2129,7 @@ message TViewDescription { optional NKikimrProto.TPathID PathId = 2; optional uint64 Version = 3; optional string QueryText = 4; + optional NYql.NProto.TTranslationSettings CapturedContext = 5; } message TResourcePoolProperties { diff --git a/ydb/core/protos/flat_tx_scheme.proto b/ydb/core/protos/flat_tx_scheme.proto index 2b87d1936df9..84026ae9f41d 100644 --- a/ydb/core/protos/flat_tx_scheme.proto +++ b/ydb/core/protos/flat_tx_scheme.proto @@ -144,13 +144,15 @@ message TEvUpdateConfigResult { message TEvLogin { optional string User = 1; - optional string Password = 2; + optional string Password = 2 [(Ydb.sensitive) = true]; optional string ExternalAuth = 3; + optional uint64 ExpiresAfterMs = 4; + optional string PeerName = 5; // IP address actually, same as TEvModifySchemeTransaction.PeerName } message TEvLoginResult { optional string Error = 1; - optional string Token = 2; + optional string Token = 2; // signed jwt token } // Sending actor registers itself to be notified when tx completes @@ -188,6 +190,8 @@ message TSchemeLimits { optional uint64 MaxExports = 16; optional uint64 MaxImports = 17; + + optional uint64 MaxColumnTableColumns = 18; } message TEvInitTenantSchemeShard { diff --git a/ydb/core/protos/hive.proto b/ydb/core/protos/hive.proto index bb9ef0d6e087..578809b86ffe 100644 --- a/ydb/core/protos/hive.proto +++ b/ydb/core/protos/hive.proto @@ -246,6 +246,7 @@ message TEvTabletMetrics { repeated TTabletMetrics TabletMetrics = 1; optional NKikimrTabletBase.TMetrics TotalResourceUsage = 2; optional double TotalNodeUsage = 3; + optional NKikimrTabletBase.TMetrics ResourceMaximum = 4; } message TEvReassignTablet { @@ -493,6 +494,7 @@ message TTabletInfo { optional uint32 RestartsPerPeriod = 22; optional uint64 LastAliveTimestamp = 23; optional EBalancerPolicy BalancerPolicy = 24; + optional bool InWaitQueue = 25; } message TEvSeizeTabletsReply { diff --git a/ydb/core/protos/import.proto b/ydb/core/protos/import.proto index 612309270c4c..f13fa156ce93 100644 --- a/ydb/core/protos/import.proto +++ b/ydb/core/protos/import.proto @@ -19,6 +19,7 @@ message TImport { oneof Settings { Ydb.Import.ImportFromS3Settings ImportFromS3Settings = 6; } + optional string UserSID = 9; } message TCreateImportRequest { @@ -33,6 +34,7 @@ message TEvCreateImportRequest { optional string DatabaseName = 2; optional string UserSID = 3; optional TCreateImportRequest Request = 4; + optional string PeerName = 5; } message TCreateImportResponse { @@ -67,8 +69,10 @@ message TCancelImportRequest { message TEvCancelImportRequest { optional uint64 TxId = 1; - optional string DatabaseName = 3; optional TCancelImportRequest Request = 2; + optional string DatabaseName = 3; + optional string UserSID = 4; + optional string PeerName = 5; } message TCancelImportResponse { @@ -87,8 +91,10 @@ message TForgetImportRequest { message TEvForgetImportRequest { optional uint64 TxId = 1; - optional string DatabaseName = 3; optional TForgetImportRequest Request = 2; + optional string DatabaseName = 3; + optional string UserSID = 4; + optional string PeerName = 5; } message TForgetImportResponse { diff --git a/ydb/core/protos/kqp.proto b/ydb/core/protos/kqp.proto index fe0ef81a862f..18dd347cf667 100644 --- a/ydb/core/protos/kqp.proto +++ b/ydb/core/protos/kqp.proto @@ -64,6 +64,7 @@ enum EIsolationLevel { ISOLATION_LEVEL_READ_COMMITTED = 2; ISOLATION_LEVEL_READ_UNCOMMITTED = 3; ISOLATION_LEVEL_READ_STALE = 4; + ISOLATION_LEVEL_SNAPSHOT_RO = 5; }; enum EQueryReplyFlags { @@ -123,6 +124,7 @@ message TQueryRequest { optional string UserSID = 33; optional uint64 OutputChunkMaxSize = 34; optional string PoolId = 35; + optional string DatabaseId = 36; } message TKqpPathIdProto { @@ -184,6 +186,7 @@ message TKqpTableMetadataProto { repeated TKqpTableMetadataProto SecondaryGlobalIndexMetadata = 12; optional uint64 RecordsCount = 13; optional uint64 DataSize = 14; + optional bool StatsLoaded = 15; } message TRlPath { @@ -277,7 +280,7 @@ message TQueryResponseExtraInfo { message TQueryResponse { optional bytes SessionId = 1; reserved 2; // (deprecated) QueryErrors - repeated NKikimrMiniKQL.TResult Results = 3; + // repeated NKikimrMiniKQL.TResult Results = 3; // optional TQueryProfile Profile = 4; // TODO: Deprecate, use QueryStats reserved 4; optional bytes PreparedQuery = 5; @@ -358,6 +361,7 @@ message TEvListSessionsRequest { repeated uint32 Columns = 5; optional int64 FreeSpace = 6; optional int64 Limit = 7; + optional string TenantName = 8; } message TEvListSessionsResponse { @@ -439,7 +443,7 @@ message TEvPingSessionResponse { message TEvDataQueryStreamPart { optional NActorsProto.TActorId GatewayActorId = 1; - repeated NKikimrMiniKQL.TResult Results = 2; + repeated Ydb.ResultSet Results = 2; }; message TCancelQueryRequest { @@ -582,6 +586,15 @@ message TEvStartKqpTasksRequest { optional bool StartAllOrFail = 6 [default = true]; optional uint64 OutputChunkMaxSize = 7 [default = 0]; // 0 - use some default value optional string SerializedGUCSettings = 8; + optional string SchedulerGroup = 9; + optional double MemoryPoolPercent = 10 [default = 100]; + optional string Database = 11; + optional string DatabaseId = 17; + optional uint64 LockTxId = 13; + optional uint32 LockNodeId = 14; + + optional double PoolMaxCpuShare = 12; + optional double QueryCpuShare = 16; } message TEvStartKqpTasksResponse { @@ -753,15 +766,16 @@ message TKqpTableSinkSettings { } message TKqpStreamLookupSettings { - optional NKqpProto.TKqpPhyTableId Table = 1; - repeated TKqpColumnMetadataProto KeyColumns = 2; - repeated TKqpColumnMetadataProto Columns = 3; - optional TKqpSnapshot Snapshot = 4; - optional uint64 LockTxId = 5; - optional bool ImmediateTx = 6; - repeated string LookupKeyColumns = 7; - optional NKqpProto.EStreamLookupStrategy LookupStrategy = 8; - optional bool AllowInconsistentReads = 9 [default = false]; + optional NKqpProto.TKqpPhyTableId Table = 1; + repeated TKqpColumnMetadataProto KeyColumns = 2; + repeated TKqpColumnMetadataProto Columns = 3; + optional TKqpSnapshot Snapshot = 4; + optional uint64 LockTxId = 5; + optional bool ImmediateTx = 6; + repeated string LookupKeyColumns = 7; + optional NKqpProto.EStreamLookupStrategy LookupStrategy = 8; + optional bool AllowInconsistentReads = 9 [default = false]; + optional uint32 LockNodeId = 10; } message TKqpSequencerSettings { diff --git a/ydb/core/protos/kqp_physical.proto b/ydb/core/protos/kqp_physical.proto index 0ec497cca01f..ac4f99db2134 100644 --- a/ydb/core/protos/kqp_physical.proto +++ b/ydb/core/protos/kqp_physical.proto @@ -263,6 +263,7 @@ enum EStreamLookupStrategy { UNSPECIFIED = 0; LOOKUP = 1; JOIN = 2; + SEMI_JOIN = 3; }; message TKqpPhyCnStreamLookup { @@ -375,6 +376,8 @@ message TKqpPhyStage { repeated TKqpSink Sinks = 11; map SecureParams = 12; bool AllowWithSpilling = 13; + uint32 TaskCount = 14; + double StageCost = 15; } message TKqpPhyResult { @@ -385,6 +388,11 @@ message TKqpPhyResult { optional uint32 QueryResultIndex = 5; } +message TKqpAnalyzeOperation { + string TablePath = 1; + repeated string Columns = 2; +} + message TKqpPhyMetadataOperation { message TColumnValue { string Column = 1; @@ -449,6 +457,10 @@ message TKqpSchemeOperation { NKikimrSchemeOp.TModifyScheme CreateResourcePool = 38; NKikimrSchemeOp.TModifyScheme AlterResourcePool = 39; NKikimrSchemeOp.TModifyScheme DropResourcePool = 40; + TKqpAnalyzeOperation AnalyzeTable = 41; + NKikimrSchemeOp.TModifyScheme CreateTopic = 42; + NKikimrSchemeOp.TModifyScheme AlterTopic = 43; + NKikimrSchemeOp.TModifyScheme DropTopic = 44; } } diff --git a/ydb/core/protos/kqp_stats.proto b/ydb/core/protos/kqp_stats.proto index 24e09f2dcd90..df70faff1a13 100644 --- a/ydb/core/protos/kqp_stats.proto +++ b/ydb/core/protos/kqp_stats.proto @@ -73,6 +73,7 @@ message TKqpExecutionExtraStats { message TKqpStatsQuery { // Basic stats uint64 DurationUs = 1; + uint64 QueuedTimeUs = 9; TKqpStatsCompile Compilation = 2; reserved 3; // repeated TKqpStatsExecution Executions = 3; diff --git a/ydb/core/protos/node_whiteboard.proto b/ydb/core/protos/node_whiteboard.proto index 0492c3aa912a..b41921fcef1d 100644 --- a/ydb/core/protos/node_whiteboard.proto +++ b/ydb/core/protos/node_whiteboard.proto @@ -11,6 +11,7 @@ option java_package = "ru.yandex.kikimr.proto"; extend google.protobuf.FieldOptions { optional uint64 InsignificantChangeAmount = 70553; optional uint32 InsignificantChangePercent = 70554; + optional bool DefaultField = 70555; } enum EFlag { @@ -47,28 +48,29 @@ message TTabletStateInfo { Reserved16 = 16; } - optional uint64 TabletId = 1; + optional uint64 TabletId = 1 [(DefaultField) = true]; optional uint64 CreateTime = 2; - optional uint64 ChangeTime = 3; - optional ETabletState State = 4; + optional uint64 ChangeTime = 3 [(DefaultField) = true]; + optional ETabletState State = 4 [(DefaultField) = true]; optional uint32 UserState = 5; // implementation-dependent - optional uint32 Generation = 6; - optional NKikimrTabletBase.TTabletTypes.EType Type = 7; + optional uint32 Generation = 6 [(DefaultField) = true]; + optional NKikimrTabletBase.TTabletTypes.EType Type = 7 [(DefaultField) = true]; optional string Host = 8; repeated uint32 ChannelGroupIDs = 9; // BS Group per channel repeated TCustomTabletAttribute Attributes = 10; optional uint32 NodeId = 11; // filled during merge - optional bool Leader = 12; // leader or follower + optional bool Leader = 12 [(DefaultField) = true]; // leader or follower optional uint32 Count = 13; // filled during group count - optional uint32 FollowerId = 14; + optional uint32 FollowerId = 14 [(DefaultField) = true]; optional EFlag Overall = 15; // filled during merge - optional NKikimrSubDomains.TDomainKey TenantId = 16; - optional fixed64 HiveId = 17; + optional NKikimrSubDomains.TDomainKey TenantId = 16 [(DefaultField) = true]; + optional fixed64 HiveId = 17 [(DefaultField) = true]; optional string EndOfRangeKeyPrefix = 18; // filled during merge } message TEvTabletStateRequest { optional uint64 ChangedSince = 1; + repeated int32 FieldsRequired = 2 [packed = true]; optional string Format = 5; // it could be "packed5" optional string GroupBy = 20; // it's either empty or "Type,State" for now repeated fixed64 FilterTabletId = 22; @@ -99,6 +101,7 @@ message TNodeStateInfo { message TEvNodeStateRequest { optional uint64 ChangedSince = 1; + repeated int32 FieldsRequired = 2 [packed = true]; } message TEvNodeStateResponse { @@ -108,33 +111,35 @@ message TEvNodeStateResponse { } message TPDiskStateInfo { - optional uint32 PDiskId = 1; + optional uint32 PDiskId = 1 [(DefaultField) = true]; optional uint64 CreateTime = 2; - optional uint64 ChangeTime = 3; - optional string Path = 4; - optional uint64 Guid = 5; - optional uint64 Category = 6; - optional uint64 AvailableSize = 7 [(InsignificantChangeAmount) = 104857600]; // 100Mb - optional uint64 TotalSize = 8; - optional NKikimrBlobStorage.TPDiskState.E State = 9; + optional uint64 ChangeTime = 3 [(DefaultField) = true]; + optional string Path = 4 [(DefaultField) = true]; + optional uint64 Guid = 5 [(DefaultField) = true]; + optional uint64 Category = 6 [(DefaultField) = true]; + optional uint64 AvailableSize = 7 [(DefaultField) = true, (InsignificantChangeAmount) = 104857600]; // 100Mb + optional uint64 TotalSize = 8 [(DefaultField) = true]; + optional NKikimrBlobStorage.TPDiskState.E State = 9 [(DefaultField) = true]; optional uint32 NodeId = 10; // filled during merge optional uint32 Count = 13; // filled during group count - optional EFlag Device = 14; - optional EFlag Realtime = 15; + optional EFlag Device = 14 [(DefaultField) = true]; + optional EFlag Realtime = 15 [(DefaultField) = true]; // State as flag - to be filled optional EFlag StateFlag = 16; // overall state - to be filled optional EFlag Overall = 17; - optional string SerialNumber = 18; - optional uint64 SystemSize = 19; - optional uint64 LogUsedSize = 20; - optional uint64 LogTotalSize = 21; - optional uint32 ExpectedSlotCount = 22; - optional uint64 EnforcedDynamicSlotSize = 23; + optional string SerialNumber = 18 [(DefaultField) = true]; + optional uint64 SystemSize = 19 [(DefaultField) = true]; + optional uint64 LogUsedSize = 20 [(DefaultField) = true]; + optional uint64 LogTotalSize = 21 [(DefaultField) = true]; + optional uint32 ExpectedSlotCount = 22 [(DefaultField) = true]; + optional uint64 EnforcedDynamicSlotSize = 23 [(DefaultField) = true]; + optional uint32 NumActiveSlots = 24 [(DefaultField) = true]; } message TEvPDiskStateRequest { optional uint64 ChangedSince = 1; + repeated int32 FieldsRequired = 2 [packed = true]; } message TEvPDiskStateResponse { @@ -171,13 +176,13 @@ message TVDiskSatisfactionRank { } message TVDiskStateInfo { - optional NKikimrBlobStorage.TVDiskID VDiskId = 1; + optional NKikimrBlobStorage.TVDiskID VDiskId = 1 [(DefaultField) = true]; optional uint64 CreateTime = 2; - optional uint64 ChangeTime = 3; - optional uint32 PDiskId = 4; - optional uint32 VDiskSlotId = 5; - optional uint64 Guid = 6; - optional uint64 Kind = 7; + optional uint64 ChangeTime = 3 [(DefaultField) = true]; + optional uint32 PDiskId = 4 [(DefaultField) = true]; + optional uint32 VDiskSlotId = 5 [(DefaultField) = true]; + optional uint64 Guid = 6 [(DefaultField) = true]; + optional uint64 Kind = 7 [(DefaultField) = true]; optional uint32 NodeId = 9; // filled during merge optional uint32 Count = 17; // filled during group count @@ -185,48 +190,49 @@ message TVDiskStateInfo { optional EFlag Overall = 10; // Current state of VDisk - optional EVDiskState VDiskState = 11; + optional EVDiskState VDiskState = 11 [(DefaultField) = true]; // Disk space flags - optional EFlag DiskSpace = 12; + optional EFlag DiskSpace = 12 [(DefaultField) = true]; // Compaction satisfaction rank - optional TVDiskSatisfactionRank SatisfactionRank = 13; + optional TVDiskSatisfactionRank SatisfactionRank = 13 [(DefaultField) = true]; // Is VDisk replicated? (i.e. contains all blobs it must have) - optional bool Replicated = 14; + optional bool Replicated = 14 [(DefaultField) = true]; // Does this VDisk has any yet unreplicated phantom-like blobs? - optional bool UnreplicatedPhantoms = 20 [default = false]; + optional bool UnreplicatedPhantoms = 20 [default = false, (DefaultField) = true]; // The same for the non-phantom-like blobs. - optional bool UnreplicatedNonPhantoms = 21 [default = false]; + optional bool UnreplicatedNonPhantoms = 21 [default = false, (DefaultField) = true]; // Replication progress (0 to 1). Only for replication, not blob scrubbing. - optional float ReplicationProgress = 30; + optional float ReplicationProgress = 30 [(DefaultField) = true]; // Replication ETA. - optional uint32 ReplicationSecondsRemaining = 31; + optional uint32 ReplicationSecondsRemaining = 31 [(DefaultField) = true]; // How many unsynced VDisks from current BlobStorage group we see - optional uint64 UnsyncedVDisks = 15 [default = 0]; + optional uint64 UnsyncedVDisks = 15 [default = 0, (DefaultField) = true]; // How much this VDisk have allocated on corresponding PDisk - optional uint64 AllocatedSize = 16 [(InsignificantChangeAmount) = 536870912]; // 512MiB + optional uint64 AllocatedSize = 16 [(InsignificantChangeAmount) = 536870912, (DefaultField) = true]; // 512MiB // How much space is available for VDisk corresponding to PDisk's hard space limits - optional uint64 AvailableSize = 28 [(InsignificantChangeAmount) = 536870912]; // 512MiB + optional uint64 AvailableSize = 28 [(InsignificantChangeAmount) = 536870912, (DefaultField) = true]; // 512MiB // Does this disk has some unreadable but not yet restored blobs? - optional bool HasUnreadableBlobs = 24; - optional fixed64 IncarnationGuid = 25; - optional bool DonorMode = 26; - optional fixed64 InstanceGuid = 27; // VDisk actor instance guid - repeated NKikimrBlobStorage.TVSlotId Donors = 29; + optional bool HasUnreadableBlobs = 24 [(DefaultField) = true]; + optional fixed64 IncarnationGuid = 25 [(DefaultField) = true]; + optional bool DonorMode = 26 [(DefaultField) = true]; + optional fixed64 InstanceGuid = 27 [(DefaultField) = true]; // VDisk actor instance guid + repeated NKikimrBlobStorage.TVSlotId Donors = 29 [(DefaultField) = true]; // VDisk (Skeleton) Front Queue Status - optional EFlag FrontQueues = 18; + optional EFlag FrontQueues = 18 [(DefaultField) = true]; // VDisk storage pool label - optional string StoragePoolName = 19; + optional string StoragePoolName = 19 [(DefaultField) = true]; // Read bytes per second from PDisk for TEvVGet blobs only - optional uint64 ReadThroughput = 22; + optional uint64 ReadThroughput = 22 [(DefaultField) = true]; // Write bytes per second to PDisk for TEvVPut blobs and replication bytes only - optional uint64 WriteThroughput = 23; + optional uint64 WriteThroughput = 23 [(DefaultField) = true]; } message TEvVDiskStateRequest { optional uint64 ChangedSince = 1; + repeated int32 FieldsRequired = 2 [packed = true]; } message TEvVDiskStateResponse { @@ -236,29 +242,30 @@ message TEvVDiskStateResponse { } message TBSGroupStateInfo { - optional uint32 GroupID = 1; - optional string ErasureSpecies = 2; - repeated NKikimrBlobStorage.TVDiskID VDiskIds = 3; + optional uint32 GroupID = 1 [(DefaultField) = true]; + optional string ErasureSpecies = 2 [(DefaultField) = true]; + repeated NKikimrBlobStorage.TVDiskID VDiskIds = 3 [(DefaultField) = true]; optional uint64 ChangeTime = 4; optional uint32 NodeId = 5; // filled during merge - optional uint32 GroupGeneration = 6; + optional uint32 GroupGeneration = 6 [(DefaultField) = true]; optional EFlag Overall = 7; - optional EFlag Latency = 8; + optional EFlag Latency = 8 [(DefaultField) = true]; optional uint32 Count = 13; // filled during group count - optional string StoragePoolName = 14; // from BS_CONTROLLER - optional uint64 AllocatedSize = 15 [(InsignificantChangeAmount) = 100000000]; - optional uint64 AvailableSize = 16 [(InsignificantChangeAmount) = 100000000]; - optional uint64 ReadThroughput = 17; - optional uint64 WriteThroughput = 18; - optional bool Encryption = 19; - repeated uint32 VDiskNodeIds = 20; - optional uint64 BlobDepotId = 21; // if set, then this is virtual group - optional bool NoVDisksInGroup = 22; - optional uint64 BlobDepotOnlineTime = 23; + optional string StoragePoolName = 14 [(DefaultField) = true]; // from BS_CONTROLLER + optional uint64 AllocatedSize = 15 [(InsignificantChangeAmount) = 100000000, (DefaultField) = true]; + optional uint64 AvailableSize = 16 [(InsignificantChangeAmount) = 100000000, (DefaultField) = true]; + optional uint64 ReadThroughput = 17 [(DefaultField) = true]; + optional uint64 WriteThroughput = 18 [(DefaultField) = true]; + optional bool Encryption = 19 [(DefaultField) = true]; + repeated uint32 VDiskNodeIds = 20 [(DefaultField) = true]; + optional uint64 BlobDepotId = 21 [(DefaultField) = true]; // if set, then this is virtual group + optional bool NoVDisksInGroup = 22 [(DefaultField) = true]; + optional uint64 BlobDepotOnlineTime = 23 [(DefaultField) = true]; } message TEvBSGroupStateRequest { optional uint64 ChangedSince = 1; + repeated int32 FieldsRequired = 2 [packed = true]; } message TEvBSGroupStateResponse { @@ -277,6 +284,7 @@ message TSystemStateInfo { optional string Name = 1; optional double Usage = 2 [(InsignificantChangePercent) = 30]; optional uint32 Threads = 3; + optional uint32 Limit = 4; } message TEndpoint { @@ -296,12 +304,12 @@ message TSystemStateInfo { optional uint64 LimitBytes = 2; } - optional uint64 StartTime = 1; - optional uint64 ChangeTime = 2; + optional uint64 StartTime = 1 [(DefaultField) = true]; + optional uint64 ChangeTime = 2 [(DefaultField) = true]; optional TLegacyNodeLocation SystemLocation = 3; - repeated double LoadAverage = 4; - optional uint32 NumberOfCpus = 5; - optional EFlag SystemState = 6; + repeated double LoadAverage = 4 [(DefaultField) = true]; + optional uint32 NumberOfCpus = 5 [(DefaultField) = true]; + optional EFlag SystemState = 6 [(DefaultField) = true]; optional EFlag MessageBusState = 7; optional EFlag GRpcState = 8; optional uint32 NodeId = 9; // filled during merge @@ -311,29 +319,32 @@ message TSystemStateInfo { optional string DataCenter = 16; optional uint32 RackId = 17; optional string Rack = 18; - optional string Host = 19; - optional string Version = 20; - repeated TPoolStats PoolStats = 21; - repeated TEndpoint Endpoints = 22; - repeated string Roles = 23; - repeated string Tenants = 24; - optional string ClusterName = 25; - optional uint64 MemoryUsed = 26; - optional uint64 MemoryLimit = 27; + optional string Host = 19 [(DefaultField) = true]; + optional string Version = 20 [(DefaultField) = true]; + repeated TPoolStats PoolStats = 21 [(DefaultField) = true]; + repeated TEndpoint Endpoints = 22 [(DefaultField) = true]; + repeated string Roles = 23 [(DefaultField) = true]; + repeated string Tenants = 24 [(DefaultField) = true]; + optional string ClusterName = 25 [(DefaultField) = true]; + optional uint64 MemoryUsed = 26 [(DefaultField) = true]; + optional uint64 MemoryLimit = 27 [(DefaultField) = true]; optional EConfigState ConfigState = 28 [default = Consistent]; optional uint64 MemoryUsedInAlloc = 29; - optional double MaxDiskUsage = 30; - optional NActorsInterconnect.TNodeLocation Location = 31; + optional double MaxDiskUsage = 30 [(DefaultField) = true]; + optional NActorsInterconnect.TNodeLocation Location = 31 [(DefaultField) = true]; optional int64 MaxClockSkewWithPeerUs = 32; // a positive value means the peer is ahead in time; a negative value means it's behind optional uint32 MaxClockSkewPeerId = 33; optional uint64 DisconnectTime = 34; - optional TNodeSharedCache SharedCacheStats = 35; - optional uint32 TotalSessions = 36; - optional string NodeName = 37; + optional TNodeSharedCache SharedCacheStats = 35; // TODO: use memory stats + optional uint32 TotalSessions = 36 [(DefaultField) = true]; + optional string NodeName = 37 [(DefaultField) = true]; + optional double CoresUsed = 39; + optional uint32 CoresTotal = 40; } message TEvSystemStateRequest { optional uint64 ChangedSince = 1; + repeated int32 FieldsRequired = 2 [packed = true]; } message TEvSystemStateResponse { diff --git a/ydb/core/protos/out/out.cpp b/ydb/core/protos/out/out.cpp index eee0c8d61b22..72bb4d1e22ba 100644 --- a/ydb/core/protos/out/out.cpp +++ b/ydb/core/protos/out/out.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -238,3 +239,15 @@ Y_DECLARE_OUT_SPEC(, NKikimrDataEvents::TEvWrite::TOperation::EOperationType, st Y_DECLARE_OUT_SPEC(, NKikimrDataEvents::TEvWrite::ETxMode, stream, value) { stream << NKikimrDataEvents::TEvWrite::ETxMode_Name(value); } + +Y_DECLARE_OUT_SPEC(, NKikimrStat::TEvAnalyzeResponse_EStatus, stream, value) { + stream << NKikimrStat::TEvAnalyzeResponse_EStatus_Name(value); +} + +Y_DECLARE_OUT_SPEC(, NKikimrStat::TEvAnalyzeStatusResponse_EStatus, stream, value) { + stream << NKikimrStat::TEvAnalyzeStatusResponse_EStatus_Name(value); +} + +Y_DECLARE_OUT_SPEC(, NKikimrStat::TEvStatisticsResponse::EStatus, stream, value) { + stream << NKikimrStat::TEvStatisticsResponse::EStatus_Name(value); +} diff --git a/ydb/core/protos/out/out_cms.cpp b/ydb/core/protos/out/out_cms.cpp index 71012587da8b..868e4d651608 100644 --- a/ydb/core/protos/out/out_cms.cpp +++ b/ydb/core/protos/out/out_cms.cpp @@ -29,3 +29,7 @@ Y_DECLARE_OUT_SPEC(, NKikimrCms::TLogRecordData::EType, stream, value) { Y_DECLARE_OUT_SPEC(, NKikimrCms::TAction::EType, stream, value) { stream << NKikimrCms::TAction::EType_Name(value); } + +Y_DECLARE_OUT_SPEC(, NKikimrCms::TAction::TIssue::EType, stream, value) { + stream << NKikimrCms::TAction::TIssue::EType_Name(value); +} diff --git a/ydb/core/protos/pqconfig.proto b/ydb/core/protos/pqconfig.proto index 9c7be1e9d6f1..71d32d7c4eb7 100644 --- a/ydb/core/protos/pqconfig.proto +++ b/ydb/core/protos/pqconfig.proto @@ -404,6 +404,7 @@ message TPQTabletConfig { CAN_SPLIT = 1; // The autoscaling algorithm will both increase and decrease partitions count depending on the load characteristics. CAN_SPLIT_AND_MERGE = 2; + PAUSED = 3; } // Strategy for automatically changing the number of topic partitions depending on the load @@ -420,6 +421,7 @@ message TPQTabletConfig { } optional TPartitionStrategy PartitionStrategy = 35; + // The field is filled in only for the PQ tablet. Contains information about linked partitions for constructing a partial PartitionGraph. repeated TPartition AllPartitions = 36; // filled by schemeshard optional TOffloadConfig OffloadConfig = 38; @@ -1084,16 +1086,23 @@ message TTransaction { enum EState { UNKNOWN = 0; PREPARING = 1; - PREPARED = 2; // хранится + PREPARED = 2; // persist PLANNING = 3; - PLANNED = 4; // хранится. хотим выкинуть и не персистить + PLANNED = 4; // persist CALCULATING = 5; CALCULATED = 6; - WAIT_RS = 7; // хранится + WAIT_RS = 7; // persist EXECUTING = 8; - EXECUTED = 9; + EXECUTED = 9; // persist + WAIT_RS_ACKS = 11; + DELETING = 10; }; + message TPredicateReceived { + optional uint64 TabletId = 1; + optional bool Predicate = 2; + } + optional EKind Kind = 11; optional uint64 Step = 8; optional uint64 TxId = 1; @@ -1101,14 +1110,14 @@ message TTransaction { optional uint64 MinStep = 3; optional uint64 MaxStep = 4; + repeated TPredicateReceived PredicatesReceived = 5; + repeated uint64 PredicateRecipients = 6; + optional bool Predicate = 9; + // // TDataTransaction // - repeated uint64 Senders = 5; - repeated uint64 Receivers = 6; repeated TPartitionOperation Operations = 7; - optional bool SelfPredicate = 9; // только предикаты партиций. предикаты коллег отдельно - optional bool AggrPredicate = 10; // // TConfigTransaction diff --git a/ydb/core/protos/replication.proto b/ydb/core/protos/replication.proto index 31b2e2eaba28..41baff85764f 100644 --- a/ydb/core/protos/replication.proto +++ b/ydb/core/protos/replication.proto @@ -5,6 +5,16 @@ import "ydb/public/api/protos/ydb_issue_message.proto"; package NKikimrReplication; option java_package = "ru.yandex.kikimr.proto"; +message TReplicationDefaults { + message TSchemeOperationLimits { + optional uint32 InflightCreateStreamLimit = 1 [default = 1]; + optional uint32 InflightDropStreamLimit = 2 [default = 1]; + } + + optional int32 RetentionPeriodSeconds = 1 [default = 86400]; // 1d + optional TSchemeOperationLimits SchemeOperationLimits = 2; +} + message TStaticCredentials { optional string User = 1; optional string Password = 2 [(Ydb.sensitive) = true]; @@ -19,6 +29,7 @@ message TOAuthToken { message TConnectionParams { optional string Endpoint = 1; optional string Database = 2; + optional bool EnableSsl = 5; // credentials oneof Credentials { TStaticCredentials StaticCredentials = 3; diff --git a/ydb/core/protos/shared_cache.proto b/ydb/core/protos/shared_cache.proto index 33758ea09807..8e5d2ee34473 100644 --- a/ydb/core/protos/shared_cache.proto +++ b/ydb/core/protos/shared_cache.proto @@ -1,10 +1,18 @@ package NKikimrSharedCache; option java_package = "ru.yandex.kikimr.proto"; +enum TReplacementPolicy { + ThreeLeveledLRU = 0; + S3FIFO = 1; + ClockPro = 2; +} + message TSharedCacheConfig { optional uint64 MemoryLimit = 1 [default = 536870912]; optional uint64 ScanQueueInFlyLimit = 2 [default = 536870912]; optional uint64 AsyncQueueInFlyLimit = 3 [default = 536870912]; optional uint32 ActivePagesReservationPercent = 4; optional uint32 MemTableReservationPercent = 5; + optional TReplacementPolicy ReplacementPolicy = 6 [default = ThreeLeveledLRU]; + reserved 7; } diff --git a/ydb/core/protos/statistics.proto b/ydb/core/protos/statistics.proto index d6ab30157c03..5ecebcaa32c2 100644 --- a/ydb/core/protos/statistics.proto +++ b/ydb/core/protos/statistics.proto @@ -13,6 +13,7 @@ message TPathEntry { optional NKikimrProto.TPathID PathId = 1; optional uint64 RowCount = 2; optional uint64 BytesSize = 3; + optional bool IsColumnTable = 4; } message TSchemeShardStats { @@ -67,56 +68,131 @@ message TEvPropagateStatisticsResponse { message TEvStatisticsIsDisabled { } -message TEvScanTable { - optional NKikimrProto.TPathID PathId = 1; +enum EColumnStatisticType { + TYPE_UNSPECIFIED = 0; + TYPE_COUNT_MIN_SKETCH = 1; } -message TEvScanTableAccepted { - optional uint64 OperationId = 1; +// table to gather statistics from +message TTable { + optional NKikimrProto.TPathID PathId = 1; // table path + repeated uint32 ColumnTags = 2; // list of columns to gather statistics from. Empty means asking for every column. } -message TEvScanTableResponse { +// KQP -> SA +message TEvAnalyze { + optional bytes OperationId = 1; // unique identifier to match response item + repeated TTable Tables = 2; // list of analyzed tables and columns + repeated EColumnStatisticType Types = 3; // list of statistics types requested. Empty means asking for all available. } -message TEvGetScanStatus { - optional NKikimrProto.TPathID PathId = 1; +// SA -> KQP +message TEvAnalyzeResponse { + optional bytes OperationId = 1; + + enum EStatus { + STATUS_UNSPECIFIED = 0; + STATUS_SUCCESS = 1; + STATUS_ERROR = 2; + } + optional EStatus Status = 2; } -message TEvGetScanStatusResponse { +// KQP -> SA +message TEvAnalyzeStatus { + optional bytes OperationId = 1; // unique identifier to match response item +} + +// SA -> KQP +message TEvAnalyzeStatusResponse { + optional bytes OperationId = 1; + enum EStatus { - NO_OPERATION = 0; - ENQUEUED = 1; - IN_PROGRESS = 2; + STATUS_UNSPECIFIED = 0; + STATUS_NO_OPERATION = 1; + STATUS_ENQUEUED = 2; + STATUS_IN_PROGRESS = 3; } - optional EStatus Status = 1; + optional EStatus Status = 2; } +// SA -> Shard +message TEvAnalyzeTable { + optional bytes OperationId = 1; // unique identifier to match response item + optional TTable Table = 2; // analyzed table + repeated EColumnStatisticType Types = 3; // list of statistics types requested. Empty means asking for all available. +} + +// Shard -> SA +message TEvAnalyzeTableResponse { + optional bytes OperationId = 1; + optional NKikimrProto.TPathID PathId = 2; + optional fixed64 ShardTabletId = 3; +} + + message TEvStatisticsRequest { - optional NKikimrDataEvents.TTableId TableId = 1; + optional TTable Table = 1; + optional bytes StartKey = 2; - // list of columns to gather statistics from. Empty means asking for every column. - repeated uint32 ColumnTags = 3; - // list of statistics types requested. Empty means asking for all available. - repeated uint32 Types = 4; + + repeated EColumnStatisticType Types = 3; +} + +message TStatistic { + optional uint32 Type = 1; + optional bytes Data = 2; +} + +message TColumnStatistics { + optional uint32 Tag = 1; + repeated TStatistic Statistics = 2; } message TEvStatisticsResponse { - message TStatistic { - optional uint32 Type = 1; - optional bytes Data = 2; - } - message TColumn { - optional uint32 Tag = 1; - repeated TStatistic Statistics = 2; - } - repeated TColumn Columns = 1; + repeated TColumnStatistics Columns = 1; enum EStatus { - SUCCESS = 1; - ABORTED = 2; - ERROR = 3; + STATUS_UNSPECIFIED = 0; + STATUS_SUCCESS = 1; + STATUS_ABORTED = 2; + STATUS_ERROR = 3; } optional EStatus Status = 2; - optional fixed64 ShardTabletId = 3; } + +message TEvAggregateStatistics { + optional uint64 Round = 1; + message TNodeMap { + optional uint32 NodeId = 1; + repeated fixed64 TabletIds = 2; + } + repeated TNodeMap Nodes = 2; + optional NKikimrProto.TPathID PathId = 3; + repeated uint32 ColumnTags = 4; +} + +message TEvAggregateKeepAlive { + optional uint64 Round = 1; +} + +message TEvAggregateKeepAliveAck { + optional uint64 Round = 1; +} + +message TEvAggregateStatisticsResponse { + optional uint64 Round = 1; + repeated TColumnStatistics Columns = 2; + enum EErrorType { + TYPE_UNSPECIFIED = 0; + TYPE_UNAVAILABLE_NODE = 1; + TYPE_NON_LOCAL_TABLET = 2; + } + message TFailedTablet { + optional EErrorType Error = 1; + optional fixed64 TabletId = 2; + optional uint32 NodeId = 3; + } + repeated TFailedTablet FailedTablets = 3; +} diff --git a/ydb/core/protos/table_service_config.proto b/ydb/core/protos/table_service_config.proto index ad87e0afe42f..16ff1f3a176f 100644 --- a/ydb/core/protos/table_service_config.proto +++ b/ydb/core/protos/table_service_config.proto @@ -23,7 +23,7 @@ message TTableServiceConfig { optional uint32 ComputeActorsCount = 1 [default = 10000]; optional uint64 ChannelBufferSize = 2 [default = 8388608]; // 8 MB reserved 3; - optional uint64 MkqlLightProgramMemoryLimit = 4 [default = 524288]; // 512 KB + optional uint64 MkqlLightProgramMemoryLimit = 4 [default = 1048576]; // 1 MiB optional uint64 MkqlHeavyProgramMemoryLimit = 5 [default = 31457280]; // 30 MB optional uint64 QueryMemoryLimit = 6 [default = 32212254720]; // 30 GB optional uint32 PublishStatisticsIntervalSec = 7 [default = 2]; @@ -42,13 +42,21 @@ message TTableServiceConfig { optional TInfoExchangerSettings InfoExchangerSettings = 19; optional uint64 KqpPatternCachePatternAccessTimesBeforeTryToCompile = 20 [default = 5]; optional uint64 KqpPatternCacheCompiledCapacityBytes = 21 [default = 104857600]; // 100 MiB - optional uint64 ReasonableSpillingTreshold = 22 [default = 104857600]; // 100 MiB + optional double SpillingPercent = 22 [default = 80]; // 100 MiB + + optional uint64 MinMemAllocSize = 23 [default = 8388608]; // 8 MiB + optional uint64 MinMemFreeSize = 24 [default = 33554432]; // 32 MiB + + optional uint64 MaxNonParallelTasksExecutionLimit = 25 [default = 8]; + optional uint64 MaxNonParallelTopStageExecutionLimit = 26 [default = 1]; + optional bool PreferLocalDatacenterExecution = 27 [ default = true ]; + optional uint64 MaxNonParallelDataQueryTasksLimit = 28 [default = 1000]; } message TSpillingServiceConfig { message TLocalFileConfig { optional bool Enable = 1 [default = false]; - optional string Root = 2 [default = "/tmp/kikimr_spilling/"]; + optional string Root = 2 [default = ""]; optional uint64 MaxTotalSize = 3 [default = 21474836480]; // 20 GiB optional uint64 MaxFileSize = 4 [default = 5368709120]; // 5 GiB optional uint64 MaxFilePartSize = 5 [default = 104857600]; // 100 MB @@ -225,7 +233,7 @@ message TTableServiceConfig { optional uint64 SessionIdleDurationSeconds = 28 [default = 600]; optional TAggregationConfig AggregationConfig = 29; optional bool EnableKqpScanQueryStreamLookup = 30 [default = true]; - optional bool EnableKqpDataQueryStreamLookup = 31 [default = true]; + optional bool EnableKqpDataQueryStreamLookup = 31 [default = false]; optional TExecuterRetriesConfig ExecuterRetriesConfig = 32; reserved 33; // optional bool EnableKqpDataQueryStreamPointLookup = 33 [default = false]; optional bool EnablePublishKqpProxyByRM = 34 [default = true]; @@ -274,12 +282,12 @@ message TTableServiceConfig { optional uint64 ExtractPredicateRangesLimit = 54 [default = 10000]; - optional bool EnableOlapSink = 55 [default = true]; + optional bool EnableOlapSink = 55 [default = false]; optional bool EnablePerStatementQueryExecution = 56 [default = false]; - optional bool EnableCreateTableAs = 57 [default = true]; + optional bool EnableCreateTableAs = 57 [default = false]; - optional uint64 IdxLookupJoinPointsLimit = 58 [default = 1]; + optional uint64 IdxLookupJoinPointsLimit = 58 [default = 3]; optional bool OldLookupJoinBehaviour = 59 [default = false]; optional bool EnableOltpSink = 60 [default = false]; @@ -289,9 +297,28 @@ message TTableServiceConfig { BLOCK_CHANNELS_AUTO = 1; BLOCK_CHANNELS_FORCE = 2; } - optional EBlockChannelsMode BlockChannelsMode = 61 [default = BLOCK_CHANNELS_SCALAR]; + optional EBlockChannelsMode BlockChannelsMode = 61 [default = BLOCK_CHANNELS_AUTO]; optional uint64 QueryReplayCacheUploadTTLSec = 62 [default = 36000]; optional bool EnableQueryServiceSpilling = 63 [ default = true ]; + + optional uint32 DefaultCostBasedOptimizationLevel = 64 [default = 2]; + + optional bool EnableConstantFolding = 65 [ default = true ]; + + optional bool EnableImplicitQueryParameterTypes = 66 [ default = false ]; + + optional string EnableSpillingNodes = 67 [ default = "None" ]; + + message TComputeSchedulerSettings { + optional uint64 AdvanceTimeIntervalUsec = 1 [default = 50000]; + optional uint64 ForgetOverflowTimeoutUsec = 2 [default = 2000000]; + optional uint64 ActivePoolPollingSec = 3 [default = 10]; + } + + reserved 68; + optional TComputeSchedulerSettings ComputeSchedulerSettings = 70; + + optional bool EnableRowsDuplicationCheck = 69 [ default = false ]; }; diff --git a/ydb/core/protos/tx_columnshard.proto b/ydb/core/protos/tx_columnshard.proto index 4cfd8c34cb0b..9cd849fd61be 100644 --- a/ydb/core/protos/tx_columnshard.proto +++ b/ydb/core/protos/tx_columnshard.proto @@ -4,6 +4,7 @@ import "ydb/core/protos/long_tx_service.proto"; import "ydb/core/protos/statistics.proto"; import "ydb/core/protos/subdomains.proto"; import "ydb/core/protos/tx.proto"; +import "ydb/library/formats/arrow/protos/fields.proto"; package NKikimrTxColumnShard; option java_package = "ru.yandex.kikimr.proto"; @@ -88,6 +89,8 @@ message TLogicalMetadata { optional uint64 DirtyWriteTimeSeconds = 5; optional string SpecialKeysRawData = 6; optional TEvWrite.EModificationType ModificationType = 7; + optional NKikimrArrowSchema.TSchemaSubset SchemaSubset = 8; + optional string SpecialKeysPayloadData = 9; } message TEvWriteResult { @@ -142,6 +145,8 @@ enum ETransactionKind { TX_KIND_COMMIT_WRITE = 5; TX_KIND_BACKUP = 6; TX_KIND_SHARING = 7; + TX_KIND_COMMIT_WRITE_PRIMARY = 8; + TX_KIND_COMMIT_WRITE_SECONDARY = 9; } enum ETransactionFlag { @@ -202,6 +207,29 @@ message TBackupTxBody { message TCommitWriteTxBody { optional uint64 LockId = 1; + + message TPrimary { + repeated uint64 SendingShards = 1; + repeated uint64 ReceivingShards = 2; + repeated uint64 WaitShardsBrokenFlags = 3; + repeated uint64 WaitShardsResultAck = 4; + optional bool TxBroken = 5; + } + + message TSecondary { + optional uint64 ArbiterTabletId = 1; + optional bool NeedReceiveBroken = 2; + optional bool ReceiveAck = 3; + optional bool SelfBroken = 4; + optional bool TxBroken = 5; + } + + oneof Implementation { + TPrimary PrimaryTabletData = 5; + TSecondary SecondaryTabletData = 6; + } + + optional bool Broken = 4 [default = false]; } message TSchemaPresetVersionInfo { diff --git a/ydb/core/protos/tx_datashard.proto b/ydb/core/protos/tx_datashard.proto index cf528ba91081..682b2373f736 100644 --- a/ydb/core/protos/tx_datashard.proto +++ b/ydb/core/protos/tx_datashard.proto @@ -7,7 +7,7 @@ import "ydb/core/scheme/protos/key_range.proto"; import "ydb/core/scheme/protos/pathid.proto"; import "ydb/core/protos/data_events.proto"; import "ydb/core/protos/kqp.proto"; -import "ydb/core/formats/arrow/protos/ssa.proto"; +import "ydb/library/formats/arrow/protos/ssa.proto"; import "ydb/core/protos/tablet.proto"; import "ydb/core/protos/tx.proto"; import "ydb/core/protos/flat_scheme_op.proto"; @@ -1523,6 +1523,8 @@ message TEvKqpScan { optional bytes OlapProgram = 19; optional NKikimrSchemeOp.EOlapProgramType OlapProgramType = 20; optional TComputeShardingPolicy ComputeShardingPolicy = 23; + optional uint64 LockTxId = 24; + optional uint32 LockNodeId = 25; } message TEvCompactTable { diff --git a/ydb/core/protos/ya.make b/ydb/core/protos/ya.make index f4301f6202d6..8d953b0604d2 100644 --- a/ydb/core/protos/ya.make +++ b/ydb/core/protos/ya.make @@ -51,6 +51,7 @@ SRCS( counters_kesus.proto counters_keyvalue.proto counters_mediator.proto + counters_node_broker.proto counters_pq.proto counters_replication.proto counters_schemeshard.proto @@ -143,10 +144,12 @@ SRCS( tx_sequenceshard.proto ydb_result_set_old.proto ydb_table_impl.proto + yql_translation_settings.proto ) GENERATE_ENUM_SERIALIZATION(blobstorage_pdisk_config.pb.h) GENERATE_ENUM_SERIALIZATION(datashard_load.pb.h) +GENERATE_ENUM_SERIALIZATION(shared_cache.pb.h) PEERDIR( ydb/library/actors/protos @@ -165,15 +168,18 @@ PEERDIR( ydb/library/yql/public/types ydb/library/services ydb/library/ydb_issue/proto - ydb/core/tx/columnshard/engines/scheme/statistics/protos ydb/core/tx/columnshard/engines/scheme/defaults/protos ydb/core/tx/columnshard/engines/protos - ydb/core/formats/arrow/protos + ydb/library/formats/arrow/protos ydb/core/tx/columnshard/common/protos ) CPP_PROTO_PLUGIN0(config_proto_plugin ydb/core/config/tools/protobuf_plugin) -EXCLUDE_TAGS(GO_PROTO) +ONLY_TAGS( + CPP_PROTO + PY_PROTO + PY3_PROTO +) END() diff --git a/ydb/core/protos/yql_translation_settings.proto b/ydb/core/protos/yql_translation_settings.proto new file mode 100644 index 000000000000..515900b14d29 --- /dev/null +++ b/ydb/core/protos/yql_translation_settings.proto @@ -0,0 +1,14 @@ +syntax = "proto3"; + +package NYql.NProto; + +option java_package = "ru.yandex.kikimr.proto"; + +message TTranslationSettings { + optional string PathPrefix = 1; + optional uint32 SyntaxVersion = 2; + optional bool AnsiLexer = 3; + optional bool PgParser = 4; + + repeated string Pragmas = 10; +} diff --git a/ydb/core/quoter/kesus_quoter_ut.cpp b/ydb/core/quoter/kesus_quoter_ut.cpp index 1e2e20846f03..3f79f4bef98a 100644 --- a/ydb/core/quoter/kesus_quoter_ut.cpp +++ b/ydb/core/quoter/kesus_quoter_ut.cpp @@ -237,6 +237,9 @@ Y_UNIT_TEST_SUITE(KesusProxyTest) { return setup.GetPipeFactory().GetPipesCreatedCount() >= 2; }; setup.GetRuntime().DispatchEvents(reconnected); + + // Dispatch some events to let poison pill reach the mock + setup.GetRuntime().SimulateSleep(TDuration::Zero()); } Y_UNIT_TEST(ReconnectsWithKesusWhenPipeDestroyed) { @@ -249,6 +252,9 @@ Y_UNIT_TEST_SUITE(KesusProxyTest) { setup.SendDestroyed(pipeMock); setup.WaitPipesCreated(2); + + // Dispatch some events to let poison pill reach the mock + setup.GetRuntime().SimulateSleep(TDuration::Zero()); } Y_UNIT_TEST(RejectsNotCanonizedResourceName) { @@ -391,6 +397,9 @@ Y_UNIT_TEST_SUITE(KesusProxyTest) { setup.WaitEvent(); setup.SendCloseSession("res", 42); setup.WaitEvent(); + + // Dispatch some events to let pending events reach their destinations + setup.GetRuntime().SimulateSleep(TDuration::Zero()); } void SendsProxySessionOnce(bool onSuccess) { diff --git a/ydb/core/resource_pools/resource_pool_classifier_settings.cpp b/ydb/core/resource_pools/resource_pool_classifier_settings.cpp new file mode 100644 index 000000000000..fe2c7586d9a0 --- /dev/null +++ b/ydb/core/resource_pools/resource_pool_classifier_settings.cpp @@ -0,0 +1,49 @@ +#include "resource_pool_classifier_settings.h" + +#include + + +namespace NKikimr::NResourcePool { + +//// TClassifierSettings::TParser + +void TClassifierSettings::TParser::operator()(i64* setting) const { + *setting = FromString(Value); + if (*setting < -1) { + throw yexception() << "Invalid integer value " << *setting << ", it is should be greater or equal -1"; + } +} + +void TClassifierSettings::TParser::operator()(TString* setting) const { + *setting = Value; +} + +//// TClassifierSettings::TExtractor + +TString TClassifierSettings::TExtractor::operator()(i64* setting) const { + return ToString(*setting); +} + +TString TClassifierSettings::TExtractor::operator()(TString* setting) const { + return *setting; +} + +//// TPoolSettings + +std::unordered_map TClassifierSettings::GetPropertiesMap() { + std::unordered_map properties = { + {"rank", &Rank}, + {"resource_pool", &ResourcePool}, + {"member_name", &MemberName} + }; + return properties; +} + +void TClassifierSettings::Validate() const { + NACLib::TUserToken token(MemberName, TVector{}); + if (token.IsSystemUser()) { + throw yexception() << "Invalid resource pool classifier configuration, cannot create classifier for system user " << MemberName; + } +} + +} // namespace NKikimr::NResourcePool diff --git a/ydb/core/resource_pools/resource_pool_classifier_settings.h b/ydb/core/resource_pools/resource_pool_classifier_settings.h new file mode 100644 index 000000000000..3e1311d9ac8c --- /dev/null +++ b/ydb/core/resource_pools/resource_pool_classifier_settings.h @@ -0,0 +1,37 @@ +#pragma once + +#include "resource_pool_settings.h" + +#include + + +namespace NKikimr::NResourcePool { + +inline constexpr i64 CLASSIFIER_RANK_OFFSET = 1000; +inline constexpr i64 CLASSIFIER_COUNT_LIMIT = 1000; + +struct TClassifierSettings : public TSettingsBase { + using TBase = TSettingsBase; + using TProperty = std::variant; + + struct TParser : public TBase::TParser { + void operator()(i64* setting) const; + void operator()(TString* setting) const; + }; + + struct TExtractor : public TBase::TExtractor { + TString operator()(i64* setting) const; + TString operator()(TString* setting) const; + }; + + bool operator==(const TClassifierSettings& other) const = default; + + std::unordered_map GetPropertiesMap(); + void Validate() const; + + i64 Rank = -1; // -1 = max rank + CLASSIFIER_RANK_OFFSET + TString ResourcePool = DEFAULT_POOL_ID; + TString MemberName = ""; +}; + +} // namespace NKikimr::NResourcePool diff --git a/ydb/core/resource_pools/resource_pool_classifier_settings_ut.cpp b/ydb/core/resource_pools/resource_pool_classifier_settings_ut.cpp new file mode 100644 index 000000000000..fb9b15bd6872 --- /dev/null +++ b/ydb/core/resource_pools/resource_pool_classifier_settings_ut.cpp @@ -0,0 +1,60 @@ +#include "resource_pool_classifier_settings.h" + +#include + +#include + + +namespace NKikimr { + +using namespace NResourcePool; + + +Y_UNIT_TEST_SUITE(ResourcePoolClassifierTest) { + Y_UNIT_TEST(IntSettingsParsing) { + TClassifierSettings settings; + auto propertiesMap = settings.GetPropertiesMap(); + + std::visit(TClassifierSettings::TParser{"0"}, propertiesMap["rank"]); + UNIT_ASSERT_VALUES_EQUAL(settings.Rank, 0); + + std::visit(TClassifierSettings::TParser{"123"}, propertiesMap["rank"]); + UNIT_ASSERT_VALUES_EQUAL(settings.Rank, 123); + + UNIT_ASSERT_EXCEPTION_CONTAINS(std::visit(TClassifierSettings::TParser{"string_value"}, propertiesMap["rank"]), TFromStringException, "Unexpected symbol \"s\" at pos 0 in string \"string_value\"."); + UNIT_ASSERT_EXCEPTION_CONTAINS(std::visit(TClassifierSettings::TParser{"9223372036854775808"}, propertiesMap["rank"]), TFromStringException, "Integer overflow in string \"9223372036854775808\"."); + UNIT_ASSERT_EXCEPTION_CONTAINS(std::visit(TClassifierSettings::TParser{"-2"}, propertiesMap["rank"]), yexception, "Invalid integer value -2, it is should be greater or equal -1"); + } + + Y_UNIT_TEST(StringSettingsParsing) { + TClassifierSettings settings; + auto propertiesMap = settings.GetPropertiesMap(); + + std::visit(TClassifierSettings::TParser{"test_pool"}, propertiesMap["resource_pool"]); + UNIT_ASSERT_VALUES_EQUAL(settings.ResourcePool, "test_pool"); + + std::visit(TClassifierSettings::TParser{"test@user"}, propertiesMap["member_name"]); + UNIT_ASSERT_VALUES_EQUAL(settings.MemberName, "test@user"); + } + + Y_UNIT_TEST(SettingsExtracting) { + TClassifierSettings settings; + settings.Rank = 123; + settings.ResourcePool = "test_pool"; + settings.MemberName = "test@user"; + auto propertiesMap = settings.GetPropertiesMap(); + + TClassifierSettings::TExtractor extractor; + UNIT_ASSERT_VALUES_EQUAL(std::visit(extractor, propertiesMap["rank"]), "123"); + UNIT_ASSERT_VALUES_EQUAL(std::visit(extractor, propertiesMap["resource_pool"]), "test_pool"); + UNIT_ASSERT_VALUES_EQUAL(std::visit(extractor, propertiesMap["member_name"]), "test@user"); + } + + Y_UNIT_TEST(SettingsValidation) { + TClassifierSettings settings; + settings.MemberName = BUILTIN_ACL_METADATA; + UNIT_ASSERT_EXCEPTION_CONTAINS(settings.Validate(), yexception, TStringBuilder() << "Invalid resource pool classifier configuration, cannot create classifier for system user " << settings.MemberName); + } +} + +} // namespace NKikimr diff --git a/ydb/core/resource_pools/resource_pool_settings.cpp b/ydb/core/resource_pools/resource_pool_settings.cpp index 1a5c39644a54..e6d9d1dfedcc 100644 --- a/ydb/core/resource_pools/resource_pool_settings.cpp +++ b/ydb/core/resource_pools/resource_pool_settings.cpp @@ -3,16 +3,76 @@ namespace NKikimr::NResourcePool { -std::unordered_map GetPropertiesMap(TPoolSettings& settings, bool restricted) { +//// TPoolSettings::TParser + +void TPoolSettings::TParser::operator()(i32* setting) const { + *setting = FromString(Value); + if (*setting < -1) { + throw yexception() << "Invalid integer value " << *setting << ", it is should be greater or equal -1"; + } +} + +void TPoolSettings::TParser::operator()(TDuration* setting) const { + ui64 seconds = FromString(Value); + if (seconds > std::numeric_limits::max() / 1000) { + throw yexception() << "Invalid seconds value " << seconds << ", it is should be less or equal than " << std::numeric_limits::max() / 1000; + } + *setting = TDuration::Seconds(seconds); +} + +void TPoolSettings::TParser::operator()(TPercent* setting) const { + *setting = FromString(Value); + if (*setting != -1 && (*setting < 0 || 100 < *setting)) { + throw yexception() << "Invalid percent value " << *setting << ", it is should be between 0 and 100 or -1"; + } +} + +//// TPoolSettings::TExtractor + +TString TPoolSettings::TExtractor::operator()(i32* setting) const { + return ToString(*setting); +} + +TString TPoolSettings::TExtractor::operator()(double* setting) const { + return ToString(*setting); +} + +TString TPoolSettings::TExtractor::operator()(TDuration* setting) const { + return ToString(setting->Seconds()); +} + +//// TPoolSettings + +TPoolSettings::TPoolSettings(const google::protobuf::Map& properties) { + for (auto& [property, value] : GetPropertiesMap()) { + if (auto propertyIt = properties.find(property); propertyIt != properties.end()) { + std::visit(TPoolSettings::TParser{propertyIt->second}, value); + } + } +} + +std::unordered_map TPoolSettings::GetPropertiesMap(bool restricted) { std::unordered_map properties = { - {"concurrent_query_limit", &settings.ConcurrentQueryLimit}, - {"queue_size", &settings.QueueSize}, - {"query_memory_limit_percent_per_node", &settings.QueryMemoryLimitPercentPerNode} + {"concurrent_query_limit", &ConcurrentQueryLimit}, + {"queue_size", &QueueSize}, + {"query_memory_limit_percent_per_node", &QueryMemoryLimitPercentPerNode}, + {"database_load_cpu_threshold", &DatabaseLoadCpuThreshold}, + {"total_cpu_limit_percent_per_node", &TotalCpuLimitPercentPerNode}, + {"query_cpu_limit_percent_per_node", &QueryCpuLimitPercentPerNode}, }; if (!restricted) { - properties.insert({"query_cancel_after_seconds", &settings.QueryCancelAfter}); + properties.insert({"query_cancel_after_seconds", &QueryCancelAfter}); } return properties; } +void TPoolSettings::Validate() const { + if (ConcurrentQueryLimit > POOL_MAX_CONCURRENT_QUERY_LIMIT) { + throw yexception() << "Invalid resource pool configuration, concurrent_query_limit is " << ConcurrentQueryLimit << ", that exceeds limit in " << POOL_MAX_CONCURRENT_QUERY_LIMIT; + } + if (QueueSize != -1 && ConcurrentQueryLimit == -1 && DatabaseLoadCpuThreshold < 0.0) { + throw yexception() << "Invalid resource pool configuration, queue_size unsupported without concurrent_query_limit or database_load_cpu_threshold"; + } +} + } // namespace NKikimr::NResourcePool diff --git a/ydb/core/resource_pools/resource_pool_settings.h b/ydb/core/resource_pools/resource_pool_settings.h index c2dc319838e1..4d2052be586d 100644 --- a/ydb/core/resource_pools/resource_pool_settings.h +++ b/ydb/core/resource_pools/resource_pool_settings.h @@ -1,64 +1,51 @@ #pragma once +#include "settings_common.h" + +#include + #include -#include namespace NKikimr::NResourcePool { inline constexpr char DEFAULT_POOL_ID[] = "default"; -typedef double TPercent; +inline constexpr i64 POOL_MAX_CONCURRENT_QUERY_LIMIT = 1000; -struct TPoolSettings { - i32 ConcurrentQueryLimit = -1; // -1 = disabled - i32 QueueSize = -1; // -1 = disabled - TDuration QueryCancelAfter = TDuration::Zero(); // 0 = disabled +struct TPoolSettings : public TSettingsBase { + typedef double TPercent; - TPercent QueryMemoryLimitPercentPerNode = -1; // Percent from node memory capacity, -1 = disabled + using TBase = TSettingsBase; + using TProperty = std::variant; - bool operator==(const TPoolSettings& other) const = default; -}; + struct TParser : public TBase::TParser { + void operator()(i32* setting) const; + void operator()(TDuration* setting) const; + void operator()(TPercent* setting) const; + }; -struct TSettingsParser { - const TString& value; - - void operator()(i32* setting) const { - *setting = FromString(value); - if (*setting < -1) { - throw yexception() << "Invalid integer value " << *setting << ", it is should be greater or equal -1"; - } - } - - void operator()(TDuration* setting) const { - ui64 seconds = FromString(value); - if (seconds > std::numeric_limits::max() / 1000) { - throw yexception() << "Invalid seconds value " << seconds << ", it is should be less or equal than " << std::numeric_limits::max() / 1000; - } - *setting = TDuration::Seconds(seconds); - } - - void operator()(TPercent* setting) const { - *setting = FromString(value); - if (*setting != -1 && (*setting < 0 || 100 < *setting)) { - throw yexception() << "Invalid percent value " << *setting << ", it is should be between 0 and 100 or -1"; - } - } -}; + struct TExtractor : public TBase::TExtractor { + TString operator()(i32* setting) const; + TString operator()(double* setting) const; + TString operator()(TDuration* setting) const; + }; -struct TSettingsExtractor { - template - TString operator()(T* setting) const { - return ToString(*setting); - } + TPoolSettings() = default; + TPoolSettings(const google::protobuf::Map& properties); - template <> - TString operator()(TDuration* setting) const { - return ToString(setting->Seconds()); - } -}; + bool operator==(const TPoolSettings& other) const = default; -using TProperty = std::variant; -std::unordered_map GetPropertiesMap(TPoolSettings& settings, bool restricted = false); + std::unordered_map GetPropertiesMap(bool restricted = false); + void Validate() const; + + i32 ConcurrentQueryLimit = -1; // -1 = disabled + i32 QueueSize = -1; // -1 = disabled + TDuration QueryCancelAfter = TDuration::Zero(); // 0 = disabled + TPercent QueryMemoryLimitPercentPerNode = -1; // Percent from node memory capacity, -1 = disabled + TPercent DatabaseLoadCpuThreshold = -1; // -1 = disabled + TPercent TotalCpuLimitPercentPerNode = -1; // -1 = disabled + TPercent QueryCpuLimitPercentPerNode = -1; // -1 = disabled; +}; } // namespace NKikimr::NResourcePool diff --git a/ydb/core/resource_pools/resource_pool_settings_ut.cpp b/ydb/core/resource_pools/resource_pool_settings_ut.cpp index 2e4b2058bcf1..9b67b03afa4a 100644 --- a/ydb/core/resource_pools/resource_pool_settings_ut.cpp +++ b/ydb/core/resource_pools/resource_pool_settings_ut.cpp @@ -11,52 +11,52 @@ using namespace NResourcePool; Y_UNIT_TEST_SUITE(ResourcePoolTest) { Y_UNIT_TEST(IntSettingsParsing) { TPoolSettings settings; - auto propertiesMap = GetPropertiesMap(settings); + auto propertiesMap = settings.GetPropertiesMap(); - std::visit(TSettingsParser{"-1"}, propertiesMap["queue_size"]); + std::visit(TPoolSettings::TParser{"-1"}, propertiesMap["queue_size"]); UNIT_ASSERT_VALUES_EQUAL(settings.QueueSize, -1); - std::visit(TSettingsParser{"10"}, propertiesMap["queue_size"]); + std::visit(TPoolSettings::TParser{"10"}, propertiesMap["queue_size"]); UNIT_ASSERT_VALUES_EQUAL(settings.QueueSize, 10); - UNIT_ASSERT_EXCEPTION_CONTAINS(std::visit(TSettingsParser{"string_value"}, propertiesMap["queue_size"]), TFromStringException, "Unexpected symbol \"s\" at pos 0 in string \"string_value\"."); - UNIT_ASSERT_EXCEPTION_CONTAINS(std::visit(TSettingsParser{"2147483648"}, propertiesMap["queue_size"]), TFromStringException, "Integer overflow in string \"2147483648\"."); - UNIT_ASSERT_EXCEPTION_CONTAINS(std::visit(TSettingsParser{"-2"}, propertiesMap["queue_size"]), yexception, "Invalid integer value -2, it is should be greater or equal -1"); + UNIT_ASSERT_EXCEPTION_CONTAINS(std::visit(TPoolSettings::TParser{"string_value"}, propertiesMap["queue_size"]), TFromStringException, "Unexpected symbol \"s\" at pos 0 in string \"string_value\"."); + UNIT_ASSERT_EXCEPTION_CONTAINS(std::visit(TPoolSettings::TParser{"2147483648"}, propertiesMap["queue_size"]), TFromStringException, "Integer overflow in string \"2147483648\"."); + UNIT_ASSERT_EXCEPTION_CONTAINS(std::visit(TPoolSettings::TParser{"-2"}, propertiesMap["queue_size"]), yexception, "Invalid integer value -2, it is should be greater or equal -1"); } Y_UNIT_TEST(SecondsSettingsParsing) { TPoolSettings settings; - auto propertiesMap = GetPropertiesMap(settings); + auto propertiesMap = settings.GetPropertiesMap(); - std::visit(TSettingsParser{"0"}, propertiesMap["query_cancel_after_seconds"]); + std::visit(TPoolSettings::TParser{"0"}, propertiesMap["query_cancel_after_seconds"]); UNIT_ASSERT_VALUES_EQUAL(settings.QueryCancelAfter, TDuration::Zero()); - std::visit(TSettingsParser{"10"}, propertiesMap["query_cancel_after_seconds"]); + std::visit(TPoolSettings::TParser{"10"}, propertiesMap["query_cancel_after_seconds"]); UNIT_ASSERT_VALUES_EQUAL(settings.QueryCancelAfter, TDuration::Seconds(10)); - UNIT_ASSERT_EXCEPTION_CONTAINS(std::visit(TSettingsParser{"-1"}, propertiesMap["query_cancel_after_seconds"]), TFromStringException, "Unexpected symbol \"-\" at pos 0 in string \"-1\"."); - UNIT_ASSERT_EXCEPTION_CONTAINS(std::visit(TSettingsParser{"18446744073709552"}, propertiesMap["query_cancel_after_seconds"]), yexception, "Invalid seconds value 18446744073709552, it is should be less or equal than 18446744073709551"); + UNIT_ASSERT_EXCEPTION_CONTAINS(std::visit(TPoolSettings::TParser{"-1"}, propertiesMap["query_cancel_after_seconds"]), TFromStringException, "Unexpected symbol \"-\" at pos 0 in string \"-1\"."); + UNIT_ASSERT_EXCEPTION_CONTAINS(std::visit(TPoolSettings::TParser{"18446744073709552"}, propertiesMap["query_cancel_after_seconds"]), yexception, "Invalid seconds value 18446744073709552, it is should be less or equal than 18446744073709551"); } Y_UNIT_TEST(PercentSettingsParsing) { TPoolSettings settings; - auto propertiesMap = GetPropertiesMap(settings); + auto propertiesMap = settings.GetPropertiesMap(); - std::visit(TSettingsParser{"-1"}, propertiesMap["query_memory_limit_percent_per_node"]); + std::visit(TPoolSettings::TParser{"-1"}, propertiesMap["query_memory_limit_percent_per_node"]); UNIT_ASSERT_VALUES_EQUAL(settings.QueryMemoryLimitPercentPerNode, -1); - std::visit(TSettingsParser{"0"}, propertiesMap["query_memory_limit_percent_per_node"]); + std::visit(TPoolSettings::TParser{"0"}, propertiesMap["query_memory_limit_percent_per_node"]); UNIT_ASSERT_VALUES_EQUAL(settings.QueryMemoryLimitPercentPerNode, 0); - std::visit(TSettingsParser{"55.5"}, propertiesMap["query_memory_limit_percent_per_node"]); + std::visit(TPoolSettings::TParser{"55.5"}, propertiesMap["query_memory_limit_percent_per_node"]); UNIT_ASSERT_VALUES_EQUAL(settings.QueryMemoryLimitPercentPerNode, 55.5); - std::visit(TSettingsParser{"100"}, propertiesMap["query_memory_limit_percent_per_node"]); + std::visit(TPoolSettings::TParser{"100"}, propertiesMap["query_memory_limit_percent_per_node"]); UNIT_ASSERT_VALUES_EQUAL(settings.QueryMemoryLimitPercentPerNode, 100); - UNIT_ASSERT_EXCEPTION_CONTAINS(std::visit(TSettingsParser{"-1.5"}, propertiesMap["query_memory_limit_percent_per_node"]), yexception, "Invalid percent value -1.5, it is should be between 0 and 100 or -1"); - UNIT_ASSERT_EXCEPTION_CONTAINS(std::visit(TSettingsParser{"-0.5"}, propertiesMap["query_memory_limit_percent_per_node"]), yexception, "Invalid percent value -0.5, it is should be between 0 and 100 or -1"); - UNIT_ASSERT_EXCEPTION_CONTAINS(std::visit(TSettingsParser{"101.5"}, propertiesMap["query_memory_limit_percent_per_node"]), yexception, "Invalid percent value 101.5, it is should be between 0 and 100 or -1"); + UNIT_ASSERT_EXCEPTION_CONTAINS(std::visit(TPoolSettings::TParser{"-1.5"}, propertiesMap["query_memory_limit_percent_per_node"]), yexception, "Invalid percent value -1.5, it is should be between 0 and 100 or -1"); + UNIT_ASSERT_EXCEPTION_CONTAINS(std::visit(TPoolSettings::TParser{"-0.5"}, propertiesMap["query_memory_limit_percent_per_node"]), yexception, "Invalid percent value -0.5, it is should be between 0 and 100 or -1"); + UNIT_ASSERT_EXCEPTION_CONTAINS(std::visit(TPoolSettings::TParser{"101.5"}, propertiesMap["query_memory_limit_percent_per_node"]), yexception, "Invalid percent value 101.5, it is should be between 0 and 100 or -1"); } Y_UNIT_TEST(SettingsExtracting) { @@ -65,14 +65,29 @@ Y_UNIT_TEST_SUITE(ResourcePoolTest) { settings.QueueSize = -1; settings.QueryCancelAfter = TDuration::Seconds(15); settings.QueryMemoryLimitPercentPerNode = 0.5; - auto propertiesMap = GetPropertiesMap(settings); + auto propertiesMap = settings.GetPropertiesMap(); - TSettingsExtractor extractor; + TPoolSettings::TExtractor extractor; UNIT_ASSERT_VALUES_EQUAL(std::visit(extractor, propertiesMap["concurrent_query_limit"]), "10"); UNIT_ASSERT_VALUES_EQUAL(std::visit(extractor, propertiesMap["queue_size"]), "-1"); UNIT_ASSERT_VALUES_EQUAL(std::visit(extractor, propertiesMap["query_cancel_after_seconds"]), "15"); UNIT_ASSERT_VALUES_EQUAL(std::visit(extractor, propertiesMap["query_memory_limit_percent_per_node"]), "0.5"); } + + Y_UNIT_TEST(SettingsValidation) { + { // Max concurrent query limit validation + TPoolSettings settings; + settings.ConcurrentQueryLimit = POOL_MAX_CONCURRENT_QUERY_LIMIT + 1; + UNIT_ASSERT_EXCEPTION_CONTAINS(settings.Validate(), yexception, TStringBuilder() << "Invalid resource pool configuration, concurrent_query_limit is " << settings.ConcurrentQueryLimit << ", that exceeds limit in " << POOL_MAX_CONCURRENT_QUERY_LIMIT); + } + + { // Unused queue size validation + + TPoolSettings settings; + settings.QueueSize = 1; + UNIT_ASSERT_EXCEPTION_CONTAINS(settings.Validate(), yexception, TStringBuilder() << "Invalid resource pool configuration, queue_size unsupported without concurrent_query_limit or database_load_cpu_threshold"); + } + } } } // namespace NKikimr diff --git a/ydb/core/resource_pools/settings_common.h b/ydb/core/resource_pools/settings_common.h new file mode 100644 index 000000000000..e1f1ef486d38 --- /dev/null +++ b/ydb/core/resource_pools/settings_common.h @@ -0,0 +1,19 @@ +#pragma once + +#include + + +namespace NKikimr::NResourcePool { + +struct TSettingsBase { + struct TParser { + const TString& Value; + }; + + struct TExtractor { + }; + + bool operator==(const TSettingsBase& other) const = default; +}; + +} // namespace NKikimr::NResourcePool diff --git a/ydb/core/resource_pools/ut/ya.make b/ydb/core/resource_pools/ut/ya.make index 035ff36c85c8..9a95820a6887 100644 --- a/ydb/core/resource_pools/ut/ya.make +++ b/ydb/core/resource_pools/ut/ya.make @@ -7,6 +7,7 @@ PEERDIR( ) SRCS( + resource_pool_classifier_settings_ut.cpp resource_pool_settings_ut.cpp ) diff --git a/ydb/core/resource_pools/ya.make b/ydb/core/resource_pools/ya.make index 831bf5e53fb1..5fd79c0bf4e4 100644 --- a/ydb/core/resource_pools/ya.make +++ b/ydb/core/resource_pools/ya.make @@ -1,11 +1,14 @@ LIBRARY() SRCS( + resource_pool_classifier_settings.cpp resource_pool_settings.cpp ) PEERDIR( + contrib/libs/protobuf util + ydb/library/aclib ) END() diff --git a/ydb/core/scheme/scheme_tablecell.cpp b/ydb/core/scheme/scheme_tablecell.cpp index 2ee86265b1a4..c2b541bb7984 100644 --- a/ydb/core/scheme/scheme_tablecell.cpp +++ b/ydb/core/scheme/scheme_tablecell.cpp @@ -307,46 +307,6 @@ bool TSerializedCellMatrix::DoTryParse(const TString& data) { return TryDeserializeCellMatrix(data, Buf, Cells, RowCount, ColCount); } -TCellsBatcher::TCellsBatcher(ui16 colCount, ui64 maxBytesPerBatch) - : ColCount(colCount) - , MaxBytesPerBatch(maxBytesPerBatch) { -} - -bool TCellsBatcher::IsEmpty() const { - return Batches.empty(); -} - -TCellsBatcher::TBatch TCellsBatcher::Flush(bool force) { - TBatch res; - if ((!Batches.empty() && force) || Batches.size() > 1) { - res = std::move(Batches.front()); - Batches.pop_front(); - } - return res; -} - -ui64 TCellsBatcher::AddRow(TArrayRef cells) { - Y_ABORT_UNLESS(cells.size() == ColCount); - ui64 newMemory = 0; - for (const auto& cell : cells) { - newMemory += cell.Size(); - } - if (Batches.empty() || newMemory + sizeof(TCellHeader) * ColCount + Batches.back().MemorySerialized > MaxBytesPerBatch) { - Batches.emplace_back(); - Batches.back().Memory = 0; - Batches.back().MemorySerialized = CellMatrixHeaderSize; - } - - for (auto& cell : cells) { - Batches.back().Data.emplace_back(std::move(cell)); - } - - Batches.back().Memory += newMemory; - Batches.back().MemorySerialized += newMemory + sizeof(TCellHeader) * ColCount; - - return newMemory; -} - void TCellsStorage::Reset(TArrayRef cells) { size_t cellsSize = cells.size(); @@ -499,5 +459,13 @@ TString DbgPrintTuple(const TDbTupleRef& row, const NScheme::TTypeRegistry& type return res; } +size_t GetCellMatrixHeaderSize() { + return CellMatrixHeaderSize; +} + +size_t GetCellHeaderSize() { + return sizeof(TCellHeader); +} + } // namespace NKikimr diff --git a/ydb/core/scheme/scheme_tablecell.h b/ydb/core/scheme/scheme_tablecell.h index b7635a424725..ed5f32fe1c9f 100644 --- a/ydb/core/scheme/scheme_tablecell.h +++ b/ydb/core/scheme/scheme_tablecell.h @@ -541,6 +541,14 @@ class TSerializedCellVec { return Cells; } + explicit operator bool() const + { + return !Cells.empty(); + } + + // read headers, assuming the buf is correct and append additional cells at the end + static bool UnsafeAppendCells(TConstArrayRef cells, TString& serializedCellVec); + static void Serialize(TString& res, TConstArrayRef cells); static TString Serialize(TConstArrayRef cells); @@ -653,29 +661,6 @@ class TSerializedCellMatrix { ui16 ColCount; }; -class TCellsBatcher { -public: - explicit TCellsBatcher(ui16 colCount, ui64 maxBytesPerBatch); - - bool IsEmpty() const; - - struct TBatch { - ui64 Memory = 0; - ui64 MemorySerialized = 0; - TVector Data; - }; - - TBatch Flush(bool force); - - ui64 AddRow(TArrayRef cells); - -private: - std::deque Batches; - - ui16 ColCount; - ui64 MaxBytesPerBatch; -}; - class TCellsStorage { public: @@ -760,4 +745,7 @@ void DbgPrintValue(TString&, const TCell&, NScheme::TTypeInfo typeInfo); TString DbgPrintCell(const TCell& r, NScheme::TTypeInfo typeInfo, const NScheme::TTypeRegistry& typeRegistry); TString DbgPrintTuple(const TDbTupleRef& row, const NScheme::TTypeRegistry& typeRegistry); +size_t GetCellMatrixHeaderSize(); +size_t GetCellHeaderSize(); + } diff --git a/ydb/core/scheme/scheme_tablecell_ut.cpp b/ydb/core/scheme/scheme_tablecell_ut.cpp index ae780b8cfc5f..a323a3e5d405 100644 --- a/ydb/core/scheme/scheme_tablecell_ut.cpp +++ b/ydb/core/scheme/scheme_tablecell_ut.cpp @@ -135,12 +135,15 @@ Y_UNIT_TEST_SUITE(Scheme) { 0); TSerializedCellVec vec3; + UNIT_ASSERT(!vec3); UNIT_ASSERT(vec3.GetCells().empty()); UNIT_ASSERT(vec3.GetBuffer().empty()); TString buf = vec.GetBuffer(); UNIT_ASSERT(buf.size() > cells.size()*2); vec3.Parse(buf); + UNIT_ASSERT(vec3); + UNIT_ASSERT_VALUES_EQUAL(CompareTypedCellVectors(vec3.GetCells().data(), cells.data(), types.data(), diff --git a/ydb/core/security/certificate_check/cert_auth_processor.cpp b/ydb/core/security/certificate_check/cert_auth_processor.cpp index f2effa29e9a7..f2275b35a441 100644 --- a/ydb/core/security/certificate_check/cert_auth_processor.cpp +++ b/ydb/core/security/certificate_check/cert_auth_processor.cpp @@ -1,6 +1,7 @@ #include "cert_auth_processor.h" #include +#include #include #include #include @@ -100,6 +101,56 @@ TVector> X509CertificateReader::ReadIssuerTerms(cons return ReadTerms(name); } +static void FreeList(GENERAL_NAMES* list) { + sk_GENERAL_NAME_pop_free(list, GENERAL_NAME_free); +} + +TVector X509CertificateReader::ReadSubjectDns(const X509Ptr& x509, const std::vector>& subjectTerms) { + TVector result; + // 1. Subject's common name (CN) must be a subject DNS name, so add it to DNS names of subject first + for (const auto& [k, v] : subjectTerms) { + if (k == "CN") { + result.emplace_back(v); + } + } + + using TGeneralNamesPtr = std::unique_ptr>; + TGeneralNamesPtr subjectAltNames((GENERAL_NAMES*)X509_get_ext_d2i(x509.get(), NID_subject_alt_name, NULL, NULL)); + if (!subjectAltNames) { + return result; + } + const int subjectAltNamesCount = sk_GENERAL_NAME_num(subjectAltNames.get()); + if (subjectAltNamesCount <= 0) { + return result; + } + + result.reserve(static_cast(subjectAltNamesCount) + result.size()); + // 2. Additionally find subject alternative names with type=DNS + for (int i = 0; i < subjectAltNamesCount; ++i) { + const GENERAL_NAME* name = sk_GENERAL_NAME_value(subjectAltNames.get(), i); + if (!name) { + continue; + } + if (name->type == GEN_DNS) { + const ASN1_STRING* value = name->d.dNSName; + if (!value) { + continue; + } + + const char* data = reinterpret_cast(ASN1_STRING_get0_data(value)); + if (!data) { + continue; + } + int size = ASN1_STRING_length(value); + if (size <= 0) { + continue; + } + result.emplace_back(data, static_cast(size)); + } + } + return result; +} + TString X509CertificateReader::GetFingerprint(const X509Ptr& x509) { static constexpr size_t FINGERPRINT_LENGTH = SHA_DIGEST_LENGTH; unsigned char fingerprint[FINGERPRINT_LENGTH]; @@ -109,14 +160,16 @@ TString X509CertificateReader::GetFingerprint(const X509Ptr& x509) { return HexEncode(fingerprint, FINGERPRINT_LENGTH); } -TCertificateAuthorizationParams::TCertificateAuthorizationParams(const TDN& dn, bool requireSameIssuer, const std::vector& groups) +TCertificateAuthorizationParams::TCertificateAuthorizationParams(const TDN& dn, const std::optional& subjectDns, bool requireSameIssuer, const std::vector& groups) : SubjectDn(dn) + , SubjectDns(subjectDns) , RequireSameIssuer(requireSameIssuer) , Groups(groups) {} -TCertificateAuthorizationParams::TCertificateAuthorizationParams(TDN&& dn, bool requireSameIssuer, std::vector&& groups) +TCertificateAuthorizationParams::TCertificateAuthorizationParams(TDN&& dn, std::optional&& subjectDns, bool requireSameIssuer, std::vector&& groups) : SubjectDn(std::move(dn)) + , SubjectDns(std::move(subjectDns)) , RequireSameIssuer(requireSameIssuer) , Groups(std::move(groups)) {} @@ -127,59 +180,44 @@ TCertificateAuthorizationParams::TDN& TCertificateAuthorizationParams::TDN::AddR } TCertificateAuthorizationParams::operator bool() const { - return SubjectDn; + return SubjectDn || SubjectDns; } -bool TCertificateAuthorizationParams::CheckSubject(const std::unordered_map>& subjectDescription) const { - bool isDescriptionMatched = false; - for (const auto& rdn: SubjectDn.RDNs) { - isDescriptionMatched = false; +bool TCertificateAuthorizationParams::CheckSubject(const std::unordered_map>& subjectDescription, const std::vector& subjectDns) const { + for (const TRDN& rdn: SubjectDn.RDNs) { auto fieldIt = subjectDescription.find(rdn.Attribute); if (fieldIt == subjectDescription.cend()) { - break; + return false; } const auto& attributeValues = fieldIt->second; - bool attributeMatched = false; - for (const auto& attributeValue : attributeValues) { - attributeMatched = false; - for (const auto& value: rdn.Values) { - if (value == attributeValue) { - attributeMatched = true; - break; - } - } - if (!attributeMatched) { - for (const auto& suffix: rdn.Suffixes) { - if (attributeValue.EndsWith(suffix)) { - attributeMatched = true; - break; - } - } - } - if (!attributeMatched) { + if (!rdn.Match(attributeValues)) { + return false; + } + } + + if (SubjectDns) { + bool dnsMatched = false; + for (const TString& dns : subjectDns) { + if (SubjectDns->Match(dns)) { + dnsMatched = true; break; } } - if (!attributeMatched) { - isDescriptionMatched = false; - break; + if (!dnsMatched) { + return false; } - isDescriptionMatched = true; } - if (isDescriptionMatched) { - return true; - } - return false; + return true; } TCertificateAuthorizationParams::TDN::operator bool() const { return !RDNs.empty(); } -TCertificateAuthorizationParams::TRDN::TRDN(const TString& Attribute) - :Attribute(Attribute) +TCertificateAuthorizationParams::TRDN::TRDN(const TString& attribute) + : Attribute(attribute) {} TCertificateAuthorizationParams::TRDN& TCertificateAuthorizationParams::TRDN::AddValue(const TString& val) @@ -194,4 +232,30 @@ TCertificateAuthorizationParams::TRDN& TCertificateAuthorizationParams::TRDN::Ad return *this; } +bool TCertificateAuthorizationParams::TRDN::Match(const TString& value) const +{ + for (const auto& v : Values) { + if (value == v) { + return true; + } + } + for (const auto& s : Suffixes) { + if (value.EndsWith(s)) { + return true; + } + } + + return false; +} + +bool TCertificateAuthorizationParams::TRDN::Match(const std::vector& values) const +{ + for (const auto& value : values) { + if (!Match(value)) { + return false; + } + } + return true; +} + } //namespace NKikimr { diff --git a/ydb/core/security/certificate_check/cert_auth_processor.h b/ydb/core/security/certificate_check/cert_auth_processor.h index 42ec4bf3981c..bcbc1bb7c8a1 100644 --- a/ydb/core/security/certificate_check/cert_auth_processor.h +++ b/ydb/core/security/certificate_check/cert_auth_processor.h @@ -15,9 +15,11 @@ struct TCertificateAuthorizationParams { TVector Values; TVector Suffixes; - TRDN(const TString& Attribute); + TRDN(const TString& attribute); TRDN& AddValue(const TString& val); TRDN& AddSuffix(const TString& suffix); + bool Match(const std::vector& values) const; + bool Match(const TString& value) const; }; struct TDN { @@ -27,11 +29,11 @@ struct TCertificateAuthorizationParams { operator bool () const; }; - TCertificateAuthorizationParams(const TDN& dn = TDN(), bool requireSameIssuer = true, const std::vector& groups = {}); - TCertificateAuthorizationParams(TDN&& dn, bool requireSameIssuer = true, std::vector&& groups = {}); + TCertificateAuthorizationParams(const TDN& dn = TDN(), const std::optional& subjectDns = std::nullopt, bool requireSameIssuer = true, const std::vector& groups = {}); + TCertificateAuthorizationParams(TDN&& dn, std::optional&& subjectDns, bool requireSameIssuer = true, std::vector&& groups = {}); operator bool () const; - bool CheckSubject(const std::unordered_map>& subjectDescription) const; + bool CheckSubject(const std::unordered_map>& subjectDescription, const std::vector& subjectDns) const; void SetSubjectDn(const TDN& subjectDn) { SubjectDn = subjectDn; } @@ -42,6 +44,7 @@ struct TCertificateAuthorizationParams { bool CanCheckNodeByAttributeCN = false; TDN SubjectDn; + std::optional SubjectDns; bool RequireSameIssuer = true; std::vector Groups; }; @@ -61,6 +64,7 @@ struct X509CertificateReader { static X509Ptr ReadCertAsPEM(const TStringBuf& cert); static TVector> ReadSubjectTerms(const X509Ptr& x509); + static TVector ReadSubjectDns(const X509Ptr& x509, const std::vector>& subjectTerms); static TVector> ReadAllSubjectTerms(const X509Ptr& x509); static TVector> ReadIssuerTerms(const X509Ptr& x509); static TString GetFingerprint(const X509Ptr& x509); diff --git a/ydb/core/security/certificate_check/cert_auth_utils.cpp b/ydb/core/security/certificate_check/cert_auth_utils.cpp index a310c197d2f0..82c380d9df50 100644 --- a/ydb/core/security/certificate_check/cert_auth_utils.cpp +++ b/ydb/core/security/certificate_check/cert_auth_utils.cpp @@ -17,7 +17,7 @@ namespace NKikimr { -std::vector GetCertificateAuthorizationParams(const NKikimrConfig::TClientCertificateAuthorization &clientCertificateAuth) { +std::vector GetCertificateAuthorizationParams(const NKikimrConfig::TClientCertificateAuthorization& clientCertificateAuth) { std::vector certAuthParams; certAuthParams.reserve(clientCertificateAuth.ClientCertificateDefinitionsSize()); @@ -33,9 +33,19 @@ std::vector GetCertificateAuthorizationParams(c } dn.AddRDN(std::move(rdn)); } - if (dn) { + std::optional subjectDns; + if (const auto& subjectDnsCfg = clientCertificateDefinition.GetSubjectDns(); subjectDnsCfg.ValuesSize() || subjectDnsCfg.SuffixesSize()) { + TCertificateAuthorizationParams::TRDN& dns = subjectDns.emplace(TString()); + for (const auto& value: subjectDnsCfg.GetValues()) { + dns.AddValue(value); + } + for (const auto& suffix: subjectDnsCfg.GetSuffixes()) { + dns.AddSuffix(suffix); + } + } + if (dn || subjectDns) { std::vector groups(clientCertificateDefinition.GetMemberGroups().cbegin(), clientCertificateDefinition.GetMemberGroups().cend()); - certAuthParams.emplace_back(std::move(dn), clientCertificateDefinition.GetRequireSameIssuer(), std::move(groups)); + certAuthParams.emplace_back(std::move(dn), std::move(subjectDns), clientCertificateDefinition.GetRequireSameIssuer(), std::move(groups)); } } @@ -130,8 +140,8 @@ int FillNameFromProps(X509_NAME* name, const TProps& props) { return 1; } - if (!props.Coutry.empty()) { - X509_NAME_add_entry_by_txt(name, SN_countryName, MBSTRING_ASC, (const unsigned char*)props.Coutry.c_str(), -1, -1, 0); + if (!props.Country.empty()) { + X509_NAME_add_entry_by_txt(name, SN_countryName, MBSTRING_ASC, (const unsigned char*)props.Country.c_str(), -1, -1, 0); } if (!props.State.empty()) { @@ -377,7 +387,7 @@ X509REQPtr GenerateRequest(PKeyPtr& pkey, const TProps& props) { return std::move(request); } -X509Ptr SingRequest(X509REQPtr& request, X509Ptr& rootCert, PKeyPtr& rootKey, const TProps& props) { +X509Ptr SignRequest(X509REQPtr& request, X509Ptr& rootCert, PKeyPtr& rootKey, const TProps& props) { auto* pktmp = X509_REQ_get0_pubkey(request.get()); // X509_REQ_get0_pubkey returns the key, that shouldn't freed CHECK(pktmp, "Error unpacking public key from request."); @@ -455,7 +465,7 @@ TCertAndKey GenerateSignedCert(const TCertAndKey& rootCA, const TProps& props) { auto rootCert = ReadCertAsPEM(rootCA.Certificate); auto rootKey = ReadPrivateKeyAsPEM(rootCA.PrivateKey); - auto cert = SingRequest(request, rootCert, rootKey, props); // NID_authority_key_identifier must see ca + auto cert = SignRequest(request, rootCert, rootKey, props); // NID_authority_key_identifier must see ca TCertAndKey result; result.Certificate = WriteAsPEM(cert); @@ -475,7 +485,7 @@ TProps TProps::AsCA() { TProps props; props.SecondsValid = 3*365 * 24 * 60 *60; // 3 years - props.Coutry = "RU"; + props.Country = "RU"; props.State = "MSK"; props.Location = "MSK"; props.Organization = "YA"; diff --git a/ydb/core/security/certificate_check/cert_auth_utils.h b/ydb/core/security/certificate_check/cert_auth_utils.h index 9bffa64093db..f76a94665029 100644 --- a/ydb/core/security/certificate_check/cert_auth_utils.h +++ b/ydb/core/security/certificate_check/cert_auth_utils.h @@ -22,7 +22,7 @@ struct TCertAndKey { struct TProps { long SecondsValid = 0; - std::string Coutry; // C + std::string Country; // C std::string State; // ST std::string Location; // L std::string Organization; // O diff --git a/ydb/core/security/certificate_check/cert_check.cpp b/ydb/core/security/certificate_check/cert_check.cpp index 18808760c350..85b6c928c27e 100644 --- a/ydb/core/security/certificate_check/cert_check.cpp +++ b/ydb/core/security/certificate_check/cert_check.cpp @@ -71,6 +71,7 @@ TCertificateChecker::TReadClientSubjectResult TCertificateChecker::ReadSubjectFr result.Error = { .Message = "Cannot extract subject from client certificate", .Retryable = false }; return result; } + result.SubjectDns = X509CertificateReader::ReadSubjectDns(pemCertificates.ClientCertX509, result.SubjectDn); return result; } @@ -84,14 +85,14 @@ TString TCertificateChecker::CreateUserSidFromSubjectDn(const std::vector>& subjectDn, const TCertificateAuthorizationParams& authParams) const { +TEvTicketParser::TError TCertificateChecker::CheckClientSubject(const TReadClientSubjectResult& subjectInfo, const TCertificateAuthorizationParams& authParams) const { std::unordered_map> subjectDescription; - for (const auto& [attribute, value] : subjectDn) { + for (const auto& [attribute, value] : subjectInfo.SubjectDn) { auto& attributeValues = subjectDescription[attribute]; attributeValues.push_back(value); } - if (!authParams.CheckSubject(subjectDescription)) { + if (!authParams.CheckSubject(subjectDescription, subjectInfo.SubjectDns)) { return { .Message = "Client certificate failed verification", .Retryable = false }; } return {}; @@ -128,7 +129,7 @@ TCertificateChecker::TCertificateCheckResult TCertificateChecker::CheckClientCer continue; } - auto checkClientSubjectError = CheckClientSubject(readClientSubjectResult.SubjectDn, authParams); + auto checkClientSubjectError = CheckClientSubject(readClientSubjectResult, authParams); if (!checkClientSubjectError.empty()) { continue; } diff --git a/ydb/core/security/certificate_check/cert_check.h b/ydb/core/security/certificate_check/cert_check.h index 7525cc0fb600..84cb978e32fc 100644 --- a/ydb/core/security/certificate_check/cert_check.h +++ b/ydb/core/security/certificate_check/cert_check.h @@ -27,6 +27,7 @@ class TCertificateChecker { struct TReadClientSubjectResult { std::vector> SubjectDn; + std::vector SubjectDns; // Subject alternative names, DNS TEvTicketParser::TError Error; }; @@ -47,7 +48,7 @@ class TCertificateChecker { TEvTicketParser::TError CheckIssuers(const TPemCertificates& pemCertificates) const; TReadClientSubjectResult ReadSubjectFromClientCertificate(const TPemCertificates& pemCertificates) const; TString CreateUserSidFromSubjectDn(const std::vector>& subjectDn) const; - TEvTicketParser::TError CheckClientSubject(const std::vector>& subjectDn, const TCertificateAuthorizationParams& authParams) const; + TEvTicketParser::TError CheckClientSubject(const TReadClientSubjectResult& subjectInfo, const TCertificateAuthorizationParams& authParams) const; TCertificateCheckResult DefaultCheckClientCertificate(const TPemCertificates& pemCertificates) const; TCertificateCheckResult CheckClientCertificate(const TPemCertificates& pemCertificates) const; TString GetDefaultGroup() const; diff --git a/ydb/core/security/certificate_check/cert_check_ut.cpp b/ydb/core/security/certificate_check/cert_check_ut.cpp new file mode 100644 index 000000000000..d9d1d48c0912 --- /dev/null +++ b/ydb/core/security/certificate_check/cert_check_ut.cpp @@ -0,0 +1,282 @@ +#include "cert_check.h" +#include "cert_auth_utils.h" + +#include + +#include +#include + +namespace NKikimr { + +TTempFile SaveToTempFile(const std::string& content, const char* prefix = "cert") { + TTempFile file = MakeTempName(nullptr, prefix); + TUnbufferedFileOutput(file.Name()).Write(content); + return file; +} + +Y_UNIT_TEST_SUITE(TCertificateCheckerTest) { + Y_UNIT_TEST(CheckSubjectDns) { + using TTestSubjectTerm = std::pair>; + struct TTestSubjectDnsData { + TString CommonName = "localhost"; + std::vector AltNames; + std::vector DnsValues; + std::vector DnsSuffixes; + std::optional SubjectTerm; // one is enough, because we test DNS now + bool CheckResult = false; + }; + + std::vector tests = { + { + .AltNames = { + "IP:1.2.3.4", + "DNS:other.name.net", + "DNS:node-1.cluster.net", + "DNS:*.cluster.net", + "DNS:balancer.cluster.net", + }, + .DnsSuffixes = { + ".cluster.net", + }, + .CheckResult = true, + }, + { + .AltNames = { + "IP:1.2.3.4", // not DNS + }, + .DnsValues = { + "1.2.3.4", + }, + .CheckResult = false, + }, + { + .CommonName = "the.only.name.net", // CN is also FQDN + .DnsValues = { + "the.only.name.net", + }, + .CheckResult = true, + }, + { + .CommonName = "the.only.name.net", // CN is also FQDN + .DnsSuffixes = { + ".name.net", + ".some.other.domain.net", + }, + .CheckResult = true, + }, + { + .CommonName = "", // no DNS in cert + .DnsSuffixes = { + ".cluster.net", + }, + .CheckResult = false, + }, + { + .CommonName = "", // no DNS in cert + .DnsValues = { + "node-1.cluster.net", + }, + .CheckResult = false, + }, + { + // Complex matching + .AltNames = { + "IP:1.2.3.4", + "DNS:other.name.net", + "DNS:node-1.cluster.net", + "DNS:*.cluster.net", + "DNS:balancer.cluster.net", + "DNS:my-host.us", + }, + .DnsValues = { + "hello.su", + "balancer.cluster.net", + }, + .DnsSuffixes = { + ".123.us", + ".cluster-0.net", + ".cluster-1.net", + }, + .CheckResult = true, + }, + { + // Complex matching + .AltNames = { + "IP:1.2.3.4", + "DNS:other.name.net", + "DNS:node-1.cluster.net", + "DNS:*.cluster.net", + "DNS:balancer.cluster.net", + "DNS:my-host.us", + }, + .DnsValues = { + "hello.su", + "no-name", + }, + .DnsSuffixes = { + ".123.us", + ".cluster-0.net", + ".cluster-1.net", + "my-host.us", + }, + .CheckResult = true, + }, + { + // Additional conditions + // No DNS + // Subject OK + .AltNames = { + "DNS:other.name.net", + "DNS:node-1.cluster.net", + "DNS:*.cluster.net", + "DNS:balancer.cluster.net", + }, + .SubjectTerm = TTestSubjectTerm{ + "L", + {"TLV", "MSK"}, + }, + .CheckResult = true, + }, + { + // Additional conditions + // No DNS + // Subject not OK + .AltNames = { + "DNS:other.name.net", + "DNS:node-1.cluster.net", + "DNS:*.cluster.net", + "DNS:balancer.cluster.net", + }, + .SubjectTerm = TTestSubjectTerm{ + "O", + {"Google", "Meta"}, + }, + .CheckResult = false, + }, + { + // Additional conditions + // DNS OK + // Subject OK + .AltNames = { + "DNS:other.name.net", + "DNS:node-1.cluster.net", + "DNS:*.cluster.net", + "DNS:balancer.cluster.net", + }, + .DnsValues = { + "node-1.cluster.net", + }, + .SubjectTerm = TTestSubjectTerm{ + "L", + {"TLV", "MSK"}, + }, + .CheckResult = true, + }, + { + // Additional conditions + // DNS not OK + // Subject OK + .AltNames = { + "DNS:other.name.net", + "DNS:node-1.cluster.net", + "DNS:*.cluster.net", + "DNS:balancer.cluster.net", + }, + .DnsSuffixes = { + ".my-cluster.net", + }, + .SubjectTerm = TTestSubjectTerm{ + "L", + {"TLV", "MSK"}, + }, + .CheckResult = false, + }, + { + // Additional conditions + // DNS not OK + // Subject not OK + .AltNames = { + "DNS:other.name.net", + "DNS:node-1.cluster.net", + "DNS:*.cluster.net", + "DNS:balancer.cluster.net", + }, + .DnsSuffixes = { + ".my-cluster.net", + }, + .SubjectTerm = TTestSubjectTerm{ + "O", + {"Google", "Meta"}, + }, + .CheckResult = false, + }, + { + // Additional conditions + // DNS OK + // Subject not OK + .AltNames = { + "DNS:other.name.net", + "DNS:node-1.cluster.net", + "DNS:*.cluster.net", + "DNS:balancer.cluster.net", + "DNS:balancer.my-cluster.net", + }, + .DnsSuffixes = { + ".my-cluster.net", + }, + .SubjectTerm = TTestSubjectTerm{ + "O", + {"Google", "Meta"}, + }, + .CheckResult = false, + }, + }; + + TCertAndKey ca = GenerateCA(TProps::AsCA()); + + for (size_t testNumber = 0; testNumber < tests.size(); ++testNumber) { + const TTestSubjectDnsData& test = tests[testNumber]; + TProps props = TProps::AsClientServer(); + props.CommonName = test.CommonName; + props.AltNames = test.AltNames; + TCertAndKey clientServer = GenerateSignedCert(ca, props); + VerifyCert(clientServer.Certificate, ca.Certificate); + + TCertificateAuthValues opts; + opts.Domain = "cert"; + TTempFile serverCert = SaveToTempFile(clientServer.Certificate); + opts.ServerCertificateFilePath = serverCert.Name(); + auto* defs = opts.ClientCertificateAuthorization.AddClientCertificateDefinitions(); + defs->AddMemberGroups("ClusterNodeGroup@cert"); + + if (!test.DnsValues.empty() || !test.DnsSuffixes.empty()) { + auto* dnsCondition = defs->MutableSubjectDns(); + for (const TString& v : test.DnsValues) { + dnsCondition->AddValues(v); + } + for (const TString& s : test.DnsSuffixes) { + dnsCondition->AddSuffixes(s); + } + } + if (test.SubjectTerm) { + auto* t = defs->AddSubjectTerms(); + t->SetShortName(test.SubjectTerm->first); + for (const TString& v : test.SubjectTerm->second) { + t->AddValues(v); + } + } + TCertificateChecker checker(opts); + + TCertificateChecker::TCertificateCheckResult result = checker.Check(TString(clientServer.Certificate)); + if (test.CheckResult) { + UNIT_ASSERT_C(result.Error.empty(), "Test number: " << testNumber << ". Error: " << result.Error); + UNIT_ASSERT_VALUES_EQUAL_C(result.Groups.size(), 1, "Test number: " << testNumber); + UNIT_ASSERT_VALUES_EQUAL_C(result.Groups[0], "ClusterNodeGroup@cert", "Test number: " << testNumber); + } else { + UNIT_ASSERT_C(!result.Error.empty(), "Test number: " << testNumber); + } + } + } +} + +} // namespace NKikimr diff --git a/ydb/core/security/certificate_check/cert_utils_ut.cpp b/ydb/core/security/certificate_check/cert_utils_ut.cpp index 583182da7360..9dfce0ebb59a 100644 --- a/ydb/core/security/certificate_check/cert_utils_ut.cpp +++ b/ydb/core/security/certificate_check/cert_utils_ut.cpp @@ -29,7 +29,7 @@ Y_UNIT_TEST_SUITE(TCertificateAuthUtilsTest) { .AddRDN(TCertificateAuthorizationParams::TRDN("O").AddValue("YA")) .AddRDN(TCertificateAuthorizationParams::TRDN("OU").AddValue("UtTest").AddValue("OtherUnit")) .AddRDN(TCertificateAuthorizationParams::TRDN("CN").AddValue("localhost").AddSuffix(".yandex.ru")); - TCertificateAuthorizationParams authParams(std::move(dn)); + TCertificateAuthorizationParams authParams(std::move(dn), std::nullopt); { std::unordered_map> subjectTerms; @@ -40,7 +40,7 @@ Y_UNIT_TEST_SUITE(TCertificateAuthUtilsTest) { subjectTerms["OU"].push_back("UtTest"); subjectTerms["CN"].push_back("localhost"); - UNIT_ASSERT(authParams.CheckSubject(subjectTerms)); + UNIT_ASSERT(authParams.CheckSubject(subjectTerms, {})); } { @@ -53,7 +53,7 @@ Y_UNIT_TEST_SUITE(TCertificateAuthUtilsTest) { subjectTerms["OU"].push_back("OtherUnit"); subjectTerms["CN"].push_back("localhost"); - UNIT_ASSERT(authParams.CheckSubject(subjectTerms)); + UNIT_ASSERT(authParams.CheckSubject(subjectTerms, {})); } { @@ -66,7 +66,7 @@ Y_UNIT_TEST_SUITE(TCertificateAuthUtilsTest) { subjectTerms["OU"].push_back("WrongUnit"); subjectTerms["CN"].push_back("localhost"); - UNIT_ASSERT(!authParams.CheckSubject(subjectTerms)); + UNIT_ASSERT(!authParams.CheckSubject(subjectTerms, {})); } { @@ -78,7 +78,7 @@ Y_UNIT_TEST_SUITE(TCertificateAuthUtilsTest) { subjectTerms["OU"].push_back("UtTest"); subjectTerms["CN"].push_back("test.yandex.ru"); - UNIT_ASSERT(authParams.CheckSubject(subjectTerms)); + UNIT_ASSERT(authParams.CheckSubject(subjectTerms, {})); } { @@ -91,7 +91,7 @@ Y_UNIT_TEST_SUITE(TCertificateAuthUtilsTest) { subjectTerms["CN"].push_back("test.yandex.ru"); subjectTerms["ELSE"].push_back("WhatEver"); - UNIT_ASSERT(authParams.CheckSubject(subjectTerms)); + UNIT_ASSERT(authParams.CheckSubject(subjectTerms, {})); } { @@ -103,7 +103,7 @@ Y_UNIT_TEST_SUITE(TCertificateAuthUtilsTest) { subjectTerms["OU"].push_back("UtTest"); subjectTerms["CN"].push_back("test.yandex.ru"); - UNIT_ASSERT(!authParams.CheckSubject(subjectTerms)); + UNIT_ASSERT(!authParams.CheckSubject(subjectTerms, {})); } { @@ -115,7 +115,7 @@ Y_UNIT_TEST_SUITE(TCertificateAuthUtilsTest) { subjectTerms["OU"].push_back("UtTest"); subjectTerms["CN"].push_back("test.not-yandex.ru"); - UNIT_ASSERT(!authParams.CheckSubject(subjectTerms)); + UNIT_ASSERT(!authParams.CheckSubject(subjectTerms, {})); } { @@ -127,7 +127,19 @@ Y_UNIT_TEST_SUITE(TCertificateAuthUtilsTest) { subjectTerms["OU"].push_back("UtTest"); subjectTerms["CN"].push_back("test.yandex.ru"); - UNIT_ASSERT(!authParams.CheckSubject(subjectTerms)); + UNIT_ASSERT(!authParams.CheckSubject(subjectTerms, {})); + } + + { + std::unordered_map> subjectTerms; + //subjectTerms["C"] = "RU"; + subjectTerms["ST"].push_back("MSK"); + subjectTerms["L"].push_back("MSK"); + subjectTerms["O"].push_back("YA"); + subjectTerms["OU"].push_back("UtTest"); + subjectTerms["CN"].push_back("test.yandex.ru"); + + UNIT_ASSERT(!authParams.CheckSubject(subjectTerms, {"test.yandex.ru"})); } } } diff --git a/ydb/core/security/certificate_check/ut/ya.make b/ydb/core/security/certificate_check/ut/ya.make index 228fa7162126..5e0c281d3093 100644 --- a/ydb/core/security/certificate_check/ut/ya.make +++ b/ydb/core/security/certificate_check/ut/ya.make @@ -13,6 +13,7 @@ PEERDIR( YQL_LAST_ABI_VERSION() SRCS( + cert_check_ut.cpp cert_utils_ut.cpp ) diff --git a/ydb/core/security/ldap_auth_provider/ldap_auth_provider.cpp b/ydb/core/security/ldap_auth_provider/ldap_auth_provider.cpp index a3ec33e9adf5..857015d85c8e 100644 --- a/ydb/core/security/ldap_auth_provider/ldap_auth_provider.cpp +++ b/ydb/core/security/ldap_auth_provider/ldap_auth_provider.cpp @@ -2,6 +2,8 @@ #include #include #include +#include +#include #include "ldap_auth_provider.h" #include "ldap_utils.h" @@ -69,6 +71,7 @@ class TLdapAuthProvider : public NActors::TActorBootstrapped TLdapAuthProvider(const NKikimrProto::TLdapAuthentication& settings) : Settings(settings) , FilterCreator(Settings) + , UrisCreator(Settings, Settings.GetPort() != 0 ? Settings.GetPort() : NKikimrLdap::GetPort(Settings.GetScheme())) { const TString& requestedGroupAttribute = Settings.GetRequestedGroupAttribute(); RequestedAttributes[0] = const_cast(requestedGroupAttribute.empty() ? "memberOf" : requestedGroupAttribute.c_str()); @@ -135,18 +138,33 @@ class TLdapAuthProvider : public NActors::TActorBootstrapped } LDAPMessage* entry = NKikimrLdap::FirstEntry(ld, searchUserResponse.SearchMessage); BerElement* ber = nullptr; - std::vector groupsDn; + std::vector directUserGroups; char* attribute = NKikimrLdap::FirstAttribute(ld, entry, &ber); if (attribute != nullptr) { - groupsDn = NKikimrLdap::GetAllValuesOfAttribute(ld, entry, attribute); + directUserGroups = NKikimrLdap::GetAllValuesOfAttribute(ld, entry, attribute); NKikimrLdap::MemFree(attribute); } if (ber) { NKikimrLdap::BerFree(ber, 0); } + std::vector allUserGroups; + auto& extendedSettings = Settings.GetExtendedSettings(); + if (extendedSettings.GetEnableNestedGroupsSearch() && !directUserGroups.empty()) { + // Active Directory has special matching rule to fetch nested groups in one request it is MatchingRuleInChain + // We don`t know what is ldap server. Is it Active Directory or OpenLdap or other server? + // If using MatchingRuleInChain return empty list of groups it means that ldap server isn`t Active Directory + // but it is known that there are groups and we are trying to do tree traversal + allUserGroups = TryToGetGroupsUseMatchingRuleInChain(ld, entry); + if (allUserGroups.empty()) { + allUserGroups = std::move(directUserGroups); + GetNestedGroups(ld, &allUserGroups); + } + } else { + allUserGroups = std::move(directUserGroups); + } NKikimrLdap::MsgFree(entry); NKikimrLdap::Unbind(ld); - Send(ev->Sender, new TEvLdapAuthProvider::TEvEnrichGroupsResponse(request->Key, request->User, groupsDn)); + Send(ev->Sender, new TEvLdapAuthProvider::TEvEnrichGroupsResponse(request->Key, request->User, allUserGroups)); } TInitAndBindResponse InitAndBind(LDAP** ld, std::function(const TEvLdapAuthProvider::EStatus&, const TEvLdapAuthProvider::TError&)> eventFabric) { @@ -173,7 +191,7 @@ class TLdapAuthProvider : public NActors::TActorBootstrapped result = NKikimrLdap::Bind(*ld, Settings.GetBindDn(), Settings.GetBindPassword()); if (!NKikimrLdap::IsSuccess(result)) { TEvLdapAuthProvider::TError error { - .Message = "Could not perform initial LDAP bind for dn " + Settings.GetBindDn() + " on server " + UrisList + "\n" + .Message = "Could not perform initial LDAP bind for dn " + Settings.GetBindDn() + " on server " + UrisCreator.GetUris() + "\n" + NKikimrLdap::ErrorToString(result), .Retryable = NKikimrLdap::IsRetryableError(result) }; @@ -202,12 +220,10 @@ class TLdapAuthProvider : public NActors::TActorBootstrapped } } - const ui32 port = Settings.GetPort() != 0 ? Settings.GetPort() : NKikimrLdap::GetPort(Settings.GetScheme()); - UrisList = GetUris(port); - result = NKikimrLdap::Init(ld, Settings.GetScheme(), UrisList, port); + result = NKikimrLdap::Init(ld, Settings.GetScheme(), UrisCreator.GetUris(), UrisCreator.GetConfiguredPort()); if (!NKikimrLdap::IsSuccess(result)) { return {{TEvLdapAuthProvider::EStatus::UNAVAILABLE, - {.Message = "Could not initialize LDAP connection for uris: " + UrisList + ". " + NKikimrLdap::LdapError(*ld), + {.Message = "Could not initialize LDAP connection for uris: " + UrisCreator.GetUris() + ". " + NKikimrLdap::LdapError(*ld), .Retryable = false}}}; } @@ -237,14 +253,20 @@ class TLdapAuthProvider : public NActors::TActorBootstrapped char* dn = NKikimrLdap::GetDn(*request.Ld, request.Entry); if (dn == nullptr) { return {{TEvLdapAuthProvider::EStatus::UNAUTHORIZED, - {.Message = "Could not get dn for the first entry matching " + FilterCreator.GetFilter(request.Login) + " on server " + UrisList + "\n" + {.Message = "Could not get dn for the first entry matching " + FilterCreator.GetFilter(request.Login) + " on server " + UrisCreator.GetUris() + "\n" + NKikimrLdap::LdapError(*request.Ld), .Retryable = false}}}; } + if (request.Password.Empty()) { + NKikimrLdap::MemFree(dn); + return {{TEvLdapAuthProvider::EStatus::UNAUTHORIZED, + {.Message = "LDAP login failed. Empty password", + .Retryable = false}}}; + } TEvLdapAuthProvider::TError error; int result = NKikimrLdap::Bind(*request.Ld, dn, request.Password); if (!NKikimrLdap::IsSuccess(result)) { - error.Message = "LDAP login failed for user " + TString(dn) + " on server " + UrisList + "\n" + error.Message = "LDAP login failed for user " + TString(dn) + " on server " + UrisCreator.GetUris() + "\n" + NKikimrLdap::ErrorToString((result)); error.Retryable = NKikimrLdap::IsRetryableError(result); } @@ -266,7 +288,7 @@ class TLdapAuthProvider : public NActors::TActorBootstrapped TSearchUserResponse response; if (!NKikimrLdap::IsSuccess(result)) { response.Status = NKikimrLdap::ErrorToStatus(result); - response.Error = {.Message = "Could not search for filter " + searchFilter + " on server " + UrisList + "\n" + response.Error = {.Message = "Could not search for filter " + searchFilter + " on server " + UrisCreator.GetUris() + "\n" + NKikimrLdap::ErrorToString(result), .Retryable = NKikimrLdap::IsRetryableError(result)}; return response; @@ -275,11 +297,11 @@ class TLdapAuthProvider : public NActors::TActorBootstrapped if (countEntries != 1) { if (countEntries == 0) { response.Error = {.Message = "LDAP user " + request.User + " does not exist. " - "LDAP search for filter " + searchFilter + " on server " + UrisList + " return no entries", + "LDAP search for filter " + searchFilter + " on server " + UrisCreator.GetUris() + " return no entries", .Retryable = false}; } else { response.Error = {.Message = "LDAP user " + request.User + " is not unique. " - "LDAP search for filter " + searchFilter + " on server " + UrisList + " return " + countEntries + " entries", + "LDAP search for filter " + searchFilter + " on server " + UrisCreator.GetUris() + " return " + countEntries + " entries", .Retryable = false}; } response.Status = TEvLdapAuthProvider::EStatus::UNAUTHORIZED; @@ -290,6 +312,85 @@ class TLdapAuthProvider : public NActors::TActorBootstrapped return response; } + std::vector TryToGetGroupsUseMatchingRuleInChain(LDAP* ld, LDAPMessage* entry) const { + static const TString matchingRuleInChain = "1.2.840.113556.1.4.1941"; // Only Active Directory supports + TStringBuilder filter; + char* dn = NKikimrLdap::GetDn(ld, entry); + filter << "(member:" << matchingRuleInChain << ":=" << dn << ')'; + NKikimrLdap::MemFree(dn); + dn = nullptr; + LDAPMessage* searchMessage = nullptr; + int result = NKikimrLdap::Search(ld, Settings.GetBaseDn(), NKikimrLdap::EScope::SUBTREE, filter, NKikimrLdap::noAttributes, 0, &searchMessage); + if (!NKikimrLdap::IsSuccess(result)) { + return {}; + } + const int countEntries = NKikimrLdap::CountEntries(ld, searchMessage); + if (countEntries == 0) { + NKikimrLdap::MsgFree(searchMessage); + return {}; + } + std::vector groups; + groups.reserve(countEntries); + for (LDAPMessage* groupEntry = NKikimrLdap::FirstEntry(ld, searchMessage); groupEntry != nullptr; groupEntry = NKikimrLdap::NextEntry(ld, groupEntry)) { + dn = NKikimrLdap::GetDn(ld, groupEntry); + groups.push_back(dn); + NKikimrLdap::MemFree(dn); + dn = nullptr; + } + NKikimrLdap::MsgFree(searchMessage); + return groups; + } + + void GetNestedGroups(LDAP* ld, std::vector* groups) { + std::unordered_set viewedGroups(groups->cbegin(), groups->cend()); + std::queue queue; + for (const auto& group : *groups) { + queue.push(group); + } + while (!queue.empty()) { + TStringBuilder filter; + filter << "(|"; + filter << "(entryDn=" << queue.front() << ')'; + queue.pop(); + //should filter string is separated into several batches + while (!queue.empty()) { + // entryDn specific for OpenLdap, may get this value from config + filter << "(entryDn=" << queue.front() << ')'; + queue.pop(); + } + filter << ')'; + LDAPMessage* searchMessage = nullptr; + int result = NKikimrLdap::Search(ld, Settings.GetBaseDn(), NKikimrLdap::EScope::SUBTREE, filter, RequestedAttributes, 0, &searchMessage); + if (!NKikimrLdap::IsSuccess(result)) { + return; + } + if (NKikimrLdap::CountEntries(ld, searchMessage) == 0) { + NKikimrLdap::MsgFree(searchMessage); + return; + } + for (LDAPMessage* groupEntry = NKikimrLdap::FirstEntry(ld, searchMessage); groupEntry != nullptr; groupEntry = NKikimrLdap::NextEntry(ld, groupEntry)) { + BerElement* ber = nullptr; + std::vector foundGroups; + char* attribute = NKikimrLdap::FirstAttribute(ld, groupEntry, &ber); + if (attribute != nullptr) { + foundGroups = NKikimrLdap::GetAllValuesOfAttribute(ld, groupEntry, attribute); + NKikimrLdap::MemFree(attribute); + } + if (ber) { + NKikimrLdap::BerFree(ber, 0); + } + for (const auto& newGroup : foundGroups) { + if (!viewedGroups.contains(newGroup)) { + viewedGroups.insert(newGroup); + queue.push(newGroup); + groups->push_back(newGroup); + } + } + } + NKikimrLdap::MsgFree(searchMessage); + } + } + TInitializeLdapConnectionResponse CheckRequiredSettingsParameters() const { if (Settings.GetHosts().empty() && Settings.GetHost().empty()) { return {TEvLdapAuthProvider::EStatus::UNAVAILABLE, {.Message = "List of ldap server hosts is empty", .Retryable = false}}; @@ -306,42 +407,11 @@ class TLdapAuthProvider : public NActors::TActorBootstrapped return {TEvLdapAuthProvider::EStatus::SUCCESS, {}}; } - TString GetUris(ui32 port) const { - TStringBuilder uris; - if (Settings.HostsSize() > 0) { - for (const auto& host : Settings.GetHosts()) { - uris << CreateUri(host, port) << " "; - } - uris.remove(uris.size() - 1); - } else { - uris << CreateUri(Settings.GetHost(), port); - } - return uris; - } - - TString CreateUri(const TString& endpoint, ui32 port) const { - TStringBuilder uri; - uri << Settings.GetScheme() << "://" << endpoint; - if (!HasEndpointPort(endpoint)) { - uri << ':' << port; - } - return uri; - } - - static bool HasEndpointPort(const TString& endpoint) { - size_t colonPos = endpoint.rfind(':'); - if (colonPos == TString::npos) { - return false; - } - ++colonPos; - return (endpoint.size() - colonPos) > 0; - } - private: const NKikimrProto::TLdapAuthentication Settings; const TSearchFilterCreator FilterCreator; + const TLdapUrisCreator UrisCreator; char* RequestedAttributes[2]; - TString UrisList; }; IActor* CreateLdapAuthProvider(const NKikimrProto::TLdapAuthentication& settings) { diff --git a/ydb/core/security/ldap_auth_provider/ldap_auth_provider_linux.cpp b/ydb/core/security/ldap_auth_provider/ldap_auth_provider_linux.cpp index f7826f019a44..4125182f3d74 100644 --- a/ydb/core/security/ldap_auth_provider/ldap_auth_provider_linux.cpp +++ b/ydb/core/security/ldap_auth_provider/ldap_auth_provider_linux.cpp @@ -77,6 +77,10 @@ LDAPMessage* FirstEntry(LDAP* ld, LDAPMessage* chain) { return ldap_first_entry(ld, chain); } +LDAPMessage* NextEntry(LDAP* ld, LDAPMessage* entry) { + return ldap_next_entry(ld, entry); +} + char* FirstAttribute(LDAP* ld, LDAPMessage* entry, BerElement** berout) { return ldap_first_attribute(ld, entry, berout); } diff --git a/ydb/core/security/ldap_auth_provider/ldap_auth_provider_ut.cpp b/ydb/core/security/ldap_auth_provider/ldap_auth_provider_ut.cpp index 5c5ac98a27ae..6ceddc0c5749 100644 --- a/ydb/core/security/ldap_auth_provider/ldap_auth_provider_ut.cpp +++ b/ydb/core/security/ldap_auth_provider/ldap_auth_provider_ut.cpp @@ -47,6 +47,8 @@ void InitLdapSettings(NKikimrProto::TLdapAuthentication* ldapSettings, ui16 ldap ldapSettings->SetBindDn("cn=robouser,dc=search,dc=yandex,dc=net"); ldapSettings->SetBindPassword("robouserPassword"); ldapSettings->SetSearchFilter("uid=$username"); + auto extendedSettings = ldapSettings->MutableExtendedSettings(); + extendedSettings->SetEnableNestedGroupsSearch(true); const auto setCertificate = [&ldapSettings] (bool useStartTls, TTempFileHandle& certificateFile) { auto useTls = ldapSettings->MutableUseTls(); @@ -64,6 +66,12 @@ void InitLdapSettings(NKikimrProto::TLdapAuthentication* ldapSettings, ui16 ldap } } +void InitLdapSettingsDisableSearchNestedGroups(NKikimrProto::TLdapAuthentication* ldapSettings, ui16 ldapPort, TTempFileHandle& certificateFile, const ESecurityConnectionType& securityConnectionType) { + InitLdapSettings(ldapSettings, ldapPort, certificateFile, securityConnectionType); + auto extendedSettings = ldapSettings->MutableExtendedSettings(); + extendedSettings->SetEnableNestedGroupsSearch(false); +} + void InitLdapSettingsWithInvalidRobotUserLogin(NKikimrProto::TLdapAuthentication* ldapSettings, ui16 ldapPort, TTempFileHandle& certificateFile, const ESecurityConnectionType& securityConnectionType) { InitLdapSettings(ldapSettings, ldapPort, certificateFile, securityConnectionType); ldapSettings->SetBindDn("cn=invalidRobouser,dc=search,dc=yandex,dc=net"); @@ -184,23 +192,424 @@ TAutoPtr LdapAuthenticate(TLdapKikimrServer& server, const TString return handle; } +// Scheme of groups +// *-> cn=people,ou=groups,dc=search,dc=yandex,dc=net +// | +// |*-> cn=managers,cn=people,ou=groups,dc=search,dc=yandex,dc=net +// | | +// | |*-> cn=managerOfProject1,cn=managers,cn=people,ou=groups,dc=search,dc=yandex,dc=net +// | | +// | |*-> uid=ldapuser,dc=search,dc=yandex,dc=net +// | +// |*-> cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net +// | +// |*-> cn=project1,cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net +// | +// |*-> uid=ldapuser,dc=search,dc=yandex,dc=net + class TCorrectLdapResponse { public: - static std::vector Groups; - static LdapMock::TLdapMockResponses GetResponses(const TString& login, const TString& groupAttribute = "memberOf"); + static std::vector DirectGroups; + static std::vector UpdatedDirectGroups; + static std::vector ManagerGroups; + static std::vector DevelopersGroups; + static std::vector PeopleGroups; + static LdapMock::TLdapMockResponses GetResponses(const TString& login, bool doReturnDirectedGroups = false, const TString& groupAttribute = "memberOf"); + static LdapMock::TLdapMockResponses GetAdResponses(const TString& login, bool doReturnDirectedGroups = false, const TString& groupAttribute = "memberOf"); + static LdapMock::TLdapMockResponses GetUpdatedResponses(const TString& login, bool doReturnDirectedGroups = false, const TString& groupAttribute = "memberOf"); + static THashSet GetAllGroups(const TString& domain) { + THashSet result; + auto AddGroups = [&result, &domain] (const std::vector& groups) { + std::transform(groups.begin(), groups.end(), std::inserter(result, result.end()), [&domain](const TString& group) { + return TString(group).append(domain); + }); + }; + AddGroups(DirectGroups); + AddGroups(ManagerGroups); + AddGroups(DevelopersGroups); + AddGroups(PeopleGroups); + return result; + } + + static THashSet GetDirectedGroups(const TString& domain) { + THashSet result; + auto AddGroups = [&result, &domain] (const std::vector& groups) { + std::transform(groups.begin(), groups.end(), std::inserter(result, result.end()), [&domain](const TString& group) { + return TString(group).append(domain); + }); + }; + AddGroups(DirectGroups); + return result; + } + + static THashSet GetAllUpdatedGroups(const TString& domain) { + THashSet result; + auto AddGroups = [&result, &domain] (const std::vector& groups) { + std::transform(groups.begin(), groups.end(), std::inserter(result, result.end()), [&domain](const TString& group) { + return TString(group).append(domain); + }); + }; + AddGroups(UpdatedDirectGroups); + AddGroups(DevelopersGroups); + AddGroups(PeopleGroups); + return result; + } + + static THashSet GetUpdatedDirectedGroups(const TString& domain) { + THashSet result; + auto AddGroups = [&result, &domain] (const std::vector& groups) { + std::transform(groups.begin(), groups.end(), std::inserter(result, result.end()), [&domain](const TString& group) { + return TString(group).append(domain); + }); + }; + AddGroups(UpdatedDirectGroups); + return result; + } +}; + +std::vector TCorrectLdapResponse::DirectGroups { + "cn=managerOfProject1,cn=managers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", + "cn=project1,cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net" +}; + +std::vector TCorrectLdapResponse::UpdatedDirectGroups { + "cn=project1,cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net" +}; + +std::vector TCorrectLdapResponse::ManagerGroups { + "cn=managers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", +}; + +std::vector TCorrectLdapResponse::DevelopersGroups { + "cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", }; -std::vector TCorrectLdapResponse::Groups { - "ou=groups,dc=search,dc=yandex,dc=net", +std::vector TCorrectLdapResponse::PeopleGroups { "cn=people,ou=groups,dc=search,dc=yandex,dc=net", - "cn=developers,ou=groups,dc=search,dc=yandex,dc=net" }; -LdapMock::TLdapMockResponses TCorrectLdapResponse::GetResponses(const TString& login, const TString& groupAttribute) { +LdapMock::TLdapMockResponses TCorrectLdapResponse::GetResponses(const TString& login, bool doReturnDirectedGroups, const TString& groupAttribute) { + LdapMock::TLdapMockResponses responses; + responses.BindResponses.push_back({{{.Login = "cn=robouser,dc=search,dc=yandex,dc=net", .Password = "robouserPassword"}}, {.Status = LdapMock::EStatus::SUCCESS}}); + + LdapMock::TSearchRequestInfo requestDirectedUserGroups { + { + .BaseDn = "dc=search,dc=yandex,dc=net", + .Scope = 2, + .DerefAliases = 0, + .Filter = {.Type = LdapMock::EFilterType::LDAP_FILTER_EQUALITY, .Attribute = "uid", .Value = login}, + .Attributes = {groupAttribute} + } + }; + + std::vector responseDirectedUserGroupsEntries { + { + .Dn = "uid=" + login + ",dc=search,dc=yandex,dc=net", + .AttributeList = { + {groupAttribute, TCorrectLdapResponse::DirectGroups} + } + }, + }; + + LdapMock::TSearchResponseInfo responseDirectedUserGroups { + .ResponseEntries = responseDirectedUserGroupsEntries, + .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} + }; + responses.SearchResponses.push_back({requestDirectedUserGroups, responseDirectedUserGroups}); + + if (doReturnDirectedGroups) { + return responses; + } + + std::shared_ptr filterToGetGroupOfManagers = std::make_shared(); + filterToGetGroupOfManagers->Type = LdapMock::EFilterType::LDAP_FILTER_EQUALITY; + filterToGetGroupOfManagers->Attribute = "entryDn"; + filterToGetGroupOfManagers->Value = "cn=managerOfProject1,cn=managers,cn=people,ou=groups,dc=search,dc=yandex,dc=net"; + + std::shared_ptr filterToGetGroupOfDevelopers = std::make_shared(); + filterToGetGroupOfDevelopers->Type = LdapMock::EFilterType::LDAP_FILTER_EQUALITY; + filterToGetGroupOfDevelopers->Attribute = "entryDn"; + filterToGetGroupOfDevelopers->Value = "cn=project1,cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net"; + + std::vector> nestedFiltersToGetGroupsOfManagersAndDevelopers = { + filterToGetGroupOfManagers, + filterToGetGroupOfDevelopers + }; + LdapMock::TSearchRequestInfo requestToGetGroupsOfManagersAndDevelopers { + { + .BaseDn = "dc=search,dc=yandex,dc=net", + .Scope = 2, + .DerefAliases = 0, + .Filter = {.Type = LdapMock::EFilterType::LDAP_FILTER_OR, .NestedFilters = nestedFiltersToGetGroupsOfManagersAndDevelopers}, + .Attributes = {groupAttribute} + } + }; + + std::vector responseEntriesWithGroupsOfManagersAndDevelopers { + { + .Dn = "cn=managerOfProject1,cn=managers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", + .AttributeList = { + {groupAttribute, TCorrectLdapResponse::ManagerGroups} + } + }, + { + .Dn = "cn=project1,cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", + .AttributeList = { + {groupAttribute, TCorrectLdapResponse::DevelopersGroups} + } + }, + }; + + LdapMock::TSearchResponseInfo responseWithGroupsOfManagersAndDevelopers { + .ResponseEntries = responseEntriesWithGroupsOfManagersAndDevelopers, + .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} + }; + responses.SearchResponses.push_back({requestToGetGroupsOfManagersAndDevelopers, responseWithGroupsOfManagersAndDevelopers}); + + std::shared_ptr filterToGetGroupPeopleFromManagers = std::make_shared(); + filterToGetGroupPeopleFromManagers->Type = LdapMock::EFilterType::LDAP_FILTER_EQUALITY; + filterToGetGroupPeopleFromManagers->Attribute = "entryDn"; + filterToGetGroupPeopleFromManagers->Value = "cn=managers,cn=people,ou=groups,dc=search,dc=yandex,dc=net"; + + std::shared_ptr filterToGetGroupPeopleFromDevelopers = std::make_shared(); + filterToGetGroupPeopleFromDevelopers->Type = LdapMock::EFilterType::LDAP_FILTER_EQUALITY; + filterToGetGroupPeopleFromDevelopers->Attribute = "entryDn"; + filterToGetGroupPeopleFromDevelopers->Value = "cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net"; + + std::vector> nestedFiltersToGetGroupOfPeople = { + filterToGetGroupPeopleFromManagers, filterToGetGroupPeopleFromDevelopers + }; + LdapMock::TSearchRequestInfo requestToGetGroupOfPeople { + { + .BaseDn = "dc=search,dc=yandex,dc=net", + .Scope = 2, + .DerefAliases = 0, + .Filter = {.Type = LdapMock::EFilterType::LDAP_FILTER_OR, .NestedFilters = nestedFiltersToGetGroupOfPeople}, + .Attributes = {groupAttribute} + } + }; + + std::vector responseWithGroupOfPeopleEntries { + { + .Dn = "cn=managers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", + .AttributeList = { + {groupAttribute, TCorrectLdapResponse::PeopleGroups} + } + }, + { + .Dn = "cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", + .AttributeList = { + {groupAttribute, TCorrectLdapResponse::PeopleGroups} + } + }, + }; + + LdapMock::TSearchResponseInfo responseWithGroupOfPeople { + .ResponseEntries = responseWithGroupOfPeopleEntries, + .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} + }; + responses.SearchResponses.push_back({requestToGetGroupOfPeople, responseWithGroupOfPeople}); + + std::shared_ptr filterToGetParentGroupOfPeople = std::make_shared(); + filterToGetParentGroupOfPeople->Type = LdapMock::EFilterType::LDAP_FILTER_EQUALITY; + filterToGetParentGroupOfPeople->Attribute = "entryDn"; + filterToGetParentGroupOfPeople->Value = "cn=people,ou=groups,dc=search,dc=yandex,dc=net"; + + std::vector> nestedFiltersToGetParentGroupOfPeople = { + filterToGetParentGroupOfPeople + }; + LdapMock::TSearchRequestInfo requestToGetParentGroupOfPeople { + { + .BaseDn = "dc=search,dc=yandex,dc=net", + .Scope = 2, + .DerefAliases = 0, + .Filter = {.Type = LdapMock::EFilterType::LDAP_FILTER_OR, .NestedFilters = nestedFiltersToGetParentGroupOfPeople}, + .Attributes = {groupAttribute} + } + }; + + LdapMock::TSearchResponseInfo responseWithParentGroupOfPeople { + .ResponseEntries = {}, + .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} + }; + responses.SearchResponses.push_back({requestToGetParentGroupOfPeople, responseWithParentGroupOfPeople}); + + LdapMock::TSearchRequestInfo requestToGetAllNestedGroupsFromAd { + { + .BaseDn = "dc=search,dc=yandex,dc=net", + .Scope = 2, + .DerefAliases = 0, + .Filter = {.Type = LdapMock::EFilterType::LDAP_FILTER_EXT, + .Attribute = "member", + .Value = "uid=ldapuser,dc=search,dc=yandex,dc=net", + .MatchingRule = "1.2.840.113556.1.4.1941", + .DnAttributes = false, + .NestedFilters = {}}, + .Attributes = {"1.1"} + } + }; + + LdapMock::TSearchResponseInfo responseWithAllNestedGroupsFromAd { + .ResponseEntries = {}, // LDAP server is not Active Directory. Return empty entries + .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} + }; + responses.SearchResponses.push_back({requestToGetAllNestedGroupsFromAd, responseWithAllNestedGroupsFromAd}); + + return responses; +} + +LdapMock::TLdapMockResponses TCorrectLdapResponse::GetUpdatedResponses(const TString& login, bool doReturnDirectedGroups, const TString& groupAttribute) { + LdapMock::TLdapMockResponses responses; + responses.BindResponses.push_back({{{.Login = "cn=robouser,dc=search,dc=yandex,dc=net", .Password = "robouserPassword"}}, {.Status = LdapMock::EStatus::SUCCESS}}); + + LdapMock::TSearchRequestInfo requestDirectedUserGroups { + { + .BaseDn = "dc=search,dc=yandex,dc=net", + .Scope = 2, + .DerefAliases = 0, + .Filter = {.Type = LdapMock::EFilterType::LDAP_FILTER_EQUALITY, .Attribute = "uid", .Value = login}, + .Attributes = {groupAttribute} + } + }; + + std::vector responseDirectedUserGroupsEntries { + { + .Dn = "uid=" + login + ",dc=search,dc=yandex,dc=net", + .AttributeList = { + {groupAttribute, UpdatedDirectGroups} + } + }, + }; + + LdapMock::TSearchResponseInfo responseDirectedUserGroups { + .ResponseEntries = responseDirectedUserGroupsEntries, + .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} + }; + responses.SearchResponses.push_back({requestDirectedUserGroups, responseDirectedUserGroups}); + + if (doReturnDirectedGroups) { + return responses; + } + + std::shared_ptr filterToGetGroupOfDevelopers = std::make_shared(); + filterToGetGroupOfDevelopers->Type = LdapMock::EFilterType::LDAP_FILTER_EQUALITY; + filterToGetGroupOfDevelopers->Attribute = "entryDn"; + filterToGetGroupOfDevelopers->Value = "cn=project1,cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net"; + + std::vector> nestedFiltersToGetGroupsOfDevelopers = { + filterToGetGroupOfDevelopers + }; + LdapMock::TSearchRequestInfo requestToGetGroupsOfDevelopers { + { + .BaseDn = "dc=search,dc=yandex,dc=net", + .Scope = 2, + .DerefAliases = 0, + .Filter = {.Type = LdapMock::EFilterType::LDAP_FILTER_OR, .NestedFilters = nestedFiltersToGetGroupsOfDevelopers}, + .Attributes = {groupAttribute} + } + }; + + std::vector responseEntriesWithGroupsOfDevelopers { + { + .Dn = "cn=project1,cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", + .AttributeList = { + {groupAttribute, TCorrectLdapResponse::DevelopersGroups} + } + }, + }; + + LdapMock::TSearchResponseInfo responseWithGroupsOfDevelopers { + .ResponseEntries = responseEntriesWithGroupsOfDevelopers, + .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} + }; + responses.SearchResponses.push_back({requestToGetGroupsOfDevelopers, responseWithGroupsOfDevelopers}); + + std::shared_ptr filterToGetGroupPeopleFromDevelopers = std::make_shared(); + filterToGetGroupPeopleFromDevelopers->Type = LdapMock::EFilterType::LDAP_FILTER_EQUALITY; + filterToGetGroupPeopleFromDevelopers->Attribute = "entryDn"; + filterToGetGroupPeopleFromDevelopers->Value = "cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net"; + + std::vector> nestedFiltersToGetGroupOfPeople = { + filterToGetGroupPeopleFromDevelopers + }; + LdapMock::TSearchRequestInfo requestToGetGroupOfPeople { + { + .BaseDn = "dc=search,dc=yandex,dc=net", + .Scope = 2, + .DerefAliases = 0, + .Filter = {.Type = LdapMock::EFilterType::LDAP_FILTER_OR, .NestedFilters = nestedFiltersToGetGroupOfPeople}, + .Attributes = {groupAttribute} + } + }; + + std::vector responseWithGroupOfPeopleEntries { + { + .Dn = "cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", + .AttributeList = { + {groupAttribute, TCorrectLdapResponse::PeopleGroups} + } + }, + }; + + LdapMock::TSearchResponseInfo responseWithGroupOfPeople { + .ResponseEntries = responseWithGroupOfPeopleEntries, + .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} + }; + responses.SearchResponses.push_back({requestToGetGroupOfPeople, responseWithGroupOfPeople}); + + std::shared_ptr filterToGetParentGroupOfPeople = std::make_shared(); + filterToGetParentGroupOfPeople->Type = LdapMock::EFilterType::LDAP_FILTER_EQUALITY; + filterToGetParentGroupOfPeople->Attribute = "entryDn"; + filterToGetParentGroupOfPeople->Value = "cn=people,ou=groups,dc=search,dc=yandex,dc=net"; + + std::vector> nestedFiltersToGetParentGroupOfPeople = { + filterToGetParentGroupOfPeople + }; + LdapMock::TSearchRequestInfo requestToGetParentGroupOfPeople { + { + .BaseDn = "dc=search,dc=yandex,dc=net", + .Scope = 2, + .DerefAliases = 0, + .Filter = {.Type = LdapMock::EFilterType::LDAP_FILTER_OR, .NestedFilters = nestedFiltersToGetParentGroupOfPeople}, + .Attributes = {groupAttribute} + } + }; + + LdapMock::TSearchResponseInfo responseWithParentGroupOfPeople { + .ResponseEntries = {}, + .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} + }; + responses.SearchResponses.push_back({requestToGetParentGroupOfPeople, responseWithParentGroupOfPeople}); + + LdapMock::TSearchRequestInfo requestToGetAllNestedGroupsFromAd { + { + .BaseDn = "dc=search,dc=yandex,dc=net", + .Scope = 2, + .DerefAliases = 0, + .Filter = {.Type = LdapMock::EFilterType::LDAP_FILTER_EXT, + .Attribute = "member", + .Value = "uid=ldapuser,dc=search,dc=yandex,dc=net", + .MatchingRule = "1.2.840.113556.1.4.1941", + .DnAttributes = false, + .NestedFilters = {}}, + .Attributes = {"1.1"} + } + }; + + LdapMock::TSearchResponseInfo responseWithAllNestedGroupsFromAd { + .ResponseEntries = {}, // LDAP server is not Active Directory. Return empty entries + .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} + }; + responses.SearchResponses.push_back({requestToGetAllNestedGroupsFromAd, responseWithAllNestedGroupsFromAd}); + + return responses; +} + +LdapMock::TLdapMockResponses TCorrectLdapResponse::GetAdResponses(const TString& login, bool doReturnDirectedGroups, const TString& groupAttribute) { LdapMock::TLdapMockResponses responses; responses.BindResponses.push_back({{{.Login = "cn=robouser,dc=search,dc=yandex,dc=net", .Password = "robouserPassword"}}, {.Status = LdapMock::EStatus::SUCCESS}}); - LdapMock::TSearchRequestInfo fetchGroupsSearchRequestInfo { + LdapMock::TSearchRequestInfo requestDirectedUserGroups { { .BaseDn = "dc=search,dc=yandex,dc=net", .Scope = 2, @@ -210,20 +619,69 @@ LdapMock::TLdapMockResponses TCorrectLdapResponse::GetResponses(const TString& l } }; - std::vector fetchGroupsSearchResponseEntries { + std::vector responseDirectedUserGroupsEntries { { .Dn = "uid=" + login + ",dc=search,dc=yandex,dc=net", .AttributeList = { - {groupAttribute, TCorrectLdapResponse::Groups} + {groupAttribute, TCorrectLdapResponse::DirectGroups} } + }, + }; + + LdapMock::TSearchResponseInfo responseDirectedUserGroups { + .ResponseEntries = responseDirectedUserGroupsEntries, + .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} + }; + responses.SearchResponses.push_back({requestDirectedUserGroups, responseDirectedUserGroups}); + + if (doReturnDirectedGroups) { + return responses; + } + + LdapMock::TSearchRequestInfo requestToGetAllNestedGroupsFromAd { + { + .BaseDn = "dc=search,dc=yandex,dc=net", + .Scope = 2, + .DerefAliases = 0, + .Filter = {.Type = LdapMock::EFilterType::LDAP_FILTER_EXT, + .Attribute = "member", + .Value = "uid=ldapuser,dc=search,dc=yandex,dc=net", + .MatchingRule = "1.2.840.113556.1.4.1941", + .DnAttributes = false, + .NestedFilters = {}}, + .Attributes = {"1.1"} } }; - LdapMock::TSearchResponseInfo fetchGroupsSearchResponseInfo { - .ResponseEntries = fetchGroupsSearchResponseEntries, + std::vector responseWithAllNestedGroupsFromAdEntries { + { + .Dn = "cn=managerOfProject1,cn=managers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", + .AttributeList = {} + }, + { + .Dn = "cn=project1,cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", + .AttributeList = {} + }, + { + .Dn = "cn=managers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", + .AttributeList = {} + }, + { + .Dn = "cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", + .AttributeList = {} + }, + { + .Dn = "cn=people,ou=groups,dc=search,dc=yandex,dc=net", + .AttributeList = {} + }, + }; + + LdapMock::TSearchResponseInfo responseWithAllNestedGroupsFromAd { + .ResponseEntries = responseWithAllNestedGroupsFromAdEntries, .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} }; - responses.SearchResponses.push_back({fetchGroupsSearchRequestInfo, fetchGroupsSearchResponseInfo}); + responses.SearchResponses.push_back({requestToGetAllNestedGroupsFromAd, responseWithAllNestedGroupsFromAd}); + return responses; } @@ -248,10 +706,7 @@ void CheckRequiredLdapSettings(std::functionToken->GetGroupSIDs(); THashSet groups(fetchedGroups.begin(), fetchedGroups.end()); - THashSet expectedGroups; - std::transform(TCorrectLdapResponse::Groups.begin(), TCorrectLdapResponse::Groups.end(), std::inserter(expectedGroups, expectedGroups.end()), [&ldapDomain](TString& group) { - return group.append(ldapDomain); - }); + THashSet expectedGroups = TCorrectLdapResponse::GetAllGroups(ldapDomain); expectedGroups.insert("all-users@well-known"); UNIT_ASSERT_VALUES_EQUAL(fetchedGroups.size(), expectedGroups.size()); @@ -281,24 +733,12 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& ldapServer.Stop(); } - Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeGood_nonSecure) { - LdapFetchGroupsWithDefaultGroupAttributeGood(ESecurityConnectionType::NON_SECURE); - } - - Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeGood_StartTls) { - LdapFetchGroupsWithDefaultGroupAttributeGood(ESecurityConnectionType::START_TLS); - } - - Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeGood_LdapsScheme) { - LdapFetchGroupsWithDefaultGroupAttributeGood(ESecurityConnectionType::LDAPS_SCHEME); - } - - void LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts(const ESecurityConnectionType& secureType) { + void LdapFetchGroupsWithDefaultGroupAttributeDisableNestedGroupsGood(const ESecurityConnectionType& secureType) { TString login = "ldapuser"; TString password = "ldapUserPassword"; - TLdapKikimrServer server(InitLdapSettingsWithListOfHosts, secureType); - LdapMock::TLdapSimpleServer ldapServer(server.GetLdapPort(), TCorrectLdapResponse::GetResponses(login), secureType == ESecurityConnectionType::LDAPS_SCHEME); + TLdapKikimrServer server(InitLdapSettingsDisableSearchNestedGroups, secureType); + LdapMock::TLdapSimpleServer ldapServer(server.GetLdapPort(), TCorrectLdapResponse::GetResponses(login, true), secureType == ESecurityConnectionType::LDAPS_SCHEME); TAutoPtr handle = LdapAuthenticate(server, login, password); TEvTicketParser::TEvAuthorizeTicketResult* ticketParserResult = handle->Get(); @@ -309,10 +749,7 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& const auto& fetchedGroups = ticketParserResult->Token->GetGroupSIDs(); THashSet groups(fetchedGroups.begin(), fetchedGroups.end()); - THashSet expectedGroups; - std::transform(TCorrectLdapResponse::Groups.begin(), TCorrectLdapResponse::Groups.end(), std::inserter(expectedGroups, expectedGroups.end()), [&ldapDomain](TString& group) { - return group.append(ldapDomain); - }); + THashSet expectedGroups = TCorrectLdapResponse::GetDirectedGroups(ldapDomain); expectedGroups.insert("all-users@well-known"); UNIT_ASSERT_VALUES_EQUAL(fetchedGroups.size(), expectedGroups.size()); @@ -323,24 +760,12 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& ldapServer.Stop(); } - Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts_nonSecure) { - LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts(ESecurityConnectionType::NON_SECURE); - } - - Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts_StartTls) { - LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts(ESecurityConnectionType::START_TLS); - } - - Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts_LdapsScheme) { - LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts(ESecurityConnectionType::LDAPS_SCHEME); - } - - void LdapFetchGroupsWithCustomGroupAttributeGood(const ESecurityConnectionType& secureType) { + void LdapFetchGroupsFromAdLdapServer(const ESecurityConnectionType& secureType) { TString login = "ldapuser"; TString password = "ldapUserPassword"; - TLdapKikimrServer server(InitLdapSettingsWithCustomGroupAttribute, secureType); - LdapMock::TLdapSimpleServer ldapServer(server.GetLdapPort(), TCorrectLdapResponse::GetResponses(login, "groupDN"), secureType == ESecurityConnectionType::LDAPS_SCHEME); + TLdapKikimrServer server(InitLdapSettings, secureType); + LdapMock::TLdapSimpleServer ldapServer(server.GetLdapPort(), TCorrectLdapResponse::GetAdResponses(login), secureType == ESecurityConnectionType::LDAPS_SCHEME); TAutoPtr handle = LdapAuthenticate(server, login, password); TEvTicketParser::TEvAuthorizeTicketResult* ticketParserResult = handle->Get(); @@ -351,10 +776,7 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& const auto& fetchedGroups = ticketParserResult->Token->GetGroupSIDs(); THashSet groups(fetchedGroups.begin(), fetchedGroups.end()); - THashSet expectedGroups; - std::transform(TCorrectLdapResponse::Groups.begin(), TCorrectLdapResponse::Groups.end(), std::inserter(expectedGroups, expectedGroups.end()), [&ldapDomain](TString& group) { - return group.append(ldapDomain); - }); + THashSet expectedGroups = TCorrectLdapResponse::GetAllGroups(ldapDomain); expectedGroups.insert("all-users@well-known"); UNIT_ASSERT_VALUES_EQUAL(fetchedGroups.size(), expectedGroups.size()); @@ -365,17 +787,86 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& ldapServer.Stop(); } - Y_UNIT_TEST(LdapFetchGroupsWithCustomGroupAttributeGood_nonSecure) { - LdapFetchGroupsWithCustomGroupAttributeGood(ESecurityConnectionType::NON_SECURE); - } - - Y_UNIT_TEST(LdapFetchGroupsWithCustomGroupAttributeGood_StartTls) { - LdapFetchGroupsWithCustomGroupAttributeGood(ESecurityConnectionType::START_TLS); - } + void LdapFetchGroupsDisableRequestToAD(const ESecurityConnectionType& secureType) { + TString login = "ldapuser"; + TString password = "ldapUserPassword"; - Y_UNIT_TEST(LdapFetchGroupsWithCustomGroupAttributeGood_LdapsScheme) { - LdapFetchGroupsWithCustomGroupAttributeGood(ESecurityConnectionType::LDAPS_SCHEME); - } + TLdapKikimrServer server(InitLdapSettingsDisableSearchNestedGroups, secureType); + LdapMock::TLdapSimpleServer ldapServer(server.GetLdapPort(), TCorrectLdapResponse::GetAdResponses(login, true), secureType == ESecurityConnectionType::LDAPS_SCHEME); + + TAutoPtr handle = LdapAuthenticate(server, login, password); + TEvTicketParser::TEvAuthorizeTicketResult* ticketParserResult = handle->Get(); + UNIT_ASSERT_C(ticketParserResult->Error.empty(), ticketParserResult->Error); + UNIT_ASSERT(ticketParserResult->Token != nullptr); + const TString ldapDomain = "@ldap"; + UNIT_ASSERT_VALUES_EQUAL(ticketParserResult->Token->GetUserSID(), login + ldapDomain); + const auto& fetchedGroups = ticketParserResult->Token->GetGroupSIDs(); + THashSet groups(fetchedGroups.begin(), fetchedGroups.end()); + + THashSet expectedGroups = TCorrectLdapResponse::GetDirectedGroups(ldapDomain); + expectedGroups.insert("all-users@well-known"); + + UNIT_ASSERT_VALUES_EQUAL(fetchedGroups.size(), expectedGroups.size()); + for (const auto& expectedGroup : expectedGroups) { + UNIT_ASSERT_C(groups.contains(expectedGroup), "Can not find " + expectedGroup); + } + + ldapServer.Stop(); + } + + void LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts(const ESecurityConnectionType& secureType) { + TString login = "ldapuser"; + TString password = "ldapUserPassword"; + + TLdapKikimrServer server(InitLdapSettingsWithListOfHosts, secureType); + LdapMock::TLdapSimpleServer ldapServer(server.GetLdapPort(), TCorrectLdapResponse::GetResponses(login), secureType == ESecurityConnectionType::LDAPS_SCHEME); + + TAutoPtr handle = LdapAuthenticate(server, login, password); + TEvTicketParser::TEvAuthorizeTicketResult* ticketParserResult = handle->Get(); + UNIT_ASSERT_C(ticketParserResult->Error.empty(), ticketParserResult->Error); + UNIT_ASSERT(ticketParserResult->Token != nullptr); + const TString ldapDomain = "@ldap"; + UNIT_ASSERT_VALUES_EQUAL(ticketParserResult->Token->GetUserSID(), login + ldapDomain); + const auto& fetchedGroups = ticketParserResult->Token->GetGroupSIDs(); + THashSet groups(fetchedGroups.begin(), fetchedGroups.end()); + + THashSet expectedGroups = TCorrectLdapResponse::GetAllGroups(ldapDomain); + expectedGroups.insert("all-users@well-known"); + + UNIT_ASSERT_VALUES_EQUAL(fetchedGroups.size(), expectedGroups.size()); + for (const auto& expectedGroup : expectedGroups) { + UNIT_ASSERT_C(groups.contains(expectedGroup), "Can not find " + expectedGroup); + } + + ldapServer.Stop(); + } + + void LdapFetchGroupsWithCustomGroupAttributeGood(const ESecurityConnectionType& secureType) { + TString login = "ldapuser"; + TString password = "ldapUserPassword"; + + TLdapKikimrServer server(InitLdapSettingsWithCustomGroupAttribute, secureType); + LdapMock::TLdapSimpleServer ldapServer(server.GetLdapPort(), TCorrectLdapResponse::GetResponses(login, false, "groupDN"), secureType == ESecurityConnectionType::LDAPS_SCHEME); + + TAutoPtr handle = LdapAuthenticate(server, login, password); + TEvTicketParser::TEvAuthorizeTicketResult* ticketParserResult = handle->Get(); + UNIT_ASSERT_C(ticketParserResult->Error.empty(), ticketParserResult->Error); + UNIT_ASSERT(ticketParserResult->Token != nullptr); + const TString ldapDomain = "@ldap"; + UNIT_ASSERT_VALUES_EQUAL(ticketParserResult->Token->GetUserSID(), login + ldapDomain); + const auto& fetchedGroups = ticketParserResult->Token->GetGroupSIDs(); + THashSet groups(fetchedGroups.begin(), fetchedGroups.end()); + + THashSet expectedGroups = TCorrectLdapResponse::GetAllGroups(ldapDomain); + expectedGroups.insert("all-users@well-known"); + + UNIT_ASSERT_VALUES_EQUAL(fetchedGroups.size(), expectedGroups.size()); + for (const auto& expectedGroup : expectedGroups) { + UNIT_ASSERT_C(groups.contains(expectedGroup), "Can not find " + expectedGroup); + } + + ldapServer.Stop(); + } void LdapFetchGroupsWithDontExistGroupAttribute(const ESecurityConnectionType& secureType) { TString login = "ldapuser"; @@ -424,18 +915,6 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& ldapServer.Stop(); } - Y_UNIT_TEST(LdapFetchGroupsWithDontExistGroupAttribute_nonSecure) { - LdapFetchGroupsWithDontExistGroupAttribute(ESecurityConnectionType::NON_SECURE); - } - - Y_UNIT_TEST(LdapFetchGroupsWithDontExistGroupAttribute_StartTls) { - LdapFetchGroupsWithDontExistGroupAttribute(ESecurityConnectionType::START_TLS); - } - - Y_UNIT_TEST(LdapFetchGroupsWithDontExistGroupAttribute_LdapsScheme) { - LdapFetchGroupsWithDontExistGroupAttribute(ESecurityConnectionType::LDAPS_SCHEME); - } - void LdapFetchGroupsWithInvalidRobotUserLoginBad(const ESecurityConnectionType& secureType) { TString login = "ldapuser"; TString password = "ldapUserPassword"; @@ -459,18 +938,6 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& ldapServer.Stop(); } - Y_UNIT_TEST(LdapFetchGroupsWithInvalidRobotUserLoginBad_nonSecure) { - LdapFetchGroupsWithInvalidRobotUserLoginBad(ESecurityConnectionType::NON_SECURE); - } - - Y_UNIT_TEST(LdapFetchGroupsWithInvalidRobotUserLoginBad_StartTls) { - LdapFetchGroupsWithInvalidRobotUserLoginBad(ESecurityConnectionType::START_TLS); - } - - Y_UNIT_TEST(LdapFetchGroupsWithInvalidRobotUserLoginBad_LdapsScheme) { - LdapFetchGroupsWithInvalidRobotUserLoginBad(ESecurityConnectionType::LDAPS_SCHEME); - } - void LdapFetchGroupsWithInvalidRobotUserPasswordBad(const ESecurityConnectionType& secureType) { TString login = "ldapuser"; TString password = "ldapUserPassword"; @@ -494,18 +961,6 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& ldapServer.Stop(); } - Y_UNIT_TEST(LdapFetchGroupsWithInvalidRobotUserPasswordBad_nonSecure) { - LdapFetchGroupsWithInvalidRobotUserPasswordBad(ESecurityConnectionType::NON_SECURE); - } - - Y_UNIT_TEST(LdapFetchGroupsWithInvalidRobotUserPasswordBad_StartTls) { - LdapFetchGroupsWithInvalidRobotUserPasswordBad(ESecurityConnectionType::START_TLS); - } - - Y_UNIT_TEST(LdapFetchGroupsWithInvalidRobotUserPasswordBad_LdapsScheme) { - LdapFetchGroupsWithInvalidRobotUserPasswordBad(ESecurityConnectionType::LDAPS_SCHEME); - } - void LdapFetchGroupsWithRemovedUserCredentialsBad(const ESecurityConnectionType& secureType) { TString removedUserLogin = "ldapuser"; TString removedUserPassword = "ldapUserPassword"; @@ -544,18 +999,6 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& ldapServer.Stop(); } - Y_UNIT_TEST(LdapFetchGroupsWithRemovedUserCredentialsBad_nonSecure) { - LdapFetchGroupsWithRemovedUserCredentialsBad(ESecurityConnectionType::NON_SECURE); - } - - Y_UNIT_TEST(LdapFetchGroupsWithRemovedUserCredentialsBad_StartTls) { - LdapFetchGroupsWithRemovedUserCredentialsBad(ESecurityConnectionType::START_TLS); - } - - Y_UNIT_TEST(LdapFetchGroupsWithRemovedUserCredentialsBad_LdapsScheme) { - LdapFetchGroupsWithRemovedUserCredentialsBad(ESecurityConnectionType::LDAPS_SCHEME); - } - void LdapFetchGroupsUseInvalidSearchFilterBad(const ESecurityConnectionType& secureType) { TString login = "ldapuser"; TString password = "ldapUserPassword"; @@ -577,76 +1020,70 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& ldapServer.Stop(); } - Y_UNIT_TEST(LdapFetchGroupsUseInvalidSearchFilterBad_nonSecure) { - LdapFetchGroupsUseInvalidSearchFilterBad(ESecurityConnectionType::NON_SECURE); - } + void LdapRefreshGroupsInfoGood(const ESecurityConnectionType& secureType) { + TString login = "ldapuser"; + TString password = "ldapUserPassword"; - Y_UNIT_TEST(LdapFetchGroupsUseInvalidSearchFilterBad_StartTls) { - LdapFetchGroupsUseInvalidSearchFilterBad(ESecurityConnectionType::START_TLS); - } + auto responses = TCorrectLdapResponse::GetResponses(login); + LdapMock::TLdapMockResponses updatedResponses = TCorrectLdapResponse::GetUpdatedResponses(login); + const TString ldapDomain = "@ldap"; - Y_UNIT_TEST(LdapFetchGroupsUseInvalidSearchFilterBad_LdapsScheme) { - LdapFetchGroupsUseInvalidSearchFilterBad(ESecurityConnectionType::LDAPS_SCHEME); - } + TLdapKikimrServer server(InitLdapSettings, secureType); + LdapMock::TLdapSimpleServer ldapServer(server.GetLdapPort(), {responses, updatedResponses}, secureType == ESecurityConnectionType::LDAPS_SCHEME); - Y_UNIT_TEST(LdapServerIsUnavailable) { - CheckRequiredLdapSettings(InitLdapSettingsWithUnavailableHost, "Could not start TLS\nCan't contact LDAP server", ESecurityConnectionType::START_TLS); - } + auto loginResponse = GetLoginResponse(server, login, password); + TTestActorRuntime* runtime = server.GetRuntime(); + TActorId sender = runtime->AllocateEdgeActor(); + runtime->Send(new IEventHandle(MakeTicketParserID(), sender, new TEvTicketParser::TEvAuthorizeTicket(loginResponse.Token)), 0); + TAutoPtr handle; + TEvTicketParser::TEvAuthorizeTicketResult* ticketParserResult = runtime->GrabEdgeEvent(handle); - Y_UNIT_TEST(LdapRequestWithEmptyHost) { - CheckRequiredLdapSettings(InitLdapSettingsWithEmptyHost, "List of ldap server hosts is empty"); - } + UNIT_ASSERT_C(ticketParserResult->Error.empty(), ticketParserResult->Error); + UNIT_ASSERT(ticketParserResult->Token != nullptr); + UNIT_ASSERT_VALUES_EQUAL(ticketParserResult->Token->GetUserSID(), login + ldapDomain); + const auto& fetchedGroups = ticketParserResult->Token->GetGroupSIDs(); + THashSet groups(fetchedGroups.begin(), fetchedGroups.end()); - Y_UNIT_TEST(LdapRequestWithEmptyBaseDn) { - CheckRequiredLdapSettings(InitLdapSettingsWithEmptyBaseDn, "Parameter BaseDn is empty"); - } + THashSet expectedGroups = TCorrectLdapResponse::GetAllGroups(ldapDomain); + expectedGroups.insert("all-users@well-known"); - Y_UNIT_TEST(LdapRequestWithEmptyBindDn) { - CheckRequiredLdapSettings(InitLdapSettingsWithEmptyBindDn, "Parameter BindDn is empty"); - } + UNIT_ASSERT_VALUES_EQUAL(fetchedGroups.size(), expectedGroups.size()); + for (const auto& expectedGroup : expectedGroups) { + UNIT_ASSERT_C(groups.contains(expectedGroup), "Can not find " + expectedGroup); + } - Y_UNIT_TEST(LdapRequestWithEmptyBindPassword) { - CheckRequiredLdapSettings(InitLdapSettingsWithEmptyBindPassword, "Parameter BindPassword is empty"); - } + ldapServer.UpdateResponses(); + Sleep(TDuration::Seconds(10)); - void LdapRefreshGroupsInfoGood(const ESecurityConnectionType& secureType) { - TString login = "ldapuser"; - TString password = "ldapUserPassword"; + runtime->Send(new IEventHandle(MakeTicketParserID(), sender, new TEvTicketParser::TEvAuthorizeTicket(loginResponse.Token)), 0); + ticketParserResult = runtime->GrabEdgeEvent(handle); + UNIT_ASSERT_C(ticketParserResult->Error.empty(), ticketParserResult->Error); + UNIT_ASSERT(ticketParserResult->Token != nullptr); + UNIT_ASSERT_VALUES_EQUAL(ticketParserResult->Token->GetUserSID(), login + "@ldap"); + const auto& newFetchedGroups = ticketParserResult->Token->GetGroupSIDs(); + THashSet newGroups(newFetchedGroups.begin(), newFetchedGroups.end()); - auto responses = TCorrectLdapResponse::GetResponses(login); - LdapMock::TLdapMockResponses updatedResponses = responses; + THashSet newExpectedGroups = TCorrectLdapResponse::GetAllUpdatedGroups(ldapDomain); + newExpectedGroups.insert("all-users@well-known"); - std::vector newLdapGroups { - "ou=groups,dc=search,dc=yandex,dc=net", - "cn=people,ou=groups,dc=search,dc=yandex,dc=net", - "cn=designers,ou=groups,dc=search,dc=yandex,dc=net" - }; - std::vector newFetchGroupsSearchResponseEntries { - { - .Dn = "uid=" + login + ",dc=search,dc=yandex,dc=net", - .AttributeList = { - {"memberOf", newLdapGroups} - } - } - }; + UNIT_ASSERT_VALUES_EQUAL(newFetchedGroups.size(), newExpectedGroups.size()); + for (const auto& expectedGroup : newExpectedGroups) { + UNIT_ASSERT_C(newGroups.contains(expectedGroup), "Can not find " + expectedGroup); + } - const TString ldapDomain = "@ldap"; - THashSet newExpectedGroups; - std::transform(newLdapGroups.begin(), newLdapGroups.end(), std::inserter(newExpectedGroups, newExpectedGroups.end()), [&ldapDomain](TString& group) { - return group.append(ldapDomain); - }); - newExpectedGroups.insert("all-users@well-known"); + ldapServer.Stop(); + } - LdapMock::TSearchResponseInfo newFetchGroupsSearchResponseInfo { - .ResponseEntries = newFetchGroupsSearchResponseEntries, - .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} - }; + void LdapRefreshGroupsInfoDisableNestedGroupsGood(const ESecurityConnectionType& secureType) { + TString login = "ldapuser"; + TString password = "ldapUserPassword"; - auto& searchResponse = updatedResponses.SearchResponses.front(); - searchResponse.second = newFetchGroupsSearchResponseInfo; + auto responses = TCorrectLdapResponse::GetResponses(login, true); + LdapMock::TLdapMockResponses updatedResponses = TCorrectLdapResponse::GetUpdatedResponses(login, true); + const TString ldapDomain = "@ldap"; - TLdapKikimrServer server(InitLdapSettings, secureType); + TLdapKikimrServer server(InitLdapSettingsDisableSearchNestedGroups, secureType); LdapMock::TLdapSimpleServer ldapServer(server.GetLdapPort(), {responses, updatedResponses}, secureType == ESecurityConnectionType::LDAPS_SCHEME); auto loginResponse = GetLoginResponse(server, login, password); @@ -662,10 +1099,7 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& const auto& fetchedGroups = ticketParserResult->Token->GetGroupSIDs(); THashSet groups(fetchedGroups.begin(), fetchedGroups.end()); - THashSet expectedGroups; - std::transform(TCorrectLdapResponse::Groups.begin(), TCorrectLdapResponse::Groups.end(), std::inserter(expectedGroups, expectedGroups.end()), [&ldapDomain](TString& group) { - return group.append(ldapDomain); - }); + THashSet expectedGroups = TCorrectLdapResponse::GetDirectedGroups(ldapDomain); expectedGroups.insert("all-users@well-known"); UNIT_ASSERT_VALUES_EQUAL(fetchedGroups.size(), expectedGroups.size()); @@ -684,6 +1118,10 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& UNIT_ASSERT_VALUES_EQUAL(ticketParserResult->Token->GetUserSID(), login + "@ldap"); const auto& newFetchedGroups = ticketParserResult->Token->GetGroupSIDs(); THashSet newGroups(newFetchedGroups.begin(), newFetchedGroups.end()); + + THashSet newExpectedGroups = TCorrectLdapResponse::GetUpdatedDirectedGroups(ldapDomain); + newExpectedGroups.insert("all-users@well-known"); + UNIT_ASSERT_VALUES_EQUAL(newFetchedGroups.size(), newExpectedGroups.size()); for (const auto& expectedGroup : newExpectedGroups) { UNIT_ASSERT_C(newGroups.contains(expectedGroup), "Can not find " + expectedGroup); @@ -692,18 +1130,6 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& ldapServer.Stop(); } - Y_UNIT_TEST(LdapRefreshGroupsInfoGood_nonSecure) { - LdapRefreshGroupsInfoGood(ESecurityConnectionType::NON_SECURE); - } - - Y_UNIT_TEST(LdapRefreshGroupsInfoGood_StartTls) { - LdapRefreshGroupsInfoGood(ESecurityConnectionType::START_TLS); - } - - Y_UNIT_TEST(LdapRefreshGroupsInfoGood_LdapsScheme) { - LdapRefreshGroupsInfoGood(ESecurityConnectionType::LDAPS_SCHEME); - } - void LdapRefreshRemoveUserBad(const ESecurityConnectionType& secureType) { TString login = "ldapuser"; TString password = "ldapUserPassword"; @@ -734,10 +1160,7 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& const auto& fetchedGroups = ticketParserResult->Token->GetGroupSIDs(); THashSet groups(fetchedGroups.begin(), fetchedGroups.end()); - THashSet expectedGroups; - std::transform(TCorrectLdapResponse::Groups.begin(), TCorrectLdapResponse::Groups.end(), std::inserter(expectedGroups, expectedGroups.end()), [&ldapDomain](TString& group) { - return group.append(ldapDomain); - }); + THashSet expectedGroups = TCorrectLdapResponse::GetAllGroups(ldapDomain); expectedGroups.insert("all-users@well-known"); UNIT_ASSERT_VALUES_EQUAL(fetchedGroups.size(), expectedGroups.size()); @@ -763,18 +1186,200 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& ldapServer.Stop(); } - Y_UNIT_TEST(LdapRefreshRemoveUserBad_nonSecure) { - LdapRefreshRemoveUserBad(ESecurityConnectionType::NON_SECURE); +Y_UNIT_TEST_SUITE(LdapAuthProviderTest) { + Y_UNIT_TEST(LdapServerIsUnavailable) { + CheckRequiredLdapSettings(InitLdapSettingsWithUnavailableHost, "Could not start TLS\nCan't contact LDAP server", ESecurityConnectionType::START_TLS); } - Y_UNIT_TEST(LdapRefreshRemoveUserBad_StartTls) { - LdapRefreshRemoveUserBad(ESecurityConnectionType::START_TLS); + Y_UNIT_TEST(LdapRequestWithEmptyHost) { + CheckRequiredLdapSettings(InitLdapSettingsWithEmptyHost, "List of ldap server hosts is empty"); + } + + Y_UNIT_TEST(LdapRequestWithEmptyBaseDn) { + CheckRequiredLdapSettings(InitLdapSettingsWithEmptyBaseDn, "Parameter BaseDn is empty"); + } + + Y_UNIT_TEST(LdapRequestWithEmptyBindDn) { + CheckRequiredLdapSettings(InitLdapSettingsWithEmptyBindDn, "Parameter BindDn is empty"); + } + + Y_UNIT_TEST(LdapRequestWithEmptyBindPassword) { + CheckRequiredLdapSettings(InitLdapSettingsWithEmptyBindPassword, "Parameter BindPassword is empty"); + } +} + +Y_UNIT_TEST_SUITE(LdapAuthProviderTest_LdapsScheme) { + Y_UNIT_TEST(LdapFetchGroupsFromAdLdapServer) { + LdapFetchGroupsFromAdLdapServer(ESecurityConnectionType::LDAPS_SCHEME); + } + + Y_UNIT_TEST(LdapFetchGroupsDisableRequestToAD) { + LdapFetchGroupsDisableRequestToAD(ESecurityConnectionType::LDAPS_SCHEME); + } + + Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeGood) { + LdapFetchGroupsWithDefaultGroupAttributeGood(ESecurityConnectionType::LDAPS_SCHEME); + } + + Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeDisableNestedGroupsGood) { + LdapFetchGroupsWithDefaultGroupAttributeDisableNestedGroupsGood(ESecurityConnectionType::LDAPS_SCHEME); + } + + Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts) { + LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts(ESecurityConnectionType::LDAPS_SCHEME); + } + + Y_UNIT_TEST(LdapFetchGroupsWithCustomGroupAttributeGood) { + LdapFetchGroupsWithCustomGroupAttributeGood(ESecurityConnectionType::LDAPS_SCHEME); + } + + Y_UNIT_TEST(LdapFetchGroupsWithDontExistGroupAttribute) { + LdapFetchGroupsWithDontExistGroupAttribute(ESecurityConnectionType::LDAPS_SCHEME); + } + + Y_UNIT_TEST(LdapFetchGroupsWithInvalidRobotUserLoginBad) { + LdapFetchGroupsWithInvalidRobotUserLoginBad(ESecurityConnectionType::LDAPS_SCHEME); + } + + Y_UNIT_TEST(LdapFetchGroupsWithInvalidRobotUserPasswordBad) { + LdapFetchGroupsWithInvalidRobotUserPasswordBad(ESecurityConnectionType::LDAPS_SCHEME); + } + + Y_UNIT_TEST(LdapFetchGroupsWithRemovedUserCredentialsBad) { + LdapFetchGroupsWithRemovedUserCredentialsBad(ESecurityConnectionType::LDAPS_SCHEME); + } + + Y_UNIT_TEST(LdapFetchGroupsUseInvalidSearchFilterBad) { + LdapFetchGroupsUseInvalidSearchFilterBad(ESecurityConnectionType::LDAPS_SCHEME); } - Y_UNIT_TEST(LdapRefreshRemoveUserBad_LdapsScheme) { + Y_UNIT_TEST(LdapRefreshGroupsInfoGood) { + LdapRefreshGroupsInfoGood(ESecurityConnectionType::LDAPS_SCHEME); + } + + Y_UNIT_TEST(LdapRefreshGroupsInfoDisableNestedGroupsGood) { + LdapRefreshGroupsInfoDisableNestedGroupsGood(ESecurityConnectionType::LDAPS_SCHEME); + } + + Y_UNIT_TEST(LdapRefreshRemoveUserBad) { LdapRefreshRemoveUserBad(ESecurityConnectionType::LDAPS_SCHEME); } +} + +Y_UNIT_TEST_SUITE(LdapAuthProviderTest_StartTls) { + Y_UNIT_TEST(LdapFetchGroupsFromAdLdapServer) { + LdapFetchGroupsFromAdLdapServer(ESecurityConnectionType::START_TLS); + } + + Y_UNIT_TEST(LdapFetchGroupsDisableRequestToAD) { + LdapFetchGroupsDisableRequestToAD(ESecurityConnectionType::START_TLS); + } + + Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeGood) { + LdapFetchGroupsWithDefaultGroupAttributeGood(ESecurityConnectionType::START_TLS); + } + + Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeDisableNestedGroupsGood) { + LdapFetchGroupsWithDefaultGroupAttributeDisableNestedGroupsGood(ESecurityConnectionType::START_TLS); + } + + Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts) { + LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts(ESecurityConnectionType::START_TLS); + } + + Y_UNIT_TEST(LdapFetchGroupsWithCustomGroupAttributeGood) { + LdapFetchGroupsWithCustomGroupAttributeGood(ESecurityConnectionType::START_TLS); + } + + Y_UNIT_TEST(LdapFetchGroupsWithDontExistGroupAttribute) { + LdapFetchGroupsWithDontExistGroupAttribute(ESecurityConnectionType::START_TLS); + } + + Y_UNIT_TEST(LdapFetchGroupsWithInvalidRobotUserLoginBad) { + LdapFetchGroupsWithInvalidRobotUserLoginBad(ESecurityConnectionType::START_TLS); + } + + Y_UNIT_TEST(LdapFetchGroupsWithInvalidRobotUserPasswordBad) { + LdapFetchGroupsWithInvalidRobotUserPasswordBad(ESecurityConnectionType::START_TLS); + } + + Y_UNIT_TEST(LdapFetchGroupsWithRemovedUserCredentialsBad) { + LdapFetchGroupsWithRemovedUserCredentialsBad(ESecurityConnectionType::START_TLS); + } + + Y_UNIT_TEST(LdapFetchGroupsUseInvalidSearchFilterBad) { + LdapFetchGroupsUseInvalidSearchFilterBad(ESecurityConnectionType::START_TLS); + } + + Y_UNIT_TEST(LdapRefreshGroupsInfoGood) { + LdapRefreshGroupsInfoGood(ESecurityConnectionType::START_TLS); + } + + Y_UNIT_TEST(LdapRefreshGroupsInfoDisableNestedGroupsGood) { + LdapRefreshGroupsInfoDisableNestedGroupsGood(ESecurityConnectionType::START_TLS); + } + Y_UNIT_TEST(LdapRefreshRemoveUserBad) { + LdapRefreshRemoveUserBad(ESecurityConnectionType::START_TLS); + } +} + +Y_UNIT_TEST_SUITE(LdapAuthProviderTest_nonSecure) { + Y_UNIT_TEST(LdapFetchGroupsFromAdLdapServer) { + LdapFetchGroupsFromAdLdapServer(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapFetchGroupsDisableRequestToAD) { + LdapFetchGroupsDisableRequestToAD(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeGood) { + LdapFetchGroupsWithDefaultGroupAttributeGood(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeDisableNestedGroupsGood) { + LdapFetchGroupsWithDefaultGroupAttributeDisableNestedGroupsGood(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts) { + LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapFetchGroupsWithCustomGroupAttributeGood) { + LdapFetchGroupsWithCustomGroupAttributeGood(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapFetchGroupsWithDontExistGroupAttribute) { + LdapFetchGroupsWithDontExistGroupAttribute(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapFetchGroupsWithInvalidRobotUserLoginBad) { + LdapFetchGroupsWithInvalidRobotUserLoginBad(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapFetchGroupsWithInvalidRobotUserPasswordBad) { + LdapFetchGroupsWithInvalidRobotUserPasswordBad(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapFetchGroupsWithRemovedUserCredentialsBad) { + LdapFetchGroupsWithRemovedUserCredentialsBad(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapFetchGroupsUseInvalidSearchFilterBad) { + LdapFetchGroupsUseInvalidSearchFilterBad(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapRefreshGroupsInfoGood) { + LdapRefreshGroupsInfoGood(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapRefreshGroupsInfoDisableNestedGroupsGood) { + LdapRefreshGroupsInfoDisableNestedGroupsGood(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapRefreshRemoveUserBad) { + LdapRefreshRemoveUserBad(ESecurityConnectionType::NON_SECURE); + } } } // NKikimr diff --git a/ydb/core/security/ldap_auth_provider/ldap_auth_provider_win.cpp b/ydb/core/security/ldap_auth_provider/ldap_auth_provider_win.cpp index 399a1f2916cd..d1fc38a449ce 100644 --- a/ydb/core/security/ldap_auth_provider/ldap_auth_provider_win.cpp +++ b/ydb/core/security/ldap_auth_provider/ldap_auth_provider_win.cpp @@ -1,7 +1,7 @@ #include #include #include -#include "ticket_parser_log.h" +#include #include "ldap_auth_provider.h" #include @@ -66,6 +66,10 @@ LDAPMessage* FirstEntry(LDAP* ld, LDAPMessage* chain) { return ldap_first_entry(ld, chain); } +LDAPMessage* NextEntry(LDAP* ld, LDAPMessage* entry) { + return ldap_next_entry(ld, entry); +} + char* FirstAttribute(LDAP* ld, LDAPMessage* entry, BerElement** berout) { return ldap_first_attribute(ld, entry, berout); } diff --git a/ydb/core/security/ldap_auth_provider/ldap_compat.h b/ydb/core/security/ldap_auth_provider/ldap_compat.h index 15aee21e9c7f..48e04441f082 100644 --- a/ydb/core/security/ldap_auth_provider/ldap_compat.h +++ b/ydb/core/security/ldap_auth_provider/ldap_compat.h @@ -37,6 +37,7 @@ int Search(LDAP* ld, TString LdapError(LDAP* ld); TString ErrorToString(int err); LDAPMessage* FirstEntry(LDAP* ld, LDAPMessage* chain); +LDAPMessage* NextEntry(LDAP* ld, LDAPMessage* entry); char* FirstAttribute(LDAP* ld, LDAPMessage* entry, BerElement** berout); void MemFree(char* p); void BerFree(BerElement* ber, int freebuf); diff --git a/ydb/core/security/ldap_auth_provider/ldap_utils.cpp b/ydb/core/security/ldap_auth_provider/ldap_utils.cpp index 3087cbaf137a..355c77aba9f7 100644 --- a/ydb/core/security/ldap_auth_provider/ldap_utils.cpp +++ b/ydb/core/security/ldap_auth_provider/ldap_utils.cpp @@ -1,11 +1,13 @@ #include +#include +#include #include "ldap_utils.h" namespace NKikimr { TSearchFilterCreator::TSearchFilterCreator(const NKikimrProto::TLdapAuthentication& settings) : Settings(settings) - {} +{} TString TSearchFilterCreator::GetFilter(const TString& userName) const { if (!Settings.GetSearchFilter().empty()) { @@ -34,4 +36,70 @@ TString TSearchFilterCreator::GetFormatSearchFilter(const TString& userName) con return result.Str(); } +TLdapUrisCreator::TLdapUrisCreator(const NKikimrProto::TLdapAuthentication& settings, ui32 configuredPort) + : Settings(settings) + , Scheme(Settings.GetScheme() == "ldaps" ? Settings.GetScheme() : "ldap") + , ConfiguredPort(configuredPort) +{} + +TString TLdapUrisCreator::GetUris() const { + if (Uris.empty()) { + Uris = CreateUrisList(); + } + return Uris; +} + +ui32 TLdapUrisCreator::GetConfiguredPort() const { + return ConfiguredPort; +} + +TString TLdapUrisCreator::CreateUrisList() const { + TStringBuilder uris; + if (Settings.HostsSize() > 0) { + for (const auto& host : Settings.GetHosts()) { + uris << CreateUri(host) << " "; + } + uris.remove(uris.size() - 1); + } else { + uris << CreateUri(Settings.GetHost()); + } + return uris; +} + +TString TLdapUrisCreator::CreateUri(const TString& address) const { + TString hostname; + ui32 port = 0; + size_t first_colon_pos = address.find(':'); + if (first_colon_pos != TString::npos) { + size_t last_colon_pos = address.rfind(':'); + if (last_colon_pos == first_colon_pos) { + // only one colon, simple case + try { + port = FromString(address.substr(first_colon_pos + 1)); + } catch (TFromStringException& ex) { + port = 0; + } + hostname = address.substr(0, first_colon_pos); + } else { + // ipv6? + size_t closing_bracket_pos = address.rfind(']'); + if (closing_bracket_pos == TString::npos || closing_bracket_pos > last_colon_pos) { + // whole address is ipv6 host + hostname = address; + } else { + try { + port = FromString(address.substr(last_colon_pos + 1)); + } catch (TFromStringException& ex) { + port = 0; + } + hostname = address.substr(0, last_colon_pos); + } + } + } else { + hostname = address; + } + port = (port != 0) ? port : ConfiguredPort; + return TStringBuilder() << Scheme << "://" << hostname << ':' << port; +} + } // namespace NKikimr diff --git a/ydb/core/security/ldap_auth_provider/ldap_utils.h b/ydb/core/security/ldap_auth_provider/ldap_utils.h index 62fd188d825e..5cfb3f957251 100644 --- a/ydb/core/security/ldap_auth_provider/ldap_utils.h +++ b/ydb/core/security/ldap_auth_provider/ldap_utils.h @@ -16,4 +16,22 @@ class TSearchFilterCreator { const NKikimrProto::TLdapAuthentication& Settings; }; +class TLdapUrisCreator { +public: + TLdapUrisCreator(const NKikimrProto::TLdapAuthentication& settings, ui32 configuredPort); + + TString GetUris() const; + ui32 GetConfiguredPort() const; + +private: + TString CreateUrisList() const; + TString CreateUri(const TString& address) const; + +private: + const NKikimrProto::TLdapAuthentication& Settings; + const TString Scheme; + const ui32 ConfiguredPort; + mutable TString Uris; +}; + } // namespace NKikimr diff --git a/ydb/core/security/ldap_auth_provider/ldap_utils_ut.cpp b/ydb/core/security/ldap_auth_provider/ldap_utils_ut.cpp index 26a0cb5a5caf..c615e18ab164 100644 --- a/ydb/core/security/ldap_auth_provider/ldap_utils_ut.cpp +++ b/ydb/core/security/ldap_auth_provider/ldap_utils_ut.cpp @@ -3,7 +3,7 @@ namespace NKikimr { -Y_UNIT_TEST_SUITE(TLdapUtilsTest) { +Y_UNIT_TEST_SUITE(TLdapUtilsSearchFilterCreatorTest) { Y_UNIT_TEST(GetDefaultFilter) { NKikimrProto::TLdapAuthentication settings; TSearchFilterCreator filterCreator(settings); @@ -62,4 +62,58 @@ Y_UNIT_TEST_SUITE(TLdapUtilsTest) { } } +Y_UNIT_TEST_SUITE(TLdapUtilsUrisCreatorTest) { + Y_UNIT_TEST(CreateUrisFromHostnames) { + NKikimrProto::TLdapAuthentication settings; + *settings.AddHosts() = "test.hostname-001"; + *settings.AddHosts() = "test.hostname-002:1234"; + *settings.AddHosts() = "test.hostname-003:"; + + TLdapUrisCreator urisCreator(settings, 389); + UNIT_ASSERT_VALUES_EQUAL("ldap://test.hostname-001:389 ldap://test.hostname-002:1234 ldap://test.hostname-003:389", urisCreator.GetUris()); + } + + Y_UNIT_TEST(CreateUrisFromIpV4List) { + NKikimrProto::TLdapAuthentication settings; + *settings.AddHosts() = "192.168.0.1"; + *settings.AddHosts() = "192.168.0.2:1234"; + *settings.AddHosts() = "192.168.0.3:"; + + TLdapUrisCreator urisCreator(settings, 389); + UNIT_ASSERT_VALUES_EQUAL("ldap://192.168.0.1:389 ldap://192.168.0.2:1234 ldap://192.168.0.3:389", urisCreator.GetUris()); + } + + Y_UNIT_TEST(CreateUrisFromIpV6List) { + NKikimrProto::TLdapAuthentication settings; + *settings.AddHosts() = "[2a02:6b8:bf00::]"; + *settings.AddHosts() = "[2a02:6b8:bf01::]:1234"; + *settings.AddHosts() = "[2a02:6b8:bf02::]:"; + + TLdapUrisCreator urisCreator(settings, 389); + UNIT_ASSERT_VALUES_EQUAL("ldap://[2a02:6b8:bf00::]:389 ldap://[2a02:6b8:bf01::]:1234 ldap://[2a02:6b8:bf02::]:389", urisCreator.GetUris()); + } + + Y_UNIT_TEST(CreateUrisFromHostnamesLdapsScheme) { + NKikimrProto::TLdapAuthentication settings; + *settings.AddHosts() = "test.hostname-001"; + *settings.AddHosts() = "test.hostname-002:1234"; + *settings.AddHosts() = "test.hostname-003:"; + settings.SetScheme("ldaps"); + + TLdapUrisCreator urisCreator(settings, 389); + UNIT_ASSERT_VALUES_EQUAL("ldaps://test.hostname-001:389 ldaps://test.hostname-002:1234 ldaps://test.hostname-003:389", urisCreator.GetUris()); + } + + Y_UNIT_TEST(CreateUrisFromHostnamesUnknownScheme) { + NKikimrProto::TLdapAuthentication settings; + *settings.AddHosts() = "test.hostname-001"; + *settings.AddHosts() = "test.hostname-002:1234"; + *settings.AddHosts() = "test.hostname-003:"; + settings.SetScheme("http"); + + TLdapUrisCreator urisCreator(settings, 389); + UNIT_ASSERT_VALUES_EQUAL("ldap://test.hostname-001:389 ldap://test.hostname-002:1234 ldap://test.hostname-003:389", urisCreator.GetUris()); + } +} + } // namespace NKikimr diff --git a/ydb/core/security/ldap_auth_provider/ya.make b/ydb/core/security/ldap_auth_provider/ya.make index abdc3f210725..a0c9671eda1e 100644 --- a/ydb/core/security/ldap_auth_provider/ya.make +++ b/ydb/core/security/ldap_auth_provider/ya.make @@ -25,6 +25,7 @@ ENDIF() PEERDIR( ydb/core/base ydb/core/protos + ydb/core/util ) END() diff --git a/ydb/core/security/login_page.cpp b/ydb/core/security/login_page.cpp index cb6db091f421..6f5f53029773 100644 --- a/ydb/core/security/login_page.cpp +++ b/ydb/core/security/login_page.cpp @@ -96,17 +96,7 @@ class TLoginRequest : public NActors::TActorBootstrapped { ALOG_DEBUG(NActorsServices::HTTP, "Login: Requesting LDAP provider for user " << AuthCredentials.Login); Send(MakeLdapAuthProviderID(), new TEvLdapAuthProvider::TEvAuthenticateRequest(AuthCredentials.Login, AuthCredentials.Password)); } else { - auto *domain = AppData()->DomainsInfo->GetDomain(); - TString rootDatabase = "/" + domain->Name; - ui64 rootSchemeShardTabletId = domain->SchemeRoot; - if (!Database.empty() && Database != rootDatabase) { - Database = rootDatabase; - ALOG_DEBUG(NActorsServices::HTTP, "Login: Requesting schemecache for database " << Database); - Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(CreateNavigateKeySetRequest(Database).Release())); - } else { - Database = rootDatabase; - RequestSchemeShard(rootSchemeShardTabletId); - } + RequestLoginProvider(); } Become(&TThis::StateWork, Timeout, new TEvents::TEvWakeup()); } @@ -123,6 +113,7 @@ class TLoginRequest : public NActors::TActorBootstrapped { PipeClient = RegisterWithSameMailbox(pipe); THolder request = MakeHolder(); request.Get()->Record = CreateLoginRequest(AuthCredentials, AppData()->AuthConfig); + request.Get()->Record.SetPeerName(Request->Address->ToString()); NTabletPipe::SendData(SelfId(), PipeClient, request.Release()); } @@ -146,10 +137,23 @@ class TLoginRequest : public NActors::TActorBootstrapped { void Handle(TEvLdapAuthProvider::TEvAuthenticateResponse::TPtr& ev) { TEvLdapAuthProvider::TEvAuthenticateResponse* response = ev->Get(); if (response->Status == TEvLdapAuthProvider::EStatus::SUCCESS) { + RequestLoginProvider(); + } else { + ReplyErrorAndPassAway("403", "Forbidden", response->Error.Message); + } + } + + void RequestLoginProvider() { + auto *domain = AppData()->DomainsInfo->GetDomain(); + TString rootDatabase = "/" + domain->Name; + ui64 rootSchemeShardTabletId = domain->SchemeRoot; + if (!Database.empty() && Database != rootDatabase) { + Database = rootDatabase; ALOG_DEBUG(NActorsServices::HTTP, "Login: Requesting schemecache for database " << Database); Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(CreateNavigateKeySetRequest(Database).Release())); } else { - ReplyErrorAndPassAway("403", "Forbidden", response->Error.Message); + Database = rootDatabase; + RequestSchemeShard(rootSchemeShardTabletId); } } diff --git a/ydb/core/security/login_shared_func.cpp b/ydb/core/security/login_shared_func.cpp index 95ea9ee15914..f013534bf821 100644 --- a/ydb/core/security/login_shared_func.cpp +++ b/ydb/core/security/login_shared_func.cpp @@ -19,9 +19,9 @@ THolder CreateNavigateKeySetRequest(const TS TAuthCredentials PrepareCredentials(const TString& login, const TString& password, const NKikimrProto::TAuthConfig& config) { if (config.HasLdapAuthentication() && !config.GetLdapAuthenticationDomain().empty()) { - size_t n = login.find("@" + config.GetLdapAuthenticationDomain()); - if (n != TString::npos) { - return {.AuthType = TAuthCredentials::EAuthType::Ldap, .Login = login.substr(0, n), .Password = password}; + const TString domain = "@" + config.GetLdapAuthenticationDomain(); + if (login.EndsWith(domain)) { + return {.AuthType = TAuthCredentials::EAuthType::Ldap, .Login = login.substr(0, login.size() - domain.size()), .Password = password}; } } return {.AuthType = TAuthCredentials::EAuthType::Internal, .Login = login, .Password = password}; @@ -38,6 +38,9 @@ NKikimrScheme::TEvLogin CreateLoginRequest(const TAuthCredentials& credentials, } default: {} } + if (config.HasLoginTokenExpireTime()) { + record.SetExpiresAfterMs(TDuration::Parse(config.GetLoginTokenExpireTime()).MilliSeconds()); + } return record; } diff --git a/ydb/core/security/ticket_parser_impl.h b/ydb/core/security/ticket_parser_impl.h index b6c842470fbd..8fa41b6086f9 100644 --- a/ydb/core/security/ticket_parser_impl.h +++ b/ydb/core/security/ticket_parser_impl.h @@ -1721,6 +1721,8 @@ class TTicketParserImpl : public TActorBootstrapped { record.RefreshRetryableErrorImmediately = false; GetDerived()->CanRefreshTicket(key, record); Respond(record); + CounterTicketsErrors->Inc(); + return; } } else { record.UnsetToken(); diff --git a/ydb/core/statistics/aggregator/aggregator_impl.cpp b/ydb/core/statistics/aggregator/aggregator_impl.cpp index 734070165b9e..9becddab5a75 100644 --- a/ydb/core/statistics/aggregator/aggregator_impl.cpp +++ b/ydb/core/statistics/aggregator/aggregator_impl.cpp @@ -1,9 +1,12 @@ #include "aggregator_impl.h" +#include +#include + #include -#include -#include #include +#include +#include #include #include @@ -20,6 +23,14 @@ TStatisticsAggregator::TStatisticsAggregator(const NActors::TActorId& tablet, TT auto seed = std::random_device{}(); RandomGenerator.seed(seed); + + TabletCountersPtr.Reset(new TProtobufTabletCounters< + ESimpleCounters_descriptor, + ECumulativeCounters_descriptor, + EPercentileCounters_descriptor, + ETxTypes_descriptor + >()); + TabletCounters = TabletCountersPtr.Get(); } void TStatisticsAggregator::OnDetach(const TActorContext& ctx) { @@ -33,6 +44,7 @@ void TStatisticsAggregator::OnTabletDead(TEvTablet::TEvTabletDead::TPtr&, const void TStatisticsAggregator::OnActivateExecutor(const TActorContext& ctx) { SA_LOG_I("[" << TabletID() << "] OnActivateExecutor"); + Executor()->RegisterExternalTabletCounters(TabletCountersPtr); Execute(CreateTxInitSchema(), ctx); } @@ -217,7 +229,7 @@ void TStatisticsAggregator::Handle(TEvPrivate::TEvFastPropagateCheck::TPtr&) { } void TStatisticsAggregator::Handle(TEvPrivate::TEvPropagate::TPtr&) { - SA_LOG_D("[" << TabletID() << "] EvPropagate"); + SA_LOG_T("[" << TabletID() << "] EvPropagate"); if (EnableStatistics) { PropagateStatistics(); @@ -308,14 +320,15 @@ void TStatisticsAggregator::SendStatisticsToNode(TNodeId nodeId, const std::vect } void TStatisticsAggregator::PropagateStatistics() { - SA_LOG_D("[" << TabletID() << "] PropagateStatistics()" - << ", node count = " << Nodes.size() - << ", schemeshard count = " << RequestedSchemeShards.size()); - if (Nodes.empty() || RequestedSchemeShards.empty()) { + SA_LOG_T("[" << TabletID() << "] PropagateStatistics() No data"); return; } + SA_LOG_D("[" << TabletID() << "] PropagateStatistics()" + << ", node count = " << Nodes.size() + << ", schemeshard count = " << RequestedSchemeShards.size()); + std::vector nodeIds; nodeIds.reserve(Nodes.size()); for (const auto& [nodeId, _] : Nodes) { @@ -381,8 +394,8 @@ size_t TStatisticsAggregator::PropagatePart(const std::vector& nodeIds, auto ssId = ssIds[index]; auto* entry = record->AddEntries(); entry->SetSchemeShardId(ssId); - auto itStats = BaseStats.find(ssId); - if (itStats != BaseStats.end()) { + auto itStats = BaseStatistics.find(ssId); + if (itStats != BaseStatistics.end()) { entry->SetStats(itStats->second); size += itStats->second.size(); } else { @@ -396,15 +409,39 @@ size_t TStatisticsAggregator::PropagatePart(const std::vector& nodeIds, } void TStatisticsAggregator::Handle(TEvPipeCache::TEvDeliveryProblem::TPtr& ev) { - auto tabletId = ev->Get()->TabletId; - if (ShardRanges.empty()) { + if (!TraversalPathId) { return; } - auto& range = ShardRanges.front(); - if (tabletId != range.DataShardId) { - return; + auto tabletId = ev->Get()->TabletId; + if (TraversalIsColumnTable) { + if (tabletId == HiveId) { + SA_LOG_E("[" << TabletID() << "] TEvDeliveryProblem with HiveId=" << tabletId); + Schedule(HiveRetryInterval, new TEvPrivate::TEvRequestDistribution); + } else { + for (TForceTraversalOperation& operation : ForceTraversals) { + for (TForceTraversalTable& operationTable : operation.Tables) { + for (TAnalyzedShard& shard : operationTable.AnalyzedShards) { + if (shard.ShardTabletId == tabletId) { + SA_LOG_E("[" << TabletID() << "] TEvDeliveryProblem with ColumnShard=" << tabletId); + shard.Status = TAnalyzedShard::EStatus::DeliveryProblem; + return; + } + } + } + } + SA_LOG_CRIT("[" << TabletID() << "] TEvDeliveryProblem with unexpected tablet " << tabletId); + } + } else { + SA_LOG_E("[" << TabletID() << "] TEvDeliveryProblem with DataShard=" << tabletId); + if (DatashardRanges.empty()) { + return; + } + auto& range = DatashardRanges.front(); + if (tabletId != range.DataShardId) { + return; + } + Resolve(); } - Resolve(); } void TStatisticsAggregator::Handle(TEvStatistics::TEvStatTableCreationResponse::TPtr&) { @@ -419,37 +456,76 @@ void TStatisticsAggregator::Handle(TEvStatistics::TEvStatTableCreationResponse:: } } -void TStatisticsAggregator::Handle(TEvStatistics::TEvGetScanStatus::TPtr& ev) { - auto& inRecord = ev->Get()->Record; - auto pathId = PathIdFromPathId(inRecord.GetPathId()); +void TStatisticsAggregator::Handle(TEvStatistics::TEvAnalyzeStatus::TPtr& ev) { + const auto& inRecord = ev->Get()->Record; + const TString operationId = inRecord.GetOperationId(); - auto response = std::make_unique(); + auto response = std::make_unique(); auto& outRecord = response->Record; + outRecord.SetOperationId(operationId); - if (ScanTableId.PathId == pathId) { - outRecord.SetStatus(NKikimrStat::TEvGetScanStatusResponse::IN_PROGRESS); + if (ForceTraversalOperationId == operationId) { + outRecord.SetStatus(NKikimrStat::TEvAnalyzeStatusResponse::STATUS_IN_PROGRESS); } else { - auto it = ScanOperationsByPathId.find(pathId); - if (it != ScanOperationsByPathId.end()) { - outRecord.SetStatus(NKikimrStat::TEvGetScanStatusResponse::ENQUEUED); + auto forceTraversalOperation = ForceTraversalOperation(operationId); + if (forceTraversalOperation) { + outRecord.SetStatus(NKikimrStat::TEvAnalyzeStatusResponse::STATUS_ENQUEUED); } else { - outRecord.SetStatus(NKikimrStat::TEvGetScanStatusResponse::NO_OPERATION); + outRecord.SetStatus(NKikimrStat::TEvAnalyzeStatusResponse::STATUS_NO_OPERATION); } } + + SA_LOG_D("[" << TabletID() << "] Send TEvStatistics::TEvAnalyzeStatusResponse. Status " << outRecord.GetStatus()); + Send(ev->Sender, response.release(), 0, ev->Cookie); } +void TStatisticsAggregator::Handle(TEvPrivate::TEvResolve::TPtr&) { + Resolve(); +} + +void TStatisticsAggregator::Handle(TEvPrivate::TEvRequestDistribution::TPtr&) { + ++HiveRequestRound; + + auto reqDistribution = std::make_unique(); + reqDistribution->Record.MutableTabletIds()->Add(TabletsForReqDistribution.begin(), TabletsForReqDistribution.end()); + Send(MakePipePerNodeCacheID(false), + new TEvPipeCache::TEvForward(reqDistribution.release(), HiveId, true)); +} + +void TStatisticsAggregator::Handle(TEvStatistics::TEvAggregateKeepAlive::TPtr& ev) { + const auto round = ev->Get()->Record.GetRound(); + if (round == GlobalTraversalRound && AggregationRequestBeginTime) { + TInstant now = AppData(TlsActivationContext->AsActorContext())->TimeProvider->Now(); + TDuration time = now - AggregationRequestBeginTime; + TabletCounters->Simple()[COUNTER_AGGREGATION_TIME].Set(time.MicroSeconds()); + } + + auto ack = std::make_unique(); + ack->Record.SetRound(round); + Send(ev->Sender, ack.release()); + Schedule(KeepAliveTimeout, new TEvPrivate::TEvAckTimeout(++KeepAliveSeqNo)); +} + void TStatisticsAggregator::InitializeStatisticsTable() { if (!EnableColumnStatistics) { return; } - Register(CreateStatisticsTableCreator(std::make_unique())); + if (!Database) { + return; + } + Register(CreateStatisticsTableCreator( + std::make_unique(), Database)); } void TStatisticsAggregator::Navigate() { + Y_ABORT_UNLESS(NavigateType == ENavigateType::Traversal && !NavigateAnalyzeOperationId + || NavigateType == ENavigateType::Analyze && NavigateAnalyzeOperationId); + Y_ABORT_UNLESS(NavigatePathId); + using TNavigate = NSchemeCache::TSchemeCacheNavigate; TNavigate::TEntry entry; - entry.TableId = ScanTableId; + entry.TableId = NavigatePathId; entry.RequestType = TNavigate::TEntry::ERequestType::ByTableId; entry.Operation = TNavigate::OpTable; @@ -460,10 +536,16 @@ void TStatisticsAggregator::Navigate() { } void TStatisticsAggregator::Resolve() { + Y_ABORT_UNLESS(NavigateType == ENavigateType::Traversal && !NavigateAnalyzeOperationId + || NavigateType == ENavigateType::Analyze && NavigateAnalyzeOperationId); + Y_ABORT_UNLESS(NavigatePathId); + + ++ResolveRound; + TVector plusInf; - TTableRange range(StartKey.GetCells(), true, plusInf, true, false); + TTableRange range(TraversalStartKey.GetCells(), true, plusInf, true, false); auto keyDesc = MakeHolder( - ScanTableId, range, TKeyDesc::ERowOperation::Read, KeyColumnTypes, Columns); + NavigatePathId, range, TKeyDesc::ERowOperation::Read, KeyColumnTypes, Columns); auto request = std::make_unique(); request->ResultSet.emplace_back(std::move(keyDesc)); @@ -471,18 +553,22 @@ void TStatisticsAggregator::Resolve() { Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvResolveKeySet(request.release())); } -void TStatisticsAggregator::NextRange() { - if (ShardRanges.empty()) { +void TStatisticsAggregator::ScanNextDatashardRange() { + // Datashard traversal is temporary disabled + Y_FAIL(); + + if (DatashardRanges.empty()) { SaveStatisticsToTable(); return; } - auto& range = ShardRanges.front(); + auto& range = DatashardRanges.front(); auto request = std::make_unique(); auto& record = request->Record; - record.MutableTableId()->SetOwnerId(ScanTableId.PathId.OwnerId); - record.MutableTableId()->SetTableId(ScanTableId.PathId.LocalPathId); - record.SetStartKey(StartKey.GetBuffer()); + auto* path = record.MutableTable()->MutablePathId(); + path->SetOwnerId(TraversalPathId.OwnerId); + path->SetLocalId(TraversalPathId.LocalPathId); + record.SetStartKey(TraversalStartKey.GetBuffer()); Send(MakePipePerNodeCacheID(false), new TEvPipeCache::TEvForward(request.release(), range.DataShardId, true), @@ -500,6 +586,9 @@ void TStatisticsAggregator::SaveStatisticsToTable() { std::vector columnTags; std::vector data; auto count = CountMinSketches.size(); + if (count == 0) { + return; + } columnTags.reserve(count); data.reserve(count); @@ -513,8 +602,8 @@ void TStatisticsAggregator::SaveStatisticsToTable() { data.push_back(strSketch); } - Register(CreateSaveStatisticsQuery(ScanTableId.PathId, EStatType::COUNT_MIN_SKETCH, - std::move(columnTags), std::move(data))); + Register(CreateSaveStatisticsQuery(SelfId(), Database, + TraversalPathId, EStatType::COUNT_MIN_SKETCH, std::move(columnTags), std::move(data))); } void TStatisticsAggregator::DeleteStatisticsFromTable() { @@ -525,106 +614,308 @@ void TStatisticsAggregator::DeleteStatisticsFromTable() { PendingDeleteStatistics = false; - Register(CreateDeleteStatisticsQuery(ScanTableId.PathId)); + Register(CreateDeleteStatisticsQuery(SelfId(), Database, TraversalPathId)); } -void TStatisticsAggregator::ScheduleNextScan(NIceDb::TNiceDb& db) { - if (!ScanOperations.Empty()) { - auto* operation = ScanOperations.Front(); - ReplyToActorIds.swap(operation->ReplyToActorIds); +void TStatisticsAggregator::ScheduleNextAnalyze(NIceDb::TNiceDb& db) { + Y_UNUSED(db); + if (ForceTraversals.empty()) { + SA_LOG_T("[" << TabletID() << "] ScheduleNextAnalyze. Empty ForceTraversals"); + return; + } + SA_LOG_D("[" << TabletID() << "] ScheduleNextAnalyze"); + + for (TForceTraversalOperation& operation : ForceTraversals) { + for (TForceTraversalTable& operationTable : operation.Tables) { + if (operationTable.Status == TForceTraversalTable::EStatus::None) { + std::optional isColumnTable = IsColumnTable(operationTable.PathId); + if (!isColumnTable) { + ForceTraversalOperationId = operation.OperationId; + TraversalPathId = operationTable.PathId; + DeleteStatisticsFromTable(); + return; + } + + if (*isColumnTable) { + NavigateAnalyzeOperationId = operation.OperationId; + NavigatePathId = operationTable.PathId; + Navigate(); + return; + } else { + SA_LOG_D("[" << TabletID() << "] ScheduleNextAnalyze. Skip analyze for datashard table " << operationTable.PathId); + UpdateForceTraversalTableStatus(TForceTraversalTable::EStatus::AnalyzeFinished, operation.OperationId, operationTable, db); + return; + } + } + } + + SA_LOG_D("[" << TabletID() << "] ScheduleNextAnalyze. All the force traversal tables sent the requests. OperationId=" << operation.OperationId); + continue; + } - StartScan(db, operation->PathId); + SA_LOG_D("[" << TabletID() << "] ScheduleNextAnalyze. All the force traversal operations sent the requests."); +} - db.Table().Key(operation->OperationId).Delete(); - ScanOperations.PopFront(); - ScanOperationsByPathId.erase(operation->PathId); - return; +void TStatisticsAggregator::ScheduleNextTraversal(NIceDb::TNiceDb& db) { + SA_LOG_D("[" << TabletID() << "] ScheduleNextTraversal"); + + TPathId pathId; + + if (!LastTraversalWasForce) { + LastTraversalWasForce = true; + + if (ForceTraversals.empty()) { + SA_LOG_D("[" << TabletID() << "] ScheduleNextTraversal. No force traversals."); + } else { + for (TForceTraversalOperation& operation : ForceTraversals) { + for (TForceTraversalTable& operationTable : operation.Tables) { + if (operationTable.Status == TForceTraversalTable::EStatus::AnalyzeFinished) { + UpdateForceTraversalTableStatus(TForceTraversalTable::EStatus::TraversalStarted, operation.OperationId, operationTable, db); + pathId = operationTable.PathId; + break; + } + } + + if (!pathId) { + SA_LOG_D("[" << TabletID() << "] ScheduleNextTraversal. All the force traversal tables sent the requests. OperationId=" << operation.OperationId); + continue; + } + + ForceTraversalOperationId = operation.OperationId; + break; + } + + if (!pathId) { + SA_LOG_D("[" << TabletID() << "] ScheduleNextTraversal. All the force traversal operations sent the requests."); + } + } } - if (ScanTablesByTime.Empty()) { + + if (!pathId && !ScheduleTraversalsByTime.Empty()){ + LastTraversalWasForce = false; + + auto* oldestTable = ScheduleTraversalsByTime.Top(); + if (TInstant::Now() < oldestTable->LastUpdateTime + ScheduleTraversalPeriod) { + SA_LOG_T("[" << TabletID() << "] A schedule traversal is skiped. " + << "The oldest table " << oldestTable->PathId << " update time " << oldestTable->LastUpdateTime << " is too fresh."); + return; + } + + pathId = oldestTable->PathId; + } + + if (!pathId) { + SA_LOG_E("[" << TabletID() << "] No traversal from schemeshard."); + return; + } + + TraversalPathId = pathId; + TraversalStartTime = TInstant::Now(); + + std::optional isColumnTable = IsColumnTable(pathId); + if (!isColumnTable){ + DeleteStatisticsFromTable(); return; } - auto* topTable = ScanTablesByTime.Top(); - auto now = TInstant::Now(); - auto updateTime = topTable->LastUpdateTime; - if (now - updateTime < ScanIntervalTime) { + + // Datashard traversal is temporary disabled + if (!*isColumnTable) { + SA_LOG_D("[" << TabletID() << "] ScheduleNextTraversal. Skip traversal for datashard table " << pathId); + DeleteStatisticsFromTable(); return; } - StartScan(db, topTable->PathId); + + TraversalIsColumnTable = *isColumnTable; + + SA_LOG_D("[" << TabletID() << "] Start " + << LastTraversalWasForceString() + << " traversal navigate for path " << pathId); + + StartTraversal(db); } -void TStatisticsAggregator::StartScan(NIceDb::TNiceDb& db, TPathId pathId) { - ScanTableId.PathId = pathId; - ScanStartTime = TInstant::Now(); - PersistCurrentScan(db); +void TStatisticsAggregator::StartTraversal(NIceDb::TNiceDb& db) { + PersistTraversal(db); - StartKey = TSerializedCellVec(); + TraversalStartKey = TSerializedCellVec(); PersistStartKey(db); + NavigatePathId = TraversalPathId; Navigate(); } -void TStatisticsAggregator::FinishScan(NIceDb::TNiceDb& db) { - auto pathId = ScanTableId.PathId; +void TStatisticsAggregator::FinishTraversal(NIceDb::TNiceDb& db) { + auto pathId = TraversalPathId; - auto pathIt = ScanTables.find(pathId); - if (pathIt != ScanTables.end()) { - auto& scanTable = pathIt->second; - scanTable.LastUpdateTime = ScanStartTime; - db.Table().Key(pathId.OwnerId, pathId.LocalPathId).Update( - NIceDb::TUpdate(ScanStartTime.MicroSeconds())); + auto pathIt = ScheduleTraversals.find(pathId); + if (pathIt != ScheduleTraversals.end()) { + auto& traversalTable = pathIt->second; + traversalTable.LastUpdateTime = TraversalStartTime; + db.Table().Key(pathId.OwnerId, pathId.LocalPathId).Update( + NIceDb::TUpdate(TraversalStartTime.MicroSeconds())); - if (ScanTablesByTime.Has(&scanTable)) { - ScanTablesByTime.Update(&scanTable); + if (ScheduleTraversalsByTime.Has(&traversalTable)) { + ScheduleTraversalsByTime.Update(&traversalTable); } } - ResetScanState(db); + auto forceTraversalOperation = CurrentForceTraversalOperation(); + if (forceTraversalOperation) { + auto operationTable = CurrentForceTraversalTable(); + + UpdateForceTraversalTableStatus(TForceTraversalTable::EStatus::TraversalFinished, forceTraversalOperation->OperationId, *operationTable, db); + + bool tablesRemained = std::any_of(forceTraversalOperation->Tables.begin(), forceTraversalOperation->Tables.end(), + [](const TForceTraversalTable& elem) { return elem.Status != TForceTraversalTable::EStatus::TraversalFinished;}); + if (!tablesRemained) { + DeleteForceTraversalOperation(ForceTraversalOperationId, db); + } + } + + ResetTraversalState(db); } +TString TStatisticsAggregator::LastTraversalWasForceString() const { + return LastTraversalWasForce ? "force" : "schedule"; +} + +TStatisticsAggregator::TForceTraversalOperation* TStatisticsAggregator::CurrentForceTraversalOperation() { + return ForceTraversalOperation(ForceTraversalOperationId); +} + +TStatisticsAggregator::TForceTraversalOperation* TStatisticsAggregator::ForceTraversalOperation(const TString& operationId) { + auto forceTraversalOperation = std::find_if(ForceTraversals.begin(), ForceTraversals.end(), + [operationId](const TForceTraversalOperation& elem) { return elem.OperationId == operationId;}); + + if (forceTraversalOperation == ForceTraversals.end()) { + return nullptr; + } else { + return &*forceTraversalOperation; + } +} + +std::optional TStatisticsAggregator::IsColumnTable(const TPathId& pathId) const { + auto itPath = ScheduleTraversals.find(pathId); + if (itPath != ScheduleTraversals.end()) { + bool ret = itPath->second.IsColumnTable; + SA_LOG_D("[" << TabletID() << "] IsColumnTable. Path " << pathId << " is " + << (ret ? "column" : "data") << " table."); + return ret; + } else { + SA_LOG_E("[" << TabletID() << "] IsColumnTable. traversal path " << pathId << " is not known to schemeshard"); + return {}; + } +} + +void TStatisticsAggregator::DeleteForceTraversalOperation(const TString& operationId, NIceDb::TNiceDb& db) { + db.Table().Key(ForceTraversalOperationId).Delete(); + + auto operation = ForceTraversalOperation(operationId); + for(const TForceTraversalTable& table : operation->Tables) { + db.Table().Key(operationId, table.PathId.OwnerId, table.PathId.LocalPathId).Delete(); + } + + ForceTraversals.remove_if([operationId](const TForceTraversalOperation& elem) { return elem.OperationId == operationId;}); + TabletCounters->Simple()[COUNTER_FORCE_TRAVERSALS_INFLIGHT_SIZE].Set(ForceTraversals.size()); +} + +TStatisticsAggregator::TForceTraversalTable* TStatisticsAggregator::ForceTraversalTable(const TString& operationId, const TPathId& pathId) { + for (TForceTraversalOperation& operation : ForceTraversals) { + if (operation.OperationId == operationId) { + for (TForceTraversalTable& operationTable : operation.Tables) { + if (operationTable.PathId == pathId) { + return &operationTable; + } + } + } + } + + return nullptr; +} + +TStatisticsAggregator::TForceTraversalTable* TStatisticsAggregator::CurrentForceTraversalTable() { + return ForceTraversalTable(ForceTraversalOperationId, TraversalPathId); +} + +void TStatisticsAggregator::UpdateForceTraversalTableStatus(const TForceTraversalTable::EStatus status, const TString& operationId, TStatisticsAggregator::TForceTraversalTable& table, NIceDb::TNiceDb& db) { + table.Status = status; + db.Table().Key(operationId, table.PathId.OwnerId, table.PathId.LocalPathId) + .Update(NIceDb::TUpdate((ui64)status)); +} + + void TStatisticsAggregator::PersistSysParam(NIceDb::TNiceDb& db, ui64 id, const TString& value) { db.Table().Key(id).Update( NIceDb::TUpdate(value)); } -void TStatisticsAggregator::PersistCurrentScan(NIceDb::TNiceDb& db) { - PersistSysParam(db, Schema::SysParam_ScanTableOwnerId, ToString(ScanTableId.PathId.OwnerId)); - PersistSysParam(db, Schema::SysParam_ScanTableLocalPathId, ToString(ScanTableId.PathId.LocalPathId)); - PersistSysParam(db, Schema::SysParam_ScanStartTime, ToString(ScanStartTime.MicroSeconds())); +void TStatisticsAggregator::PersistTraversal(NIceDb::TNiceDb& db) { + PersistSysParam(db, Schema::SysParam_TraversalTableOwnerId, ToString(TraversalPathId.OwnerId)); + PersistSysParam(db, Schema::SysParam_TraversalTableLocalPathId, ToString(TraversalPathId.LocalPathId)); + PersistSysParam(db, Schema::SysParam_TraversalStartTime, ToString(TraversalStartTime.MicroSeconds())); + PersistSysParam(db, Schema::SysParam_TraversalIsColumnTable, ToString(TraversalIsColumnTable)); } void TStatisticsAggregator::PersistStartKey(NIceDb::TNiceDb& db) { - PersistSysParam(db, Schema::SysParam_StartKey, StartKey.GetBuffer()); + PersistSysParam(db, Schema::SysParam_TraversalStartKey, TraversalStartKey.GetBuffer()); } -void TStatisticsAggregator::PersistLastScanOperationId(NIceDb::TNiceDb& db) { - PersistSysParam(db, Schema::SysParam_LastScanOperationId, ToString(LastScanOperationId)); +void TStatisticsAggregator::PersistGlobalTraversalRound(NIceDb::TNiceDb& db) { + PersistSysParam(db, Schema::SysParam_GlobalTraversalRound, ToString(GlobalTraversalRound)); } -void TStatisticsAggregator::ResetScanState(NIceDb::TNiceDb& db) { - ScanTableId.PathId = TPathId(); - ScanStartTime = TInstant::MicroSeconds(0); - PersistCurrentScan(db); +void TStatisticsAggregator::ResetTraversalState(NIceDb::TNiceDb& db) { + ForceTraversalOperationId.clear(); + TraversalPathId = {}; + TraversalStartTime = TInstant::MicroSeconds(0); + PersistTraversal(db); - StartKey = TSerializedCellVec(); + TraversalStartKey = TSerializedCellVec(); PersistStartKey(db); - ReplyToActorIds.clear(); - for (auto& [tag, _] : CountMinSketches) { - db.Table().Key(tag).Delete(); + db.Table().Key(tag).Delete(); } CountMinSketches.clear(); - ShardRanges.clear(); + DatashardRanges.clear(); KeyColumnTypes.clear(); Columns.clear(); ColumnNames.clear(); + + TabletsForReqDistribution.clear(); + + ResolveRound = 0; + HiveRequestRound = 0; + TraversalRound = 0; } -template -void PrintContainerStart(const T& container, size_t count, TStringStream& str, - std::function extractor) +TString TStatisticsAggregator::GetNavigateTypeString() const { + switch (NavigateType) { + case Analyze: + return "Analyze"; + case Traversal: + return "Traversal"; + } +} + +TString TStatisticsAggregator::TForceTraversalTable::GetStatusString() const { + switch (Status) { + case EStatus::None: + return "None"; + case EStatus::AnalyzeStarted: + return "AnalyzeStarted"; + case EStatus::AnalyzeFinished: + return "AnalyzeFinished"; + case EStatus::TraversalStarted: + return "TraversalStarted"; + case EStatus::TraversalFinished: + return "TraversalFinished"; + } +} + +void PrintContainerStart(const auto& container, size_t count, TStringStream& str, auto extractor) { if (container.empty()) { return; @@ -658,70 +949,105 @@ bool TStatisticsAggregator::OnRenderAppHtmlPage(NMon::TEvRemoteHttpInfo::TPtr ev PRE() { str << "---- StatisticsAggregator ----" << Endl << Endl; str << "Database: " << Database << Endl; - str << "BaseStats: " << BaseStats.size() << Endl; + str << "BaseStatistics: " << BaseStatistics.size() << Endl; str << "SchemeShards: " << SchemeShards.size() << Endl; { - std::function&)> extr = - [](const auto& x) { return x.first; }; + auto extr = [](const auto& x) { return x.first; }; PrintContainerStart(SchemeShards, 4, str, extr); } str << "Nodes: " << Nodes.size() << Endl; { - std::function&)> extr = - [](const auto& x) { return x.first; }; + auto extr = [](const auto& x) { return x.first; }; PrintContainerStart(Nodes, 8, str, extr); } str << "RequestedSchemeShards: " << RequestedSchemeShards.size() << Endl; { - std::function extr = [](const auto& x) { return x; }; + auto extr = [](const auto& x) { return x; }; PrintContainerStart(RequestedSchemeShards, 4, str, extr); } str << "FastCounter: " << FastCounter << Endl; str << "FastCheckInFlight: " << FastCheckInFlight << Endl; str << "FastSchemeShards: " << FastSchemeShards.size() << Endl; { - std::function extr = [](const auto& x) { return x; }; + auto extr = [](const auto& x) { return x; }; PrintContainerStart(FastSchemeShards, 4, str, extr); } str << "FastNodes: " << FastNodes.size() << Endl; { - std::function extr = [](const auto& x) { return x; }; + auto extr = [](const auto& x) { return x; }; PrintContainerStart(FastNodes, 8, str, extr); } str << "PropagationInFlight: " << PropagationInFlight << Endl; str << "PropagationSchemeShards: " << PropagationSchemeShards.size() << Endl; { - std::function extr = [](const auto& x) { return x; }; + auto extr = [](const auto& x) { return x; }; PrintContainerStart(PropagationSchemeShards, 4, str, extr); } str << "PropagationNodes: " << PropagationNodes.size() << Endl; { - std::function extr = [](const auto& x) { return x; }; + auto extr = [](const auto& x) { return x; }; PrintContainerStart(FastNodes, 8, str, extr); } str << "LastSSIndex: " << LastSSIndex << Endl; str << "PendingRequests: " << PendingRequests.size() << Endl; str << "ProcessUrgentInFlight: " << ProcessUrgentInFlight << Endl << Endl; - str << "ScanTableId: " << ScanTableId << Endl; str << "Columns: " << Columns.size() << Endl; - str << "ShardRanges: " << ShardRanges.size() << Endl; + str << "DatashardRanges: " << DatashardRanges.size() << Endl; str << "CountMinSketches: " << CountMinSketches.size() << Endl << Endl; - str << "ScanTablesByTime: " << ScanTablesByTime.Size() << Endl; - if (!ScanTablesByTime.Empty()) { - auto* scanTable = ScanTablesByTime.Top(); - str << " top: " << scanTable->PathId - << ", last update time: " << scanTable->LastUpdateTime << Endl; + str << "ScheduleTraversalsByTime: " << ScheduleTraversalsByTime.Size() << Endl; + if (!ScheduleTraversalsByTime.Empty()) { + auto* oldestTable = ScheduleTraversalsByTime.Top(); + str << " oldest table: " << oldestTable->PathId + << ", update time: " << oldestTable->LastUpdateTime.ToStringUpToSeconds() << Endl; + } + str << "ScheduleTraversalsBySchemeShard: " << ScheduleTraversalsBySchemeShard.size() << Endl; + if (!ScheduleTraversalsBySchemeShard.empty()) { + str << " " << ScheduleTraversalsBySchemeShard.begin()->first << Endl; + auto extr = [](const auto& x) { return x; }; + PrintContainerStart(ScheduleTraversalsBySchemeShard.begin()->second, 2, str, extr); } - str << "ScanTablesBySchemeShard: " << ScanTablesBySchemeShard.size() << Endl; - if (!ScanTablesBySchemeShard.empty()) { - str << " " << ScanTablesBySchemeShard.begin()->first << Endl; - std::function extr = [](const auto& x) { return x; }; - PrintContainerStart(ScanTablesBySchemeShard.begin()->second, 2, str, extr); + str << "ForceTraversals: " << ForceTraversals.size() << Endl; + if (!ForceTraversals.empty()) { + auto extr = [](const auto& x) { return x.CreatedAt.ToStringUpToSeconds(); }; + PrintContainerStart(ForceTraversals, 2, str, extr); + } + + str << Endl; + str << "NavigateType: " << GetNavigateTypeString() << Endl; + str << "NavigateAnalyzeOperationId: " << NavigateAnalyzeOperationId << Endl; + str << "NavigatePathId: " << NavigatePathId << Endl; + + str << Endl; + str << "ForceTraversalOperationId: " << ForceTraversalOperationId << Endl; + if (ForceTraversalOperationId) { + auto forceTraversal = CurrentForceTraversalOperation(); + str << " CreatedAt: " << forceTraversal->CreatedAt << Endl; + str << ", ReplyToActorId: " << forceTraversal->ReplyToActorId << Endl; + str << ", Types: " << forceTraversal->Types << Endl; + str << ", Tables size: " << forceTraversal->Tables.size() << Endl; + str << ", Tables: " << Endl; + + for (size_t i = 0; i < forceTraversal->Tables.size(); ++i) { + const TForceTraversalTable& table = forceTraversal->Tables[i]; + str << " Table[" << i << "] PathId: " << table.PathId << Endl; + str << " Status: " << table.GetStatusString() << Endl; + str << " AnalyzedShards size: " << table.AnalyzedShards.size() << Endl; + str << " ColumnTags: " << table.ColumnTags << Endl; + } } - str << "ScanStartTime: " << ScanStartTime << Endl; + str << Endl; + str << "TraversalStartTime: " << TraversalStartTime.ToStringUpToSeconds() << Endl; + str << "TraversalPathId: " << TraversalPathId << Endl; + str << "TraversalIsColumnTable: " << TraversalIsColumnTable << Endl; + str << "TraversalStartKey: " << TraversalStartKey.GetBuffer() << Endl; + + str << Endl; + str << "GlobalTraversalRound: " << GlobalTraversalRound << Endl; + str << "TraversalRound: " << TraversalRound << Endl; + str << "HiveRequestRound: " << HiveRequestRound << Endl; } } diff --git a/ydb/core/statistics/aggregator/aggregator_impl.h b/ydb/core/statistics/aggregator/aggregator_impl.h index 314a7051a446..1d75e08b6c01 100644 --- a/ydb/core/statistics/aggregator/aggregator_impl.h +++ b/ydb/core/statistics/aggregator/aggregator_impl.h @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -13,6 +14,7 @@ #include #include +#include #include #include #include @@ -44,13 +46,19 @@ class TStatisticsAggregator : public TActor, public NTabl struct TTxInit; struct TTxConfigure; struct TTxSchemeShardStats; - struct TTxScanTable; + struct TTxAnalyze; + struct TTxAnalyzeTableRequest; + struct TTxAnalyzeTableResponse; + struct TTxAnalyzeTableDeliveryProblem; + struct TTxAnalyzeDeadline; struct TTxNavigate; struct TTxResolve; - struct TTxStatisticsScanResponse; - struct TTxSaveQueryResponse; - struct TTxScheduleScan; - struct TTxDeleteQueryResponse; + struct TTxDatashardScanResponse; + struct TTxFinishTraversal; + struct TTxScheduleTrasersal; + struct TTxAggregateStatisticsResponse; + struct TTxResponseTabletDistribution; + struct TTxAckTimeout; struct TEvPrivate { enum EEv { @@ -58,7 +66,13 @@ class TStatisticsAggregator : public TActor, public NTabl EvFastPropagateCheck, EvProcessUrgent, EvPropagateTimeout, - EvScheduleScan, + EvScheduleTraversal, + EvRequestDistribution, + EvResolve, + EvAckTimeout, + EvSendAnalyze, + EvAnalyzeDeliveryProblem, + EvAnalyzeDeadline, EvEnd }; @@ -67,7 +81,20 @@ class TStatisticsAggregator : public TActor, public NTabl struct TEvFastPropagateCheck : public TEventLocal {}; struct TEvProcessUrgent : public TEventLocal {}; struct TEvPropagateTimeout : public TEventLocal {}; - struct TEvScheduleScan : public TEventLocal {}; + struct TEvScheduleTraversal : public TEventLocal {}; + struct TEvRequestDistribution : public TEventLocal {}; + struct TEvResolve : public TEventLocal {}; + struct TEvSendAnalyze : public TEventLocal {}; + struct TEvAnalyzeDeliveryProblem : public TEventLocal {}; + struct TEvAnalyzeDeadline : public TEventLocal {}; + + struct TEvAckTimeout : public TEventLocal { + size_t SeqNo = 0; + explicit TEvAckTimeout(size_t seqNo) { + SeqNo = seqNo; + } + }; + }; private: @@ -94,6 +121,7 @@ class TStatisticsAggregator : public TActor, public NTabl void Handle(TEvTabletPipe::TEvServerDisconnected::TPtr& ev); void Handle(TEvPrivate::TEvFastPropagateCheck::TPtr& ev); void Handle(TEvStatistics::TEvPropagateStatisticsResponse::TPtr& ev); + void Handle(TEvStatistics::TEvAnalyzeTableResponse::TPtr& ev); void Handle(TEvPrivate::TEvProcessUrgent::TPtr& ev); void Handle(TEvPrivate::TEvPropagateTimeout::TPtr& ev); @@ -104,7 +132,7 @@ class TStatisticsAggregator : public TActor, public NTabl size_t PropagatePart(const std::vector& nodeIds, const std::vector& ssIds, size_t lastSSIndex, bool useSizeLimit); - void Handle(TEvStatistics::TEvScanTable::TPtr& ev); + void Handle(TEvStatistics::TEvAnalyze::TPtr& ev); void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev); void Handle(TEvTxProxySchemeCache::TEvResolveKeySetResult::TPtr& ev); void Handle(NStat::TEvStatistics::TEvStatisticsResponse::TPtr& ev); @@ -112,26 +140,39 @@ class TStatisticsAggregator : public TActor, public NTabl void Handle(TEvStatistics::TEvStatTableCreationResponse::TPtr& ev); void Handle(TEvStatistics::TEvSaveStatisticsQueryResponse::TPtr& ev); void Handle(TEvStatistics::TEvDeleteStatisticsQueryResponse::TPtr& ev); - void Handle(TEvPrivate::TEvScheduleScan::TPtr& ev); - void Handle(TEvStatistics::TEvGetScanStatus::TPtr& ev); + void Handle(TEvPrivate::TEvScheduleTraversal::TPtr& ev); + void Handle(TEvStatistics::TEvAnalyzeStatus::TPtr& ev); + void Handle(TEvHive::TEvResponseTabletDistribution::TPtr& ev); + void Handle(TEvStatistics::TEvAggregateStatisticsResponse::TPtr& ev); + void Handle(TEvPrivate::TEvResolve::TPtr& ev); + void Handle(TEvPrivate::TEvRequestDistribution::TPtr& ev); + void Handle(TEvStatistics::TEvAggregateKeepAlive::TPtr& ev); + void Handle(TEvPrivate::TEvAckTimeout::TPtr& ev); + void Handle(TEvPrivate::TEvSendAnalyze::TPtr& ev); + void Handle(TEvPrivate::TEvAnalyzeDeliveryProblem::TPtr& ev); + void Handle(TEvPrivate::TEvAnalyzeDeadline::TPtr& ev); void InitializeStatisticsTable(); void Navigate(); void Resolve(); - void NextRange(); + void ScanNextDatashardRange(); void SaveStatisticsToTable(); void DeleteStatisticsFromTable(); void PersistSysParam(NIceDb::TNiceDb& db, ui64 id, const TString& value); - void PersistCurrentScan(NIceDb::TNiceDb& db); + void PersistTraversal(NIceDb::TNiceDb& db); void PersistStartKey(NIceDb::TNiceDb& db); - void PersistLastScanOperationId(NIceDb::TNiceDb& db); + void PersistGlobalTraversalRound(NIceDb::TNiceDb& db); + + void ResetTraversalState(NIceDb::TNiceDb& db); + void ScheduleNextAnalyze(NIceDb::TNiceDb& db); + void ScheduleNextTraversal(NIceDb::TNiceDb& db); + void StartTraversal(NIceDb::TNiceDb& db); + void FinishTraversal(NIceDb::TNiceDb& db); - void ResetScanState(NIceDb::TNiceDb& db); - void ScheduleNextScan(NIceDb::TNiceDb& db); - void StartScan(NIceDb::TNiceDb& db, TPathId pathId); - void FinishScan(NIceDb::TNiceDb& db); + std::optional IsColumnTable(const TPathId& pathId) const; + TString LastTraversalWasForceString() const; STFUNC(StateInit) { StateInitImpl(ev, SelfId()); @@ -151,10 +192,11 @@ class TStatisticsAggregator : public TActor, public NTabl hFunc(TEvTabletPipe::TEvServerDisconnected, Handle); hFunc(TEvPrivate::TEvFastPropagateCheck, Handle); hFunc(TEvStatistics::TEvPropagateStatisticsResponse, Handle); + hFunc(TEvStatistics::TEvAnalyzeTableResponse, Handle); hFunc(TEvPrivate::TEvProcessUrgent, Handle); hFunc(TEvPrivate::TEvPropagateTimeout, Handle); - hFunc(TEvStatistics::TEvScanTable, Handle); + hFunc(TEvStatistics::TEvAnalyze, Handle); hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); hFunc(TEvTxProxySchemeCache::TEvResolveKeySetResult, Handle); hFunc(NStat::TEvStatistics::TEvStatisticsResponse, Handle); @@ -162,8 +204,17 @@ class TStatisticsAggregator : public TActor, public NTabl hFunc(TEvStatistics::TEvStatTableCreationResponse, Handle); hFunc(TEvStatistics::TEvSaveStatisticsQueryResponse, Handle); hFunc(TEvStatistics::TEvDeleteStatisticsQueryResponse, Handle); - hFunc(TEvPrivate::TEvScheduleScan, Handle); - hFunc(TEvStatistics::TEvGetScanStatus, Handle); + hFunc(TEvPrivate::TEvScheduleTraversal, Handle); + hFunc(TEvStatistics::TEvAnalyzeStatus, Handle); + hFunc(TEvHive::TEvResponseTabletDistribution, Handle); + hFunc(TEvStatistics::TEvAggregateStatisticsResponse, Handle); + hFunc(TEvPrivate::TEvResolve, Handle); + hFunc(TEvPrivate::TEvRequestDistribution, Handle); + hFunc(TEvStatistics::TEvAggregateKeepAlive, Handle); + hFunc(TEvPrivate::TEvAckTimeout, Handle); + hFunc(TEvPrivate::TEvSendAnalyze, Handle); + hFunc(TEvPrivate::TEvAnalyzeDeliveryProblem, Handle); + hFunc(TEvPrivate::TEvAnalyzeDeadline, Handle); default: if (!HandleDefaultEvents(ev, SelfId())) { @@ -178,6 +229,11 @@ class TStatisticsAggregator : public TActor, public NTabl std::mt19937_64 RandomGenerator; + TTabletCountersBase* TabletCounters; + TAutoPtr TabletCountersPtr; + + TInstant AggregationRequestBeginTime; + bool EnableStatistics = false; bool EnableColumnStatistics = false; @@ -188,7 +244,7 @@ class TStatisticsAggregator : public TActor, public NTabl TDuration PropagateTimeout; static constexpr TDuration FastCheckInterval = TDuration::MilliSeconds(50); - std::unordered_map BaseStats; // schemeshard id -> serialized stats for all paths + std::unordered_map BaseStatistics; // schemeshard id -> serialized stats for all paths std::unordered_map SchemeShards; // all connected schemeshards std::unordered_map SchemeShardPipes; // schemeshard pipe servers @@ -211,11 +267,6 @@ class TStatisticsAggregator : public TActor, public NTabl std::queue PendingRequests; bool ProcessUrgentInFlight = false; - // - - TTableId ScanTableId; // stored in local db - std::unordered_set ReplyToActorIds; - bool IsStatisticsTableCreated = false; bool PendingSaveStatistics = false; bool PendingDeleteStatistics = false; @@ -228,53 +279,135 @@ class TStatisticsAggregator : public TActor, public NTabl TSerializedCellVec EndKey; ui64 DataShardId = 0; }; - std::deque ShardRanges; + std::deque DatashardRanges; - TSerializedCellVec StartKey; // stored in local db + // period for both force and schedule traversals + static constexpr TDuration TraversalPeriod = TDuration::Seconds(1); + // if table traverse time is older, than traserse it on schedule + static constexpr TDuration ScheduleTraversalPeriod = TDuration::Hours(24); - std::unordered_map> CountMinSketches; // stored in local db - - static constexpr TDuration ScanIntervalTime = TDuration::Hours(24); - static constexpr TDuration ScheduleScanIntervalTime = TDuration::Seconds(1); - - struct TScanTable { + struct TScheduleTraversal { TPathId PathId; ui64 SchemeShardId = 0; TInstant LastUpdateTime; + bool IsColumnTable = false; size_t HeapIndexByTime = -1; struct THeapIndexByTime { - size_t& operator()(TScanTable& value) const { + size_t& operator()(TScheduleTraversal& value) const { return value.HeapIndexByTime; } }; struct TLessByTime { - bool operator()(const TScanTable& l, const TScanTable& r) const { + bool operator()(const TScheduleTraversal& l, const TScheduleTraversal& r) const { return l.LastUpdateTime < r.LastUpdateTime; } }; }; - std::unordered_map ScanTables; // stored in local db - std::unordered_map> ScanTablesBySchemeShard; + size_t ResolveRound = 0; + static constexpr size_t MaxResolveRoundCount = 5; + static constexpr TDuration ResolveRetryInterval = TDuration::Seconds(1); + + ui64 HiveId = 0; + std::unordered_set TabletsForReqDistribution; + + size_t HiveRequestRound = 0; + static constexpr size_t MaxHiveRequestRoundCount = 5; + static constexpr TDuration HiveRetryInterval = TDuration::Seconds(1); + + size_t TraversalRound = 0; + static constexpr size_t MaxTraversalRoundCount = 5; + + size_t KeepAliveSeqNo = 0; + static constexpr TDuration KeepAliveTimeout = TDuration::Seconds(3); + + static constexpr size_t SendAnalyzeCount = 100; + static constexpr TDuration SendAnalyzePeriod = TDuration::Seconds(1); + static constexpr TDuration AnalyzeDeliveryProblemPeriod = TDuration::Seconds(1); + static constexpr TDuration AnalyzeDeadline = TDuration::Days(1); + static constexpr TDuration AnalyzeDeadlinePeriod = TDuration::Seconds(1); + + enum ENavigateType { + Analyze, + Traversal + }; + ENavigateType NavigateType = Analyze; + TString GetNavigateTypeString() const; + + TString NavigateAnalyzeOperationId; + TPathId NavigatePathId; + + // alternate between forced and scheduled traversals + bool LastTraversalWasForce = false; + +private: // stored in local db + + TString ForceTraversalOperationId; + + TPathId TraversalPathId; + bool TraversalIsColumnTable = false; + TSerializedCellVec TraversalStartKey; + TInstant TraversalStartTime; + + size_t GlobalTraversalRound = 1; + + std::unordered_map> CountMinSketches; + + std::unordered_map ScheduleTraversals; + std::unordered_map> ScheduleTraversalsBySchemeShard; + typedef TIntrusiveHeap + TTraversalsByTime; + TTraversalsByTime ScheduleTraversalsByTime; - typedef TIntrusiveHeap - TScanTableQueueByTime; - TScanTableQueueByTime ScanTablesByTime; - struct TScanOperation : public TIntrusiveListItem { - ui64 OperationId = 0; + struct TAnalyzedShard { + ui64 ShardTabletId; + + enum class EStatus : ui8 { + None, + DeliveryProblem, + AnalyzeStarted, + AnalyzeFinished, + }; + EStatus Status = EStatus::None; + }; + + struct TForceTraversalTable { TPathId PathId; - std::unordered_set ReplyToActorIds; + TString ColumnTags; + std::vector AnalyzedShards; + + enum class EStatus : ui8 { + None, + AnalyzeStarted, + AnalyzeFinished, + TraversalStarted, + TraversalFinished, + }; + EStatus Status = EStatus::None; + + TString GetStatusString() const; }; - TIntrusiveList ScanOperations; // stored in local db - std::unordered_map ScanOperationsByPathId; + struct TForceTraversalOperation { + TString OperationId; + std::vector Tables; + TString Types; + TActorId ReplyToActorId; + TInstant CreatedAt; + }; + std::list ForceTraversals; - ui64 LastScanOperationId = 0; // stored in local db +private: + TForceTraversalOperation* CurrentForceTraversalOperation(); + TForceTraversalOperation* ForceTraversalOperation(const TString& operationId); + void DeleteForceTraversalOperation(const TString& operationId, NIceDb::TNiceDb& db); - TInstant ScanStartTime; + TForceTraversalTable* ForceTraversalTable(const TString& operationId, const TPathId& pathId); + TForceTraversalTable* CurrentForceTraversalTable(); + void UpdateForceTraversalTableStatus(const TForceTraversalTable::EStatus status, const TString& operationId, TStatisticsAggregator::TForceTraversalTable& table, NIceDb::TNiceDb& db); }; } // NKikimr::NStat diff --git a/ydb/core/statistics/aggregator/schema.h b/ydb/core/statistics/aggregator/schema.h index fea591a932c4..592564ede6e3 100644 --- a/ydb/core/statistics/aggregator/schema.h +++ b/ydb/core/statistics/aggregator/schema.h @@ -13,7 +13,7 @@ struct TAggregatorSchema : NIceDb::Schema { using TColumns = TableColumns; }; - struct BaseStats : Table<2> { + struct BaseStatistics : Table<2> { struct SchemeShardId : Column<1, NScheme::NTypeIds::Uint64> {}; struct Stats : Column<2, NScheme::NTypeIds::String> {}; @@ -21,7 +21,7 @@ struct TAggregatorSchema : NIceDb::Schema { using TColumns = TableColumns; }; - struct Statistics : Table<3> { + struct ColumnStatistics : Table<3> { struct ColumnTag : Column<1, NScheme::NTypeIds::Uint32> {}; struct CountMinSketch : Column<2, NScheme::NTypeIds::String> {}; @@ -29,40 +29,63 @@ struct TAggregatorSchema : NIceDb::Schema { using TColumns = TableColumns; }; - struct ScanTables : Table<4> { + struct ScheduleTraversals : Table<4> { struct OwnerId : Column<1, NScheme::NTypeIds::Uint64> {}; struct LocalPathId : Column<2, NScheme::NTypeIds::Uint64> {}; struct LastUpdateTime : Column<3, NScheme::NTypeIds::Timestamp> {}; struct SchemeShardId : Column<4, NScheme::NTypeIds::Uint64> {}; + struct IsColumnTable : Column<5, NScheme::NTypeIds::Bool> {}; using TKey = TableKey; using TColumns = TableColumns< OwnerId, LocalPathId, LastUpdateTime, - SchemeShardId + SchemeShardId, + IsColumnTable >; }; - struct ScanOperations : Table<5> { - struct OperationId : Column<1, NScheme::NTypeIds::Uint64> {}; + // struct ForceTraversals : Table<5> + + struct ForceTraversalOperations : Table<6> { + struct OperationId : Column<1, NScheme::NTypeIds::String> {}; + struct Types : Column<2, NScheme::NTypeIds::String> {}; + struct CreatedAt : Column<3, NScheme::NTypeIds::Uint64> {}; + + using TKey = TableKey; + using TColumns = TableColumns< + OperationId, + Types, + CreatedAt + >; + }; + + struct ForceTraversalTables : Table<7> { + struct OperationId : Column<1, NScheme::NTypeIds::String> {}; struct OwnerId : Column<2, NScheme::NTypeIds::Uint64> {}; struct LocalPathId : Column<3, NScheme::NTypeIds::Uint64> {}; + struct ColumnTags : Column<4, NScheme::NTypeIds::String> {}; + struct Status : Column<5, NScheme::NTypeIds::Uint64> {}; - using TKey = TableKey; + using TKey = TableKey; using TColumns = TableColumns< OperationId, OwnerId, - LocalPathId + LocalPathId, + ColumnTags, + Status >; }; using TTables = SchemaTables< SysParams, - BaseStats, - Statistics, - ScanTables, - ScanOperations + BaseStatistics, + ColumnStatistics, + ScheduleTraversals, +// ForceTraversals, + ForceTraversalOperations, + ForceTraversalTables >; using TSettings = SchemaSettings< @@ -71,11 +94,17 @@ struct TAggregatorSchema : NIceDb::Schema { >; static constexpr ui64 SysParam_Database = 1; - static constexpr ui64 SysParam_StartKey = 2; - static constexpr ui64 SysParam_ScanTableOwnerId = 3; - static constexpr ui64 SysParam_ScanTableLocalPathId = 4; - static constexpr ui64 SysParam_ScanStartTime = 5; - static constexpr ui64 SysParam_LastScanOperationId = 6; + static constexpr ui64 SysParam_TraversalStartKey = 2; + // deprecated 3 + static constexpr ui64 SysParam_TraversalTableOwnerId = 4; + static constexpr ui64 SysParam_TraversalTableLocalPathId = 5; + // deprecated 6 + // deprecated 7 + // deprecated 8 + static constexpr ui64 SysParam_TraversalStartTime = 9; + // deprecated 10 + static constexpr ui64 SysParam_TraversalIsColumnTable = 11; + static constexpr ui64 SysParam_GlobalTraversalRound = 12; }; } // NKikimr::NStat diff --git a/ydb/core/statistics/aggregator/tx_ack_timeout.cpp b/ydb/core/statistics/aggregator/tx_ack_timeout.cpp new file mode 100644 index 000000000000..d83ce5b10d57 --- /dev/null +++ b/ydb/core/statistics/aggregator/tx_ack_timeout.cpp @@ -0,0 +1,32 @@ +#include "aggregator_impl.h" + +namespace NKikimr::NStat { + +struct TStatisticsAggregator::TTxAckTimeout : public TTxBase { + explicit TTxAckTimeout(TSelf* self) + : TTxBase(self) + {} + + TTxType GetTxType() const override { return TXTYPE_ACK_TIMEOUT; } + + bool Execute(TTransactionContext& /*txc*/, const TActorContext&) override { + SA_LOG_D("[" << Self->TabletID() << "] TTxAckTimeout::Execute"); + return true; + } + + void Complete(const TActorContext& ctx) override { + SA_LOG_D("[" << Self->TabletID() << "] TTxAckTimeout::Complete"); + + ctx.Send(Self->SelfId(), new TEvPrivate::TEvRequestDistribution); + } +}; + +void TStatisticsAggregator::Handle(TEvPrivate::TEvAckTimeout::TPtr& ev) { + if (ev->Get()->SeqNo < KeepAliveSeqNo) { + return; + } + // timeout + Execute(new TTxAckTimeout(this), TActivationContext::AsActorContext()); +} + +} // NKikimr::NStat diff --git a/ydb/core/statistics/aggregator/tx_aggr_stat_response.cpp b/ydb/core/statistics/aggregator/tx_aggr_stat_response.cpp new file mode 100644 index 000000000000..d6622dc623d2 --- /dev/null +++ b/ydb/core/statistics/aggregator/tx_aggr_stat_response.cpp @@ -0,0 +1,143 @@ +#include "aggregator_impl.h" + +#include + +namespace NKikimr::NStat { + +struct TStatisticsAggregator::TTxAggregateStatisticsResponse : public TTxBase { + NKikimrStat::TEvAggregateStatisticsResponse Record; + + enum class EAction : ui8 { + None, + SendReqDistribution, + SendAggregate, + }; + EAction Action = EAction::None; + + std::unique_ptr Request; + + TTxAggregateStatisticsResponse(TSelf* self, NKikimrStat::TEvAggregateStatisticsResponse&& record) + : TTxBase(self) + , Record(std::move(record)) + {} + + TTxType GetTxType() const override { return TXTYPE_AGGR_STAT_RESPONSE; } + + bool Execute(TTransactionContext& txc, const TActorContext&) override { + SA_LOG_D("[" << Self->TabletID() << "] TTxAggregateStatisticsResponse::Execute"); + + ++Self->KeepAliveSeqNo; // cancel timeout events + + Self->TabletCounters->Simple()[COUNTER_AGGREGATION_TIME].Set(0); + Self->AggregationRequestBeginTime = TInstant::Zero(); + + NIceDb::TNiceDb db(txc.DB); + + for (auto& column : Record.GetColumns()) { + auto tag = column.GetTag(); + for (auto& statistic : column.GetStatistics()) { + if (statistic.GetType() == NKikimr::NStat::COUNT_MIN_SKETCH) { + if (!Self->ColumnNames.contains(tag)) { + continue; + } + + auto [currentIt, emplaced] = Self->CountMinSketches.try_emplace(tag); + if (emplaced) { + currentIt->second.reset(TCountMinSketch::Create()); + } + + auto* data = statistic.GetData().Data(); + auto* sketch = reinterpret_cast(data); + *(currentIt->second) += *sketch; + } + } + } + + if (Record.FailedTabletsSize() == 0 || + Self->TraversalRound >= Self->MaxTraversalRoundCount) + { + Self->SaveStatisticsToTable(); + return true; + } + + std::unordered_map> nonLocalTablets; + Self->TabletsForReqDistribution.clear(); + + for (auto& tablet : Record.GetFailedTablets()) { + auto error = tablet.GetError(); + switch (error) { + case NKikimrStat::TEvAggregateStatisticsResponse::TYPE_UNSPECIFIED: + SA_LOG_CRIT("[" << Self->TabletID() << "] Unspecified TEvAggregateStatisticsResponse status"); + return false; + + case NKikimrStat::TEvAggregateStatisticsResponse::TYPE_UNAVAILABLE_NODE: + Self->TabletsForReqDistribution.insert(tablet.GetTabletId()); + Action = EAction::SendReqDistribution; + break; + + case NKikimrStat::TEvAggregateStatisticsResponse::TYPE_NON_LOCAL_TABLET: + auto nodeId = tablet.GetNodeId(); + if (nodeId == 0) { + // we cannot reach this tablet + Self->TabletsForReqDistribution.insert(tablet.GetTabletId()); + Action = EAction::SendReqDistribution; + + } else if (Action != EAction::SendReqDistribution) { + nonLocalTablets[nodeId].push_back(tablet.GetTabletId()); + } + break; + } + } + + if (Action == EAction::SendReqDistribution) { + return true; + } + + Request = std::make_unique(); + auto& outRecord = Request->Record; + + for (auto& [nodeId, tabletIds] : nonLocalTablets) { + auto& outNode = *outRecord.AddNodes(); + outNode.SetNodeId(nodeId); + outNode.MutableTabletIds()->Reserve(tabletIds.size()); + for (auto tabletId : tabletIds) { + outNode.AddTabletIds(tabletId); + } + } + + ++Self->TraversalRound; + ++Self->GlobalTraversalRound; + Self->PersistGlobalTraversalRound(db); + outRecord.SetRound(Self->GlobalTraversalRound); + Action = EAction::SendAggregate; + + return true; + } + + void Complete(const TActorContext& ctx) override { + SA_LOG_D("[" << Self->TabletID() << "] TTxAggregateStatisticsResponse::Complete"); + + switch (Action) { + case EAction::SendReqDistribution: + ctx.Send(Self->SelfId(), new TEvPrivate::TEvRequestDistribution); + break; + + case EAction::SendAggregate: + ctx.Send(MakeStatServiceID(Self->SelfId().NodeId()), Request.release()); + ctx.Schedule(KeepAliveTimeout, new TEvPrivate::TEvAckTimeout(++Self->KeepAliveSeqNo)); + Self->AggregationRequestBeginTime = AppData(ctx)->TimeProvider->Now(); + break; + + default: + break; + } + } +}; + +void TStatisticsAggregator::Handle(TEvStatistics::TEvAggregateStatisticsResponse::TPtr& ev) { + auto& record = ev->Get()->Record; + Execute(new TTxAggregateStatisticsResponse(this, std::move(record)), + TActivationContext::AsActorContext()); +} + +} // NKikimr::NStat diff --git a/ydb/core/statistics/aggregator/tx_analyze.cpp b/ydb/core/statistics/aggregator/tx_analyze.cpp new file mode 100644 index 000000000000..58548d41d47e --- /dev/null +++ b/ydb/core/statistics/aggregator/tx_analyze.cpp @@ -0,0 +1,105 @@ +#include "aggregator_impl.h" + +#include + +#include + +namespace NKikimr::NStat { + +struct TStatisticsAggregator::TTxAnalyze : public TTxBase { + const NKikimrStat::TEvAnalyze& Record; + TActorId ReplyToActorId; + + TTxAnalyze(TSelf* self, const NKikimrStat::TEvAnalyze& record, TActorId replyToActorId) + : TTxBase(self) + , Record(record) + , ReplyToActorId(replyToActorId) + {} + + TTxType GetTxType() const override { return TXTYPE_ANALYZE_TABLE; } + + bool Execute(TTransactionContext& txc, const TActorContext& ctx) override { + SA_LOG_D("[" << Self->TabletID() << "] TTxAnalyze::Execute. ReplyToActorId " << ReplyToActorId << " , Record " << Record); + + if (!Self->EnableColumnStatistics) { + return true; + } + + NIceDb::TNiceDb db(txc.DB); + + const TString operationId = Record.GetOperationId(); + + // check existing force traversal with the same OperationId + const auto existingOperation = Self->ForceTraversalOperation(operationId); + + // update existing force traversal + if (existingOperation) { + if (existingOperation->Tables.size() == Record.TablesSize()) { + SA_LOG_D("[" << Self->TabletID() << "] TTxAnalyze::Execute. Update existing force traversal. OperationId " << operationId << " , ReplyToActorId " << ReplyToActorId); + existingOperation->ReplyToActorId = ReplyToActorId; + return true; + } else { + SA_LOG_D("[" << Self->TabletID() << "] TTxAnalyze::Execute. Delete broken force traversal. OperationId " << operationId << " , ReplyToActorId " << ReplyToActorId); + Self->DeleteForceTraversalOperation(operationId, db); + } + } + + SA_LOG_D("[" << Self->TabletID() << "] TTxAnalyze::Execute. Create new force traversal operation, OperationId=" << operationId); + const TString types = JoinVectorIntoString(TVector(Record.GetTypes().begin(), Record.GetTypes().end()), ","); + + // create new force trasersal + auto createdAt = ctx.Now(); + TForceTraversalOperation operation { + .OperationId = operationId, + .Tables = {}, + .Types = types, + .ReplyToActorId = ReplyToActorId, + .CreatedAt = createdAt + }; + + for (const auto& table : Record.GetTables()) { + const TPathId pathId = PathIdFromPathId(table.GetPathId()); + const TString columnTags = JoinVectorIntoString(TVector{table.GetColumnTags().begin(),table.GetColumnTags().end()},","); + const auto status = TForceTraversalTable::EStatus::None; + + SA_LOG_D("[" << Self->TabletID() << "] TTxAnalyze::Execute. Create new force traversal table, OperationId=" << operationId << " , PathId " << pathId); + + // create new force traversal + TForceTraversalTable operationTable { + .PathId = pathId, + .ColumnTags = columnTags, + .Status = status + }; + operation.Tables.emplace_back(operationTable); + + db.Table().Key(operationId, pathId.OwnerId, pathId.LocalPathId).Update( + NIceDb::TUpdate(operationId), + NIceDb::TUpdate(pathId.OwnerId), + NIceDb::TUpdate(pathId.LocalPathId), + NIceDb::TUpdate(columnTags), + NIceDb::TUpdate((ui64)status) + ); + } + + Self->ForceTraversals.emplace_back(operation); + Self->TabletCounters->Simple()[COUNTER_FORCE_TRAVERSALS_INFLIGHT_SIZE].Set(Self->ForceTraversals.size()); + + db.Table().Key(operationId).Update( + NIceDb::TUpdate(operationId), + NIceDb::TUpdate(types), + NIceDb::TUpdate(createdAt.GetValue()) + ); + + return true; + } + + void Complete(const TActorContext& /*ctx*/) override { + SA_LOG_D("[" << Self->TabletID() << "] TTxAnalyze::Complete"); + } +}; + +void TStatisticsAggregator::Handle(TEvStatistics::TEvAnalyze::TPtr& ev) { + Execute(new TTxAnalyze(this, ev->Get()->Record, ev->Sender), TActivationContext::AsActorContext()); +} + +} // NKikimr::NStat diff --git a/ydb/core/statistics/aggregator/tx_analyze_deadline.cpp b/ydb/core/statistics/aggregator/tx_analyze_deadline.cpp new file mode 100644 index 000000000000..1143faca4390 --- /dev/null +++ b/ydb/core/statistics/aggregator/tx_analyze_deadline.cpp @@ -0,0 +1,66 @@ +#include "aggregator_impl.h" + +#include +#include + +#include + +namespace NKikimr::NStat { + +struct TStatisticsAggregator::TTxAnalyzeDeadline : public TTxBase { + TString OperationId; + TActorId ReplyToActorId; + + TTxAnalyzeDeadline(TSelf* self) + : TTxBase(self) + {} + + TTxType GetTxType() const override { return TXTYPE_ANALYZE_DEADLINE; } + + bool Execute(TTransactionContext& txc, const TActorContext& ctx) override { + SA_LOG_T("[" << Self->TabletID() << "] TTxAnalyzeDeadline::Execute"); + + NIceDb::TNiceDb db(txc.DB); + auto now = ctx.Now(); + + for (TForceTraversalOperation& operation : Self->ForceTraversals) { + if (operation.CreatedAt + Self->AnalyzeDeadline < now) { + SA_LOG_E("[" << Self->TabletID() << "] Delete long analyze operation, OperationId=" << operation.OperationId); + + OperationId = operation.OperationId; + ReplyToActorId = operation.ReplyToActorId; + Self->DeleteForceTraversalOperation(operation.OperationId, db); + break; + } + } + + return true; + } + + void Complete(const TActorContext& ctx) override { + SA_LOG_T("[" << Self->TabletID() << "] TTxAnalyzeDeadline::Complete"); + + if (OperationId) { + if (ReplyToActorId) { + SA_LOG_D("[" << Self->TabletID() << "] TTxAnalyzeDeadline::Complete. " << + "Send TEvAnalyzeResponse for deleted operation, OperationId=" << OperationId << ", ActorId=" << ReplyToActorId); + auto response = std::make_unique(); + response->Record.SetOperationId(OperationId); + response->Record.SetStatus(NKikimrStat::TEvAnalyzeResponse::STATUS_ERROR); + ctx.Send(ReplyToActorId, response.release()); + } else { + SA_LOG_D("[" << Self->TabletID() << "] TTxAnalyzeDeadline::Complete. No ActorId to send reply. OperationId=" << OperationId); + } + ctx.Send(Self->SelfId(), new TEvPrivate::TEvAnalyzeDeadline()); + } else { + ctx.Schedule(AnalyzeDeadlinePeriod, new TEvPrivate::TEvAnalyzeDeadline()); + } + } +}; + +void TStatisticsAggregator::Handle(TEvPrivate::TEvAnalyzeDeadline::TPtr&) { + Execute(new TTxAnalyzeDeadline(this), + TActivationContext::AsActorContext()); +} + +} // NKikimr::NStat diff --git a/ydb/core/statistics/aggregator/tx_analyze_table_delivery_problem.cpp b/ydb/core/statistics/aggregator/tx_analyze_table_delivery_problem.cpp new file mode 100644 index 000000000000..6e04a23e339e --- /dev/null +++ b/ydb/core/statistics/aggregator/tx_analyze_table_delivery_problem.cpp @@ -0,0 +1,46 @@ +#include "aggregator_impl.h" + +#include +#include + +#include + +namespace NKikimr::NStat { + +struct TStatisticsAggregator::TTxAnalyzeTableDeliveryProblem : public TTxBase { + TTxAnalyzeTableDeliveryProblem(TSelf* self) + : TTxBase(self) + {} + + TTxType GetTxType() const override { return TXTYPE_ANALYZE_TABLE_DELIVERY_PROBLEM; } + + bool Execute(TTransactionContext&, const TActorContext&) override { + SA_LOG_T("[" << Self->TabletID() << "] TTxAnalyzeTableDeliveryProblem::Execute"); + + for (TForceTraversalOperation& operation : Self->ForceTraversals) { + for (TForceTraversalTable& operationTable : operation.Tables) { + for(TAnalyzedShard& analyzedShard : operationTable.AnalyzedShards) { + if (analyzedShard.Status == TAnalyzedShard::EStatus::DeliveryProblem) { + SA_LOG_D("[" << Self->TabletID() << "] Reset DeliveryProblem to ColumnShard=" << analyzedShard.ShardTabletId); + analyzedShard.Status = TAnalyzedShard::EStatus::None; + } + } + } + } + + return true; + } + + void Complete(const TActorContext& ctx) override { + SA_LOG_T("[" << Self->TabletID() << "] TTxAnalyzeTableDeliveryProblem::Complete"); + + ctx.Schedule(AnalyzeDeliveryProblemPeriod, new TEvPrivate::TEvAnalyzeDeliveryProblem()); + } +}; + +void TStatisticsAggregator::Handle(TEvPrivate::TEvAnalyzeDeliveryProblem::TPtr&) { + Execute(new TTxAnalyzeTableDeliveryProblem(this), + TActivationContext::AsActorContext()); +} + +} // NKikimr::NStat diff --git a/ydb/core/statistics/aggregator/tx_analyze_table_request.cpp b/ydb/core/statistics/aggregator/tx_analyze_table_request.cpp new file mode 100644 index 000000000000..983a4761c78a --- /dev/null +++ b/ydb/core/statistics/aggregator/tx_analyze_table_request.cpp @@ -0,0 +1,79 @@ +#include "aggregator_impl.h" + +#include +#include + +#include + +namespace NKikimr::NStat { + +struct TStatisticsAggregator::TTxAnalyzeTableRequest : public TTxBase { + std::vector> Events; + + TTxAnalyzeTableRequest(TSelf* self) + : TTxBase(self) + {} + + TTxType GetTxType() const override { return TXTYPE_ANALYZE_TABLE_REQUEST; } + + static std::unique_ptr MakeRequest(const TString& operationId, const TForceTraversalTable& operationTable) { + auto request = std::make_unique(); + auto& record = request->Record; + record.SetOperationId(operationId); + auto& table = *record.MutableTable(); + PathIdFromPathId(operationTable.PathId, table.MutablePathId()); + TVector columnTags = Scan(SplitString(operationTable.ColumnTags, ",")); + table.MutableColumnTags()->Add(columnTags.begin(), columnTags.end()); + return request; + } + + bool Execute(TTransactionContext&, const TActorContext&) override { + SA_LOG_T("[" << Self->TabletID() << "] TTxAnalyzeTableRequest::Execute"); + + for (TForceTraversalOperation& operation : Self->ForceTraversals) { + for (TForceTraversalTable& operationTable : operation.Tables) { + if (operationTable.Status == TForceTraversalTable::EStatus::AnalyzeStarted) { + for(TAnalyzedShard& analyzedShard : operationTable.AnalyzedShards) { + if (analyzedShard.Status == TAnalyzedShard::EStatus::None) { + analyzedShard.Status = TAnalyzedShard::EStatus::AnalyzeStarted; + + auto request = MakeRequest(operation.OperationId, operationTable); + Events.push_back(std::make_unique(request.release(), analyzedShard.ShardTabletId, true)); + + if (Events.size() == SendAnalyzeCount) + return true; + } + } + } + } + } + + return true; + } + + void Complete(const TActorContext& ctx) override { + if (Events.size()) { + SA_LOG_D("[" << Self->TabletID() << "] TTxAnalyzeTableRequest::Complete. Send " << Events.size() << " events."); + } + else { + SA_LOG_T("[" << Self->TabletID() << "] TTxAnalyzeTableRequest::Complete."); + } + + for (auto& ev : Events) { + Self->Send(MakePipePerNodeCacheID(false), ev.release() ); + } + + if (Events.size() == SendAnalyzeCount) { + ctx.Send(Self->SelfId(), new TEvPrivate::TEvSendAnalyze()); + } else { + ctx.Schedule(SendAnalyzePeriod, new TEvPrivate::TEvSendAnalyze()); + } + } +}; + +void TStatisticsAggregator::Handle(TEvPrivate::TEvSendAnalyze::TPtr&) { + Execute(new TTxAnalyzeTableRequest(this), + TActivationContext::AsActorContext()); +} + +} // NKikimr::NStat diff --git a/ydb/core/statistics/aggregator/tx_analyze_table_response.cpp b/ydb/core/statistics/aggregator/tx_analyze_table_response.cpp new file mode 100644 index 000000000000..2bbade5ed146 --- /dev/null +++ b/ydb/core/statistics/aggregator/tx_analyze_table_response.cpp @@ -0,0 +1,67 @@ +#include "aggregator_impl.h" + +#include +#include + +#include + +namespace NKikimr::NStat { + +struct TStatisticsAggregator::TTxAnalyzeTableResponse : public TTxBase { + NKikimrStat::TEvAnalyzeTableResponse Record; + + TTxAnalyzeTableResponse(TSelf* self, NKikimrStat::TEvAnalyzeTableResponse&& record) + : TTxBase(self) + , Record(std::move(record)) + {} + + TTxType GetTxType() const override { return TXTYPE_ANALYZE_TABLE_RESPONSE; } + + bool Execute(TTransactionContext& txc, const TActorContext&) override { + SA_LOG_D("[" << Self->TabletID() << "] TTxAnalyzeTableResponse::Execute"); + + const TString operationId = Record.GetOperationId(); + const TPathId pathId = PathIdFromPathId(Record.GetPathId()); + auto operationTable = Self->ForceTraversalTable(operationId, pathId); + if (!operationTable) { + SA_LOG_E("[" << Self->TabletID() << "] TTxAnalyzeTableResponse::Execute. Unknown OperationTable. Record: " << Record.ShortDebugString()); + return true; + } + + auto analyzedShard = std::find_if(operationTable->AnalyzedShards.begin(), operationTable->AnalyzedShards.end(), + [tabletId = Record.GetShardTabletId()] (TAnalyzedShard& analyzedShard) { return analyzedShard.ShardTabletId == tabletId;}); + if (analyzedShard == operationTable->AnalyzedShards.end()) { + SA_LOG_E("[" << Self->TabletID() << "] TTxAnalyzeTableResponse::Execute. Unknown AnalyzedShards. Record: " << Record.ShortDebugString() << ", ShardTabletId " << Record.GetShardTabletId()); + return true; + } + if (analyzedShard->Status != TAnalyzedShard::EStatus::AnalyzeStarted) { + SA_LOG_E("[" << Self->TabletID() << "] TTxAnalyzeTableResponse::Execute. Unknown AnalyzedShards Status. Record: " << Record.ShortDebugString() << ", ShardTabletId " << Record.GetShardTabletId()); + } + + analyzedShard->Status = TAnalyzedShard::EStatus::AnalyzeFinished; + + bool completeResponse = std::any_of(operationTable->AnalyzedShards.begin(), operationTable->AnalyzedShards.end(), + [] (const TAnalyzedShard& analyzedShard) { return analyzedShard.Status == TAnalyzedShard::EStatus::AnalyzeFinished;}); + + if (!completeResponse) { + SA_LOG_D("[" << Self->TabletID() << "] TTxAnalyzeTableResponse::Execute. There are shards which are not analyzed"); + return true; + } + NIceDb::TNiceDb db(txc.DB); + Self->UpdateForceTraversalTableStatus(TForceTraversalTable::EStatus::AnalyzeFinished, operationId, *operationTable, db); + SA_LOG_D("[" << Self->TabletID() << "] TTxAnalyzeTableResponse::Execute. All shards are analyzed"); + return true; + } + + void Complete(const TActorContext&) override { + SA_LOG_D("[" << Self->TabletID() << "] TTxAnalyzeTableResponse::Complete."); + } +}; + +void TStatisticsAggregator::Handle(TEvStatistics::TEvAnalyzeTableResponse::TPtr& ev) { + auto& record = ev->Get()->Record; + Execute(new TTxAnalyzeTableResponse(this, std::move(record)), + TActivationContext::AsActorContext()); +} + +} // NKikimr::NStat diff --git a/ydb/core/statistics/aggregator/tx_configure.cpp b/ydb/core/statistics/aggregator/tx_configure.cpp index 391324b0b01a..4f0790b02e1b 100644 --- a/ydb/core/statistics/aggregator/tx_configure.cpp +++ b/ydb/core/statistics/aggregator/tx_configure.cpp @@ -22,8 +22,13 @@ struct TStatisticsAggregator::TTxConfigure : public TTxBase { NIceDb::TNiceDb db(txc.DB); + bool needInitialize = !Self->Database; Self->Database = Record.GetDatabase(); Self->PersistSysParam(db, Schema::SysParam_Database, Self->Database); + + if (needInitialize) { + Self->InitializeStatisticsTable(); + } return true; } diff --git a/ydb/core/statistics/aggregator/tx_datashard_scan_response.cpp b/ydb/core/statistics/aggregator/tx_datashard_scan_response.cpp new file mode 100644 index 000000000000..79ea60b5bbc7 --- /dev/null +++ b/ydb/core/statistics/aggregator/tx_datashard_scan_response.cpp @@ -0,0 +1,84 @@ +#include "aggregator_impl.h" + +#include + +namespace NKikimr::NStat { + +struct TStatisticsAggregator::TTxDatashardScanResponse : public TTxBase { + NKikimrStat::TEvStatisticsResponse Record; + bool IsCorrectShardId = false; + + TTxDatashardScanResponse(TSelf* self, NKikimrStat::TEvStatisticsResponse&& record) + : TTxBase(self) + , Record(std::move(record)) + {} + + TTxType GetTxType() const override { return TXTYPE_SCAN_RESPONSE; } + + bool Execute(TTransactionContext& txc, const TActorContext&) override { + SA_LOG_D("[" << Self->TabletID() << "] TTxDatashardScanResponse::Execute"); + + NIceDb::TNiceDb db(txc.DB); + + // TODO: handle scan errors + + if (Self->DatashardRanges.empty()) { + return true; + } + + auto& range = Self->DatashardRanges.front(); + auto replyShardId = Record.GetShardTabletId(); + + if (replyShardId != range.DataShardId) { + return true; + } + + IsCorrectShardId = true; + + for (auto& column : Record.GetColumns()) { + auto tag = column.GetTag(); + for (auto& statistic : column.GetStatistics()) { + if (statistic.GetType() == NKikimr::NStat::COUNT_MIN_SKETCH) { + auto* data = statistic.GetData().Data(); + auto* sketch = reinterpret_cast(data); + + if (Self->ColumnNames.find(tag) == Self->ColumnNames.end()) { + continue; + } + if (Self->CountMinSketches.find(tag) == Self->CountMinSketches.end()) { + Self->CountMinSketches[tag].reset(TCountMinSketch::Create()); + } + + auto& current = Self->CountMinSketches[tag]; + *current += *sketch; + + auto currentStr = TString(current->AsStringBuf()); + db.Table().Key(tag).Update( + NIceDb::TUpdate(currentStr)); + } + } + } + + Self->TraversalStartKey = range.EndKey; + Self->PersistStartKey(db); + + return true; + } + + void Complete(const TActorContext&) override { + SA_LOG_D("[" << Self->TabletID() << "] TTxDatashardScanResponse::Complete"); + + if (IsCorrectShardId && !Self->DatashardRanges.empty()) { + Self->DatashardRanges.pop_front(); + Self->ScanNextDatashardRange(); + } + } +}; + +void TStatisticsAggregator::Handle(NStat::TEvStatistics::TEvStatisticsResponse::TPtr& ev) { + auto& record = ev->Get()->Record; + Execute(new TTxDatashardScanResponse(this, std::move(record)), + TActivationContext::AsActorContext()); +} + +} // NKikimr::NStat diff --git a/ydb/core/statistics/aggregator/tx_delete_query_response.cpp b/ydb/core/statistics/aggregator/tx_delete_query_response.cpp deleted file mode 100644 index 2c0487846a8f..000000000000 --- a/ydb/core/statistics/aggregator/tx_delete_query_response.cpp +++ /dev/null @@ -1,39 +0,0 @@ -#include "aggregator_impl.h" - -#include - -namespace NKikimr::NStat { - -struct TStatisticsAggregator::TTxDeleteQueryResponse : public TTxBase { - std::unordered_set ReplyToActorIds; - - TTxDeleteQueryResponse(TSelf* self) - : TTxBase(self) - {} - - TTxType GetTxType() const override { return TXTYPE_DELETE_QUERY_RESPONSE; } - - bool Execute(TTransactionContext& txc, const TActorContext&) override { - SA_LOG_D("[" << Self->TabletID() << "] TTxDeleteQueryResponse::Execute"); - - ReplyToActorIds.swap(Self->ReplyToActorIds); - - NIceDb::TNiceDb db(txc.DB); - Self->FinishScan(db); - - return true; - } - - void Complete(const TActorContext& ctx) override { - SA_LOG_D("[" << Self->TabletID() << "] TTxDeleteQueryResponse::Complete"); - - for (auto& id : ReplyToActorIds) { - ctx.Send(id, new TEvStatistics::TEvScanTableResponse); - } - } -}; -void TStatisticsAggregator::Handle(TEvStatistics::TEvDeleteStatisticsQueryResponse::TPtr&) { - Execute(new TTxDeleteQueryResponse(this), TActivationContext::AsActorContext()); -} - -} // NKikimr::NStat diff --git a/ydb/core/statistics/aggregator/tx_finish_trasersal.cpp b/ydb/core/statistics/aggregator/tx_finish_trasersal.cpp new file mode 100644 index 000000000000..cb94e4eac7a8 --- /dev/null +++ b/ydb/core/statistics/aggregator/tx_finish_trasersal.cpp @@ -0,0 +1,65 @@ +#include "aggregator_impl.h" + +#include + +namespace NKikimr::NStat { + +struct TStatisticsAggregator::TTxFinishTraversal : public TTxBase { + TString OperationId; + TPathId PathId; + TActorId ReplyToActorId; + + TTxFinishTraversal(TSelf* self) + : TTxBase(self) + , OperationId(self->ForceTraversalOperationId) + , PathId(self->TraversalPathId) + { + auto forceTraversal = Self->CurrentForceTraversalOperation(); + if (forceTraversal) { + ReplyToActorId = forceTraversal->ReplyToActorId; + } + } + + TTxType GetTxType() const override { return TXTYPE_FINISH_TRAVERSAL; } + + bool Execute(TTransactionContext& txc, const TActorContext&) override { + SA_LOG_D("[" << Self->TabletID() << "] TTxFinishTraversal::Execute"); + + NIceDb::TNiceDb db(txc.DB); + Self->FinishTraversal(db); + + return true; + } + + void Complete(const TActorContext& ctx) override { + SA_LOG_D("[" << Self->TabletID() << "] TTxFinishTraversal::Complete " << + Self->LastTraversalWasForceString() << " traversal for path " << PathId); + + if (!ReplyToActorId) { + SA_LOG_D("[" << Self->TabletID() << "] TTxFinishTraversal::Complete. No ActorId to send reply."); + return; + } + + auto forceTraversalRemained = Self->ForceTraversalOperation(OperationId); + + if (forceTraversalRemained) { + SA_LOG_D("[" << Self->TabletID() << "] TTxFinishTraversal::Complete. Don't send TEvAnalyzeResponse. " << + "There are pending operations, OperationId " << OperationId << " , ActorId=" << ReplyToActorId); + } else { + SA_LOG_D("[" << Self->TabletID() << "] TTxFinishTraversal::Complete. " << + "Send TEvAnalyzeResponse, OperationId=" << OperationId << ", ActorId=" << ReplyToActorId); + auto response = std::make_unique(); + response->Record.SetOperationId(OperationId); + response->Record.SetStatus(NKikimrStat::TEvAnalyzeResponse::STATUS_SUCCESS); + ctx.Send(ReplyToActorId, response.release()); + } + } +}; +void TStatisticsAggregator::Handle(TEvStatistics::TEvSaveStatisticsQueryResponse::TPtr&) { + Execute(new TTxFinishTraversal(this), TActivationContext::AsActorContext()); +} +void TStatisticsAggregator::Handle(TEvStatistics::TEvDeleteStatisticsQueryResponse::TPtr&) { + Execute(new TTxFinishTraversal(this), TActivationContext::AsActorContext()); +} + +} // NKikimr::NStat diff --git a/ydb/core/statistics/aggregator/tx_init.cpp b/ydb/core/statistics/aggregator/tx_init.cpp index 4dbd2295c015..fc53cf6cf761 100644 --- a/ydb/core/statistics/aggregator/tx_init.cpp +++ b/ydb/core/statistics/aggregator/tx_init.cpp @@ -3,6 +3,8 @@ #include #include +#include + namespace NKikimr::NStat { struct TStatisticsAggregator::TTxInit : public TTxBase { @@ -19,16 +21,18 @@ struct TStatisticsAggregator::TTxInit : public TTxBase { { // precharge auto sysParamsRowset = db.Table().Range().Select(); - auto baseStatsRowset = db.Table().Range().Select(); - auto statisticsRowset = db.Table().Range().Select(); - auto scanTablesRowset = db.Table().Range().Select(); - auto scanOperationsRowset = db.Table().Range().Select(); + auto baseStatisticsRowset = db.Table().Range().Select(); + auto statisticsRowset = db.Table().Range().Select(); + auto scheduleTraversalRowset = db.Table().Range().Select(); + auto forceTraversalOperationsRowset = db.Table().Range().Select(); + auto forceTraversalTablesRowset = db.Table().Range().Select(); if (!sysParamsRowset.IsReady() || - !baseStatsRowset.IsReady() || + !baseStatisticsRowset.IsReady() || !statisticsRowset.IsReady() || - !scanTablesRowset.IsReady() || - !scanOperationsRowset.IsReady()) + !scheduleTraversalRowset.IsReady() || + !forceTraversalOperationsRowset.IsReady() || + !forceTraversalTablesRowset.IsReady()) { return false; } @@ -48,35 +52,38 @@ struct TStatisticsAggregator::TTxInit : public TTxBase { switch (id) { case Schema::SysParam_Database: Self->Database = value; - SA_LOG_D("[" << Self->TabletID() << "] Loading database: " << Self->Database); + SA_LOG_D("[" << Self->TabletID() << "] Loaded database: " << Self->Database); break; - case Schema::SysParam_StartKey: - Self->StartKey = TSerializedCellVec(value); - SA_LOG_D("[" << Self->TabletID() << "] Loading start key"); + case Schema::SysParam_TraversalStartKey: + Self->TraversalStartKey = TSerializedCellVec(value); + SA_LOG_D("[" << Self->TabletID() << "] Loaded traversal start key"); break; - case Schema::SysParam_ScanTableOwnerId: - Self->ScanTableId.PathId.OwnerId = FromString(value); - SA_LOG_D("[" << Self->TabletID() << "] Loading scan table owner id: " - << Self->ScanTableId.PathId.OwnerId); + case Schema::SysParam_TraversalTableOwnerId: + Self->TraversalPathId.OwnerId = FromString(value); + SA_LOG_D("[" << Self->TabletID() << "] Loaded traversal table owner id: " + << Self->TraversalPathId.OwnerId); break; - case Schema::SysParam_ScanTableLocalPathId: - Self->ScanTableId.PathId.LocalPathId = FromString(value); - SA_LOG_D("[" << Self->TabletID() << "] Loading scan table local path id: " - << Self->ScanTableId.PathId.LocalPathId); + case Schema::SysParam_TraversalTableLocalPathId: + Self->TraversalPathId.LocalPathId = FromString(value); + SA_LOG_D("[" << Self->TabletID() << "] Loaded traversal table local path id: " + << Self->TraversalPathId.LocalPathId); break; - case Schema::SysParam_ScanStartTime: { + case Schema::SysParam_TraversalStartTime: { auto us = FromString(value); - Self->ScanStartTime = TInstant::MicroSeconds(us); - SA_LOG_D("[" << Self->TabletID() << "] Loading scan start time: " << us); + Self->TraversalStartTime = TInstant::MicroSeconds(us); + SA_LOG_D("[" << Self->TabletID() << "] Loaded traversal start time: " << us); break; } - case Schema::SysParam_LastScanOperationId: { - auto id = FromString(value); - Self->LastScanOperationId = id; - SA_LOG_D("[" << Self->TabletID() << "] Loading last scan operation id: " << id); + case Schema::SysParam_TraversalIsColumnTable: { + Self->TraversalIsColumnTable = FromString(value); + SA_LOG_D("[" << Self->TabletID() << "] Loaded traversal IsColumnTable: " << value); + break; + } + case Schema::SysParam_GlobalTraversalRound: { + Self->GlobalTraversalRound = FromString(value); + SA_LOG_D("[" << Self->TabletID() << "] Loaded global traversal round: " << value); break; } - default: SA_LOG_CRIT("[" << Self->TabletID() << "] Unexpected SysParam id: " << id); } @@ -87,42 +94,42 @@ struct TStatisticsAggregator::TTxInit : public TTxBase { } } - // BaseStats + // BaseStatistics { - Self->BaseStats.clear(); + Self->BaseStatistics.clear(); - auto rowset = db.Table().Range().Select(); + auto rowset = db.Table().Range().Select(); if (!rowset.IsReady()) { return false; } while (!rowset.EndOfSet()) { - ui64 schemeShardId = rowset.GetValue(); - TString stats = rowset.GetValue(); + ui64 schemeShardId = rowset.GetValue(); + TString stats = rowset.GetValue(); - Self->BaseStats[schemeShardId] = stats; + Self->BaseStatistics[schemeShardId] = stats; if (!rowset.Next()) { return false; } } - SA_LOG_D("[" << Self->TabletID() << "] Loading base stats: " - << "schemeshard count# " << Self->BaseStats.size()); + SA_LOG_D("[" << Self->TabletID() << "] Loaded BaseStatistics: " + << "schemeshard count# " << Self->BaseStatistics.size()); } - // Statistics + // ColumnStatistics { Self->CountMinSketches.clear(); - auto rowset = db.Table().Range().Select(); + auto rowset = db.Table().Range().Select(); if (!rowset.IsReady()) { return false; } while (!rowset.EndOfSet()) { - ui32 columnTag = rowset.GetValue(); - TString sketch = rowset.GetValue(); + ui32 columnTag = rowset.GetValue(); + TString sketch = rowset.GetValue(); Self->CountMinSketches[columnTag].reset( TCountMinSketch::FromString(sketch.data(), sketch.size())); @@ -132,76 +139,129 @@ struct TStatisticsAggregator::TTxInit : public TTxBase { } } - SA_LOG_D("[" << Self->TabletID() << "] Loading statistics: " + SA_LOG_D("[" << Self->TabletID() << "] Loaded ColumnStatistics: " << "column count# " << Self->CountMinSketches.size()); } - // ScanTables + // ScheduleTraversals { - Self->ScanTablesByTime.Clear(); - Self->ScanTablesBySchemeShard.clear(); - Self->ScanTables.clear(); + Self->ScheduleTraversalsByTime.Clear(); + Self->ScheduleTraversalsBySchemeShard.clear(); + Self->ScheduleTraversals.clear(); - auto rowset = db.Table().Range().Select(); + auto rowset = db.Table().Range().Select(); if (!rowset.IsReady()) { return false; } while (!rowset.EndOfSet()) { - ui64 ownerId = rowset.GetValue(); - ui64 localPathId = rowset.GetValue(); - ui64 lastUpdateTime = rowset.GetValue(); - ui64 schemeShardId = rowset.GetValue(); + ui64 ownerId = rowset.GetValue(); + ui64 localPathId = rowset.GetValue(); + ui64 lastUpdateTime = rowset.GetValue(); + ui64 schemeShardId = rowset.GetValue(); + bool isColumnTable = rowset.GetValue(); auto pathId = TPathId(ownerId, localPathId); - TScanTable scanTable; - scanTable.PathId = pathId; - scanTable.SchemeShardId = schemeShardId; - scanTable.LastUpdateTime = TInstant::MicroSeconds(lastUpdateTime); + TScheduleTraversal scheduleTraversal; + scheduleTraversal.PathId = pathId; + scheduleTraversal.SchemeShardId = schemeShardId; + scheduleTraversal.LastUpdateTime = TInstant::MicroSeconds(lastUpdateTime); + scheduleTraversal.IsColumnTable = isColumnTable; - auto [it, _] = Self->ScanTables.emplace(pathId, scanTable); - Self->ScanTablesByTime.Add(&it->second); - Self->ScanTablesBySchemeShard[schemeShardId].insert(pathId); + auto [it, _] = Self->ScheduleTraversals.emplace(pathId, scheduleTraversal); + Self->ScheduleTraversalsByTime.Add(&it->second); + Self->ScheduleTraversalsBySchemeShard[schemeShardId].insert(pathId); if (!rowset.Next()) { return false; } } - SA_LOG_D("[" << Self->TabletID() << "] Loading scan tables: " - << "table count# " << Self->ScanTables.size()); + SA_LOG_D("[" << Self->TabletID() << "] Loaded ScheduleTraversals: " + << "table count# " << Self->ScheduleTraversals.size()); } - // ScanOperations + // ForceTraversalOperations { - Self->ScanOperations.Clear(); - Self->ScanOperationsByPathId.clear(); + Self->ForceTraversals.clear(); + + auto rowset = db.Table().Range().Select(); + if (!rowset.IsReady()) { + return false; + } + + while (!rowset.EndOfSet()) { + TString operationId = rowset.GetValue(); + TString types = rowset.GetValue(); + ui64 createdAt = rowset.GetValue(); + + TForceTraversalOperation operation { + .OperationId = operationId, + .Tables = {}, + .Types = types, + .ReplyToActorId = {}, + .CreatedAt = TInstant::FromValue(createdAt) + }; + Self->ForceTraversals.emplace_back(operation); + + if (!rowset.Next()) { + return false; + } + } + + Self->TabletCounters->Simple()[COUNTER_FORCE_TRAVERSALS_INFLIGHT_SIZE].Set(Self->ForceTraversals.size()); + + SA_LOG_D("[" << Self->TabletID() << "] Loaded ForceTraversalOperations: " + << "table count# " << Self->ForceTraversals.size()); + } - auto rowset = db.Table().Range().Select(); + // ForceTraversalTables + { + auto rowset = db.Table().Range().Select(); if (!rowset.IsReady()) { return false; } + size_t size = 0; while (!rowset.EndOfSet()) { - ui64 operationId = rowset.GetValue(); - ui64 ownerId = rowset.GetValue(); - ui64 localPathId = rowset.GetValue(); + ++size; + + TString operationId = rowset.GetValue(); + ui64 ownerId = rowset.GetValue(); + ui64 localPathId = rowset.GetValue(); + TString columnTags = rowset.GetValue(); + TForceTraversalTable::EStatus status = (TForceTraversalTable::EStatus)rowset.GetValue(); + + if (status == TForceTraversalTable::EStatus::AnalyzeStarted) { + // Resent TEvAnalyzeTable to shards + status = TForceTraversalTable::EStatus::None; + } else if (status == TForceTraversalTable::EStatus::TraversalStarted) { + // Reset traversal + status = TForceTraversalTable::EStatus::AnalyzeFinished; + } auto pathId = TPathId(ownerId, localPathId); - TScanOperation& operation = Self->ScanOperationsByPathId[pathId]; - operation.PathId = pathId; - operation.OperationId = operationId; - Self->ScanOperations.PushBack(&operation); + TForceTraversalTable operationTable { + .PathId = pathId, + .ColumnTags = columnTags, + .Status = status, + }; + auto forceTraversalOperation = Self->ForceTraversalOperation(operationId); + if (!forceTraversalOperation) { + SA_LOG_E("[" << Self->TabletID() << "] ForceTraversalTables contains unknown operationId: " << operationId); + continue; + } + forceTraversalOperation->Tables.emplace_back(operationTable); if (!rowset.Next()) { return false; } } - SA_LOG_D("[" << Self->TabletID() << "] Loading scan operations: " - << "table count# " << Self->ScanOperationsByPathId.size()); + SA_LOG_D("[" << Self->TabletID() << "] Loaded ForceTraversalTables: " + << "table count# " << size); } return true; @@ -217,11 +277,24 @@ struct TStatisticsAggregator::TTxInit : public TTxBase { Self->SubscribeForConfigChanges(ctx); Self->Schedule(Self->PropagateInterval, new TEvPrivate::TEvPropagate()); - Self->Schedule(Self->ScheduleScanIntervalTime, new TEvPrivate::TEvScheduleScan()); - Self->InitializeStatisticsTable(); + if (Self->EnableColumnStatistics) { + Self->Schedule(Self->TraversalPeriod, new TEvPrivate::TEvScheduleTraversal()); + Self->Schedule(Self->SendAnalyzePeriod, new TEvPrivate::TEvSendAnalyze()); + Self->Schedule(Self->AnalyzeDeliveryProblemPeriod, new TEvPrivate::TEvAnalyzeDeliveryProblem()); + Self->Schedule(Self->AnalyzeDeadlinePeriod, new TEvPrivate::TEvAnalyzeDeadline()); + } else { + SA_LOG_W("[" << Self->TabletID() << "] TTxInit::Complete. EnableColumnStatistics=false"); + } + + if (Self->Database) { + Self->InitializeStatisticsTable(); + } - if (Self->ScanTableId.PathId) { + if (Self->TraversalPathId && Self->TraversalStartKey) { + SA_LOG_D("[" << Self->TabletID() << "] TTxInit::Complete. Start navigate. PathId " << Self->TraversalPathId); + Self->NavigateType = ENavigateType::Traversal; + Self->NavigatePathId = Self->TraversalPathId; Self->Navigate(); } diff --git a/ydb/core/statistics/aggregator/tx_init_schema.cpp b/ydb/core/statistics/aggregator/tx_init_schema.cpp index 4c77a9b5e9cb..352786245589 100644 --- a/ydb/core/statistics/aggregator/tx_init_schema.cpp +++ b/ydb/core/statistics/aggregator/tx_init_schema.cpp @@ -15,7 +15,9 @@ struct TStatisticsAggregator::TTxInitSchema : public TTxBase { NIceDb::TNiceDb(txc.DB).Materialize(); static constexpr NIceDb::TTableId bigTableIds[] = { - Schema::BaseStats::TableId, + Schema::BaseStatistics::TableId, + Schema::ColumnStatistics::TableId, + Schema::ScheduleTraversals::TableId }; for (auto id : bigTableIds) { diff --git a/ydb/core/statistics/aggregator/tx_navigate.cpp b/ydb/core/statistics/aggregator/tx_navigate.cpp index 95abd04cecbb..79df2a01f778 100644 --- a/ydb/core/statistics/aggregator/tx_navigate.cpp +++ b/ydb/core/statistics/aggregator/tx_navigate.cpp @@ -29,7 +29,7 @@ struct TStatisticsAggregator::TTxNavigate : public TTxBase { if (entry.Status == NSchemeCache::TSchemeCacheNavigate::EStatus::PathErrorUnknown) { Self->DeleteStatisticsFromTable(); } else { - Self->FinishScan(db); + Self->FinishTraversal(db); } return true; } @@ -52,12 +52,19 @@ struct TStatisticsAggregator::TTxNavigate : public TTxBase { Self->KeyColumnTypes[col.second.KeyOrder] = col.second.PType; } - if (Self->StartKey.GetCells().empty()) { + if (Self->TraversalStartKey.GetCells().empty()) { TVector minusInf(Self->KeyColumnTypes.size()); - Self->StartKey = TSerializedCellVec(minusInf); + Self->TraversalStartKey = TSerializedCellVec(minusInf); Self->PersistStartKey(db); } + if (Self->TraversalIsColumnTable) { + Self->HiveId = entry.DomainInfo->ExtractHive(); + if (Self->HiveId == 0) { + Self->HiveId = AppData()->DomainsInfo->GetHive(); + } + } + return true; } diff --git a/ydb/core/statistics/aggregator/tx_resolve.cpp b/ydb/core/statistics/aggregator/tx_resolve.cpp index 36c9360e6f43..36d9d561144b 100644 --- a/ydb/core/statistics/aggregator/tx_resolve.cpp +++ b/ydb/core/statistics/aggregator/tx_resolve.cpp @@ -1,12 +1,16 @@ #include "aggregator_impl.h" +#include #include +#include + namespace NKikimr::NStat { struct TStatisticsAggregator::TTxResolve : public TTxBase { std::unique_ptr Request; bool Cancelled = false; + bool StartColumnShardEventDistribution = true; TTxResolve(TSelf* self, NSchemeCache::TSchemeCacheRequest* request) : TTxBase(self) @@ -15,48 +19,129 @@ struct TStatisticsAggregator::TTxResolve : public TTxBase { TTxType GetTxType() const override { return TXTYPE_RESOLVE; } - bool Execute(TTransactionContext& txc, const TActorContext&) override { - SA_LOG_D("[" << Self->TabletID() << "] TTxResolve::Execute"); + bool ExecuteAnalyze(const NSchemeCache::TSchemeCacheRequest::TEntry& entry, NIceDb::TNiceDb& db) { + Y_ABORT_UNLESS(Self->NavigateAnalyzeOperationId); + Y_ABORT_UNLESS(Self->NavigatePathId); - NIceDb::TNiceDb db(txc.DB); + if (entry.Status == NSchemeCache::TSchemeCacheRequest::EStatus::PathErrorNotExist) { + // AnalyzedShards will be empty and Analyze will complete without sending event to shards + return true; + } - Y_ABORT_UNLESS(Request->ResultSet.size() == 1); - const auto& entry = Request->ResultSet.front(); + if (entry.Status != NSchemeCache::TSchemeCacheRequest::EStatus::OkData) { + Cancelled = true; + return true; + } + auto& partitioning = entry.KeyDescription->GetPartitions(); + + auto forceTraversalTable = Self->ForceTraversalTable(Self->NavigateAnalyzeOperationId, Self->NavigatePathId); + Y_ABORT_UNLESS(forceTraversalTable); + + for (auto& part : partitioning) { + if (part.Range) { + forceTraversalTable->AnalyzedShards.push_back({ + .ShardTabletId = part.ShardId, + .Status = TAnalyzedShard::EStatus::None + }); + } + } + + SA_LOG_D("[" << Self->TabletID() << "] TTxResolve::ExecuteAnalyze. Table OperationId " << Self->NavigateAnalyzeOperationId << ", PathId " << Self->NavigatePathId + << ", AnalyzedShards " << forceTraversalTable->AnalyzedShards.size()); + + Self->UpdateForceTraversalTableStatus(TForceTraversalTable::EStatus::AnalyzeStarted, Self->NavigateAnalyzeOperationId, *forceTraversalTable, db); + return true; + } + + bool ExecuteTraversal(const NSchemeCache::TSchemeCacheRequest::TEntry& entry, NIceDb::TNiceDb& db) { if (entry.Status != NSchemeCache::TSchemeCacheRequest::EStatus::OkData) { Cancelled = true; if (entry.Status == NSchemeCache::TSchemeCacheRequest::EStatus::PathErrorNotExist) { Self->DeleteStatisticsFromTable(); } else { - Self->FinishScan(db); + Self->FinishTraversal(db); } return true; } - Self->ShardRanges.clear(); + auto& partitioning = entry.KeyDescription->GetPartitions(); - auto& partitioning = entry.KeyDescription->Partitioning; - for (auto& part : *partitioning) { + if (Self->TraversalIsColumnTable) { + Self->TabletsForReqDistribution.clear(); + Self->CountMinSketches.clear(); + } else { + Self->DatashardRanges.clear(); + } + + for (auto& part : partitioning) { if (!part.Range) { continue; } - TRange range; - range.EndKey = part.Range->EndKeyPrefix; - range.DataShardId = part.ShardId; - Self->ShardRanges.push_back(range); + if (Self->TraversalIsColumnTable) { + Self->TabletsForReqDistribution.insert(part.ShardId); + } else { + TRange range; + range.EndKey = part.Range->EndKeyPrefix; + range.DataShardId = part.ShardId; + Self->DatashardRanges.push_back(range); + } + } + + if (Self->TraversalIsColumnTable && Self->TabletsForReqDistribution.empty()) { + Self->FinishTraversal(db); + StartColumnShardEventDistribution = false; } + return true; } - void Complete(const TActorContext&) override { - SA_LOG_D("[" << Self->TabletID() << "] TTxResolve::Complete"); + bool Execute(TTransactionContext& txc, const TActorContext&) override { + SA_LOG_D("[" << Self->TabletID() << "] TTxResolve::Execute"); + + NIceDb::TNiceDb db(txc.DB); + + Y_ABORT_UNLESS(Request->ResultSet.size() == 1); + const auto& entry = Request->ResultSet.front(); + + switch (Self->NavigateType) { + case ENavigateType::Analyze: + return ExecuteAnalyze(entry, db); + case ENavigateType::Traversal: + return ExecuteTraversal(entry, db); + }; + + } + + void CompleteTraversal(const TActorContext& ctx) { if (Cancelled) { return; } - Self->NextRange(); + if (Self->TraversalIsColumnTable) { + if (StartColumnShardEventDistribution) { + ctx.Send(Self->SelfId(), new TEvPrivate::TEvRequestDistribution); + } + } else { + Self->ScanNextDatashardRange(); + } + } + + void Complete(const TActorContext& ctx) override { + SA_LOG_D("[" << Self->TabletID() << "] TTxResolve::Complete"); + + switch (Self->NavigateType) { + case ENavigateType::Analyze: + break; + case ENavigateType::Traversal: + CompleteTraversal(ctx); + break; + }; + + Self->NavigateAnalyzeOperationId.clear(); + Self->NavigatePathId = {}; } }; diff --git a/ydb/core/statistics/aggregator/tx_response_tablet_distribution.cpp b/ydb/core/statistics/aggregator/tx_response_tablet_distribution.cpp new file mode 100644 index 000000000000..323a8dec6afe --- /dev/null +++ b/ydb/core/statistics/aggregator/tx_response_tablet_distribution.cpp @@ -0,0 +1,121 @@ +#include "aggregator_impl.h" + +#include +#include + +#include + +namespace NKikimr::NStat { + +struct TStatisticsAggregator::TTxResponseTabletDistribution : public TTxBase { + const NKikimrHive::TEvResponseTabletDistribution HiveRecord; + + enum class EAction : ui8 { + None, + SendAggregate, + ScheduleResolve, + ScheduleReqDistribution, + }; + EAction Action = EAction::None; + + std::unique_ptr AggregateStatisticsRequest; + + TTxResponseTabletDistribution(TSelf* self, NKikimrHive::TEvResponseTabletDistribution&& hiveRecord) + : TTxBase(self) + , HiveRecord(std::move(hiveRecord)) + {} + + TTxType GetTxType() const override { return TXTYPE_RESPONSE_TABLET_DISTRIBUTION; } + + bool ExecuteStartForceTraversal(TTransactionContext& txc) { + ++Self->TraversalRound; + ++Self->GlobalTraversalRound; + + NIceDb::TNiceDb db(txc.DB); + Self->PersistGlobalTraversalRound(db); + + AggregateStatisticsRequest = std::make_unique(); + auto& outRecord = AggregateStatisticsRequest->Record; + outRecord.SetRound(Self->GlobalTraversalRound); + PathIdFromPathId(Self->TraversalPathId, outRecord.MutablePathId()); + + const auto forceTraversalTable = Self->CurrentForceTraversalTable(); + if (forceTraversalTable) { + TVector columnTags = Scan(SplitString(forceTraversalTable->ColumnTags, ",")); + outRecord.MutableColumnTags()->Add(columnTags.begin(), columnTags.end()); + } + + for (auto& inNode : HiveRecord.GetNodes()) { + auto& outNode = *outRecord.AddNodes(); + outNode.SetNodeId(inNode.GetNodeId()); + outNode.MutableTabletIds()->CopyFrom(inNode.GetTabletIds()); + } + + return true; + } + + bool Execute(TTransactionContext& txc, const TActorContext&) override { + SA_LOG_D("[" << Self->TabletID() << "] TTxResponseTabletDistribution::Execute. Node count = " << HiveRecord.NodesSize()); + + auto distribution = Self->TabletsForReqDistribution; + for (auto& inNode : HiveRecord.GetNodes()) { + if (inNode.GetNodeId() == 0) { + // these tablets are probably in Hive boot queue + if (Self->HiveRequestRound < Self->MaxHiveRequestRoundCount) { + SA_LOG_W("[" << Self->TabletID() << "] TTxResponseTabletDistribution::Execute. Some tablets are probably in Hive boot queue"); + Action = EAction::ScheduleReqDistribution; + } + continue; + } + for (auto tabletId : inNode.GetTabletIds()) { + distribution.erase(tabletId); + } + } + + if (Action == EAction::ScheduleReqDistribution) { + return true; + } + + if (!distribution.empty() && Self->ResolveRound < Self->MaxResolveRoundCount) { + SA_LOG_W("[" << Self->TabletID() << "] TTxResponseTabletDistribution::Execute. Some tablets do not exist in Hive anymore; tablet count = " << distribution.size()); + // these tablets do not exist in Hive anymore + Self->NavigatePathId = Self->TraversalPathId; + Action = EAction::ScheduleResolve; + return true; + } + + Action = EAction::SendAggregate; + return ExecuteStartForceTraversal(txc); + } + + void Complete(const TActorContext& ctx) override { + SA_LOG_D("[" << Self->TabletID() << "] TTxResponseTabletDistribution::Complete"); + + switch (Action) { + case EAction::ScheduleResolve: + ctx.Schedule(ResolveRetryInterval, new TEvPrivate::TEvResolve()); + break; + + case EAction::ScheduleReqDistribution: + ctx.Schedule(HiveRetryInterval, new TEvPrivate::TEvRequestDistribution()); + break; + + case EAction::SendAggregate: + ctx.Send(MakeStatServiceID(Self->SelfId().NodeId()), AggregateStatisticsRequest.release()); + ctx.Schedule(KeepAliveTimeout, new TEvPrivate::TEvAckTimeout(++Self->KeepAliveSeqNo)); + Self->AggregationRequestBeginTime = AppData(ctx)->TimeProvider->Now(); + break; + + default: + break; + } + } +}; + +void TStatisticsAggregator::Handle(TEvHive::TEvResponseTabletDistribution::TPtr& ev) { + auto& record = ev->Get()->Record; + Execute(new TTxResponseTabletDistribution(this, std::move(record)), + TActivationContext::AsActorContext()); +} + +} // NKikimr::NStat diff --git a/ydb/core/statistics/aggregator/tx_save_query_response.cpp b/ydb/core/statistics/aggregator/tx_save_query_response.cpp deleted file mode 100644 index 3c8a24357af8..000000000000 --- a/ydb/core/statistics/aggregator/tx_save_query_response.cpp +++ /dev/null @@ -1,39 +0,0 @@ -#include "aggregator_impl.h" - -#include - -namespace NKikimr::NStat { - -struct TStatisticsAggregator::TTxSaveQueryResponse : public TTxBase { - std::unordered_set ReplyToActorIds; - - TTxSaveQueryResponse(TSelf* self) - : TTxBase(self) - {} - - TTxType GetTxType() const override { return TXTYPE_SAVE_QUERY_RESPONSE; } - - bool Execute(TTransactionContext& txc, const TActorContext&) override { - SA_LOG_D("[" << Self->TabletID() << "] TTxSaveQueryResponse::Execute"); - - ReplyToActorIds.swap(Self->ReplyToActorIds); - - NIceDb::TNiceDb db(txc.DB); - Self->FinishScan(db); - - return true; - } - - void Complete(const TActorContext& ctx) override { - SA_LOG_D("[" << Self->TabletID() << "] TTxSaveQueryResponse::Complete"); - - for (auto& id : ReplyToActorIds) { - ctx.Send(id, new TEvStatistics::TEvScanTableResponse); - } - } -}; -void TStatisticsAggregator::Handle(TEvStatistics::TEvSaveStatisticsQueryResponse::TPtr&) { - Execute(new TTxSaveQueryResponse(this), TActivationContext::AsActorContext()); -} - -} // NKikimr::NStat diff --git a/ydb/core/statistics/aggregator/tx_scan_table.cpp b/ydb/core/statistics/aggregator/tx_scan_table.cpp deleted file mode 100644 index ae687ccca616..000000000000 --- a/ydb/core/statistics/aggregator/tx_scan_table.cpp +++ /dev/null @@ -1,74 +0,0 @@ -#include "aggregator_impl.h" - -#include - -namespace NKikimr::NStat { - -struct TStatisticsAggregator::TTxScanTable : public TTxBase { - NKikimrStat::TEvScanTable Record; - TActorId ReplyToActorId; - ui64 OperationId = 0; - - TTxScanTable(TSelf* self, NKikimrStat::TEvScanTable&& record, TActorId replyToActorId) - : TTxBase(self) - , Record(std::move(record)) - , ReplyToActorId(replyToActorId) - {} - - TTxType GetTxType() const override { return TXTYPE_SCAN_TABLE; } - - bool Execute(TTransactionContext& txc, const TActorContext&) override { - SA_LOG_D("[" << Self->TabletID() << "] TTxScanTable::Execute"); - - if (!Self->EnableColumnStatistics) { - return true; - } - - auto pathId = PathIdFromPathId(Record.GetPathId()); - - auto itOp = Self->ScanOperationsByPathId.find(pathId); - if (itOp != Self->ScanOperationsByPathId.end()) { - itOp->second.ReplyToActorIds.insert(ReplyToActorId); - OperationId = itOp->second.OperationId; - return true; - } - - NIceDb::TNiceDb db(txc.DB); - - TScanOperation& operation = Self->ScanOperationsByPathId[pathId]; - operation.PathId = pathId; - operation.OperationId = ++Self->LastScanOperationId; - operation.ReplyToActorIds.insert(ReplyToActorId); - Self->ScanOperations.PushBack(&operation); - - Self->PersistLastScanOperationId(db); - - db.Table().Key(operation.OperationId).Update( - NIceDb::TUpdate(pathId.OwnerId), - NIceDb::TUpdate(pathId.LocalPathId)); - - OperationId = operation.OperationId; - - return true; - } - - void Complete(const TActorContext& ctx) override { - SA_LOG_D("[" << Self->TabletID() << "] TTxScanTable::Complete"); - - if (!Self->EnableColumnStatistics) { - return; - } - - auto accepted = std::make_unique(); - accepted->Record.SetOperationId(OperationId); - ctx.Send(ReplyToActorId, accepted.release()); - } -}; - -void TStatisticsAggregator::Handle(TEvStatistics::TEvScanTable::TPtr& ev) { - auto& record = ev->Get()->Record; - Execute(new TTxScanTable(this, std::move(record), ev->Sender), - TActivationContext::AsActorContext()); -} - -} // NKikimr::NStat diff --git a/ydb/core/statistics/aggregator/tx_schedule_scan.cpp b/ydb/core/statistics/aggregator/tx_schedule_scan.cpp deleted file mode 100644 index a1bbd9438915..000000000000 --- a/ydb/core/statistics/aggregator/tx_schedule_scan.cpp +++ /dev/null @@ -1,41 +0,0 @@ -#include "aggregator_impl.h" - -#include - -namespace NKikimr::NStat { - -struct TStatisticsAggregator::TTxScheduleScan : public TTxBase { - TTxScheduleScan(TSelf* self) - : TTxBase(self) - {} - - TTxType GetTxType() const override { return TXTYPE_SCHEDULE_SCAN; } - - bool Execute(TTransactionContext& txc, const TActorContext&) override { - SA_LOG_D("[" << Self->TabletID() << "] TTxScheduleScan::Execute"); - - Self->Schedule(Self->ScheduleScanIntervalTime, new TEvPrivate::TEvScheduleScan()); - - if (!Self->EnableColumnStatistics) { - return true; - } - - if (Self->ScanTableId.PathId) { - return true; // scan is in progress - } - - NIceDb::TNiceDb db(txc.DB); - Self->ScheduleNextScan(db); - return true; - } - - void Complete(const TActorContext&) override { - SA_LOG_D("[" << Self->TabletID() << "] TTxScheduleScan::Complete"); - } -}; - -void TStatisticsAggregator::Handle(TEvPrivate::TEvScheduleScan::TPtr&) { - Execute(new TTxScheduleScan(this), TActivationContext::AsActorContext()); -} - -} // NKikimr::NStat diff --git a/ydb/core/statistics/aggregator/tx_schedule_traversal.cpp b/ydb/core/statistics/aggregator/tx_schedule_traversal.cpp new file mode 100644 index 000000000000..0d07c6339c3c --- /dev/null +++ b/ydb/core/statistics/aggregator/tx_schedule_traversal.cpp @@ -0,0 +1,65 @@ +#include "aggregator_impl.h" + +#include + +namespace NKikimr::NStat { + +struct TStatisticsAggregator::TTxScheduleTrasersal : public TTxBase { + TTxScheduleTrasersal(TSelf* self) + : TTxBase(self) + {} + + TTxType GetTxType() const override { return TXTYPE_SCHEDULE_TRAVERSAL; } + + bool Execute(TTransactionContext& txc, const TActorContext& ctx) override { + + if (!Self->EnableColumnStatistics) { + return true; + } + + TDuration time = TDuration ::Zero(); + if (!Self->ForceTraversals.empty()) { + time = ctx.Now() - Self->ForceTraversals.front().CreatedAt; + } + Self->TabletCounters->Simple()[COUNTER_FORCE_TRAVERSAL_INFLIGHT_MAX_TIME].Set(time.MicroSeconds()); + + if (Self->TraversalPathId) { + SA_LOG_T("[" << Self->TabletID() << "] TTxScheduleTrasersal::Execute. Traverse is in progress. PathId " << Self->TraversalPathId); + return true; + } + + if (Self->ScheduleTraversals.empty()) { + SA_LOG_T("[" << Self->TabletID() << "] TTxScheduleTrasersal. No info from schemeshard"); + return true; + } + + SA_LOG_T("[" << Self->TabletID() << "] TTxScheduleTrasersal::Execute"); + + NIceDb::TNiceDb db(txc.DB); + + switch (Self->NavigateType) { + case ENavigateType::Analyze: + Self->NavigateType = ENavigateType::Traversal; + Self->ScheduleNextTraversal(db); + break; + case ENavigateType::Traversal: + Self->NavigateType = ENavigateType::Analyze; + Self->ScheduleNextAnalyze(db); + break; + } + + return true; + } + + void Complete(const TActorContext&) override { + SA_LOG_T("[" << Self->TabletID() << "] TTxScheduleTrasersal::Complete"); + + Self->Schedule(Self->TraversalPeriod, new TEvPrivate::TEvScheduleTraversal()); + } +}; + +void TStatisticsAggregator::Handle(TEvPrivate::TEvScheduleTraversal::TPtr&) { + Execute(new TTxScheduleTrasersal(this), TActivationContext::AsActorContext()); +} + +} // NKikimr::NStat diff --git a/ydb/core/statistics/aggregator/tx_schemeshard_stats.cpp b/ydb/core/statistics/aggregator/tx_schemeshard_stats.cpp index ead6e8642fc0..61efd9a2c743 100644 --- a/ydb/core/statistics/aggregator/tx_schemeshard_stats.cpp +++ b/ydb/core/statistics/aggregator/tx_schemeshard_stats.cpp @@ -21,10 +21,10 @@ struct TStatisticsAggregator::TTxSchemeShardStats : public TTxBase { << ", stats size# " << stats.size()); NIceDb::TNiceDb db(txc.DB); - db.Table().Key(schemeShardId).Update( - NIceDb::TUpdate(stats)); + db.Table().Key(schemeShardId).Update( + NIceDb::TUpdate(stats)); - Self->BaseStats[schemeShardId] = stats; + Self->BaseStatistics[schemeShardId] = stats; if (!Self->EnableColumnStatistics) { return true; @@ -33,36 +33,39 @@ struct TStatisticsAggregator::TTxSchemeShardStats : public TTxBase { NKikimrStat::TSchemeShardStats statRecord; Y_PROTOBUF_SUPPRESS_NODISCARD statRecord.ParseFromString(stats); - auto& oldPathIds = Self->ScanTablesBySchemeShard[schemeShardId]; + auto& oldPathIds = Self->ScheduleTraversalsBySchemeShard[schemeShardId]; std::unordered_set newPathIds; for (auto& entry : statRecord.GetEntries()) { auto pathId = PathIdFromPathId(entry.GetPathId()); newPathIds.insert(pathId); if (oldPathIds.find(pathId) == oldPathIds.end()) { - TStatisticsAggregator::TScanTable scanTable; - scanTable.PathId = pathId; - scanTable.SchemeShardId = schemeShardId; - scanTable.LastUpdateTime = TInstant::MicroSeconds(0); - auto [it, _] = Self->ScanTables.emplace(pathId, scanTable); - Self->ScanTablesByTime.Add(&it->second); - - db.Table().Key(pathId.OwnerId, pathId.LocalPathId).Update( - NIceDb::TUpdate(schemeShardId), - NIceDb::TUpdate(0)); + TStatisticsAggregator::TScheduleTraversal traversalTable; + traversalTable.PathId = pathId; + traversalTable.SchemeShardId = schemeShardId; + traversalTable.LastUpdateTime = TInstant::MicroSeconds(0); + traversalTable.IsColumnTable = entry.GetIsColumnTable(); + auto [it, _] = Self->ScheduleTraversals.emplace(pathId, traversalTable); + if (!Self->ScheduleTraversalsByTime.Has(&it->second)) { + Self->ScheduleTraversalsByTime.Add(&it->second); + } + db.Table().Key(pathId.OwnerId, pathId.LocalPathId).Update( + NIceDb::TUpdate(schemeShardId), + NIceDb::TUpdate(0), + NIceDb::TUpdate(entry.GetIsColumnTable())); } } for (auto& pathId : oldPathIds) { if (newPathIds.find(pathId) == newPathIds.end()) { - auto it = Self->ScanTables.find(pathId); - if (it != Self->ScanTables.end()) { - if (Self->ScanTablesByTime.Has(&it->second)) { - Self->ScanTablesByTime.Remove(&it->second); + auto it = Self->ScheduleTraversals.find(pathId); + if (it != Self->ScheduleTraversals.end()) { + if (Self->ScheduleTraversalsByTime.Has(&it->second)) { + Self->ScheduleTraversalsByTime.Remove(&it->second); } - Self->ScanTables.erase(it); + Self->ScheduleTraversals.erase(it); } - db.Table().Key(pathId.OwnerId, pathId.LocalPathId).Delete(); + db.Table().Key(pathId.OwnerId, pathId.LocalPathId).Delete(); } } diff --git a/ydb/core/statistics/aggregator/tx_statistics_scan_response.cpp b/ydb/core/statistics/aggregator/tx_statistics_scan_response.cpp deleted file mode 100644 index f32a78450ef3..000000000000 --- a/ydb/core/statistics/aggregator/tx_statistics_scan_response.cpp +++ /dev/null @@ -1,84 +0,0 @@ -#include "aggregator_impl.h" - -#include - -namespace NKikimr::NStat { - -struct TStatisticsAggregator::TTxStatisticsScanResponse : public TTxBase { - NKikimrStat::TEvStatisticsResponse Record; - bool IsCorrectShardId = false; - - TTxStatisticsScanResponse(TSelf* self, NKikimrStat::TEvStatisticsResponse&& record) - : TTxBase(self) - , Record(std::move(record)) - {} - - TTxType GetTxType() const override { return TXTYPE_SCAN_RESPONSE; } - - bool Execute(TTransactionContext& txc, const TActorContext&) override { - SA_LOG_D("[" << Self->TabletID() << "] TTxStatisticsScanResponse::Execute"); - - NIceDb::TNiceDb db(txc.DB); - - // TODO: handle scan errors - - if (Self->ShardRanges.empty()) { - return true; - } - - auto& range = Self->ShardRanges.front(); - auto replyShardId = Record.GetShardTabletId(); - - if (replyShardId != range.DataShardId) { - return true; - } - - IsCorrectShardId = true; - - for (auto& column : Record.GetColumns()) { - auto tag = column.GetTag(); - for (auto& statistic : column.GetStatistics()) { - if (statistic.GetType() == NKikimr::NStat::COUNT_MIN_SKETCH) { - auto* data = statistic.GetData().Data(); - auto* sketch = reinterpret_cast(data); - - if (Self->ColumnNames.find(tag) == Self->ColumnNames.end()) { - continue; - } - if (Self->CountMinSketches.find(tag) == Self->CountMinSketches.end()) { - Self->CountMinSketches[tag].reset(TCountMinSketch::Create()); - } - - auto& current = Self->CountMinSketches[tag]; - *current += *sketch; - - auto currentStr = TString(current->AsStringBuf()); - db.Table().Key(tag).Update( - NIceDb::TUpdate(currentStr)); - } - } - } - - Self->StartKey = range.EndKey; - Self->PersistStartKey(db); - - return true; - } - - void Complete(const TActorContext&) override { - SA_LOG_D("[" << Self->TabletID() << "] TTxStatisticsScanResponse::Complete"); - - if (IsCorrectShardId && !Self->ShardRanges.empty()) { - Self->ShardRanges.pop_front(); - Self->NextRange(); - } - } -}; - -void TStatisticsAggregator::Handle(NStat::TEvStatistics::TEvStatisticsResponse::TPtr& ev) { - auto& record = ev->Get()->Record; - Execute(new TTxStatisticsScanResponse(this, std::move(record)), - TActivationContext::AsActorContext()); -} - -} // NKikimr::NStat diff --git a/ydb/core/statistics/aggregator/ut/ut_analyze_columnshard.cpp b/ydb/core/statistics/aggregator/ut/ut_analyze_columnshard.cpp new file mode 100644 index 000000000000..56d302a0effd --- /dev/null +++ b/ydb/core/statistics/aggregator/ut/ut_analyze_columnshard.cpp @@ -0,0 +1,360 @@ +#include + +#include + +#include + +#include + +namespace NKikimr { +namespace NStat { + +Y_UNIT_TEST_SUITE(AnalyzeColumnshard) { + Y_UNIT_TEST(AnalyzeTable) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 1); + const auto& tableInfo = databaseInfo.Tables[0]; + + AnalyzeTable(runtime, tableInfo.ShardIds[0], tableInfo.PathId); + } + + Y_UNIT_TEST(Analyze) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 1); + const auto& tableInfo = databaseInfo.Tables[0]; + + Analyze(runtime, tableInfo.SaTabletId, {tableInfo.PathId}); + } + + Y_UNIT_TEST(AnalyzeServerless) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + auto databaseInfo = CreateServerlessDatabaseColumnTables(env, 1, 1); + const auto& tableInfo = databaseInfo.Tables[0]; + + Analyze(runtime, tableInfo.SaTabletId, {tableInfo.PathId}); + } + + Y_UNIT_TEST(AnalyzeAnalyzeOneColumnTableSpecificColumns) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 1); + const auto& tableInfo = databaseInfo.Tables[0]; + + Analyze(runtime, tableInfo.SaTabletId, {{tableInfo.PathId, {1, 2}}}); + } + + Y_UNIT_TEST(AnalyzeTwoColumnTables) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + auto databaseInfo = CreateDatabaseColumnTables(env, 2, 1); + const auto& tableInfos = databaseInfo.Tables; + + Analyze(runtime, tableInfos[0].SaTabletId, {tableInfos[0].PathId, tableInfos[1].PathId}); + } + + Y_UNIT_TEST(AnalyzeStatus) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + auto sender = runtime.AllocateEdgeActor(); + + TBlockEvents block(runtime); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 1); + const auto& tableInfo = databaseInfo.Tables[0]; + + const TString operationId = "operationId"; + AnalyzeStatus(runtime, sender, tableInfo.SaTabletId, operationId, NKikimrStat::TEvAnalyzeStatusResponse::STATUS_NO_OPERATION); + + auto analyzeRequest = MakeAnalyzeRequest({{tableInfo.PathId, {1, 2}}}, operationId); + runtime.SendToPipe(tableInfo.SaTabletId, sender, analyzeRequest.release()); + + AnalyzeStatus(runtime, sender, tableInfo.SaTabletId, operationId, NKikimrStat::TEvAnalyzeStatusResponse::STATUS_ENQUEUED); + + // Check EvRemoteHttpInfo + { + auto httpRequest = std::make_unique("/app?"); + runtime.SendToPipe(tableInfo.SaTabletId, sender, httpRequest.release(), 0, {}); + auto httpResponse = runtime.GrabEdgeEventRethrow(sender); + TString body = httpResponse->Get()->Html; + Cerr << body << Endl; + UNIT_ASSERT(body.Size() > 500); + UNIT_ASSERT(body.Contains("ForceTraversals: 1")); + } + + block.Unblock(); + block.Stop(); + + auto analyzeResonse = runtime.GrabEdgeEventRethrow(sender); + UNIT_ASSERT_VALUES_EQUAL(analyzeResonse->Get()->Record.GetOperationId(), operationId); + + AnalyzeStatus(runtime, sender, tableInfo.SaTabletId, operationId, NKikimrStat::TEvAnalyzeStatusResponse::STATUS_NO_OPERATION); + } + + Y_UNIT_TEST(AnalyzeSameOperationId) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 1); + const auto& tableInfo = databaseInfo.Tables[0]; + auto sender = runtime.AllocateEdgeActor(); + const TString operationId = "operationId"; + + TBlockEvents block(runtime); + + auto tabletPipe = runtime.ConnectToPipe(tableInfo.SaTabletId, sender, 0, {}); + + auto analyzeRequest1 = MakeAnalyzeRequest({tableInfo.PathId}, operationId); + runtime.SendToPipe(tabletPipe, sender, analyzeRequest1.release()); + + runtime.WaitFor("TEvAnalyzeTableResponse", [&]{ return block.size(); }); + + auto analyzeRequest2 = MakeAnalyzeRequest({tableInfo.PathId}, operationId); + runtime.SendToPipe(tabletPipe, sender, analyzeRequest2.release()); + + block.Unblock(); + block.Stop(); + + auto response1 = runtime.GrabEdgeEventRethrow(sender); + UNIT_ASSERT(response1); + UNIT_ASSERT_VALUES_EQUAL(response1->Get()->Record.GetOperationId(), operationId); + + auto response2 = runtime.GrabEdgeEventRethrow(sender, TDuration::Seconds(5)); + UNIT_ASSERT(!response2); + } + + Y_UNIT_TEST(AnalyzeMultiOperationId) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 1); + const auto& tableInfo = databaseInfo.Tables[0]; + auto sender = runtime.AllocateEdgeActor(); + + auto GetOperationId = [] (size_t i) { return TStringBuilder() << "operationId" << i; }; + + TBlockEvents block(runtime); + + const size_t numEvents = 10; + + auto tabletPipe = runtime.ConnectToPipe(tableInfo.SaTabletId, sender, 0, {}); + + for (size_t i = 0; i < numEvents; ++i) { + auto analyzeRequest = MakeAnalyzeRequest({tableInfo.PathId}, GetOperationId(i)); + runtime.SendToPipe(tabletPipe, sender, analyzeRequest.release()); + } + + runtime.WaitFor("TEvAnalyzeTableResponse", [&]{ return block.size() == numEvents; }); + + block.Unblock(); + block.Stop(); + + for (size_t i = 0; i < numEvents; ++i) { + auto response = runtime.GrabEdgeEventRethrow(sender); + UNIT_ASSERT(response); + UNIT_ASSERT_VALUES_EQUAL(response->Get()->Record.GetOperationId(), GetOperationId(i)); + } + } + + Y_UNIT_TEST(AnalyzeRebootSaBeforeAnalyzeTableResponse) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 1); + const auto& tableInfo = databaseInfo.Tables[0]; + auto sender = runtime.AllocateEdgeActor(); + + bool eventSeen = false; + auto observer = runtime.AddObserver([&](auto& ev) { + eventSeen = true; + ev.Reset(); + }); + + auto analyzeRequest1 = MakeAnalyzeRequest({tableInfo.PathId}); + runtime.SendToPipe(tableInfo.SaTabletId, sender, analyzeRequest1.release()); + + runtime.WaitFor("TEvAnalyzeTableResponse", [&]{ return eventSeen; }); + observer.Remove(); + RebootTablet(runtime, tableInfo.SaTabletId, sender); + + auto analyzeRequest2 = MakeAnalyzeRequest({tableInfo.PathId}); + runtime.SendToPipe(tableInfo.SaTabletId, sender, analyzeRequest2.release()); + + runtime.GrabEdgeEventRethrow(sender); + } + + Y_UNIT_TEST(AnalyzeRebootSaBeforeResolve) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 1); + const auto& tableInfo = databaseInfo.Tables[0]; + auto sender = runtime.AllocateEdgeActor(); + + TBlockEvents block(runtime); + + auto analyzeRequest1 = MakeAnalyzeRequest({tableInfo.PathId}); + runtime.SendToPipe(tableInfo.SaTabletId, sender, analyzeRequest1.release()); + + runtime.WaitFor("1st TEvResolveKeySetResult", [&]{ return block.size() >= 1; }); + block.Unblock(1); + runtime.WaitFor("2nd TEvResolveKeySetResult", [&]{ return block.size() >= 1; }); + block.Unblock(1); + runtime.WaitFor("3rd TEvResolveKeySetResult", [&]{ return block.size() >= 1; }); + + RebootTablet(runtime, tableInfo.SaTabletId, sender); + + block.Unblock(); + block.Stop(); + + auto analyzeRequest2 = MakeAnalyzeRequest({tableInfo.PathId}); + runtime.SendToPipe(tableInfo.SaTabletId, sender, analyzeRequest2.release()); + + runtime.GrabEdgeEventRethrow(sender); + } + + Y_UNIT_TEST(AnalyzeRebootSaBeforeReqDistribution) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 1); + const auto& tableInfo = databaseInfo.Tables[0]; + auto sender = runtime.AllocateEdgeActor(); + + bool eventSeen = false; + auto observer = runtime.AddObserver([&](auto& ev) { + eventSeen = true; + ev.Reset(); + }); + + auto analyzeRequest1 = MakeAnalyzeRequest({tableInfo.PathId}); + runtime.SendToPipe(tableInfo.SaTabletId, sender, analyzeRequest1.release()); + + runtime.WaitFor("TEvRequestTabletDistribution", [&]{ return eventSeen; }); + observer.Remove(); + RebootTablet(runtime, tableInfo.SaTabletId, sender); + + auto analyzeRequest2 = MakeAnalyzeRequest({tableInfo.PathId}); + runtime.SendToPipe(tableInfo.SaTabletId, sender, analyzeRequest2.release()); + + runtime.GrabEdgeEventRethrow(sender); + } + + Y_UNIT_TEST(AnalyzeRebootSaBeforeAggregate) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 1); + const auto& tableInfo = databaseInfo.Tables[0]; + auto sender = runtime.AllocateEdgeActor(); + + bool eventSeen = false; + auto observer = runtime.AddObserver([&](auto& ev){ + eventSeen = true; + ev.Reset(); + }); + + auto analyzeRequest1 = MakeAnalyzeRequest({tableInfo.PathId}); + runtime.SendToPipe(tableInfo.SaTabletId, sender, analyzeRequest1.release()); + + runtime.WaitFor("TEvAggregateStatistics", [&]{ return eventSeen; }); + observer.Remove(); + RebootTablet(runtime, tableInfo.SaTabletId, sender); + + auto analyzeRequest2 = MakeAnalyzeRequest({tableInfo.PathId}); + runtime.SendToPipe(tableInfo.SaTabletId, sender, analyzeRequest2.release()); + + runtime.GrabEdgeEventRethrow(sender); + } + + Y_UNIT_TEST(AnalyzeRebootSaBeforeSave) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 1); + const auto& tableInfo = databaseInfo.Tables[0]; + auto sender = runtime.AllocateEdgeActor(); + + bool eventSeen = false; + auto observer = runtime.AddObserver([&](auto& ev){ + eventSeen = true; + ev.Reset(); + }); + + auto analyzeRequest1 = MakeAnalyzeRequest({tableInfo.PathId}); + runtime.SendToPipe(tableInfo.SaTabletId, sender, analyzeRequest1.release()); + + runtime.WaitFor("TEvAggregateStatisticsResponse", [&]{ return eventSeen; }); + observer.Remove(); + RebootTablet(runtime, tableInfo.SaTabletId, sender); + + auto analyzeRequest2 = MakeAnalyzeRequest({tableInfo.PathId}); + runtime.SendToPipe(tableInfo.SaTabletId, sender, analyzeRequest2.release()); + + runtime.GrabEdgeEventRethrow(sender); + } + + Y_UNIT_TEST(AnalyzeRebootSaInAggregate) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 10); + const auto& tableInfo = databaseInfo.Tables[0]; + auto sender = runtime.AllocateEdgeActor(); + + int observerCount = 0; + auto observer = runtime.AddObserver([&](auto& ev) { + if (++observerCount >= 5) { + ev.Reset(); + } + }); + + auto analyzeRequest1 = MakeAnalyzeRequest({tableInfo.PathId}); + runtime.SendToPipe(tableInfo.SaTabletId, sender, analyzeRequest1.release()); + + runtime.WaitFor("5th TEvStatisticsRequest", [&]{ return observerCount >= 5; }); + observer.Remove(); + RebootTablet(runtime, tableInfo.SaTabletId, sender); + + auto analyzeRequest2 = MakeAnalyzeRequest({tableInfo.PathId}); + runtime.SendToPipe(tableInfo.SaTabletId, sender, analyzeRequest2.release()); + + runtime.GrabEdgeEventRethrow(sender); + } + + Y_UNIT_TEST(AnalyzeRebootColumnShard) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 1); + const auto& tableInfo = databaseInfo.Tables[0]; + auto sender = runtime.AllocateEdgeActor(); + + TBlockEvents block(runtime); + + auto analyzeRequest = MakeAnalyzeRequest({tableInfo.PathId}); + runtime.SendToPipe(tableInfo.SaTabletId, sender, analyzeRequest.release()); + + runtime.WaitFor("TEvAnalyzeTableResponse", [&]{ return block.size(); }); + block.Stop(); + RebootTablet(runtime, tableInfo.ShardIds[0], sender); + + runtime.GrabEdgeEventRethrow(sender); + } + + Y_UNIT_TEST(AnalyzeDeadline) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 1); + const auto& tableInfo = databaseInfo.Tables[0]; + auto sender = runtime.AllocateEdgeActor(); + + TBlockEvents block(runtime); + + auto analyzeRequest = MakeAnalyzeRequest({tableInfo.PathId}); + runtime.SendToPipe(tableInfo.SaTabletId, sender, analyzeRequest.release()); + + runtime.WaitFor("TEvAnalyzeTableResponse", [&]{ return block.size(); }); + runtime.AdvanceCurrentTime(TDuration::Days(2)); + + auto analyzeResponse = runtime.GrabEdgeEventRethrow(sender); + const auto& record = analyzeResponse->Get()->Record; + UNIT_ASSERT_VALUES_EQUAL(record.GetOperationId(), "operationId"); + UNIT_ASSERT_VALUES_EQUAL(record.GetStatus(), NKikimrStat::TEvAnalyzeResponse::STATUS_ERROR); + } +} + +} // NStat +} // NKikimr diff --git a/ydb/core/statistics/aggregator/ut/ut_analyze_datashard.cpp b/ydb/core/statistics/aggregator/ut/ut_analyze_datashard.cpp new file mode 100644 index 000000000000..3d4c8e9fcde0 --- /dev/null +++ b/ydb/core/statistics/aggregator/ut/ut_analyze_datashard.cpp @@ -0,0 +1,70 @@ +#include + +#include + +#include +#include +#include +#include + +namespace NKikimr { +namespace NStat { + +Y_UNIT_TEST_SUITE(AnalyzeDatashard) { + + Y_UNIT_TEST(AnalyzeOneTable) { + TTestEnv env(1, 1); + + auto& runtime = *env.GetServer().GetRuntime(); + + CreateDatabase(env, "Database"); + CreateUniformTable(env, "Database", "Table"); + + ui64 saTabletId; + auto pathId = ResolvePathId(runtime, "/Root/Database/Table", nullptr, &saTabletId); + + Analyze(runtime, saTabletId, {{pathId}}); + + ValidateCountMinDatashardAbsense(runtime, pathId); + } + + Y_UNIT_TEST(AnalyzeTwoTables) { + TTestEnv env(1, 1); + + auto& runtime = *env.GetServer().GetRuntime(); + + CreateDatabase(env, "Database"); + CreateUniformTable(env, "Database", "Table1"); + CreateUniformTable(env, "Database", "Table2"); + + ui64 saTabletId1; + auto pathId1 = ResolvePathId(runtime, "/Root/Database/Table1", nullptr, &saTabletId1); + auto pathId2 = ResolvePathId(runtime, "/Root/Database/Table2"); + + Analyze(runtime, saTabletId1, {pathId1, pathId2}); + + ValidateCountMinDatashardAbsense(runtime, pathId1); + ValidateCountMinDatashardAbsense(runtime, pathId2); + } + + Y_UNIT_TEST(DropTableNavigateError) { + TTestEnv env(1, 1); + + auto& runtime = *env.GetServer().GetRuntime(); + + CreateDatabase(env, "Database"); + CreateUniformTable(env, "Database", "Table"); + + ui64 saTabletId = 0; + auto pathId = ResolvePathId(runtime, "/Root/Database/Table", nullptr, &saTabletId); + + DropTable(env, "Database", "Table"); + + Analyze(runtime, saTabletId, {pathId}); + + ValidateCountMinDatashardAbsense(runtime, pathId); + } +} + +} // NStat +} // NKikimr diff --git a/ydb/core/statistics/aggregator/ut/ut_traverse_columnshard.cpp b/ydb/core/statistics/aggregator/ut/ut_traverse_columnshard.cpp new file mode 100644 index 000000000000..9d21363191c4 --- /dev/null +++ b/ydb/core/statistics/aggregator/ut/ut_traverse_columnshard.cpp @@ -0,0 +1,350 @@ +#include + +#include + +#include +#include +#include +#include +#include + +namespace NKikimr { +namespace NStat { + +// TODO: check for arbitrary set of values of type T (including frequent duplicates) +// numbers (1..N) were count as a sketch. Check sketch properties +bool CheckCountMinSketch(const std::shared_ptr& sketch, const ui32 N) { + UNIT_ASSERT(sketch->GetElementCount() == N); + const double eps = 1. / sketch->GetWidth(); + const double delta = 1. / (1 << sketch->GetDepth()); + size_t failedEstimatesCount = 0; + for (ui32 i = 0; i < N; ++i) { + const ui32 trueCount = 1; // true count of value i + auto probe = sketch->Probe((const char *)&i, sizeof(i)); + if (probe > trueCount + eps * N) { + failedEstimatesCount++; + } + } + Cerr << ">>> failedEstimatesCount = " << failedEstimatesCount << Endl; + return failedEstimatesCount < delta * N; +} + +Y_UNIT_TEST_SUITE(TraverseColumnShard) { + + Y_UNIT_TEST(TraverseColumnTable) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 10); + const auto& tableInfo = databaseInfo.Tables[0]; + + WaitForSavedStatistics(runtime, tableInfo.PathId); + + auto countMin = ExtractCountMin(runtime, tableInfo.PathId); + + UNIT_ASSERT(CheckCountMinSketch(countMin, ColumnTableRowsNumber)); + } + + Y_UNIT_TEST(TraverseServerlessColumnTable) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + auto databaseInfo = CreateServerlessDatabaseColumnTables(env, 1, 10); + const auto& tableInfo = databaseInfo.Tables[0]; + + WaitForSavedStatistics(runtime, tableInfo.PathId); + + auto countMin = ExtractCountMin(runtime, tableInfo.PathId); + + UNIT_ASSERT(CheckCountMinSketch(countMin, ColumnTableRowsNumber)); + } + + Y_UNIT_TEST(TraverseColumnTableRebootColumnshard) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 10); + const auto& tableInfo = databaseInfo.Tables[0]; + auto sender = runtime.AllocateEdgeActor(); + + WaitForSavedStatistics(runtime, tableInfo.PathId); + + RebootTablet(runtime, tableInfo.ShardIds[0], sender); + + auto countMin = ExtractCountMin(runtime, tableInfo.PathId); + + UNIT_ASSERT(CheckCountMinSketch(countMin, ColumnTableRowsNumber)); + } + + Y_UNIT_TEST(TraverseColumnTableRebootSaTabletBeforeResolve) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 10); + const auto& tableInfo = databaseInfo.Tables[0]; + auto sender = runtime.AllocateEdgeActor(); + + TBlockEvents block(runtime); + + runtime.WaitFor("1st TEvResolveKeySetResult", [&]{ return block.size() >= 1; }); + block.Unblock(1); + runtime.WaitFor("2nd TEvResolveKeySetResult", [&]{ return block.size() >= 1; }); + block.Unblock(1); + runtime.WaitFor("3rd TEvResolveKeySetResult", [&]{ return block.size() >= 1; }); + + RebootTablet(runtime, tableInfo.SaTabletId, sender); + + block.Unblock(); + block.Stop(); + + WaitForSavedStatistics(runtime, tableInfo.PathId); + + auto countMin = ExtractCountMin(runtime, tableInfo.PathId); + UNIT_ASSERT(CheckCountMinSketch(countMin, ColumnTableRowsNumber)); + } + + Y_UNIT_TEST(TraverseColumnTableRebootSaTabletBeforeReqDistribution) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 10); + const auto& tableInfo = databaseInfo.Tables[0]; + auto sender = runtime.AllocateEdgeActor(); + + bool eventSeen = false; + auto observer = runtime.AddObserver([&](auto& ev){ + eventSeen = true; + ev.Reset(); + }); + + runtime.WaitFor("TEvRequestTabletDistribution", [&]{ return eventSeen; }); + observer.Remove(); + RebootTablet(runtime, tableInfo.SaTabletId, sender); + + auto countMin = ExtractCountMin(runtime, tableInfo.PathId); + UNIT_ASSERT(CheckCountMinSketch(countMin, ColumnTableRowsNumber)); + } + + Y_UNIT_TEST(TraverseColumnTableRebootSaTabletBeforeAggregate) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 10); + const auto& tableInfo = databaseInfo.Tables[0]; + auto sender = runtime.AllocateEdgeActor(); + + bool eventSeen = false; + auto observer = runtime.AddObserver([&](auto& ev){ + eventSeen = true; + ev.Reset(); + }); + + runtime.WaitFor("TEvAggregateStatistics", [&]{ return eventSeen; }); + observer.Remove(); + RebootTablet(runtime, tableInfo.SaTabletId, sender); + + auto countMin = ExtractCountMin(runtime, tableInfo.PathId); + UNIT_ASSERT(CheckCountMinSketch(countMin, ColumnTableRowsNumber)); + } + + Y_UNIT_TEST(TraverseColumnTableRebootSaTabletBeforeSave) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 10); + const auto& tableInfo = databaseInfo.Tables[0]; + auto sender = runtime.AllocateEdgeActor(); + + bool eventSeen = false; + auto observer = runtime.AddObserver([&](auto& ev){ + eventSeen = true; + ev.Reset(); + }); + + runtime.WaitFor("TEvAggregateStatisticsResponse", [&]{ return eventSeen; }); + observer.Remove(); + RebootTablet(runtime, tableInfo.SaTabletId, sender); + + auto countMin = ExtractCountMin(runtime, tableInfo.PathId); + UNIT_ASSERT(CheckCountMinSketch(countMin, ColumnTableRowsNumber)); + } + + Y_UNIT_TEST(TraverseColumnTableRebootSaTabletInAggregate) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 10); + const auto& tableInfo = databaseInfo.Tables[0]; + auto sender = runtime.AllocateEdgeActor(); + + int observerCount = 0; + auto observer = runtime.AddObserver([&](auto& ev){ + if (++observerCount >= 5) { + ev.Reset(); + } + }); + + runtime.WaitFor("5th TEvStatisticsRequest", [&]{ return observerCount >= 5; }); + observer.Remove(); + RebootTablet(runtime, tableInfo.SaTabletId, sender); + + auto countMin = ExtractCountMin(runtime, tableInfo.PathId); + UNIT_ASSERT(CheckCountMinSketch(countMin, ColumnTableRowsNumber)); + } + + Y_UNIT_TEST(TraverseColumnTableHiveDistributionZeroNodes) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 10); + const auto& tableInfo = databaseInfo.Tables[0]; + + bool observerFirstExec = true; + auto observer = runtime.AddObserver( + [&](TEvHive::TEvResponseTabletDistribution::TPtr& ev) + { + if (observerFirstExec) { + observerFirstExec = false; + auto& record = ev->Get()->Record; + + NKikimrHive::TEvResponseTabletDistribution newRecord; + std::vector unknownTablets; + + for (auto& node : record.GetNodes()) { + auto* newNode = newRecord.AddNodes(); + newNode->SetNodeId(node.GetNodeId()); + int index = 0; + for (auto tabletId : node.GetTabletIds()) { + if (index < 7) { + newNode->AddTabletIds(tabletId); + } else { + unknownTablets.push_back(tabletId); + } + ++index; + } + } + auto* unknownNode = newRecord.AddNodes(); + unknownNode->SetNodeId(0); + for (auto tabletId : unknownTablets) { + unknownNode->AddTabletIds(tabletId); + } + + record.Swap(&newRecord); + } + }); + + WaitForSavedStatistics(runtime, tableInfo.PathId); + + auto countMin = ExtractCountMin(runtime, tableInfo.PathId); + UNIT_ASSERT(CheckCountMinSketch(countMin, ColumnTableRowsNumber)); + } + + Y_UNIT_TEST(TraverseColumnTableHiveDistributionAbsentNodes) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 10); + const auto& tableInfo = databaseInfo.Tables[0]; + + bool observerFirstExec = true; + auto observer = runtime.AddObserver( + [&](TEvHive::TEvResponseTabletDistribution::TPtr& ev) + { + if (observerFirstExec) { + observerFirstExec = false; + auto& record = ev->Get()->Record; + + NKikimrHive::TEvResponseTabletDistribution newRecord; + + for (auto& node : record.GetNodes()) { + auto* newNode = newRecord.AddNodes(); + newNode->SetNodeId(node.GetNodeId()); + int index = 0; + for (auto tabletId : node.GetTabletIds()) { + if (index < 7) { + newNode->AddTabletIds(tabletId); + } + ++index; + } + } + + record.Swap(&newRecord); + } + }); + + WaitForSavedStatistics(runtime, tableInfo.PathId); + + auto countMin = ExtractCountMin(runtime, tableInfo.PathId); + UNIT_ASSERT(CheckCountMinSketch(countMin, ColumnTableRowsNumber)); + } + + Y_UNIT_TEST(TraverseColumnTableAggrStatUnavailableNode) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 10); + const auto& tableInfo = databaseInfo.Tables[0]; + + bool observerFirstExec = true; + auto observer = runtime.AddObserver( + [&](TEvStatistics::TEvAggregateStatisticsResponse::TPtr& ev) + { + if (observerFirstExec) { + observerFirstExec = false; + auto& record = ev->Get()->Record; + + NKikimrStat::TEvAggregateStatisticsResponse newRecord; + newRecord.SetRound(record.GetRound()); + newRecord.MutableColumns()->Swap(record.MutableColumns()); + + auto* failedTablet = newRecord.AddFailedTablets(); + failedTablet->SetError(NKikimrStat::TEvAggregateStatisticsResponse::TYPE_UNAVAILABLE_NODE); + failedTablet->SetTabletId(72075186224037900); + failedTablet->SetNodeId(2); + + record.Swap(&newRecord); + } + }); + + WaitForSavedStatistics(runtime, tableInfo.PathId); + + auto countMin = ExtractCountMin(runtime, tableInfo.PathId); + + ui32 value = 1; + auto probe = countMin->Probe((const char *)&value, sizeof(value)); + Cerr << "probe = " << probe << Endl; + const double eps = 1. / countMin->GetWidth(); + UNIT_ASSERT(probe <= 1 + eps * ColumnTableRowsNumber * 1.1); // 10 for first round, 1 for second + } + + Y_UNIT_TEST(TraverseColumnTableAggrStatNonLocalTablet) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 10); + const auto& tableInfo = databaseInfo.Tables[0]; + + bool observerFirstExec = true; + auto observer = runtime.AddObserver( + [&](TEvStatistics::TEvAggregateStatisticsResponse::TPtr& ev) + { + if (observerFirstExec) { + observerFirstExec = false; + auto& record = ev->Get()->Record; + + NKikimrStat::TEvAggregateStatisticsResponse newRecord; + newRecord.SetRound(record.GetRound()); + newRecord.MutableColumns()->Swap(record.MutableColumns()); + + auto* failedTablet = newRecord.AddFailedTablets(); + failedTablet->SetError(NKikimrStat::TEvAggregateStatisticsResponse::TYPE_NON_LOCAL_TABLET); + failedTablet->SetTabletId(72075186224037900); + failedTablet->SetNodeId(3); + + record.Swap(&newRecord); + } + }); + + WaitForSavedStatistics(runtime, tableInfo.PathId); + + auto countMin = ExtractCountMin(runtime, tableInfo.PathId); + + ui32 value = 1; + auto probe = countMin->Probe((const char *)&value, sizeof(value)); + Cerr << "probe = " << probe << Endl; + const double eps = 1. / countMin->GetWidth(); + UNIT_ASSERT(probe <= 1 + eps * ColumnTableRowsNumber * 1.1); // 10 for first round, 1 for second + } + +} + +} // NStat +} // NKikimr diff --git a/ydb/core/statistics/aggregator/ut/ut_traverse_datashard.cpp b/ydb/core/statistics/aggregator/ut/ut_traverse_datashard.cpp new file mode 100644 index 000000000000..039334cf15e9 --- /dev/null +++ b/ydb/core/statistics/aggregator/ut/ut_traverse_datashard.cpp @@ -0,0 +1,86 @@ +#include + +#include + +#include +#include +#include +#include + +namespace NKikimr { +namespace NStat { + +Y_UNIT_TEST_SUITE(TraverseDatashard) { + + Y_UNIT_TEST(TraverseOneTable) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + + CreateDatabase(env, "Database"); + CreateUniformTable(env, "Database", "Table"); + + auto pathId = ResolvePathId(runtime, "/Root/Database/Table"); + ValidateCountMinDatashardAbsense(runtime, pathId); + } + + Y_UNIT_TEST(TraverseTwoTables) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + + CreateDatabase(env, "Database"); + CreateUniformTable(env, "Database", "Table1"); + CreateUniformTable(env, "Database", "Table2"); + + auto pathId1 = ResolvePathId(runtime, "/Root/Database/Table1"); + auto pathId2 = ResolvePathId(runtime, "/Root/Database/Table2"); + ValidateCountMinDatashardAbsense(runtime, pathId1); + ValidateCountMinDatashardAbsense(runtime, pathId2); + } + + Y_UNIT_TEST(TraverseOneTableServerless) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + + CreateDatabase(env, "Shared", 1, true); + CreateServerlessDatabase(env, "Serverless", "/Root/Shared"); + CreateUniformTable(env, "Serverless", "Table"); + + auto pathId = ResolvePathId(runtime, "/Root/Serverless/Table"); + ValidateCountMinDatashardAbsense(runtime, pathId); + } + + Y_UNIT_TEST(TraverseTwoTablesServerless) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + + CreateDatabase(env, "Shared", 1, true); + CreateServerlessDatabase(env, "Serverless", "/Root/Shared"); + CreateUniformTable(env, "Serverless", "Table1"); + CreateUniformTable(env, "Serverless", "Table2"); + + auto pathId1 = ResolvePathId(runtime, "/Root/Serverless/Table1"); + auto pathId2 = ResolvePathId(runtime, "/Root/Serverless/Table2"); + ValidateCountMinDatashardAbsense(runtime, pathId1); + ValidateCountMinDatashardAbsense(runtime, pathId2); + } + + Y_UNIT_TEST(TraverseTwoTablesTwoServerlessDbs) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + + CreateDatabase(env, "Shared", 1, true); + CreateServerlessDatabase(env, "Serverless1", "/Root/Shared"); + CreateServerlessDatabase(env, "Serverless2", "/Root/Shared"); + CreateUniformTable(env, "Serverless1", "Table1"); + CreateUniformTable(env, "Serverless2", "Table2"); + + auto pathId1 = ResolvePathId(runtime, "/Root/Serverless1/Table1"); + auto pathId2 = ResolvePathId(runtime, "/Root/Serverless2/Table2"); + ValidateCountMinDatashardAbsense(runtime, pathId1); + ValidateCountMinDatashardAbsense(runtime, pathId2); + } + +} + +} // NStat +} // NKikimr diff --git a/ydb/core/statistics/aggregator/ut/ya.make b/ydb/core/statistics/aggregator/ut/ya.make new file mode 100644 index 000000000000..830728bf21fd --- /dev/null +++ b/ydb/core/statistics/aggregator/ut/ya.make @@ -0,0 +1,30 @@ +UNITTEST_FOR(ydb/core/statistics/aggregator) + +FORK_SUBTESTS() + +IF (WITH_VALGRIND) + TIMEOUT(3600) + SIZE(LARGE) + TAG(ya:fat) +ELSE() + TIMEOUT(600) + SIZE(MEDIUM) +ENDIF() + +YQL_LAST_ABI_VERSION() + +PEERDIR( + library/cpp/testing/unittest + ydb/core/protos + ydb/core/testlib/default + ydb/core/statistics/ut_common +) + +SRCS( + ut_analyze_datashard.cpp + ut_analyze_columnshard.cpp + ut_traverse_datashard.cpp + ut_traverse_columnshard.cpp +) + +END() diff --git a/ydb/core/statistics/aggregator/ya.make b/ydb/core/statistics/aggregator/ya.make index 3223cd9e4d9c..bab460ef0055 100644 --- a/ydb/core/statistics/aggregator/ya.make +++ b/ydb/core/statistics/aggregator/ya.make @@ -7,17 +7,23 @@ SRCS( aggregator_impl.cpp schema.h schema.cpp + tx_ack_timeout.cpp + tx_aggr_stat_response.cpp + tx_analyze.cpp + tx_analyze_deadline.cpp + tx_analyze_table_delivery_problem.cpp + tx_analyze_table_request.cpp + tx_analyze_table_response.cpp tx_configure.cpp - tx_delete_query_response.cpp + tx_datashard_scan_response.cpp + tx_finish_trasersal.cpp tx_init.cpp tx_init_schema.cpp tx_navigate.cpp tx_resolve.cpp - tx_save_query_response.cpp - tx_scan_table.cpp - tx_schedule_scan.cpp + tx_response_tablet_distribution.cpp + tx_schedule_traversal.cpp tx_schemeshard_stats.cpp - tx_statistics_scan_response.cpp ) PEERDIR( @@ -26,9 +32,14 @@ PEERDIR( ydb/core/protos ydb/core/tablet ydb/core/tablet_flat + ydb/core/statistics/database ydb/library/minsketch ) YQL_LAST_ABI_VERSION() END() + +RECURSE_FOR_TESTS( + ut +) diff --git a/ydb/core/statistics/database/database.cpp b/ydb/core/statistics/database/database.cpp new file mode 100644 index 000000000000..41b67c5b0369 --- /dev/null +++ b/ydb/core/statistics/database/database.cpp @@ -0,0 +1,445 @@ +#include "database.h" + +#include + +#include +#include +#include + +namespace NKikimr::NStat { + +class TStatisticsTableCreator : public TActorBootstrapped { +public: + explicit TStatisticsTableCreator(std::unique_ptr resultEvent, const TString& database) + : ResultEvent(std::move(resultEvent)) + , Database(database) + {} + + void Registered(NActors::TActorSystem* sys, const NActors::TActorId& owner) override { + NActors::TActorBootstrapped::Registered(sys, owner); + Owner = owner; + } + + void Bootstrap() { + Become(&TStatisticsTableCreator::StateFunc); + + NKikimrSchemeOp::TPartitioningPolicy partitioningPolicy; + partitioningPolicy.SetSizeToSplit(2 << 30); + + Register( + CreateTableCreator( + { ".metadata", "_statistics" }, + { + Col("owner_id", NScheme::NTypeIds::Uint64), + Col("local_path_id", NScheme::NTypeIds::Uint64), + Col("stat_type", NScheme::NTypeIds::Uint32), + Col("column_tag", NScheme::NTypeIds::Uint32), + Col("data", NScheme::NTypeIds::String), + }, + { "owner_id", "local_path_id", "stat_type", "column_tag"}, + NKikimrServices::STATISTICS, + Nothing(), + Database, + true, + std::move(partitioningPolicy) + ) + ); + } + +private: + static NKikimrSchemeOp::TColumnDescription Col(const TString& columnName, const char* columnType) { + NKikimrSchemeOp::TColumnDescription desc; + desc.SetName(columnName); + desc.SetType(columnType); + return desc; + } + + static NKikimrSchemeOp::TColumnDescription Col(const TString& columnName, NScheme::TTypeId columnType) { + return Col(columnName, NScheme::TypeName(columnType)); + } + + void Handle(TEvTableCreator::TEvCreateTableResponse::TPtr&) { + Send(Owner, std::move(ResultEvent)); + PassAway(); + } + + STRICT_STFUNC(StateFunc, + hFunc(TEvTableCreator::TEvCreateTableResponse, Handle); + ) + +private: + std::unique_ptr ResultEvent; + const TString Database; + NActors::TActorId Owner; +}; + +NActors::IActor* CreateStatisticsTableCreator(std::unique_ptr event, const TString& database) { + return new TStatisticsTableCreator(std::move(event), database); +} + + +class TSaveStatisticsQuery : public NKikimr::TQueryBase { +private: + const TPathId PathId; + const ui64 StatType; + const std::vector ColumnTags; + const std::vector Data; + +public: + TSaveStatisticsQuery(const TString& database, const TPathId& pathId, ui64 statType, + const std::vector& columnTags, const std::vector& data) + : NKikimr::TQueryBase(NKikimrServices::STATISTICS, {}, database, true) + , PathId(pathId) + , StatType(statType) + , ColumnTags(columnTags) + , Data(data) + { + Y_ABORT_UNLESS(ColumnTags.size() == Data.size()); + } + + void OnRunQuery() override { + TStringBuilder sql; + sql << R"( + DECLARE $owner_id AS Uint64; + DECLARE $local_path_id AS Uint64; + DECLARE $stat_type AS Uint32; + DECLARE $column_tags AS List; + DECLARE $data AS List; + + UPSERT INTO `.metadata/_statistics` + (owner_id, local_path_id, stat_type, column_tag, data) + VALUES + )"; + + for (size_t i = 0; i < Data.size(); ++i) { + sql << " ($owner_id, $local_path_id, $stat_type, $column_tags[" << i << "], $data[" << i << "])"; + sql << (i == Data.size() - 1 ? ";" : ","); + } + + NYdb::TParamsBuilder params; + params + .AddParam("$owner_id") + .Uint64(PathId.OwnerId) + .Build() + .AddParam("$local_path_id") + .Uint64(PathId.LocalPathId) + .Build() + .AddParam("$stat_type") + .Uint32(StatType) + .Build(); + auto& columnTags = params.AddParam("$column_tags").BeginList(); + for (size_t i = 0; i < ColumnTags.size(); ++i) { + columnTags + .AddListItem() + .Uint32(ColumnTags[i]); + } + columnTags.EndList().Build(); + auto& data = params.AddParam("$data").BeginList(); + for (size_t i = 0; i < Data.size(); ++i) { + data + .AddListItem() + .String(Data[i]); + } + data.EndList().Build(); + + RunDataQuery(sql, ¶ms); + } + + void OnQueryResult() override { + Finish(); + } + + void OnFinish(Ydb::StatusIds::StatusCode status, NYql::TIssues&& issues) override { + Y_UNUSED(issues); + auto response = std::make_unique(); + response->Status = status; + response->Issues = std::move(issues); + response->Success = (status == Ydb::StatusIds::SUCCESS); + response->PathId = PathId; + Send(Owner, response.release()); + } +}; + +class TSaveStatisticsRetryingQuery : public TActorBootstrapped { +private: + const NActors::TActorId ReplyActorId; + const TString Database; + const TPathId PathId; + const ui64 StatType; + const std::vector ColumnTags; + const std::vector Data; + +public: + using TSaveRetryingQuery = TQueryRetryActor< + TSaveStatisticsQuery, TEvStatistics::TEvSaveStatisticsQueryResponse, + const TString&, const TPathId&, ui64, const std::vector&, const std::vector&>; + + TSaveStatisticsRetryingQuery(const NActors::TActorId& replyActorId, const TString& database, + const TPathId& pathId, ui64 statType, std::vector&& columnTags, std::vector&& data) + : ReplyActorId(replyActorId) + , Database(database) + , PathId(pathId) + , StatType(statType) + , ColumnTags(std::move(columnTags)) + , Data(std::move(data)) + {} + + void Bootstrap() { + Register(new TSaveRetryingQuery( + SelfId(), + TSaveRetryingQuery::IRetryPolicy::GetExponentialBackoffPolicy( + TSaveRetryingQuery::Retryable, TDuration::MilliSeconds(10), + TDuration::MilliSeconds(200), TDuration::Seconds(1), + std::numeric_limits::max(), TDuration::Seconds(1)), + Database, PathId, StatType, ColumnTags, Data + )); + Become(&TSaveStatisticsRetryingQuery::StateFunc); + } + + STRICT_STFUNC(StateFunc, + hFunc(TEvStatistics::TEvSaveStatisticsQueryResponse, Handle); + ) + + void Handle(TEvStatistics::TEvSaveStatisticsQueryResponse::TPtr& ev) { + Send(ReplyActorId, ev->Release().Release()); + PassAway(); + } +}; + +NActors::IActor* CreateSaveStatisticsQuery(const NActors::TActorId& replyActorId, const TString& database, + const TPathId& pathId, ui64 statType, std::vector&& columnTags, std::vector&& data) +{ + return new TSaveStatisticsRetryingQuery(replyActorId, database, pathId, statType, std::move(columnTags), std::move(data)); +} + + +class TLoadStatisticsQuery : public NKikimr::TQueryBase { +private: + const TPathId PathId; + const ui64 StatType; + const ui32 ColumnTag; + const ui64 Cookie; + + std::optional Data; + +public: + TLoadStatisticsQuery(const TString& database, const TPathId& pathId, ui64 statType, ui32 columnTag, ui64 cookie) + : NKikimr::TQueryBase(NKikimrServices::STATISTICS, {}, database, true) + , PathId(pathId) + , StatType(statType) + , ColumnTag(columnTag) + , Cookie(cookie) + {} + + void OnRunQuery() override { + TString sql = R"( + DECLARE $owner_id AS Uint64; + DECLARE $local_path_id AS Uint64; + DECLARE $stat_type AS Uint32; + DECLARE $column_tag AS Uint32; + + SELECT + data + FROM `.metadata/_statistics` + WHERE + owner_id = $owner_id AND + local_path_id = $local_path_id AND + stat_type = $stat_type AND + column_tag = $column_tag; + )"; + + NYdb::TParamsBuilder params; + params + .AddParam("$owner_id") + .Uint64(PathId.OwnerId) + .Build() + .AddParam("$local_path_id") + .Uint64(PathId.LocalPathId) + .Build() + .AddParam("$stat_type") + .Uint32(StatType) + .Build() + .AddParam("$column_tag") + .Uint32(ColumnTag) + .Build(); + + RunDataQuery(sql, ¶ms, TTxControl::BeginTx()); + } + + void OnQueryResult() override { + if (ResultSets.size() != 1) { + Finish(Ydb::StatusIds::INTERNAL_ERROR, "Unexpected read response", false); + return; + } + NYdb::TResultSetParser result(ResultSets[0]); + if (result.RowsCount() == 0) { + Finish(Ydb::StatusIds::BAD_REQUEST, "No data", false); + return; + } + result.TryNextRow(); + Data = *result.ColumnParser("data").GetOptionalString(); + Finish(); + } + + void OnFinish(Ydb::StatusIds::StatusCode status, NYql::TIssues&& issues) override { + Y_UNUSED(issues); + auto response = std::make_unique(); + response->Status = status; + response->Issues = std::move(issues); + response->Success = (status == Ydb::StatusIds::SUCCESS); + response->Cookie = Cookie; + if (response->Success) { + response->Data = Data; + } + Send(Owner, response.release()); + } +}; + +class TLoadStatisticsRetryingQuery : public TActorBootstrapped { +private: + const NActors::TActorId ReplyActorId; + const TString Database; + const TPathId PathId; + const ui64 StatType; + const ui32 ColumnTag; + const ui64 Cookie; + +public: + using TLoadRetryingQuery = TQueryRetryActor< + TLoadStatisticsQuery, TEvStatistics::TEvLoadStatisticsQueryResponse, + const TString&, const TPathId&, ui64, ui32, ui64>; + + TLoadStatisticsRetryingQuery(const NActors::TActorId& replyActorId, const TString& database, + const TPathId& pathId, ui64 statType, ui32 columnTag, ui64 cookie) + : ReplyActorId(replyActorId) + , Database(database) + , PathId(pathId) + , StatType(statType) + , ColumnTag(columnTag) + , Cookie(cookie) + {} + + void Bootstrap() { + Register(new TLoadRetryingQuery( + SelfId(), + TLoadRetryingQuery::IRetryPolicy::GetExponentialBackoffPolicy( + TLoadRetryingQuery::Retryable, TDuration::MilliSeconds(10), + TDuration::MilliSeconds(200), TDuration::Seconds(1), + std::numeric_limits::max(), TDuration::Seconds(1)), + Database, PathId, StatType, ColumnTag, Cookie + )); + Become(&TLoadStatisticsRetryingQuery::StateFunc); + } + + STRICT_STFUNC(StateFunc, + hFunc(TEvStatistics::TEvLoadStatisticsQueryResponse, Handle); + ) + + void Handle(TEvStatistics::TEvLoadStatisticsQueryResponse::TPtr& ev) { + Send(ReplyActorId, ev->Release().Release()); + PassAway(); + } +}; + +NActors::IActor* CreateLoadStatisticsQuery(const NActors::TActorId& replyActorId, + const TString& database, const TPathId& pathId, ui64 statType, ui32 columnTag, ui64 cookie) +{ + return new TLoadStatisticsRetryingQuery(replyActorId, database, pathId, statType, columnTag, cookie); +} + + +class TDeleteStatisticsQuery : public NKikimr::TQueryBase { +private: + const TPathId PathId; + +public: + TDeleteStatisticsQuery(const TString& database, const TPathId& pathId) + : NKikimr::TQueryBase(NKikimrServices::STATISTICS, {}, database, true) + , PathId(pathId) + { + } + + void OnRunQuery() override { + TString sql = R"( + DECLARE $owner_id AS Uint64; + DECLARE $local_path_id AS Uint64; + + DELETE FROM `.metadata/_statistics` + WHERE + owner_id = $owner_id AND + local_path_id = $local_path_id; + )"; + + NYdb::TParamsBuilder params; + params + .AddParam("$owner_id") + .Uint64(PathId.OwnerId) + .Build() + .AddParam("$local_path_id") + .Uint64(PathId.LocalPathId) + .Build(); + + RunDataQuery(sql, ¶ms); + } + + void OnQueryResult() override { + Finish(); + } + + void OnFinish(Ydb::StatusIds::StatusCode status, NYql::TIssues&& issues) override { + Y_UNUSED(issues); + auto response = std::make_unique(); + response->Status = status; + response->Issues = std::move(issues); + response->Success = (status == Ydb::StatusIds::SUCCESS); + Send(Owner, response.release()); + } +}; + +class TDeleteStatisticsRetryingQuery : public TActorBootstrapped { +private: + const NActors::TActorId ReplyActorId; + const TString Database; + const TPathId PathId; + +public: + using TDeleteRetryingQuery = TQueryRetryActor< + TDeleteStatisticsQuery, TEvStatistics::TEvDeleteStatisticsQueryResponse, + const TString&, const TPathId&>; + + TDeleteStatisticsRetryingQuery(const NActors::TActorId& replyActorId, const TString& database, + const TPathId& pathId) + : ReplyActorId(replyActorId) + , Database(database) + , PathId(pathId) + {} + + void Bootstrap() { + Register(new TDeleteRetryingQuery( + SelfId(), + TDeleteRetryingQuery::IRetryPolicy::GetExponentialBackoffPolicy( + TDeleteRetryingQuery::Retryable, TDuration::MilliSeconds(10), + TDuration::MilliSeconds(200), TDuration::Seconds(1), + std::numeric_limits::max(), TDuration::Seconds(1)), + Database, PathId + )); + Become(&TDeleteStatisticsRetryingQuery::StateFunc); + } + + STRICT_STFUNC(StateFunc, + hFunc(TEvStatistics::TEvDeleteStatisticsQueryResponse, Handle); + ) + + void Handle(TEvStatistics::TEvDeleteStatisticsQueryResponse::TPtr& ev) { + Send(ReplyActorId, ev->Release().Release()); + PassAway(); + } +}; + +NActors::IActor* CreateDeleteStatisticsQuery(const NActors::TActorId& replyActorId, const TString& database, + const TPathId& pathId) +{ + return new TDeleteStatisticsRetryingQuery(replyActorId, database, pathId); +} + +} // NKikimr::NStat diff --git a/ydb/core/statistics/database/database.h b/ydb/core/statistics/database/database.h new file mode 100644 index 000000000000..8f61d433dde4 --- /dev/null +++ b/ydb/core/statistics/database/database.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include + +namespace NKikimr::NStat { + +NActors::IActor* CreateStatisticsTableCreator(std::unique_ptr event, const TString& database); + +NActors::IActor* CreateSaveStatisticsQuery(const NActors::TActorId& replyActorId, const TString& database, + const TPathId& pathId, ui64 statType, std::vector&& columnTags, std::vector&& data); + +NActors::IActor* CreateLoadStatisticsQuery(const NActors::TActorId& replyActorId, const TString& database, + const TPathId& pathId, ui64 statType, ui32 columnTag, ui64 cookie); + +NActors::IActor* CreateDeleteStatisticsQuery(const NActors::TActorId& replyActorId, const TString& database, + const TPathId& pathId); + +}; diff --git a/ydb/core/statistics/database/ut/ut_database.cpp b/ydb/core/statistics/database/ut/ut_database.cpp new file mode 100644 index 000000000000..188f9e1c1e97 --- /dev/null +++ b/ydb/core/statistics/database/ut/ut_database.cpp @@ -0,0 +1,116 @@ +#include + +#include +#include + +#include + +#include + +namespace NKikimr::NStat { + +Y_UNIT_TEST_SUITE(StatisticsSaveLoad) { + Y_UNIT_TEST(Simple) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + + CreateDatabase(env, "Database"); + + auto sender = runtime.AllocateEdgeActor(0); + runtime.Register(CreateStatisticsTableCreator( + std::make_unique(), "/Root/Database"), + 0, 0, TMailboxType::Simple, 0, sender); + runtime.GrabEdgeEventRethrow(sender); + + TPathId pathId(1, 1); + ui64 statType = 1; + std::vector columnTags = {1, 2}; + std::vector data = {"dataA", "dataB"}; + + runtime.Register(CreateSaveStatisticsQuery(sender, "/Root/Database", + pathId, statType, std::move(columnTags), std::move(data)), + 0, 0, TMailboxType::Simple, 0, sender); + auto saveResponse = runtime.GrabEdgeEventRethrow(sender); + UNIT_ASSERT(saveResponse->Get()->Success); + + runtime.Register(CreateLoadStatisticsQuery(sender, "/Root/Database", pathId, statType, 1, 1), + 0, 0, TMailboxType::Simple, 0, sender); + auto loadResponseA = runtime.GrabEdgeEventRethrow(sender); + UNIT_ASSERT(loadResponseA->Get()->Success); + UNIT_ASSERT(loadResponseA->Get()->Data); + UNIT_ASSERT_VALUES_EQUAL(*loadResponseA->Get()->Data, "dataA"); + + runtime.Register(CreateLoadStatisticsQuery(sender, "/Root/Database", pathId, statType, 2, 1), + 0, 0, TMailboxType::Simple, 0, sender); + auto loadResponseB = runtime.GrabEdgeEventRethrow(sender); + UNIT_ASSERT(loadResponseB->Get()->Success); + UNIT_ASSERT(loadResponseB->Get()->Data); + UNIT_ASSERT_VALUES_EQUAL(*loadResponseB->Get()->Data, "dataB"); + } + + Y_UNIT_TEST(Delete) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + + CreateDatabase(env, "Database"); + + auto sender = runtime.AllocateEdgeActor(0); + runtime.Register(CreateStatisticsTableCreator( + std::make_unique(), "/Root/Database"), + 0, 0, TMailboxType::Simple, 0, sender); + runtime.GrabEdgeEvent(sender); + + TPathId pathId(1, 1); + ui64 statType = 1; + std::vector columnTags = {1, 2}; + std::vector data = {"dataA", "dataB"}; + + runtime.Register(CreateSaveStatisticsQuery(sender, "/Root/Database", + pathId, statType, std::move(columnTags), std::move(data)), + 0, 0, TMailboxType::Simple, 0, sender); + auto saveResponse = runtime.GrabEdgeEvent(sender); + UNIT_ASSERT(saveResponse->Get()->Success); + + runtime.Register(CreateDeleteStatisticsQuery(sender, "/Root/Database", pathId), + 0, 0, TMailboxType::Simple, 0, sender); + auto deleteResponse = runtime.GrabEdgeEvent(sender); + UNIT_ASSERT(deleteResponse->Get()->Success); + + runtime.Register(CreateLoadStatisticsQuery(sender, "/Root/Database", pathId, statType, 1, 1), + 0, 0, TMailboxType::Simple, 0, sender); + auto loadResponseA = runtime.GrabEdgeEvent(sender); + UNIT_ASSERT(!loadResponseA->Get()->Success); + } + + Y_UNIT_TEST(ForbidAccess) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + + CreateDatabase(env, "Database", 1, true); + CreateUniformTable(env, "Database", "Table"); + + NYdb::EStatus status; + auto test = [&] () { + auto driverConfig = NYdb::TDriverConfig() + .SetEndpoint(env.GetEndpoint()) + .SetAuthToken("user@builtin"); + auto driver = NYdb::TDriver(driverConfig); + auto db = NYdb::NTable::TTableClient(driver); + auto session = db.CreateSession().GetValueSync().GetSession(); + + auto result = session.ExecuteDataQuery(R"( + SELECT * FROM `/Root/Database/.metadata/_statistics`; + )", NYdb::NTable::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + status = result.GetStatus(); + }; + std::thread testThread(test); + + runtime.SimulateSleep(TDuration::Seconds(1)); + testThread.join(); + + UNIT_ASSERT_VALUES_EQUAL(status, NYdb::EStatus::SCHEME_ERROR); + } + +} + +} // NKikimr::NStat diff --git a/ydb/core/statistics/database/ut/ya.make b/ydb/core/statistics/database/ut/ya.make new file mode 100644 index 000000000000..fb0e5fcb6b9c --- /dev/null +++ b/ydb/core/statistics/database/ut/ya.make @@ -0,0 +1,27 @@ +UNITTEST_FOR(ydb/core/statistics/database) + +FORK_SUBTESTS() + +IF (WITH_VALGRIND) + TIMEOUT(3600) + SIZE(LARGE) + TAG(ya:fat) +ELSE() + TIMEOUT(600) + SIZE(MEDIUM) +ENDIF() + +YQL_LAST_ABI_VERSION() + +PEERDIR( + library/cpp/testing/unittest + ydb/core/protos + ydb/core/testlib/default + ydb/core/statistics/ut_common +) + +SRCS( + ut_database.cpp +) + +END() diff --git a/ydb/core/statistics/database/ya.make b/ydb/core/statistics/database/ya.make new file mode 100644 index 000000000000..b88855356fab --- /dev/null +++ b/ydb/core/statistics/database/ya.make @@ -0,0 +1,23 @@ +LIBRARY() + +SRCS( + database.h + database.cpp +) + +PEERDIR( + ydb/core/base + ydb/core/engine/minikql + ydb/core/protos + ydb/core/tablet + ydb/core/tablet_flat + ydb/library/minsketch +) + +YQL_LAST_ABI_VERSION() + +END() + +RECURSE_FOR_TESTS( + ut +) diff --git a/ydb/core/statistics/events.h b/ydb/core/statistics/events.h index 021977ab19c1..97154c2d67c1 100644 --- a/ydb/core/statistics/events.h +++ b/ydb/core/statistics/events.h @@ -3,8 +3,11 @@ #include #include #include +#include #include #include +#include + namespace NKikimr { namespace NStat { @@ -64,23 +67,54 @@ struct TEvStatistics { EvStatTableCreationResponse, EvSaveStatisticsQueryResponse, + EvDeleteStatisticsQueryResponse, EvLoadStatisticsQueryResponse, - EvScanTable, - EvScanTableResponse, - - EvDeleteStatisticsQueryResponse, + EvAnalyze, + EvAnalyzeResponse, + EvAnalyzeStatus, + EvAnalyzeStatusResponse, - EvScanTableAccepted, - EvGetScanStatus, - EvGetScanStatusResponse, + EvAnalyzeTable, + EvAnalyzeTableResponse, EvStatisticsRequest, EvStatisticsResponse, + EvAggregateStatistics, + EvAggregateStatisticsResponse, + EvAggregateKeepAlive, + EvAggregateKeepAliveAck, + + EvFinishTraversal, + EvEnd }; + struct TEvAggregateKeepAlive : public TEventPB< + TEvAggregateKeepAlive, + NKikimrStat::TEvAggregateKeepAlive, + EvAggregateKeepAlive> + {}; + + struct TEvAggregateKeepAliveAck : public TEventPB< + TEvAggregateKeepAliveAck, + NKikimrStat::TEvAggregateKeepAliveAck, + EvAggregateKeepAliveAck> + {}; + + struct TEvAggregateStatistics : public TEventPB< + TEvAggregateStatistics, + NKikimrStat::TEvAggregateStatistics, + EvAggregateStatistics> + {}; + + struct TEvAggregateStatisticsResponse : public TEventPB< + TEvAggregateStatisticsResponse, + NKikimrStat::TEvAggregateStatisticsResponse, + EvAggregateStatisticsResponse> + {}; + struct TEvGetStatistics : public TEventLocal { EStatType StatType; std::vector StatRequests; @@ -156,6 +190,9 @@ struct TEvStatistics { TEvSaveStatisticsQueryResponse, EvSaveStatisticsQueryResponse> { + Ydb::StatusIds::StatusCode Status; + NYql::TIssues Issues; + TPathId PathId; bool Success = true; }; @@ -163,6 +200,8 @@ struct TEvStatistics { TEvLoadStatisticsQueryResponse, EvLoadStatisticsQueryResponse> { + Ydb::StatusIds::StatusCode Status; + NYql::TIssues Issues; bool Success = true; ui64 Cookie = 0; std::optional Data; @@ -172,39 +211,47 @@ struct TEvStatistics { TEvDeleteStatisticsQueryResponse, EvDeleteStatisticsQueryResponse> { + Ydb::StatusIds::StatusCode Status; + NYql::TIssues Issues; bool Success = true; }; - struct TEvScanTable : public TEventPB< - TEvScanTable, - NKikimrStat::TEvScanTable, - EvScanTable> + struct TEvAnalyze : public TEventPB< + TEvAnalyze, + NKikimrStat::TEvAnalyze, + EvAnalyze> {}; - struct TEvScanTableAccepted : public TEventPB< - TEvScanTableAccepted, - NKikimrStat::TEvScanTableAccepted, - EvScanTableAccepted> + struct TEvAnalyzeResponse : public TEventPB< + TEvAnalyzeResponse, + NKikimrStat::TEvAnalyzeResponse, + EvAnalyzeResponse> {}; - struct TEvScanTableResponse : public TEventPB< - TEvScanTableResponse, - NKikimrStat::TEvScanTableResponse, - EvScanTableResponse> + struct TEvAnalyzeStatus : public TEventPB< + TEvAnalyzeStatus, + NKikimrStat::TEvAnalyzeStatus, + EvAnalyzeStatus> {}; - struct TEvGetScanStatus : public TEventPB< - TEvGetScanStatus, - NKikimrStat::TEvGetScanStatus, - EvGetScanStatus> + struct TEvAnalyzeStatusResponse : public TEventPB< + TEvAnalyzeStatusResponse, + NKikimrStat::TEvAnalyzeStatusResponse, + EvAnalyzeStatusResponse> {}; - struct TEvGetScanStatusResponse : public TEventPB< - TEvGetScanStatusResponse, - NKikimrStat::TEvGetScanStatusResponse, - EvGetScanStatusResponse> + struct TEvAnalyzeTable : public TEventPB< + TEvAnalyzeTable, + NKikimrStat::TEvAnalyzeTable, + EvAnalyzeTable> {}; + struct TEvAnalyzeTableResponse : public TEventPB< + TEvAnalyzeTableResponse, + NKikimrStat::TEvAnalyzeTableResponse, + EvAnalyzeTableResponse> + {}; + struct TEvStatisticsRequest : public TEventPB< TEvStatisticsRequest, NKikimrStat::TEvStatisticsRequest, diff --git a/ydb/core/statistics/save_load_stats.cpp b/ydb/core/statistics/save_load_stats.cpp deleted file mode 100644 index 082b9f58b5bb..000000000000 --- a/ydb/core/statistics/save_load_stats.cpp +++ /dev/null @@ -1,295 +0,0 @@ -#include "save_load_stats.h" - -#include "events.h" - -#include -#include -#include - -namespace NKikimr::NStat { - -class TStatisticsTableCreator : public TActorBootstrapped { -public: - explicit TStatisticsTableCreator(std::unique_ptr resultEvent) - : ResultEvent(std::move(resultEvent)) - {} - - void Registered(NActors::TActorSystem* sys, const NActors::TActorId& owner) override { - NActors::TActorBootstrapped::Registered(sys, owner); - Owner = owner; - } - - void Bootstrap() { - Become(&TStatisticsTableCreator::StateFunc); - Register( - CreateTableCreator( - { ".metadata", "_statistics" }, - { - Col("owner_id", NScheme::NTypeIds::Uint64), - Col("local_path_id", NScheme::NTypeIds::Uint64), - Col("stat_type", NScheme::NTypeIds::Uint32), - Col("column_tag", NScheme::NTypeIds::Uint32), - Col("data", NScheme::NTypeIds::String), - }, - { "owner_id", "local_path_id", "stat_type", "column_tag"}, - NKikimrServices::STATISTICS - ) - ); - } - -private: - static NKikimrSchemeOp::TColumnDescription Col(const TString& columnName, const char* columnType) { - NKikimrSchemeOp::TColumnDescription desc; - desc.SetName(columnName); - desc.SetType(columnType); - return desc; - } - - static NKikimrSchemeOp::TColumnDescription Col(const TString& columnName, NScheme::TTypeId columnType) { - return Col(columnName, NScheme::TypeName(columnType)); - } - - void Handle(TEvTableCreator::TEvCreateTableResponse::TPtr&) { - Send(Owner, std::move(ResultEvent)); - PassAway(); - } - - STRICT_STFUNC(StateFunc, - hFunc(TEvTableCreator::TEvCreateTableResponse, Handle); - ) - -private: - std::unique_ptr ResultEvent; - NActors::TActorId Owner; -}; - -NActors::IActor* CreateStatisticsTableCreator(std::unique_ptr event) { - return new TStatisticsTableCreator(std::move(event)); -} - - -class TSaveStatisticsQuery : public NKikimr::TQueryBase { -private: - const TPathId PathId; - const ui64 StatType; - const std::vector ColumnTags; - const std::vector Data; - -public: - TSaveStatisticsQuery(const TPathId& pathId, ui64 statType, - std::vector&& columnTags, std::vector&& data) - : NKikimr::TQueryBase(NKikimrServices::STATISTICS, {}) - , PathId(pathId) - , StatType(statType) - , ColumnTags(std::move(columnTags)) - , Data(std::move(data)) - { - Y_ABORT_UNLESS(ColumnTags.size() == Data.size()); - } - - void OnRunQuery() override { - TStringBuilder sql; - sql << R"( - DECLARE $owner_id AS Uint64; - DECLARE $local_path_id AS Uint64; - DECLARE $stat_type AS Uint32; - DECLARE $column_tags AS List; - DECLARE $data AS List; - - UPSERT INTO `.metadata/_statistics` - (owner_id, local_path_id, stat_type, column_tag, data) - VALUES - )"; - - for (size_t i = 0; i < Data.size(); ++i) { - sql << " ($owner_id, $local_path_id, $stat_type, $column_tags[" << i << "], $data[" << i << "])"; - sql << (i == Data.size() - 1 ? ";" : ","); - } - - NYdb::TParamsBuilder params; - params - .AddParam("$owner_id") - .Uint64(PathId.OwnerId) - .Build() - .AddParam("$local_path_id") - .Uint64(PathId.LocalPathId) - .Build() - .AddParam("$stat_type") - .Uint32(StatType) - .Build(); - auto& columnTags = params.AddParam("$column_tags").BeginList(); - for (size_t i = 0; i < ColumnTags.size(); ++i) { - columnTags - .AddListItem() - .Uint32(ColumnTags[i]); - } - columnTags.EndList().Build(); - auto& data = params.AddParam("$data").BeginList(); - for (size_t i = 0; i < Data.size(); ++i) { - data - .AddListItem() - .String(Data[i]); - } - data.EndList().Build(); - - RunDataQuery(sql, ¶ms); - } - - void OnQueryResult() override { - Finish(); - } - - void OnFinish(Ydb::StatusIds::StatusCode status, NYql::TIssues&& issues) override { - Y_UNUSED(issues); - auto response = std::make_unique(); - response->Success = (status == Ydb::StatusIds::SUCCESS); - Send(Owner, response.release()); - } -}; - -NActors::IActor* CreateSaveStatisticsQuery(const TPathId& pathId, ui64 statType, - std::vector&& columnTags, std::vector&& data) -{ - return new TSaveStatisticsQuery(pathId, statType, std::move(columnTags), std::move(data)); -} - - -class TLoadStatisticsQuery : public NKikimr::TQueryBase { -private: - const TPathId PathId; - const ui64 StatType; - const ui32 ColumnTag; - const ui64 Cookie; - - std::optional Data; - -public: - TLoadStatisticsQuery(const TPathId& pathId, ui64 statType, ui32 columnTag, ui64 cookie) - : NKikimr::TQueryBase(NKikimrServices::STATISTICS, {}) - , PathId(pathId) - , StatType(statType) - , ColumnTag(columnTag) - , Cookie(cookie) - {} - - void OnRunQuery() override { - TString sql = R"( - DECLARE $owner_id AS Uint64; - DECLARE $local_path_id AS Uint64; - DECLARE $stat_type AS Uint32; - DECLARE $column_tag AS Uint32; - - SELECT - data - FROM `.metadata/_statistics` - WHERE - owner_id = $owner_id AND - local_path_id = $local_path_id AND - stat_type = $stat_type AND - column_tag = $column_tag; - )"; - - NYdb::TParamsBuilder params; - params - .AddParam("$owner_id") - .Uint64(PathId.OwnerId) - .Build() - .AddParam("$local_path_id") - .Uint64(PathId.LocalPathId) - .Build() - .AddParam("$stat_type") - .Uint32(StatType) - .Build() - .AddParam("$column_tag") - .Uint32(ColumnTag) - .Build(); - - RunDataQuery(sql, ¶ms, TTxControl::BeginTx()); - } - - void OnQueryResult() override { - if (ResultSets.size() != 1) { - Finish(Ydb::StatusIds::INTERNAL_ERROR, "Unexpected read response", false); - return; - } - NYdb::TResultSetParser result(ResultSets[0]); - if (result.RowsCount() == 0) { - Finish(Ydb::StatusIds::BAD_REQUEST, "No data", false); - return; - } - result.TryNextRow(); - Data = *result.ColumnParser("data").GetOptionalString(); - Finish(); - } - - void OnFinish(Ydb::StatusIds::StatusCode status, NYql::TIssues&& issues) override { - Y_UNUSED(issues); - auto response = std::make_unique(); - response->Success = (status == Ydb::StatusIds::SUCCESS); - response->Cookie = Cookie; - if (response->Success) { - response->Data = Data; - } - Send(Owner, response.release()); - } -}; - -NActors::IActor* CreateLoadStatisticsQuery(const TPathId& pathId, ui64 statType, - ui32 columnTag, ui64 cookie) -{ - return new TLoadStatisticsQuery(pathId, statType, columnTag, cookie); -} - - -class TDeleteStatisticsQuery : public NKikimr::TQueryBase { -private: - const TPathId PathId; - -public: - TDeleteStatisticsQuery(const TPathId& pathId) - : NKikimr::TQueryBase(NKikimrServices::STATISTICS, {}) - , PathId(pathId) - { - } - - void OnRunQuery() override { - TString sql = R"( - DECLARE $owner_id AS Uint64; - DECLARE $local_path_id AS Uint64; - - DELETE FROM `.metadata/_statistics` - WHERE - owner_id = $owner_id AND - local_path_id = $local_path_id; - )"; - - NYdb::TParamsBuilder params; - params - .AddParam("$owner_id") - .Uint64(PathId.OwnerId) - .Build() - .AddParam("$local_path_id") - .Uint64(PathId.LocalPathId) - .Build(); - - RunDataQuery(sql, ¶ms); - } - - void OnQueryResult() override { - Finish(); - } - - void OnFinish(Ydb::StatusIds::StatusCode status, NYql::TIssues&& issues) override { - Y_UNUSED(issues); - auto response = std::make_unique(); - response->Success = (status == Ydb::StatusIds::SUCCESS); - Send(Owner, response.release()); - } -}; - -NActors::IActor* CreateDeleteStatisticsQuery(const TPathId& pathId) -{ - return new TDeleteStatisticsQuery(pathId); -} - -} // NKikimr::NStat diff --git a/ydb/core/statistics/save_load_stats.h b/ydb/core/statistics/save_load_stats.h deleted file mode 100644 index cb81909c3d54..000000000000 --- a/ydb/core/statistics/save_load_stats.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once - -#include -#include - -namespace NKikimr::NStat { - -NActors::IActor* CreateStatisticsTableCreator(std::unique_ptr event); - -NActors::IActor* CreateSaveStatisticsQuery(const TPathId& pathId, ui64 statType, - std::vector&& columnTags, std::vector&& data); - -NActors::IActor* CreateLoadStatisticsQuery(const TPathId& pathId, ui64 statType, - ui32 columnTag, ui64 cookie); - -NActors::IActor* CreateDeleteStatisticsQuery(const TPathId& pathId); - -}; diff --git a/ydb/core/statistics/service/http_request.cpp b/ydb/core/statistics/service/http_request.cpp new file mode 100644 index 000000000000..298bcd991162 --- /dev/null +++ b/ydb/core/statistics/service/http_request.cpp @@ -0,0 +1,243 @@ +#include "http_request.h" + +#include +#include +#include +#include +#include +#include + + +namespace NKikimr { +namespace NStat { + +static constexpr ui64 FirstRoundCookie = 1; +static constexpr ui64 SecondRoundCookie = 2; + +THttpRequest::THttpRequest(ERequestType requestType, const std::unordered_map& params, const TActorId& replyToActorId) + : RequestType(requestType) + , Params(params) + , ReplyToActorId(replyToActorId) +{} + +void THttpRequest::Bootstrap() { + auto navigate = std::make_unique(); + auto& entry = navigate->ResultSet.emplace_back(); + entry.Path = SplitPath(Params[EParamType::PATH]); + entry.Operation = TNavigate::EOp::OpTable; + entry.RequestType = TNavigate::TEntry::ERequestType::ByPath; + entry.ShowPrivatePath = true; + navigate->Cookie = FirstRoundCookie; + + Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(navigate.release())); + + Become(&THttpRequest::StateWork); +} + +void THttpRequest::Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { + std::unique_ptr navigate(ev->Get()->Request.Release()); + Y_ABORT_UNLESS(navigate->ResultSet.size() == 1); + + const auto& entry = navigate->ResultSet.front(); + + if (navigate->Cookie == SecondRoundCookie) { + if (entry.Status != TNavigate::EStatus::Ok) { + HttpReply("Internal error"); + return; + } + + DoRequest(entry); + return; + } + + if (entry.Status != TNavigate::EStatus::Ok) { + switch (entry.Status) { + case TNavigate::EStatus::PathErrorUnknown: + HttpReply("Path does not exist"); + return; + case TNavigate::EStatus::PathNotPath: + HttpReply("Invalid path"); + return; + case TNavigate::EStatus::PathNotTable: + HttpReply("Path is not a table"); + return; + default: + HttpReply("Internal error"); + return; + } + } + + if (RequestType == ERequestType::COUNT_MIN_SKETCH_PROBE) { + DoRequest(entry); + return; + } + + auto navigateDomainKey = [this] (TPathId domainKey) { + auto navigate = std::make_unique(); + auto& entry = navigate->ResultSet.emplace_back(); + entry.TableId = TTableId(domainKey.OwnerId, domainKey.LocalPathId); + entry.Operation = TNavigate::EOp::OpPath; + entry.RequestType = TNavigate::TEntry::ERequestType::ByTableId; + entry.RedirectRequired = false; + navigate->Cookie = SecondRoundCookie; + + Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(navigate.release())); + }; + + const auto& domainInfo = entry.DomainInfo; + + if (domainInfo->IsServerless()) { + navigateDomainKey(domainInfo->ResourcesDomainKey); + return; + } + + if (!domainInfo->Params.HasStatisticsAggregator()) { + navigateDomainKey(domainInfo->DomainKey); + return; + } + + DoRequest(entry); +} + +void THttpRequest::Handle(TEvStatistics::TEvAnalyzeStatusResponse::TPtr& ev) { + const auto& record = ev->Get()->Record; + + switch (record.GetStatus()) { + case NKikimrStat::TEvAnalyzeStatusResponse::STATUS_UNSPECIFIED: + HttpReply("Status is unspecified"); + break; + case NKikimrStat::TEvAnalyzeStatusResponse::STATUS_NO_OPERATION: + HttpReply("No analyze operation"); + break; + case NKikimrStat::TEvAnalyzeStatusResponse::STATUS_ENQUEUED: + HttpReply("Analyze is enqueued"); + break; + case NKikimrStat::TEvAnalyzeStatusResponse::STATUS_IN_PROGRESS: + HttpReply("Analyze is in progress"); + break; + } +} + +void THttpRequest::Handle(TEvStatistics::TEvLoadStatisticsQueryResponse::TPtr& ev) { + const auto msg = ev->Get(); + if (!msg->Success || !msg->Data) { + const auto status = std::to_string(static_cast(msg->Status)); + HttpReply("Error occurred while loading statistics. Status: " + status); + return; + } + + const auto typeId = static_cast(msg->Cookie); + const NScheme::TTypeInfo typeInfo(typeId); + const TStringBuf value(Params[EParamType::CELL_VALUE]); + TMemoryPool pool(64); + + TCell cell; + TString error; + if (!NFormats::MakeCell(cell, value, typeInfo, pool, error)) { + HttpReply("Cell value parsing error: " + error); + return; + } + + auto countMinSketch = std::unique_ptr(TCountMinSketch::FromString(msg->Data->Data(), msg->Data->Size())); + const auto probe = countMinSketch->Probe(cell.Data(), cell.Size()); + HttpReply(Params[EParamType::PATH] + "[" + Params[EParamType::COLUMN_NAME] + "]=" + std::to_string(probe)); +} + +void THttpRequest::Handle(TEvPipeCache::TEvDeliveryProblem::TPtr&) { + HttpReply("Delivery problem"); +} + +void THttpRequest::DoRequest(const TNavigate::TEntry& entry) { + switch (RequestType) { + case ERequestType::ANALYZE: + DoAnalyze(entry); + return; + case ERequestType::STATUS: + DoStatus(entry); + return; + case ERequestType::COUNT_MIN_SKETCH_PROBE: + DoCountMinSketchProbe(entry); + return; + } +} + +void THttpRequest::DoAnalyze(const TNavigate::TEntry& entry) { + if (!entry.DomainInfo->Params.HasStatisticsAggregator()) { + HttpReply("No statistics aggregator"); + return; + } + + const auto statisticsAggregatorId = entry.DomainInfo->Params.GetStatisticsAggregator(); + const auto operationId = TULIDGenerator().Next(TActivationContext::Now()); + + auto analyze = std::make_unique(); + auto& record = analyze->Record; + record.SetOperationId(operationId.ToBinary()); + + const auto& pathId = entry.TableId.PathId; + PathIdFromPathId(pathId, record.AddTables()->MutablePathId()); + + Send(MakePipePerNodeCacheID(false), new TEvPipeCache::TEvForward(analyze.release(), statisticsAggregatorId, true)); + HttpReply("Analyze sent. OperationId: " + operationId.ToString()); +} + +void THttpRequest::DoStatus(const TNavigate::TEntry& entry) { + if (!entry.DomainInfo->Params.HasStatisticsAggregator()) { + HttpReply("No statistics aggregator"); + return; + } + + const auto statisticsAggregatorId = entry.DomainInfo->Params.GetStatisticsAggregator(); + + const auto& operationIdParam = Params[EParamType::OPERATION_ID]; + TULID operationId; + + if (operationIdParam.empty() || !operationId.ParseString(operationIdParam)) { + HttpReply(TString("Wrong OperationId: ") + (operationIdParam.empty() ? "Empty" : operationIdParam)); + } + + auto status = std::make_unique(); + auto& record = status->Record; + record.SetOperationId(operationId.ToBinary()); + + Send(MakePipePerNodeCacheID(false), new TEvPipeCache::TEvForward(status.release(), statisticsAggregatorId, true)); +} + +void THttpRequest::DoCountMinSketchProbe(const TNavigate::TEntry& entry) { + const auto& columnName = Params[EParamType::COLUMN_NAME]; + if (columnName.empty()) { + HttpReply("Column is not set"); + return; + } + + if (Params[EParamType::CELL_VALUE].empty()) { + HttpReply("Value is not set"); + return; + } + + for (const auto& [_, tableInfo]: entry.Columns) { + if (tableInfo.Name == columnName) { + const auto columnTag = tableInfo.Id; + const auto typeId = tableInfo.PType.GetTypeId(); + const auto& pathId = entry.TableId.PathId; + Register(CreateLoadStatisticsQuery(SelfId(), Params[EParamType::DATABASE], pathId, EStatType::COUNT_MIN_SKETCH, columnTag, typeId)); + return; + } + } + + HttpReply("Column not found"); +} + +void THttpRequest::HttpReply(const TString& msg) { + Send(ReplyToActorId, new NMon::TEvHttpInfoRes(msg)); + PassAway(); +} + +void THttpRequest::PassAway() { + Send(MakePipePerNodeCacheID(false), new TEvPipeCache::TEvUnlink(0)); + TBase::PassAway(); +} + + +} // NStat +} // NKikimr diff --git a/ydb/core/statistics/service/http_request.h b/ydb/core/statistics/service/http_request.h new file mode 100644 index 000000000000..e3e514281225 --- /dev/null +++ b/ydb/core/statistics/service/http_request.h @@ -0,0 +1,78 @@ +#include + +#include +#include + +#include +#include +#include + +#include + +namespace NKikimr { +namespace NStat { + +class THttpRequest : public NActors::TActorBootstrapped { +public: + using TBase = TActorBootstrapped; + + static constexpr auto ActorActivityType() { + return NKikimrServices::TActivity::STAT_SERVICE_HTTP_REQUEST; + } + + void Bootstrap(); + + enum class ERequestType { + ANALYZE, + STATUS, + COUNT_MIN_SKETCH_PROBE + }; + + enum class EParamType { + DATABASE, + PATH, + OPERATION_ID, + COLUMN_NAME, + CELL_VALUE + }; + + THttpRequest(ERequestType requestType, const std::unordered_map& params, const TActorId& replyToActorId); + +private: + using TNavigate = NSchemeCache::TSchemeCacheNavigate; + + STFUNC(StateWork) { + switch(ev->GetTypeRewrite()) { + hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); + hFunc(TEvStatistics::TEvAnalyzeStatusResponse, Handle); + hFunc(TEvPipeCache::TEvDeliveryProblem, Handle); + hFunc(TEvStatistics::TEvLoadStatisticsQueryResponse, Handle); + IgnoreFunc(TEvStatistics::TEvAnalyzeResponse); + default: + LOG_CRIT_S(TlsActivationContext->AsActorContext(), NKikimrServices::STATISTICS, + "NStat::THttpRequest: unexpected event# " << ev->GetTypeRewrite()); + } + } + + void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev); + void Handle(TEvStatistics::TEvAnalyzeStatusResponse::TPtr& ev); + void Handle(TEvPipeCache::TEvDeliveryProblem::TPtr& ev); + void Handle(TEvStatistics::TEvLoadStatisticsQueryResponse::TPtr& ev); + + void DoRequest(const TNavigate::TEntry& entry); + void DoAnalyze(const TNavigate::TEntry& entry); + void DoStatus(const TNavigate::TEntry& entry); + void DoCountMinSketchProbe(const TNavigate::TEntry& entry); + + void HttpReply(const TString& msg); + + void PassAway(); + +private: + const ERequestType RequestType; + std::unordered_map Params; + const TActorId ReplyToActorId; +}; + +} // NStat +} // NKikimr \ No newline at end of file diff --git a/ydb/core/statistics/service/service.cpp b/ydb/core/statistics/service/service.cpp new file mode 100644 index 000000000000..2b59af317b23 --- /dev/null +++ b/ydb/core/statistics/service/service.cpp @@ -0,0 +1,52 @@ +#include "service.h" + + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +namespace NKikimr { +namespace NStat { + +static constexpr TDuration DefaultAggregateKeepAlivePeriod = TDuration::MilliSeconds(500); +static constexpr TDuration DefaultAggregateKeepAliveTimeout = TDuration::Seconds(3); +static constexpr TDuration DefaultAggregateKeepAliveAckTimeout = TDuration::Seconds(3); +static constexpr TDuration DefaultStatisticsRequestTimeout = TDuration::Seconds(5); +static constexpr size_t DefaultMaxInFlightTabletRequests = 5; +static constexpr size_t DefaultFanOutFactor = 5; + + + +TStatServiceSettings::TStatServiceSettings() + : AggregateKeepAlivePeriod(DefaultAggregateKeepAlivePeriod) + , AggregateKeepAliveTimeout(DefaultAggregateKeepAliveTimeout) + , AggregateKeepAliveAckTimeout(DefaultAggregateKeepAliveAckTimeout) + , StatisticsRequestTimeout(DefaultStatisticsRequestTimeout) + , MaxInFlightTabletRequests(DefaultMaxInFlightTabletRequests) + , FanOutFactor(DefaultFanOutFactor) +{} + +NActors::TActorId MakeStatServiceID(ui32 node) { + const char x[12] = "StatService"; + return NActors::TActorId(node, TStringBuf(x, 12)); +} + +} // NStat +} // NKikimr diff --git a/ydb/core/statistics/service/service.h b/ydb/core/statistics/service/service.h new file mode 100644 index 000000000000..4b29bc8bcbef --- /dev/null +++ b/ydb/core/statistics/service/service.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include + +namespace NKikimr::NStat { + +struct TStatServiceSettings { + TDuration AggregateKeepAlivePeriod; + TDuration AggregateKeepAliveTimeout; + TDuration AggregateKeepAliveAckTimeout; + TDuration StatisticsRequestTimeout; + size_t MaxInFlightTabletRequests; + size_t FanOutFactor; + + TStatServiceSettings(); + + TStatServiceSettings& SetAggregateKeepAlivePeriod(const TDuration& val) { + AggregateKeepAlivePeriod = val; + return *this; + } + + TStatServiceSettings& SetAggregateKeepAliveTimeout(const TDuration& val) { + AggregateKeepAliveTimeout = val; + return *this; + } + + TStatServiceSettings& SetAggregateKeepAliveAckTimeout(const TDuration& val) { + AggregateKeepAliveAckTimeout = val; + return *this; + } + + TStatServiceSettings& SetStatisticsRequestTimeout(const TDuration& val) { + StatisticsRequestTimeout = val; + return *this; + } + + TStatServiceSettings& SetMaxInFlightTabletRequests(size_t val) { + MaxInFlightTabletRequests = val; + return *this; + } + + TStatServiceSettings& SetFanOutFactor(size_t val) { + FanOutFactor = val; + return *this; + } +}; + +NActors::TActorId MakeStatServiceID(ui32 node); + +THolder CreateStatService(const TStatServiceSettings& settings = TStatServiceSettings()); + +} // NKikimr::NStat diff --git a/ydb/core/statistics/service/service_impl.cpp b/ydb/core/statistics/service/service_impl.cpp new file mode 100644 index 000000000000..7743b2d51f1b --- /dev/null +++ b/ydb/core/statistics/service/service_impl.cpp @@ -0,0 +1,1568 @@ +#include "service.h" +#include "http_request.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + + +namespace NKikimr { +namespace NStat { + +struct TAggregationStatistics { + using TColumnsStatistics = ::google::protobuf::RepeatedPtrField<::NKikimrStat::TColumnStatistics>; + + TAggregationStatistics(size_t nodesCount) + : Nodes(nodesCount) + {} + + struct TFailedTablet { + using EErrorType = NKikimrStat::TEvAggregateStatisticsResponse::EErrorType; + + ui64 TabletId; + ui32 NodeId; + EErrorType Error; + + TFailedTablet(ui64 tabletId, ui32 nodeId, EErrorType error) + : TabletId(tabletId) + , NodeId(nodeId) + , Error(error) {} + }; + + struct TTablets { + ui32 NodeId; + std::vector Ids; + }; + + struct TNode { + enum class EStatus: ui8 { + None, + Processing, + Processed, + Unavailable + }; + + ui64 LastHeartbeat{ 0 }; + std::vector Tablets; + TActorId Actor; + EStatus Status{ EStatus::None }; + }; + + struct TLocalTablets { + size_t NextTablet{ 0 }; + ui32 InFlight{ 0 }; + std::vector Ids; + std::unordered_map TabletsPipes; + }; + + struct ColumnStatistics { + std::unique_ptr Statistics; + ui32 ContainedInResponse{ 0 }; + }; + + ui64 Round{ 0 }; + ui64 Cookie{ 0 }; + TPathId PathId; + ui64 LastAckHeartbeat{ 0 }; + TActorId ParentNode; + std::vector Nodes; + size_t PprocessedNodes{ 0 }; + + std::unordered_map CountMinSketches; + ui32 TotalStatisticsResponse{ 0 }; + + std::vector ColumnTags; + TLocalTablets LocalTablets; + std::vector FailedTablets; + + bool IsCompleted() const noexcept { + return PprocessedNodes == Nodes.size() && LocalTablets.InFlight == 0; + } + + TNode* GetProcessingChildNode(ui32 nodeId) { + for (size_t i = 0; i < Nodes.size(); ++i) { + if (Nodes[i].Actor.NodeId() == nodeId) { + return Nodes[i].Status == TAggregationStatistics::TNode::EStatus::Processing + ? &Nodes[i] : nullptr; + } + } + SA_LOG_E("Child node with the specified id was not found"); + return nullptr; + } +}; + +class TStatService : public TActorBootstrapped { +public: + using TBase = TActorBootstrapped; + + TStatService(const TStatServiceSettings& settings) + : Settings(settings) + , AggregationStatistics(settings.FanOutFactor) + {} + + static constexpr auto ActorActivityType() { + return NKikimrServices::TActivity::STAT_SERVICE; + } + + struct TEvPrivate { + enum EEv { + EvRequestTimeout = EventSpaceBegin(NActors::TEvents::ES_PRIVATE), + EvDispatchKeepAlive, + EvKeepAliveTimeout, + EvKeepAliveAckTimeout, + EvStatisticsRequestTimeout, + + EvEnd + }; + + struct TEvRequestTimeout : public NActors::TEventLocal { + std::unordered_set NeedSchemeShards; + NActors::TActorId PipeClientId; + }; + + struct TEvDispatchKeepAlive: public NActors::TEventLocal { + TEvDispatchKeepAlive(ui64 round): Round(round) {} + + ui64 Round; + }; + + struct TEvKeepAliveAckTimeout: public NActors::TEventLocal { + TEvKeepAliveAckTimeout(ui64 round): Round(round) {} + + ui64 Round; + }; + + struct TEvKeepAliveTimeout: public NActors::TEventLocal { + TEvKeepAliveTimeout(ui64 round, ui32 nodeId): Round(round), NodeId(nodeId) {} + + ui64 Round; + ui32 NodeId; + }; + + struct TEvStatisticsRequestTimeout: public NActors::TEventLocal { + TEvStatisticsRequestTimeout(ui64 round, ui64 tabletId): Round(round), TabletId(tabletId) {} + + ui64 Round; + ui64 TabletId; + }; + }; + + void Bootstrap() { + EnableStatistics = AppData()->FeatureFlags.GetEnableStatistics(); + EnableColumnStatistics = AppData()->FeatureFlags.GetEnableColumnStatistics(); + + ui32 configKind = (ui32) NKikimrConsole::TConfigItem::FeatureFlagsItem; + Send(NConsole::MakeConfigsDispatcherID(SelfId().NodeId()), + new NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionRequest({configKind})); + + NActors::TMon* mon = AppData()->Mon; + if (mon) { + NMonitoring::TIndexMonPage *actorsMonPage = mon->RegisterIndexPage("actors", "Actors"); + mon->RegisterActorPage(actorsMonPage, "statservice", "Statistics service", + false, TlsActivationContext->ExecutorThread.ActorSystem, SelfId()); + } + + Become(&TStatService::StateWork); + } + + STFUNC(StateWork) { + switch(ev->GetTypeRewrite()) { + hFunc(NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionResponse, HandleConfig) + hFunc(NConsole::TEvConsole::TEvConfigNotificationRequest, HandleConfig) + hFunc(TEvStatistics::TEvGetStatistics, Handle); + hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); + hFunc(TEvStatistics::TEvPropagateStatistics, Handle); + hFunc(TEvStatistics::TEvAggregateStatistics, Handle); + IgnoreFunc(TEvStatistics::TEvPropagateStatisticsResponse); + hFunc(TEvTabletPipe::TEvClientConnected, Handle); + hFunc(TEvTabletPipe::TEvClientDestroyed, Handle); + hFunc(TEvStatistics::TEvStatisticsIsDisabled, Handle); + hFunc(TEvStatistics::TEvLoadStatisticsQueryResponse, Handle); + hFunc(TEvPrivate::TEvRequestTimeout, Handle); + + hFunc(TEvStatistics::TEvAggregateKeepAliveAck, Handle); + hFunc(TEvPrivate::TEvKeepAliveAckTimeout, Handle); + hFunc(TEvStatistics::TEvAggregateKeepAlive, Handle); + hFunc(TEvPrivate::TEvDispatchKeepAlive, Handle); + hFunc(TEvPrivate::TEvKeepAliveTimeout, Handle); + hFunc(TEvPrivate::TEvStatisticsRequestTimeout, Handle); + hFunc(TEvStatistics::TEvStatisticsResponse, Handle); + hFunc(TEvStatistics::TEvAggregateStatisticsResponse, Handle); + + hFunc(NMon::TEvHttpInfo, Handle); + hFunc(NMon::TEvHttpInfoRes, Handle); + cFunc(TEvents::TEvPoison::EventType, PassAway); + default: + SA_LOG_CRIT("NStat::TStatService: unexpected event# " << ev->GetTypeRewrite() << " " << ev->ToString()); + } + } + +private: + void HandleConfig(NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionResponse::TPtr&) { + SA_LOG_I("Subscribed for config changes on node " << SelfId().NodeId()); + } + + void HandleConfig(NConsole::TEvConsole::TEvConfigNotificationRequest::TPtr& ev) { + const auto& record = ev->Get()->Record; + const auto& config = record.GetConfig(); + if (config.HasFeatureFlags()) { + const auto& featureFlags = config.GetFeatureFlags(); + EnableStatistics = featureFlags.GetEnableStatistics(); + EnableColumnStatistics = featureFlags.GetEnableColumnStatistics(); + if (!EnableStatistics) { + ReplyAllFailed(); + } + } + auto response = std::make_unique(record); + Send(ev->Sender, response.release(), 0, ev->Cookie); + } + + bool IsNotCurrentRound(ui64 round) { + if (round != AggregationStatistics.Round) { + SA_LOG_D("Event round " << round << " is different from the current " << AggregationStatistics.Round); + return true; + } + return false; + } + + void OnAggregateStatisticsFinished() { + SendAggregateStatisticsResponse(); + ResetAggregationStatistics(); + } + + void SendRequestToNextTablet() { + auto& localTablets = AggregationStatistics.LocalTablets; + if (localTablets.NextTablet >= localTablets.Ids.size()) { + return; + } + + const auto tabletId = localTablets.Ids[localTablets.NextTablet]; + ++localTablets.NextTablet; + ++localTablets.InFlight; + + auto policy = NTabletPipe::TClientRetryPolicy::WithRetries(); + policy.RetryLimitCount = 2; + NTabletPipe::TClientConfig pipeConfig{policy}; + pipeConfig.ForceLocal = true; + localTablets.TabletsPipes[tabletId] = Register(NTabletPipe::CreateClient(SelfId(), tabletId, pipeConfig)); + } + + void ResetAggregationStatistics() { + const auto& tabletsPipes = AggregationStatistics.LocalTablets.TabletsPipes; + for (auto it = tabletsPipes.begin(); it != tabletsPipes.end(); ++it) { + NTabletPipe::CloseClient(SelfId(), it->second); + } + + TAggregationStatistics aggregationStatistics(Settings.FanOutFactor); + std::swap(AggregationStatistics, aggregationStatistics); + } + + void AggregateStatistics(const TAggregationStatistics::TColumnsStatistics& columnsStatistics) { + ++AggregationStatistics.TotalStatisticsResponse; + + for (const auto& column : columnsStatistics) { + const auto tag = column.GetTag(); + + for (auto& statistic : column.GetStatistics()) { + if (statistic.GetType() == NKikimr::NStat::COUNT_MIN_SKETCH) { + auto data = statistic.GetData().Data(); + auto sketch = reinterpret_cast(data); + auto& current = AggregationStatistics.CountMinSketches[tag]; + + if (current.Statistics == nullptr) { + current.Statistics.reset(TCountMinSketch::Create()); + } + + ++current.ContainedInResponse; + *current.Statistics += *sketch; + } + } + } + } + + void Handle(TEvStatistics::TEvStatisticsResponse::TPtr& ev) { + const auto& record = ev->Get()->Record; + const auto tabletId = record.GetShardTabletId(); + + SA_LOG_D("Received TEvStatisticsResponse TabletId: " << tabletId); + + const auto round = ev->Cookie; + if (IsNotCurrentRound(round)) { + return; + } + + auto tabletPipe = AggregationStatistics.LocalTablets.TabletsPipes.find(tabletId); + if (tabletPipe != AggregationStatistics.LocalTablets.TabletsPipes.end()) { + NTabletPipe::CloseClient(SelfId(), tabletPipe->second); + AggregationStatistics.LocalTablets.TabletsPipes.erase(tabletPipe); + } + + AggregateStatistics(record.GetColumns()); + --AggregationStatistics.LocalTablets.InFlight; + + SendRequestToNextTablet(); + + if (AggregationStatistics.IsCompleted()) { + OnAggregateStatisticsFinished(); + } + } + + void Handle(TEvStatistics::TEvAggregateKeepAliveAck::TPtr& ev) { + const auto& record = ev->Get()->Record; + const auto round = record.GetRound(); + + if (IsNotCurrentRound(round)) { + SA_LOG_D("Skip TEvAggregateKeepAliveAck"); + return; + } + + AggregationStatistics.LastAckHeartbeat = GetCycleCountFast(); + } + + void Handle(TEvPrivate::TEvKeepAliveAckTimeout::TPtr& ev) { + const auto round = ev->Get()->Round; + if (IsNotCurrentRound(round)) { + SA_LOG_D("Skip TEvKeepAliveAckTimeout"); + return; + } + + const auto maxDuration = DurationToCycles(Settings.AggregateKeepAliveAckTimeout); + const auto deadline = AggregationStatistics.LastAckHeartbeat + maxDuration; + const auto now = GetCycleCountFast(); + + if (deadline >= now) { + Schedule(Settings.AggregateKeepAliveAckTimeout, new TEvPrivate::TEvKeepAliveAckTimeout(round)); + return; + } + + // the parent node is unavailable + // invalidate the subtree with the root in the current node + SA_LOG_I("Parent node " << AggregationStatistics.ParentNode.NodeId() << " is unavailable"); + + + ResetAggregationStatistics(); + } + + void Handle(TEvPrivate::TEvDispatchKeepAlive::TPtr& ev) { + const auto round = ev->Get()->Round; + if (IsNotCurrentRound(round)) { + SA_LOG_D("Skip TEvDispatchKeepAlive"); + return; + } + + auto keepAlive = std::make_unique(); + keepAlive->Record.SetRound(round); + Send(AggregationStatistics.ParentNode, keepAlive.release()); + Schedule(Settings.AggregateKeepAlivePeriod, new TEvPrivate::TEvDispatchKeepAlive(round)); + } + + void Handle(TEvPrivate::TEvKeepAliveTimeout::TPtr& ev) { + const auto round = ev->Get()->Round; + + if (IsNotCurrentRound(round)) { + SA_LOG_D("Skip TEvKeepAliveTimeout"); + return; + } + + const auto nodeId = ev->Get()->NodeId; + auto node = AggregationStatistics.GetProcessingChildNode(nodeId); + + if (node == nullptr) { + SA_LOG_D("Skip TEvKeepAliveTimeout"); + return; + } + + const auto maxDuration = DurationToCycles(Settings.AggregateKeepAliveTimeout); + const auto deadline = node->LastHeartbeat + maxDuration; + const auto now = GetCycleCountFast(); + + if (deadline >= now) { + Schedule(Settings.AggregateKeepAliveTimeout, new TEvPrivate::TEvKeepAliveTimeout(round, nodeId)); + return; + } + + node->Status = TAggregationStatistics::TNode::EStatus::Unavailable; + ++AggregationStatistics.PprocessedNodes; + SA_LOG_I("Node " << nodeId << " is unavailable"); + + if (AggregationStatistics.IsCompleted()) { + OnAggregateStatisticsFinished(); + } + } + + void Handle(TEvStatistics::TEvAggregateKeepAlive::TPtr& ev) { + const auto& record = ev->Get()->Record; + const auto round = record.GetRound(); + + if (IsNotCurrentRound(round)) { + SA_LOG_D("Skip TEvAggregateKeepAlive"); + return; + } + + const auto nodeId = ev->Sender.NodeId(); + auto node = AggregationStatistics.GetProcessingChildNode(nodeId); + + if (node == nullptr) { + SA_LOG_D( "Skip TEvAggregateKeepAlive"); + return; + } + + auto response = std::make_unique(); + response->Record.SetRound(round); + Send(ev->Sender, response.release()); + + node->LastHeartbeat = GetCycleCountFast(); + } + + void Handle(TEvStatistics::TEvAggregateStatisticsResponse::TPtr& ev) { + SA_LOG_D("Received TEvAggregateStatisticsResponse SenderNodeId: " << ev->Sender.NodeId()); + + const auto& record = ev->Get()->Record; + const auto round = record.GetRound(); + + if (IsNotCurrentRound(round)) { + SA_LOG_D("Skip TEvAggregateStatisticsResponse"); + return; + } + + const auto nodeId = ev->Sender.NodeId(); + auto node = AggregationStatistics.GetProcessingChildNode(nodeId); + + if (node == nullptr) { + SA_LOG_D("Skip TEvAggregateStatisticsResponse"); + return; + } + + node->Status = TAggregationStatistics::TNode::EStatus::Processed; + ++AggregationStatistics.PprocessedNodes; + + AggregateStatistics(record.GetColumns()); + + const auto size = AggregationStatistics.FailedTablets.size(); + AggregationStatistics.FailedTablets.reserve(size + record.GetFailedTablets().size()); + + for (const auto& fail : record.GetFailedTablets()) { + AggregationStatistics.FailedTablets.emplace_back( + fail.GetTabletId(), fail.GetNodeId(), fail.GetError() + ); + } + + if (AggregationStatistics.IsCompleted()) { + OnAggregateStatisticsFinished(); + } + } + + void AddUnavailableTablets(const TAggregationStatistics::TNode& node, + NKikimrStat::TEvAggregateStatisticsResponse& response) { + if (node.Status != TAggregationStatistics::TNode::EStatus::Unavailable) { + return; + } + + for (const auto& range : node.Tablets) { + for (const auto& tabletId : range.Ids) { + auto failedTablet = response.AddFailedTablets(); + failedTablet->SetNodeId(range.NodeId); + failedTablet->SetTabletId(tabletId); + failedTablet->SetError(NKikimrStat::TEvAggregateStatisticsResponse::TYPE_UNAVAILABLE_NODE); + } + } + } + + void SendAggregateStatisticsResponse() { + SA_LOG_D("Send aggregate statistics response to node: " << AggregationStatistics.ParentNode.NodeId()); + + auto response = std::make_unique(); + auto& record = response->Record; + record.SetRound(AggregationStatistics.Round); + + const auto& countMinSketches = AggregationStatistics.CountMinSketches; + + for (auto it = countMinSketches.begin(); it != countMinSketches.end(); ++it) { + if (it->second.ContainedInResponse != AggregationStatistics.TotalStatisticsResponse) { + continue; + } + + auto column = record.AddColumns(); + column->SetTag(it->first); + + auto data = it->second.Statistics->AsStringBuf(); + auto statistics = column->AddStatistics(); + statistics->SetType(NKikimr::NStat::COUNT_MIN_SKETCH); + statistics->SetData(data.Data(), data.Size()); + } + + auto failedTablets = record.MutableFailedTablets(); + failedTablets->Reserve(AggregationStatistics.FailedTablets.size()); + + for (const auto& fail : AggregationStatistics.FailedTablets) { + auto failedTablet = failedTablets->Add(); + failedTablet->SetNodeId(fail.NodeId); + failedTablet->SetTabletId(fail.TabletId); + failedTablet->SetError(fail.Error); + } + + for (auto& node : AggregationStatistics.Nodes) { + AddUnavailableTablets(node, record); + } + + Send(AggregationStatistics.ParentNode, response.release(), 0, AggregationStatistics.Cookie); + } + + void SendRequestToNode(TAggregationStatistics::TNode& node, const NKikimrStat::TEvAggregateStatistics& record) { + if (node.Tablets.empty()) { + node.Status = TAggregationStatistics::TNode::EStatus::Processed; + ++AggregationStatistics.PprocessedNodes; + return; + } + + auto request = std::make_unique(); + request->Record.SetRound(AggregationStatistics.Round); + request->Record.MutableNodes()->Reserve(node.Tablets.size()); + + const auto& columnTags = record.GetColumnTags(); + if (!columnTags.empty()) { + request->Record.MutableColumnTags()->Assign(columnTags.begin(), columnTags.end()); + } + + auto pathId = request->Record.MutablePathId(); + pathId->SetOwnerId(AggregationStatistics.PathId.OwnerId); + pathId->SetLocalId(AggregationStatistics.PathId.LocalPathId); + + for (const auto& range : node.Tablets) { + auto recordNode = request->Record.AddNodes(); + recordNode->SetNodeId(range.NodeId); + + auto tabletIds = recordNode->MutableTabletIds(); + tabletIds->Reserve(range.Ids.size()); + + for (const auto& tabletId : range.Ids) { + tabletIds->Add(tabletId); + } + } + + // sending the request to the first node of the range + const auto nodeId = node.Tablets[0].NodeId; + node.Actor = MakeStatServiceID(nodeId); + node.Status = TAggregationStatistics::TNode::EStatus::Processing; + + Send(node.Actor, request.release()); + Schedule(Settings.AggregateKeepAliveTimeout, + new TEvPrivate::TEvKeepAliveTimeout(AggregationStatistics.Round, nodeId)); + } + + void Handle(TEvStatistics::TEvAggregateStatistics::TPtr& ev) { + const auto& record = ev->Get()->Record; + const auto round = record.GetRound(); + + SA_LOG_D("Received TEvAggregateStatistics from node: " << ev->Sender.NodeId() + << ", Round: " << round << ", current Round: " << AggregationStatistics.Round); + + // reset previous state + if (AggregationStatistics.Round != 0) { + ResetAggregationStatistics(); + } + + AggregationStatistics.Round = round; + AggregationStatistics.Cookie = ev->Cookie; + AggregationStatistics.ParentNode = ev->Sender; + + // schedule keep alive with the parent node + Schedule(Settings.AggregateKeepAlivePeriod, new TEvPrivate::TEvDispatchKeepAlive(round)); + + const auto& pathId = record.GetPathId(); + AggregationStatistics.PathId.OwnerId = pathId.GetOwnerId(); + AggregationStatistics.PathId.LocalPathId = pathId.GetLocalId(); + + for (const auto tag : record.GetColumnTags()) { + AggregationStatistics.ColumnTags.emplace_back(tag); + } + + const auto currentNodeId = ev->Recipient.NodeId(); + const auto& nodes = record.GetNodes(); + + // divide the entire range of nodes into two parts, + // forming the right and left child nodes + size_t k = 0; + for (const auto& node : nodes) { + if (node.GetNodeId() == currentNodeId) { + AggregationStatistics.LocalTablets.Ids.reserve(node.GetTabletIds().size()); + + for (const auto& tabletId : node.GetTabletIds()) { + AggregationStatistics.LocalTablets.Ids.push_back(tabletId); + } + continue; + } + + TAggregationStatistics::TTablets nodeTablets; + nodeTablets.NodeId = node.GetNodeId(); + nodeTablets.Ids.reserve(node.GetTabletIds().size()); + for (const auto& tabletId : node.GetTabletIds()) { + nodeTablets.Ids.push_back(tabletId); + } + + AggregationStatistics.Nodes[k % Settings.FanOutFactor].Tablets.push_back(std::move(nodeTablets)); + ++k; + } + + for (auto& node : AggregationStatistics.Nodes) { + SendRequestToNode(node, record); + } + + // to check the locality of the tablets, + // send requests to receive the IDs of the nodes + // where the tablets are located + auto& localTablets = AggregationStatistics.LocalTablets; + const auto count = std::min(Settings.MaxInFlightTabletRequests, + localTablets.Ids.size()); + for (size_t i = 0; i < count; ++i) { + SendRequestToNextTablet(); + } + } + + void Handle(TEvStatistics::TEvGetStatistics::TPtr& ev) { + ui64 requestId = NextRequestId++; + + auto& request = InFlight[requestId]; + request.ReplyToActorId = ev->Sender; + request.EvCookie = ev->Cookie; + request.StatType = ev->Get()->StatType; + request.StatRequests.swap(ev->Get()->StatRequests); + + if (!EnableStatistics || IsStatisticsDisabledInSA) { + ReplyFailed(requestId, true); + return; + } + + SA_LOG_D("Handle TEvStatistics::TEvGetStatistics, request id = " << requestId + << ", ReplyToActorId = " << request.ReplyToActorId + << ", StatRequests.size() = " << request.StatRequests.size()); + + if (request.StatType == EStatType::COUNT_MIN_SKETCH) { + request.StatResponses.reserve(request.StatRequests.size()); + ui32 reqIndex = 0; + for (const auto& req : request.StatRequests) { + auto& response = request.StatResponses.emplace_back(); + response.Req = req; + if (!req.ColumnTag) { + response.Success = false; + ++reqIndex; + continue; + } + ui64 loadCookie = NextLoadQueryCookie++; + LoadQueriesInFlight[loadCookie] = std::make_pair(requestId, reqIndex); + Register(CreateLoadStatisticsQuery(SelfId(), "", + req.PathId, request.StatType, *req.ColumnTag, loadCookie)); + ++request.ReplyCounter; + ++reqIndex; + } + return; + } + + using TNavigate = NSchemeCache::TSchemeCacheNavigate; + auto navigate = std::make_unique(); + for (const auto& req : request.StatRequests) { + auto& entry = navigate->ResultSet.emplace_back(); + entry.TableId = TTableId(req.PathId.OwnerId, req.PathId.LocalPathId); + entry.Operation = TNavigate::EOp::OpPath; + entry.RequestType = TNavigate::TEntry::ERequestType::ByTableId; + } + navigate->Cookie = requestId; + + Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(navigate.release())); + } + + void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { + using TNavigate = NSchemeCache::TSchemeCacheNavigate; + std::unique_ptr navigate(ev->Get()->Request.Release()); + + auto cookie = navigate->Cookie; + + SA_LOG_D("Handle TEvTxProxySchemeCache::TEvNavigateKeySetResult, request id = " << cookie); + + if (cookie == ResolveSACookie) { + Y_ABORT_UNLESS(navigate->ResultSet.size() == 1); + auto& entry = navigate->ResultSet.back(); + if (entry.Status != TNavigate::EStatus::Ok) { + StatisticsAggregatorId = 0; + } else if (entry.DomainInfo->Params.HasStatisticsAggregator()) { + StatisticsAggregatorId = entry.DomainInfo->Params.GetStatisticsAggregator(); + } + ResolveSAStage = StatisticsAggregatorId ? RSA_FINISHED : RSA_INITIAL; + + if (StatisticsAggregatorId) { + ConnectToSA(); + SyncNode(); + } else { + for (auto it = InFlight.begin(); it != InFlight.end();) { + if (EStatType::COUNT_MIN_SKETCH == it->second.StatType) { + ++it; + continue; + } + ReplyFailed(it->first, false); + it = InFlight.erase(it); + } + } + return; + } + + ui64 requestId = cookie; + auto itRequest = InFlight.find(requestId); + if (itRequest == InFlight.end()) { + return; + } + auto& request = itRequest->second; + + if (!EnableStatistics) { + ReplyFailed(requestId, true); + return; + } + + std::unordered_set ssIds; + bool isServerless = false; + ui64 aggregatorId = 0; + TPathId domainKey, resourcesDomainKey; + for (const auto& entry : navigate->ResultSet) { + if (entry.Status != TNavigate::EStatus::Ok) { + continue; + } + auto& domainInfo = entry.DomainInfo; + ssIds.insert(domainInfo->ExtractSchemeShard()); + aggregatorId = domainInfo->Params.GetStatisticsAggregator(); + isServerless = domainInfo->IsServerless(); + domainKey = domainInfo->DomainKey; + resourcesDomainKey = domainInfo->ResourcesDomainKey; + } + if (ssIds.size() != 1) { + ReplyFailed(requestId, true); + return; + } + request.SchemeShardId = *ssIds.begin(); + + if (Statistics.find(request.SchemeShardId) != Statistics.end()) { + ReplySuccess(requestId, true); + return; + } + + bool isNewSS = (NeedSchemeShards.find(request.SchemeShardId) == NeedSchemeShards.end()); + if (isNewSS) { + NeedSchemeShards.insert(request.SchemeShardId); + } + + auto navigateDomainKey = [this] (TPathId domainKey) { + using TNavigate = NSchemeCache::TSchemeCacheNavigate; + auto navigate = std::make_unique(); + auto& entry = navigate->ResultSet.emplace_back(); + entry.TableId = TTableId(domainKey.OwnerId, domainKey.LocalPathId); + entry.Operation = TNavigate::EOp::OpPath; + entry.RequestType = TNavigate::TEntry::ERequestType::ByTableId; + entry.RedirectRequired = false; + navigate->Cookie = ResolveSACookie; + Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(navigate.release())); + ResolveSAStage = RSA_IN_FLIGHT; + }; + + switch (ResolveSAStage) { + case RSA_INITIAL: + if (!isServerless) { + if (aggregatorId) { + StatisticsAggregatorId = aggregatorId; + ResolveSAStage = RSA_FINISHED; + } else { + navigateDomainKey(domainKey); + return; + } + } else { + navigateDomainKey(resourcesDomainKey); + return; + } + break; + case RSA_IN_FLIGHT: + return; + default: + break; + } + + if (!StatisticsAggregatorId) { + ReplyFailed(requestId, true); + return; + } + + if (!SAPipeClientId) { + ConnectToSA(); + SyncNode(); + + } else if (isNewSS) { + auto requestStats = std::make_unique(); + requestStats->Record.SetNodeId(SelfId().NodeId()); + requestStats->Record.SetUrgent(false); + requestStats->Record.AddNeedSchemeShards(request.SchemeShardId); + NTabletPipe::SendData(SelfId(), SAPipeClientId, requestStats.release()); + + auto timeout = std::make_unique(); + timeout->NeedSchemeShards.insert(request.SchemeShardId); + timeout->PipeClientId = SAPipeClientId; + Schedule(RequestTimeout, timeout.release()); + } + } + + void Handle(TEvStatistics::TEvPropagateStatistics::TPtr& ev) { + SA_LOG_D("EvPropagateStatistics, node id = " << SelfId().NodeId()); + + Send(ev->Sender, new TEvStatistics::TEvPropagateStatisticsResponse); + + IsStatisticsDisabledInSA = false; + + auto* record = ev->Get()->MutableRecord(); + for (const auto& entry : record->GetEntries()) { + ui64 schemeShardId = entry.GetSchemeShardId(); + NeedSchemeShards.erase(schemeShardId); + auto& statisticsState = Statistics[schemeShardId]; + + if (entry.GetStats().empty()) { + continue; // stats are not ready in SA, wait for next cycle + } + + statisticsState.Map.clear(); + + NKikimrStat::TSchemeShardStats statRecord; + Y_PROTOBUF_SUPPRESS_NODISCARD statRecord.ParseFromString(entry.GetStats()); + + for (const auto& pathEntry : statRecord.GetEntries()) { + TPathId pathId(pathEntry.GetPathId().GetOwnerId(), pathEntry.GetPathId().GetLocalId()); + auto& mapEntry = statisticsState.Map[pathId]; + mapEntry.RowCount = pathEntry.GetRowCount(); + mapEntry.BytesSize = pathEntry.GetBytesSize(); + } + } + + for (auto itReq = InFlight.begin(); itReq != InFlight.end(); ) { + auto requestId = itReq->first; + auto requestState = itReq->second; + if (requestState.SchemeShardId == 0) { + ++itReq; + continue; + } + if (Statistics.find(requestState.SchemeShardId) != Statistics.end()) { + ReplySuccess(requestId, false); + itReq = InFlight.erase(itReq); + } else { + ++itReq; + } + } + + if (record->NodeIdsSize() == 0) { + return; + } + + std::vector nodeIds; + nodeIds.reserve(record->NodeIdsSize()); + for (const auto nodeId : record->GetNodeIds()) { + nodeIds.push_back(nodeId); + } + + size_t step = 0; + if (nodeIds.size() <= StatFanOut + 1) { + step = 0; + } else if (nodeIds.size() <= StatFanOut * (StatFanOut + 1)) { + step = StatFanOut; + } else { + step = nodeIds.size() / StatFanOut; + } + + auto serialized = std::make_unique(); + serialized->MutableRecord()->MutableEntries()->Swap(record->MutableEntries()); + TString preSerializedStats; + Y_PROTOBUF_SUPPRESS_NODISCARD serialized->GetRecord().SerializeToString(&preSerializedStats); + + for (size_t i = 0; i < nodeIds.size(); ) { + ui32 leadingNodeId = nodeIds[i++]; + + auto propagate = std::make_unique(); + propagate->MutableRecord()->MutableNodeIds()->Reserve(step); + for (size_t j = 0; i < nodeIds.size() && j < step; ++i, ++j) { + propagate->MutableRecord()->AddNodeIds(nodeIds[i]); + } + propagate->PreSerializedData = preSerializedStats; + Send(MakeStatServiceID(leadingNodeId), propagate.release()); + } + } + + void Handle(TEvPrivate::TEvStatisticsRequestTimeout::TPtr& ev) { + const auto round = ev->Get()->Round; + if (IsNotCurrentRound(round)) { + SA_LOG_D("Skip TEvStatisticsRequestTimeout"); + return; + } + + const auto tabletId = ev->Get()->TabletId; + auto tabletPipe = AggregationStatistics.LocalTablets.TabletsPipes.find(tabletId); + if (tabletPipe == AggregationStatistics.LocalTablets.TabletsPipes.end()) { + SA_LOG_D("Tablet " << tabletId << " has already been processed"); + return; + } + + SA_LOG_E("No result was received from the tablet " << tabletId); + + auto clientId = tabletPipe->second; + OnTabletError(tabletId); + NTabletPipe::CloseClient(SelfId(), clientId); + } + + void SendStatisticsRequest(const TActorId& clientId, ui64 tabletId) { + auto request = std::make_unique(); + auto& record = request->Record; + record.MutableTypes()->Add(NKikimrStat::TYPE_COUNT_MIN_SKETCH); + + auto* path = record.MutableTable()->MutablePathId(); + path->SetOwnerId(AggregationStatistics.PathId.OwnerId); + path->SetLocalId(AggregationStatistics.PathId.LocalPathId); + + auto* columnTags = record.MutableTable()->MutableColumnTags(); + for (const auto& tag : AggregationStatistics.ColumnTags) { + columnTags->Add(tag); + } + + const auto round = AggregationStatistics.Round; + NTabletPipe::SendData(SelfId(), clientId, request.release(), round); + Schedule(Settings.StatisticsRequestTimeout, new TEvPrivate::TEvStatisticsRequestTimeout(round, tabletId)); + + SA_LOG_D("TEvStatisticsRequest send" + << ", client id = " << clientId + << ", path = " << *path); + } + + void OnTabletError(ui64 tabletId) { + SA_LOG_D("Tablet " << tabletId << " is not local."); + + const auto error = NKikimrStat::TEvAggregateStatisticsResponse::TYPE_NON_LOCAL_TABLET; + AggregationStatistics.FailedTablets.emplace_back(tabletId, 0, error); + + AggregationStatistics.LocalTablets.TabletsPipes.erase(tabletId); + --AggregationStatistics.LocalTablets.InFlight; + SendRequestToNextTablet(); + + if (AggregationStatistics.IsCompleted()) { + OnAggregateStatisticsFinished(); + } + } + + void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev) { + const auto& clientId = ev->Get()->ClientId; + const auto& tabletId = ev->Get()->TabletId; + + SA_LOG_D("EvClientConnected" + << ", node id = " << ev->Get()->ClientId.NodeId() + << ", client id = " << clientId + << ", server id = " << ev->Get()->ServerId + << ", tablet id = " << tabletId + << ", status = " << ev->Get()->Status); + + if (clientId == SAPipeClientId) { + IsStatisticsDisabledInSA = false; + if (ev->Get()->Status != NKikimrProto::OK) { + SAPipeClientId = TActorId(); + ConnectToSA(); + SyncNode(); + } + return; + } + + const auto& tabletsPipes = AggregationStatistics.LocalTablets.TabletsPipes; + auto tabletPipe = tabletsPipes.find(tabletId); + + if (tabletPipe != tabletsPipes.end() && clientId == tabletPipe->second) { + if (ev->Get()->Status == NKikimrProto::OK) { + SendStatisticsRequest(clientId, tabletId); + } else { + OnTabletError(tabletId); + } + return; + } + + SA_LOG_D("Skip EvClientConnected"); + } + + void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev) { + const auto& clientId = ev->Get()->ClientId; + const auto& tabletId = ev->Get()->TabletId; + + SA_LOG_D("EvClientDestroyed" + << ", node id = " << ev->Get()->ClientId.NodeId() + << ", client id = " << clientId + << ", server id = " << ev->Get()->ServerId + << ", tablet id = " << tabletId); + + if (clientId == SAPipeClientId) { + IsStatisticsDisabledInSA = false; + SAPipeClientId = TActorId(); + ConnectToSA(); + SyncNode(); + return; + } + + const auto& tabletsPipes = AggregationStatistics.LocalTablets.TabletsPipes; + auto tabletPipe = tabletsPipes.find(tabletId); + + if (tabletPipe != tabletsPipes.end() && clientId == tabletPipe->second) { + OnTabletError(tabletId); + return; + } + + SA_LOG_D("Skip EvClientDestroyed"); + } + + void Handle(TEvStatistics::TEvStatisticsIsDisabled::TPtr&) { + IsStatisticsDisabledInSA = true; + ReplyAllFailed(); + } + + void Handle(TEvStatistics::TEvLoadStatisticsQueryResponse::TPtr& ev) { + ui64 cookie = ev->Get()->Cookie; + auto itLoadQuery = LoadQueriesInFlight.find(cookie); + Y_ABORT_UNLESS(itLoadQuery != LoadQueriesInFlight.end()); + auto [requestId, requestIndex] = itLoadQuery->second; + + SA_LOG_D("TEvLoadStatisticsQueryResponse, request id = " << requestId); + + auto itRequest = InFlight.find(requestId); + if (InFlight.end() == itRequest) { + SA_LOG_E("TEvLoadStatisticsQueryResponse, request id = " << requestId + << ". Request not found in InFlight"); + return; + } + + auto& request = itRequest->second; + + auto& response = request.StatResponses[requestIndex]; + Y_ABORT_UNLESS(request.StatType == EStatType::COUNT_MIN_SKETCH); + + if (ev->Get()->Success) { + response.Success = true; + auto& data = ev->Get()->Data; + Y_ABORT_UNLESS(data); + response.CountMinSketch.CountMin.reset(TCountMinSketch::FromString(data->Data(), data->Size())); + } else { + response.Success = false; + } + + if (--request.ReplyCounter == 0) { + auto result = std::make_unique(); + result->Success = true; + result->StatResponses.swap(request.StatResponses); + + Send(request.ReplyToActorId, result.release(), 0, request.EvCookie); + + InFlight.erase(requestId); + } + } + + void Handle(TEvPrivate::TEvRequestTimeout::TPtr& ev) { + SA_LOG_D("EvRequestTimeout" + << ", pipe client id = " << ev->Get()->PipeClientId + << ", schemeshard count = " << ev->Get()->NeedSchemeShards.size()); + + if (SAPipeClientId != ev->Get()->PipeClientId) { + return; + } + auto requestStats = std::make_unique(); + bool hasNeedSchemeShards = false; + for (auto& ssId : ev->Get()->NeedSchemeShards) { + if (NeedSchemeShards.find(ssId) != NeedSchemeShards.end()) { + requestStats->Record.AddNeedSchemeShards(ssId); + hasNeedSchemeShards = true; + } + } + if (!hasNeedSchemeShards) { + return; + } + requestStats->Record.SetNodeId(SelfId().NodeId()); + requestStats->Record.SetUrgent(true); + + NTabletPipe::SendData(SelfId(), SAPipeClientId, requestStats.release()); + } + + void ConnectToSA() { + if (SAPipeClientId || !StatisticsAggregatorId) { + return; + } + auto policy = NTabletPipe::TClientRetryPolicy::WithRetries(); + NTabletPipe::TClientConfig pipeConfig{policy}; + SAPipeClientId = Register(NTabletPipe::CreateClient(SelfId(), StatisticsAggregatorId, pipeConfig)); + + SA_LOG_D("ConnectToSA(), pipe client id = " << SAPipeClientId); + } + + void SyncNode() { + if (!SAPipeClientId || !StatisticsAggregatorId) { + return; + } + auto connect = std::make_unique(); + auto& record = connect->Record; + + auto timeout = std::make_unique(); + timeout->PipeClientId = SAPipeClientId; + + record.SetNodeId(SelfId().NodeId()); + for (const auto& [ssId, ssState] : Statistics) { + auto* entry = record.AddHaveSchemeShards(); + entry->SetSchemeShardId(ssId); + entry->SetTimestamp(ssState.Timestamp); + } + for (const auto& ssId : NeedSchemeShards) { + record.AddNeedSchemeShards(ssId); + timeout->NeedSchemeShards.insert(ssId); + } + NTabletPipe::SendData(SelfId(), SAPipeClientId, connect.release()); + + if (!NeedSchemeShards.empty()) { + Schedule(RequestTimeout, timeout.release()); + } + + SA_LOG_D("SyncNode(), pipe client id = " << SAPipeClientId); + } + + void ReplySuccess(ui64 requestId, bool eraseRequest) { + auto itRequest = InFlight.find(requestId); + if (itRequest == InFlight.end()) { + return; + } + auto& request = itRequest->second; + + SA_LOG_D("ReplySuccess(), request id = " << requestId + << ", ReplyToActorId = " << request.ReplyToActorId + << ", StatRequests.size() = " << request.StatRequests.size()); + + auto itStatistics = Statistics.find(request.SchemeShardId); + if (itStatistics == Statistics.end()) { + return; + } + auto& statisticsMap = itStatistics->second.Map; + + auto result = std::make_unique(); + result->Success = true; + + for (auto& req : request.StatRequests) { + TResponse rsp; + rsp.Success = true; + rsp.Req = req; + + TStatSimple stat; + auto itStat = statisticsMap.find(req.PathId); + if (itStat != statisticsMap.end()) { + stat.RowCount = itStat->second.RowCount; + stat.BytesSize = itStat->second.BytesSize; + } else { + stat.RowCount = 0; + stat.BytesSize = 0; + } + rsp.Simple = stat; + + result->StatResponses.push_back(rsp); + } + + Send(request.ReplyToActorId, result.release(), 0, request.EvCookie); + + if (eraseRequest) { + InFlight.erase(requestId); + } + } + + void ReplyFailed(ui64 requestId, bool eraseRequest) { + auto itRequest = InFlight.find(requestId); + if (itRequest == InFlight.end()) { + return; + } + auto& request = itRequest->second; + + SA_LOG_D("ReplyFailed(), request id = " << requestId); + + auto result = std::make_unique(); + result->Success = false; + + for (auto& req : request.StatRequests) { + TResponse rsp; + rsp.Success = false; + rsp.Req = req; + + TStatSimple stat; + stat.RowCount = 0; + stat.BytesSize = 0; + rsp.Simple = stat; + + result->StatResponses.push_back(rsp); + } + + Send(request.ReplyToActorId, result.release(), 0, request.EvCookie); + + if (eraseRequest) { + InFlight.erase(requestId); + } + } + + void ReplyAllFailed() { + for (const auto& [requestId, _] : InFlight) { + ReplyFailed(requestId, false); + } + InFlight.clear(); + } + + void PassAway() { + if (SAPipeClientId) { + NTabletPipe::CloseClient(SelfId(), SAPipeClientId); + } + TBase::PassAway(); + } + + void PrintStatServiceState(TStringStream& str) { + HTML(str) { + PRE() { + str << "---- StatisticsService ----" << Endl << Endl; + str << "StatisticsAggregatorId: " << StatisticsAggregatorId << Endl; + str << "SAPipeClientId: " << SAPipeClientId << Endl; + + str << "InFlight: " << InFlight.size(); + { + ui32 simple{ 0 }; + ui32 countMin{ 0 }; + for (auto it = InFlight.begin(); it != InFlight.end(); ++it) { + if (it->second.StatType == EStatType::SIMPLE) { + ++simple; + } else if (it->second.StatType == EStatType::COUNT_MIN_SKETCH) { + ++countMin; + } + } + str << "[SIMPLE: " << simple << ", COUNT_MIN_SKETCH: " << countMin << "]" << Endl; + } + str << "NextRequestId: " << NextRequestId << Endl; + + str << "LoadQueriesInFlight: " << LoadQueriesInFlight.size() << Endl; + str << "NextLoadQueryCookie: " << NextLoadQueryCookie << Endl; + + str << "NeedSchemeShards: " << NeedSchemeShards.size() << Endl; + str << "Statistics: " << Statistics.size() << Endl; + + str << "ResolveSAStage: "; + if (ResolveSAStage == RSA_INITIAL) { + str << "RSA_INITIAL"; + } else if (ResolveSAStage == RSA_IN_FLIGHT) { + str << "RSA_IN_FLIGHT"; + } + else { + str << "RSA_FINISHED"; + } + str << Endl; + + str << "AggregateKeepAlivePeriod: " << Settings.AggregateKeepAlivePeriod << Endl; + str << "AggregateKeepAliveTimeout: " << Settings.AggregateKeepAliveTimeout << Endl; + str << "AggregateKeepAliveAckTimeout: " << Settings.AggregateKeepAliveAckTimeout << Endl; + str << "StatisticsRequestTimeout: " << Settings.StatisticsRequestTimeout << Endl; + str << "MaxInFlightTabletRequests: " << Settings.MaxInFlightTabletRequests << Endl; + str << "FanOutFactor: " << Settings.FanOutFactor << Endl; + + str << "---- AggregationStatistics ----" << Endl; + str << "Round: " << AggregationStatistics.Round << Endl; + str << "Cookie: " << AggregationStatistics.Cookie << Endl; + str << "PathId: " << AggregationStatistics.PathId.ToString() << Endl; + str << "LastAckHeartbeat: " << AggregationStatistics.LastAckHeartbeat << Endl; + str << "ParentNode: " << AggregationStatistics.ParentNode << Endl; + str << "PprocessedNodes: " << AggregationStatistics.PprocessedNodes << Endl; + str << "TotalStatisticsResponse: " << AggregationStatistics.TotalStatisticsResponse << Endl; + str << "Nodes: " << AggregationStatistics.Nodes.size() << Endl; + str << "CountMinSketches: " << AggregationStatistics.CountMinSketches.size() << Endl; + } + } + } + + void AddPanel(IOutputStream& str, const TString& title, const std::function& bodyRender) { + HTML(str) { + DIV_CLASS("panel panel-default") { + DIV_CLASS("panel-heading") { + H4_CLASS("panel-title") { + str << title; + } + } + DIV_CLASS("panel-body") { + bodyRender(str); + } + } + } + } + + void PrintForm(TStringStream& str) { + HTML(str) { + AddPanel(str, "Analyze table", [](IOutputStream& str) { + HTML(str) { + FORM_CLASS("form-horizontal") { + DIV_CLASS("form-group") { + LABEL_CLASS_FOR("col-sm-2 control-label", "path") { + str << "Path"; + } + DIV_CLASS("col-sm-8") { + str << ""; + } + str << ""; + DIV_CLASS("col-sm-2") { + str << ""; + } + } + } + } + }); + AddPanel(str, "Get operation status", [](IOutputStream& str) { + HTML(str) { + FORM_CLASS("form-horizontal") { + DIV_CLASS("form-group") { + LABEL_CLASS_FOR("col-sm-2 control-label", "path") { + str << "Path"; + } + DIV_CLASS("col-sm-8") { + str << ""; + } + } + DIV_CLASS("form-group") { + LABEL_CLASS_FOR("col-sm-2 control-label", "operation") { + str << "OperationId"; + } + DIV_CLASS("col-sm-8") { + str << ""; + } + str << ""; + DIV_CLASS("col-sm-2") { + str << ""; + } + } + } + } + }); + AddPanel(str, "Probe count-min sketch", [](IOutputStream& str) { + HTML(str) { + FORM_CLASS("form-horizontal") { + DIV_CLASS("form-group") { + LABEL_CLASS_FOR("col-sm-2 control-label", "database") { + str << "Database"; + } + DIV_CLASS("col-sm-8") { + str << ""; + } + } + DIV_CLASS("form-group") { + LABEL_CLASS_FOR("col-sm-2 control-label", "path") { + str << "Path"; + } + DIV_CLASS("col-sm-8") { + str << ""; + } + } + DIV_CLASS("form-group") { + LABEL_CLASS_FOR("col-sm-2 control-label", "column") { + str << "ColumnName"; + } + DIV_CLASS("col-sm-8") { + str << ""; + } + } + DIV_CLASS("form-group") { + LABEL_CLASS_FOR("col-sm-2 control-label", "cell") { + str << "Value"; + } + DIV_CLASS("col-sm-8") { + str << ""; + } + + str << ""; + DIV_CLASS("col-sm-2") { + str << ""; + } + } + } + } + }); + + PrintStatServiceState(str); + } + } + + void Handle(NMon::TEvHttpInfoRes::TPtr& ev) { + if (HttpRequestActorId != ev->Sender) { + return; + } + + HttpRequestActorId = TActorId(); + + const auto* msg = ev->CastAsLocal(); + if (msg != nullptr) { + ReplyToMonitoring(msg->Answer); + } + } + + void ReplyToMonitoring(const TString& description) { + TStringStream str; + + if (!description.empty()) { + HTML(str) { + DIV_CLASS("row") { + DIV_CLASS("col-md-12 alert alert-info") { + str << description; + } + } + } + } + + PrintForm(str); + Send(MonitoringActorId, new NMon::TEvHttpInfoRes(str.Str())); + } + + void Handle(NMon::TEvHttpInfo::TPtr& ev) { + if (!EnableColumnStatistics) { + Send(ev->Sender, new NMon::TEvHttpInfoRes("Column statistics is disabled")); + return; + } + + HttpRequestActorId = TActorId(); + MonitoringActorId = ev->Sender; + + const auto& request = ev->Get()->Request; + const auto& params = request.GetParams(); + + auto getRequestParam = [¶ms](const TString& name){ + auto it = params.find(name); + return it != params.end() ? it->second : TString(); + }; + + const auto action = getRequestParam("action"); + if (action.empty()) { + ReplyToMonitoring(""); + return; + } + + const auto path = getRequestParam("path"); + if (path.empty()) { + ReplyToMonitoring("'Path' parameter is required"); + return; + } + + if (action == "analyze") { + HttpRequestActorId = Register(new THttpRequest(THttpRequest::ERequestType::ANALYZE, { + { THttpRequest::EParamType::PATH, path } + }, SelfId())); + } else if (action == "status") { + const auto operationId = getRequestParam("operation"); + if (operationId.empty()) { + ReplyToMonitoring("'OperationId' parameter is required"); + return; + } + + HttpRequestActorId = Register(new THttpRequest(THttpRequest::ERequestType::STATUS, { + { THttpRequest::EParamType::PATH, path }, + { THttpRequest::EParamType::OPERATION_ID, operationId } + }, SelfId())); + } else if (action == "probe") { + const auto column = getRequestParam("column"); + if (column.empty()) { + ReplyToMonitoring("'ColumnName' parameter is required"); + return; + } + + const auto cell = getRequestParam("cell"); + if (cell.empty()) { + ReplyToMonitoring("'Value' parameter is required"); + return; + } + + const auto database = getRequestParam("database"); + if (database.empty()) { + ReplyToMonitoring("'Database' parameter is required"); + return; + } + + HttpRequestActorId = Register(new THttpRequest(THttpRequest::ERequestType::COUNT_MIN_SKETCH_PROBE, { + { THttpRequest::EParamType::DATABASE, database }, + { THttpRequest::EParamType::PATH, path }, + { THttpRequest::EParamType::COLUMN_NAME, column }, + { THttpRequest::EParamType::CELL_VALUE, cell } + }, SelfId())); + } else { + ReplyToMonitoring("Wrong 'action' parameter value"); + } + } + +private: + TStatServiceSettings Settings; + TAggregationStatistics AggregationStatistics; + + bool EnableStatistics = false; + bool EnableColumnStatistics = false; + bool IsStatisticsDisabledInSA = false; + + static constexpr size_t StatFanOut = 10; + + struct TRequestState { + NActors::TActorId ReplyToActorId; + ui64 EvCookie = 0; + ui64 SchemeShardId = 0; + EStatType StatType = EStatType::SIMPLE; + std::vector StatRequests; + std::vector StatResponses; + size_t ReplyCounter = 0; + }; + std::unordered_map InFlight; // request id -> state + ui64 NextRequestId = 1; + + std::unordered_map> LoadQueriesInFlight; // load cookie -> req id, req index + ui64 NextLoadQueryCookie = 1; + + std::unordered_set NeedSchemeShards; + + struct TStatEntry { + ui64 RowCount = 0; + ui64 BytesSize = 0; + }; + typedef std::unordered_map TStatisticsMap; + struct TStatisticsState { + TStatisticsMap Map; + ui64 Timestamp = 0; + }; + std::unordered_map Statistics; // ss id -> stats + + ui64 StatisticsAggregatorId = 0; + TActorId SAPipeClientId; + + static const ui64 ResolveSACookie = std::numeric_limits::max(); + enum EResolveSAStage { + RSA_INITIAL, + RSA_IN_FLIGHT, + RSA_FINISHED + }; + EResolveSAStage ResolveSAStage = RSA_INITIAL; + + static constexpr TDuration RequestTimeout = TDuration::MilliSeconds(100); + + TActorId HttpRequestActorId; + TActorId MonitoringActorId; +}; + +THolder CreateStatService(const TStatServiceSettings& settings) { + return MakeHolder(settings); +} + + +} // NStat +} // NKikimr diff --git a/ydb/core/statistics/service/ut/ut_basic_statistics.cpp b/ydb/core/statistics/service/ut/ut_basic_statistics.cpp new file mode 100644 index 000000000000..310623fcaf7a --- /dev/null +++ b/ydb/core/statistics/service/ut/ut_basic_statistics.cpp @@ -0,0 +1,189 @@ +#include + +#include + +#include +#include + +#include +#include +#include + +#include + +namespace NKikimr { +namespace NStat { + +using namespace NYdb; +using namespace NYdb::NTable; +using namespace NYdb::NScheme; + +namespace { + +void CreateTable(TTestEnv& env, const TString& databaseName, const TString& tableName, size_t rowCount) { + ExecuteYqlScript(env, Sprintf(R"( + CREATE TABLE `Root/%s/%s` ( + Key Uint64, + Value Uint64, + PRIMARY KEY (Key) + ); + )", databaseName.c_str(), tableName.c_str())); + + TStringBuilder replace; + replace << Sprintf("REPLACE INTO `Root/%s/%s` (Key, Value) VALUES ", + databaseName.c_str(), tableName.c_str()); + for (ui32 i = 0; i < rowCount; ++i) { + if (i > 0) { + replace << ", "; + } + replace << Sprintf("(%uu, %uu)", i, i); + } + replace << ";"; + ExecuteYqlScript(env, replace); +} + +void ValidateRowCount(TTestActorRuntime& runtime, ui32 nodeIndex, TPathId pathId, size_t expectedRowCount) { + auto statServiceId = NStat::MakeStatServiceID(runtime.GetNodeId(nodeIndex)); + ui64 rowCount = 0; + while (rowCount == 0) { + NStat::TRequest req; + req.PathId = pathId; + + auto evGet = std::make_unique(); + evGet->StatType = NStat::EStatType::SIMPLE; + evGet->StatRequests.push_back(req); + + auto sender = runtime.AllocateEdgeActor(nodeIndex); + runtime.Send(statServiceId, sender, evGet.release(), nodeIndex, true); + auto evResult = runtime.GrabEdgeEventRethrow(sender); + + UNIT_ASSERT(evResult); + UNIT_ASSERT(evResult->Get()); + UNIT_ASSERT(evResult->Get()->StatResponses.size() == 1); + + auto rsp = evResult->Get()->StatResponses[0]; + auto stat = rsp.Simple; + + rowCount = stat.RowCount; + + if (rowCount != 0) { + UNIT_ASSERT(stat.RowCount == expectedRowCount); + break; + } + + runtime.SimulateSleep(TDuration::Seconds(1)); + } +} + +} // namespace + +Y_UNIT_TEST_SUITE(BasicStatistics) { + + Y_UNIT_TEST(Simple) { + TTestEnv env(1, 1); + + auto& runtime = *env.GetServer().GetRuntime(); + + CreateDatabase(env, "Database"); + CreateTable(env, "Database", "Table", 5); + + auto pathId = ResolvePathId(runtime, "/Root/Database/Table"); + ValidateRowCount(runtime, 1, pathId, 5); + } + + Y_UNIT_TEST(TwoNodes) { + TTestEnv env(1, 2); + + auto& runtime = *env.GetServer().GetRuntime(); + + CreateDatabase(env, "Database", 2); + CreateTable(env, "Database", "Table", 5); + + auto pathId1 = ResolvePathId(runtime, "/Root/Database/Table"); + ValidateRowCount(runtime, 1, pathId1, 5); + ValidateRowCount(runtime, 2, pathId1, 5); + } + + Y_UNIT_TEST(TwoTables) { + TTestEnv env(1, 1); + + auto& runtime = *env.GetServer().GetRuntime(); + + CreateDatabase(env, "Database"); + CreateTable(env, "Database", "Table1", 5); + CreateTable(env, "Database", "Table2", 6); + + auto pathId1 = ResolvePathId(runtime, "/Root/Database/Table1"); + auto pathId2 = ResolvePathId(runtime, "/Root/Database/Table2"); + ValidateRowCount(runtime, 1, pathId1, 5); + ValidateRowCount(runtime, 1, pathId2, 6); + } + + Y_UNIT_TEST(TwoDatabases) { + TTestEnv env(1, 2); + + auto& runtime = *env.GetServer().GetRuntime(); + + CreateDatabase(env, "Database1", 1, false, "hdd1"); + CreateDatabase(env, "Database2", 1, false, "hdd2"); + CreateTable(env, "Database1", "Table1", 5); + CreateTable(env, "Database2", "Table2", 6); + + auto pathId1 = ResolvePathId(runtime, "/Root/Database1/Table1"); + auto pathId2 = ResolvePathId(runtime, "/Root/Database2/Table2"); + ValidateRowCount(runtime, 2, pathId1, 5); + ValidateRowCount(runtime, 1, pathId2, 6); + } + + Y_UNIT_TEST(Serverless) { + TTestEnv env(1, 1); + + auto& runtime = *env.GetServer().GetRuntime(); + + CreateDatabase(env, "Shared", 1, true); + CreateServerlessDatabase(env, "Serverless", "/Root/Shared"); + CreateTable(env, "Serverless", "Table", 5); + + auto pathId = ResolvePathId(runtime, "/Root/Serverless/Table"); + ValidateRowCount(runtime, 1, pathId, 5); + } + + Y_UNIT_TEST(TwoServerlessDbs) { + TTestEnv env(1, 1); + + auto& runtime = *env.GetServer().GetRuntime(); + + CreateDatabase(env, "Shared", 1, true); + CreateServerlessDatabase(env, "Serverless1", "/Root/Shared"); + CreateServerlessDatabase(env, "Serverless2", "/Root/Shared"); + CreateTable(env, "Serverless1", "Table1", 5); + CreateTable(env, "Serverless2", "Table2", 6); + + auto pathId1 = ResolvePathId(runtime, "/Root/Serverless1/Table1"); + auto pathId2 = ResolvePathId(runtime, "/Root/Serverless2/Table2"); + ValidateRowCount(runtime, 1, pathId1, 5); + ValidateRowCount(runtime, 1, pathId2, 6); + } + + Y_UNIT_TEST(TwoServerlessTwoSharedDbs) { + TTestEnv env(1, 2); + + auto& runtime = *env.GetServer().GetRuntime(); + + CreateDatabase(env, "Shared1", 1, true, "hdd1"); + CreateDatabase(env, "Shared2", 1, true, "hdd2"); + CreateServerlessDatabase(env, "Serverless1", "/Root/Shared1"); + CreateServerlessDatabase(env, "Serverless2", "/Root/Shared2"); + CreateTable(env, "Serverless1", "Table1", 5); + CreateTable(env, "Serverless2", "Table2", 6); + + auto pathId1 = ResolvePathId(runtime, "/Root/Serverless1/Table1"); + auto pathId2 = ResolvePathId(runtime, "/Root/Serverless2/Table2"); + ValidateRowCount(runtime, 2, pathId1, 5); + ValidateRowCount(runtime, 1, pathId2, 6); + } + +} + +} // NSysView +} // NKikimr diff --git a/ydb/core/statistics/service/ut/ut_http/ut_http_request.cpp b/ydb/core/statistics/service/ut/ut_http/ut_http_request.cpp new file mode 100644 index 000000000000..bb6924dd32f0 --- /dev/null +++ b/ydb/core/statistics/service/ut/ut_http/ut_http_request.cpp @@ -0,0 +1,99 @@ +#include +#include +#include +#include +#include + +namespace NKikimr { +namespace NStat { + +void AnalyzeTest(bool isServerless) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = isServerless + ? CreateServerlessDatabaseColumnTables(env, 1, 10) + : CreateDatabaseColumnTables(env, 1, 10); + const auto& tableInfo = databaseInfo.Tables[0]; + const auto sender = runtime.AllocateEdgeActor(); + + runtime.Register(new THttpRequest(THttpRequest::ERequestType::ANALYZE, { + { THttpRequest::EParamType::PATH, tableInfo.Path } + }, sender)); + + auto res = runtime.GrabEdgeEvent(sender); + auto msg = static_cast(res->Get()); + + Cerr << "Answer: '" << msg->Answer << "'" << Endl; + const TString expected = "Analyze sent. OperationId:"; + UNIT_ASSERT_STRING_CONTAINS(msg->Answer, expected); +} + +void ProbeTest(bool isServerless) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = isServerless + ? CreateServerlessDatabaseColumnTables(env, 1, 10) + : CreateDatabaseColumnTables(env, 1, 10); + const auto& tableInfo = databaseInfo.Tables[0]; + TString columnName = "Value"; + const auto sender = runtime.AllocateEdgeActor(); + + const auto operationId = TULIDGenerator().Next(TInstant::Now()).ToBinary(); + auto analyzeRequest = MakeAnalyzeRequest({{tableInfo.PathId, {1, 2}}}, operationId); + runtime.SendToPipe(tableInfo.SaTabletId, sender, analyzeRequest.release()); + runtime.GrabEdgeEventRethrow(sender); + + runtime.Register(new THttpRequest(THttpRequest::ERequestType::COUNT_MIN_SKETCH_PROBE, { + { THttpRequest::EParamType::DATABASE, databaseInfo.FullDatabaseName}, + { THttpRequest::EParamType::PATH, tableInfo.Path }, + { THttpRequest::EParamType::COLUMN_NAME, columnName }, + { THttpRequest::EParamType::CELL_VALUE, "1" } + }, sender)); + auto res = runtime.GrabEdgeEvent(sender); + auto msg = static_cast(res->Get()); + + Cerr << "Answer: '" << msg->Answer << "'" << Endl; + const TString expected = tableInfo.Path + "[" + columnName + "]="; + UNIT_ASSERT_STRING_CONTAINS(msg->Answer, expected); +} + +Y_UNIT_TEST_SUITE(HttpRequest) { + Y_UNIT_TEST(Analyze) { + AnalyzeTest(false); + } + + Y_UNIT_TEST(AnalyzeServerless) { + AnalyzeTest(true); + } + + Y_UNIT_TEST(Status) { + TTestEnv env(1, 1); + auto& runtime = *env.GetServer().GetRuntime(); + const auto databaseInfo = CreateDatabaseColumnTables(env, 1, 10); + const auto& tableInfo = databaseInfo.Tables[0]; + + const auto sender = runtime.AllocateEdgeActor(); + const auto operationId = TULIDGenerator().Next(TInstant::Now()).ToString(); + runtime.Register(new THttpRequest(THttpRequest::ERequestType::STATUS, { + { THttpRequest::EParamType::PATH, tableInfo.Path }, + { THttpRequest::EParamType::OPERATION_ID, operationId } + }, sender)); + + auto res = runtime.GrabEdgeEvent(sender); + auto msg = static_cast(res->Get()); + + Cerr << "Answer: '" << msg->Answer << "'" << Endl; + UNIT_ASSERT_EQUAL(msg->Answer, "No analyze operation"); + } + + Y_UNIT_TEST(Probe) { + ProbeTest(false); + } + + Y_UNIT_TEST(ProbeServerless) { + ProbeTest(true); + } +} + +} // NStat +} // NKikimr \ No newline at end of file diff --git a/ydb/core/statistics/service/ut/ut_http/ya.make b/ydb/core/statistics/service/ut/ut_http/ya.make new file mode 100644 index 000000000000..114a7168cbf6 --- /dev/null +++ b/ydb/core/statistics/service/ut/ut_http/ya.make @@ -0,0 +1,28 @@ +UNITTEST_FOR(ydb/core/statistics/service) + +FORK_SUBTESTS() + +IF (WITH_VALGRIND) + TIMEOUT(3600) + SIZE(LARGE) + TAG(ya:fat) +ELSE() + TIMEOUT(600) + SIZE(MEDIUM) +ENDIF() + +YQL_LAST_ABI_VERSION() + +PEERDIR( + library/cpp/testing/unittest + ydb/core/protos + ydb/core/testlib/default + ydb/core/statistics/ut_common +) + +SRCS( + ut_http_request.cpp +) + +END() + diff --git a/ydb/core/statistics/service/ut/ut_service.cpp b/ydb/core/statistics/service/ut/ut_service.cpp new file mode 100644 index 000000000000..8425d12a4a9e --- /dev/null +++ b/ydb/core/statistics/service/ut/ut_service.cpp @@ -0,0 +1,599 @@ +#include +#include +#include +#include + +#include +#include +#include + +#include + +namespace NKikimr { +namespace NStat { + +using EResponseStatus = NKikimrStat::TEvStatisticsResponse::EStatus; +using EErrorType = NKikimrStat::TEvAggregateStatisticsResponse::EErrorType; + +struct TAggregateStatisticsRequest { + struct TTablets { + ui32 NodeId; + std::vector Ids; + }; + ui64 Round; + TPathId PathId; + std::vector Nodes; + std::vector ColumnTags; +}; + +struct TColumnItem { + ui32 Tag; + std::vector Cells; +}; + +struct TStatisticsResponse { + ui64 TabletId; + std::vector Columns; + EResponseStatus Status; +}; + +struct TAggregateStatisticsResponse { + struct TFailedTablet { + ui64 TabletId; + ui32 NodeId; + EErrorType Error; + }; + + ui64 Round; + std::vector Columns; + std::vector FailedTablets; +}; + +std::unique_ptr CreateStatisticsRequest(const TAggregateStatisticsRequest& data) { + auto ev = std::make_unique(); + auto& record = ev->Record; + record.SetRound(data.Round); + + PathIdFromPathId(data.PathId, record.MutablePathId()); + + auto columnTags = record.MutableColumnTags(); + for (auto tag : data.ColumnTags) { + columnTags->Add(tag); + } + + for (const auto& tablets : data.Nodes) { + auto node = record.AddNodes(); + node->SetNodeId(tablets.NodeId); + + auto tabletIds = node->MutableTabletIds(); + for (auto tabletId : tablets.Ids) { + tabletIds->Add(tabletId); + } + } + + return std::move(ev); +} + +std::unique_ptr CreateAggregateStatisticsResponse(const TAggregateStatisticsResponse& data) { + auto ev = std::make_unique(); + auto& record = ev->Record; + record.SetRound(data.Round); + + for (const auto& fail : data.FailedTablets) { + auto failedTablets = record.AddFailedTablets(); + failedTablets->SetTabletId(fail.TabletId); + failedTablets->SetNodeId(fail.NodeId); + failedTablets->SetError(fail.Error); + } + + for (const auto& col : data.Columns) { + auto column = record.AddColumns(); + column->SetTag(col.Tag); + + auto statistics = column->AddStatistics(); + statistics->SetType(NKikimr::NStat::COUNT_MIN_SKETCH); + auto sketch = std::unique_ptr(TCountMinSketch::Create()); + + for (const auto& cell : col.Cells) { + sketch->Count(cell.data(), cell.size()); + } + + auto buf = sketch->AsStringBuf(); + statistics->SetData(buf.Data(), buf.Size()); + } + + return std::move(ev); +} + +std::unique_ptr CreateStatisticsResponse(const TStatisticsResponse& data) { + auto ev = std::make_unique(); + auto& record = ev->Record; + record.SetShardTabletId(data.TabletId); + record.SetStatus(data.Status); + + for (const auto& col : data.Columns) { + auto column = record.AddColumns(); + column->SetTag(col.Tag); + + auto statistics = column->AddStatistics(); + statistics->SetType(NKikimr::NStat::COUNT_MIN_SKETCH); + auto sketch = std::unique_ptr(TCountMinSketch::Create()); + + for (const auto& cell : col.Cells) { + sketch->Count(cell.data(), cell.size()); + } + + auto buf = sketch->AsStringBuf(); + statistics->SetData(buf.Data(), buf.Size()); + } + + return std::move(ev); +} + +TStatServiceSettings GetDefaultSettings() { + auto settings = TStatServiceSettings(); + settings.AggregateKeepAlivePeriod = TDuration::Seconds(15); + settings.AggregateKeepAliveTimeout = TDuration::Seconds(30); + settings.AggregateKeepAliveAckTimeout = TDuration::Seconds(30); + settings.FanOutFactor = 2; + return settings; +} + +std::unordered_map InitializeRuntime(TTestActorRuntime& runtime, ui32 nodesCount, + const TStatServiceSettings& settings = GetDefaultSettings()) { + runtime.SetLogPriority(NKikimrServices::STATISTICS, NLog::EPriority::PRI_DEBUG); + runtime.SetScheduledEventFilter([](TTestActorRuntimeBase&, TAutoPtr&, TDuration, TInstant&){ + return false; + }); + + TIntrusivePtr nameserverTable(new TTableNameserverSetup()); + TPortManager pm; + + for (ui32 i = 1; i <= nodesCount; ++i) { + nameserverTable->StaticNodeTable[i] = std::pair("127.0.0." + std::to_string(i), pm.GetPort(12000 + i)); + } + + auto nameserviceActor = GetNameserviceActorId(); + auto resolverActor = MakeTabletResolverID(); + auto resolverConfig = MakeIntrusive(); + std::unordered_map indexToActor; + + for (ui32 i = 0; i < nodesCount; ++i) { + ui32 nodeId = runtime.GetNodeId(i); + auto actorId = NStat::MakeStatServiceID(nodeId); + indexToActor.emplace(i, actorId); + + runtime.AddLocalService(actorId, TActorSetupCmd(NStat::CreateStatService(settings).Release(), TMailboxType::HTSwap, 0), i); + runtime.AddLocalService(resolverActor, TActorSetupCmd(CreateTabletResolver(resolverConfig), TMailboxType::Simple, 0), i); + runtime.AddLocalService(nameserviceActor, TActorSetupCmd(CreateNameserverTable(nameserverTable), TMailboxType::Simple, 0), i); + } + + TTestActorRuntime::TEgg egg{ new TAppData(0, 0, 0, 0, { }, nullptr, nullptr, nullptr, nullptr), nullptr, nullptr, {} }; + runtime.Initialize(egg); + + return indexToActor; +} + +std::unordered_map GetNodeIdToIndexMap(const std::unordered_map& map) { + std::unordered_map res; + for (auto it = map.begin(); it != map.end(); ++it) { + res.emplace(it->second.NodeId(), it->first); + } + return res; +} + +Y_UNIT_TEST_SUITE(StatisticsService) { + Y_UNIT_TEST(ShouldBeCorrectlyAggregateStatisticsFromAllNodes) { + size_t nodeCount = 4; + auto runtime = TTestActorRuntime(nodeCount, 1, false); + auto indexToActorMap = InitializeRuntime(runtime, nodeCount); + auto nodeIdToIndexMap = GetNodeIdToIndexMap(indexToActorMap); + std::vector localTabletsIds = {1, 2, 3}; + std::vector nodesTablets = {{.NodeId = indexToActorMap[0].NodeId(), .Ids{localTabletsIds}}, + {.NodeId = indexToActorMap[1].NodeId(), .Ids{4}}, {.NodeId = indexToActorMap[2].NodeId(), .Ids{5}}, + {.NodeId = indexToActorMap[3].NodeId(), .Ids{6}}}; + std::unordered_map pipeToTablet; + + std::vector observers; + observers.emplace_back(runtime.AddObserver([&](TEvTabletResolver::TEvForward::TPtr& ev) { + auto tabletId = ev->Get()->TabletID; + auto recipient = indexToActorMap[nodeIdToIndexMap[ev->Sender.NodeId()]]; + pipeToTablet[ev->Sender] = tabletId; + + runtime.Send(new IEventHandle(recipient, ev->Sender, + new TEvTabletPipe::TEvClientConnected(tabletId, NKikimrProto::OK, ev->Sender, ev->Sender, + true, false, 0), 0, ev->Cookie), nodeIdToIndexMap[ev->Sender.NodeId()], true); + ev.Reset(); + })); + observers.emplace_back(runtime.AddObserver([&](TAutoPtr& ev) { + switch (ev->GetTypeRewrite()) { + case TEvTabletPipe::EvSend: + auto msg = ev->Get(); + if (msg != nullptr) { + auto tabletId = pipeToTablet[ev->Recipient]; + auto senderNodeIndex = nodeIdToIndexMap[ev->Sender.NodeId()]; + + if (tabletId <= localTabletsIds.back()) { + runtime.Send(new IEventHandle(ev->Sender, ev->Sender, + CreateStatisticsResponse(TStatisticsResponse{ + .TabletId = tabletId, + .Columns{ + TColumnItem{.Tag = 1, .Cells{"1", "2"}}, + TColumnItem{.Tag = 2, .Cells{"3"}} + }, + .Status = NKikimrStat::TEvStatisticsResponse::STATUS_SUCCESS + }).release(), 0, ev->Cookie), senderNodeIndex, true); + } else { + runtime.Send(new IEventHandle(ev->Sender, ev->Sender, + CreateStatisticsResponse(TStatisticsResponse{ + .TabletId = tabletId, + .Columns{ + TColumnItem{.Tag = 2, .Cells{"3"}} + }, + .Status = NKikimrStat::TEvStatisticsResponse::STATUS_SUCCESS + }).release(), 0, ev->Cookie), senderNodeIndex, true); + } + } + break; + } + })); + + auto sender = runtime.AllocateEdgeActor(); + runtime.Send(indexToActorMap[0], sender, CreateStatisticsRequest(TAggregateStatisticsRequest{ + .Round = 1, + .PathId{3, 3}, + .Nodes{ nodesTablets }, + .ColumnTags{1, 2} + }).release()); + auto res = runtime.GrabEdgeEvent(sender); + const auto& record = res->Get()->Record; + + std::unordered_map> expected = { + {1, {{"1", localTabletsIds.size()}, {"2", localTabletsIds.size()}}}, + {2, {{"3", nodesTablets.size() - 1 + localTabletsIds.size()}}}, + }; + + const auto& columns = record.GetColumns(); + for (const auto& column : columns) { + const auto tag = column.GetTag(); + + for (auto& statistic : column.GetStatistics()) { + if (statistic.GetType() == NKikimr::NStat::COUNT_MIN_SKETCH) { + auto data = statistic.GetData().Data(); + auto sketch = reinterpret_cast(data); + + const auto& cells = expected[tag]; + for (auto it = cells.begin(); it != cells.end(); ++it) { + UNIT_ASSERT_VALUES_EQUAL(it->second, sketch->Probe(it->first.data(), it->first.size())); + } + } + } + } + } + + Y_UNIT_TEST(ShouldBePings) { + size_t nodeCount = 2; + auto runtime = TTestActorRuntime(nodeCount, 1, false); + auto indexToActorMap = InitializeRuntime(runtime, nodeCount, + GetDefaultSettings() + .SetAggregateKeepAlivePeriod(TDuration::MilliSeconds(10)) + .SetAggregateKeepAliveTimeout(TDuration::Seconds(3)) + .SetAggregateKeepAliveAckTimeout(TDuration::Seconds(3))); + auto nodeIdToIndexMap = GetNodeIdToIndexMap(indexToActorMap); + std::vector nodesTablets = {{.NodeId = indexToActorMap[0].NodeId(), .Ids{1}}, + {.NodeId = indexToActorMap[1].NodeId(), .Ids{2}}}; + + std::vector ping(3); + std::vector pong(3); + auto sender = runtime.AllocateEdgeActor(); + + std::vector observers; + observers.emplace_back(runtime.AddObserver([&](TEvTabletResolver::TEvForward::TPtr& ev) { + ev.Reset(); + })); + observers.emplace_back(runtime.AddObserver([&](TEvStatistics::TEvAggregateKeepAlive::TPtr& ev) { + if (ev->Recipient == sender) { + ++ping[0]; + ev.Reset(); + return; + } + + auto it = nodeIdToIndexMap.find(ev->Recipient.NodeId()); + if (it != nodeIdToIndexMap.end()) { + ++ping[it->second + 1]; + } + })); + observers.emplace_back(runtime.AddObserver([&](TEvStatistics::TEvAggregateKeepAliveAck::TPtr& ev) { + auto it = nodeIdToIndexMap.find(ev->Recipient.NodeId()); + if (it != nodeIdToIndexMap.end()) { + ++pong[it->second + 1]; + } + })); + runtime.Send(indexToActorMap[0], sender, CreateStatisticsRequest(TAggregateStatisticsRequest{ + .Round = 1, + .PathId{3, 3}, + .Nodes{ nodesTablets }, + .ColumnTags{1, 2} + }).release()); + + runtime.DispatchEvents(TDispatchOptions{ + .CustomFinalCondition = [&]() { + return ping[0] >= 10 && ping[1] >= 10 && pong[2] >= 10; + } + }); + + for (const auto& node : nodesTablets) { + for (auto tabletId : node.Ids) { + auto actorId = NStat::MakeStatServiceID(node.NodeId); + runtime.Send(new IEventHandle(actorId, actorId, + CreateStatisticsResponse(TStatisticsResponse{ + .TabletId = tabletId, + .Status = NKikimrStat::TEvStatisticsResponse::STATUS_SUCCESS + }).release(), 0, 1), nodeIdToIndexMap[node.NodeId], true); + } + } + + auto res = runtime.GrabEdgeEvent(sender); + const auto& record = res->Get()->Record; + UNIT_ASSERT(record.GetFailedTablets().empty()); + } + + Y_UNIT_TEST(RootNodeShouldBeInvalidateByTimeout) { + size_t nodeCount = 4; + auto runtime = TTestActorRuntime(nodeCount, 1, false); + auto indexToActorMap = InitializeRuntime(runtime, nodeCount, + GetDefaultSettings() + .SetAggregateKeepAlivePeriod(TDuration::MilliSeconds(5)) + .SetAggregateKeepAliveTimeout(TDuration::MilliSeconds(10)) + .SetAggregateKeepAliveAckTimeout(TDuration::MilliSeconds(10))); + auto nodeIdToIndexMap = GetNodeIdToIndexMap(indexToActorMap); + std::vector nodesTablets = {{.NodeId = indexToActorMap[0].NodeId(), .Ids{1}}, + {.NodeId = indexToActorMap[1].NodeId(), .Ids{2}}, {.NodeId = indexToActorMap[2].NodeId(), .Ids{3}}, + {.NodeId = indexToActorMap[3].NodeId(), .Ids{4}}}; + std::unordered_map pipeToTablet; + + std::vector observers; + observers.emplace_back(runtime.AddObserver([&](TEvTabletResolver::TEvForward::TPtr& ev) { + auto tabletId = ev->Get()->TabletID; + pipeToTablet[ev->Sender] = tabletId; + + if (tabletId == 2) { + ev.Reset(); + return; + } + + auto recipient = indexToActorMap[nodeIdToIndexMap[ev->Sender.NodeId()]]; + runtime.Send(new IEventHandle(recipient, ev->Sender, + new TEvTabletPipe::TEvClientConnected(tabletId, NKikimrProto::OK, ev->Sender, ev->Sender, + true, false, 0), 0, ev->Cookie), nodeIdToIndexMap[ev->Sender.NodeId()], true); + ev.Reset(); + })); + observers.emplace_back(runtime.AddObserver([&](TAutoPtr& ev) { + switch (ev->GetTypeRewrite()) { + case TEvTabletPipe::EvSend: + auto msg = ev->Get(); + if (msg != nullptr) { + runtime.Send(new IEventHandle(ev->Sender, ev->Sender, + CreateStatisticsResponse(TStatisticsResponse{ + .TabletId = pipeToTablet[ev->Recipient], + .Status = NKikimrStat::TEvStatisticsResponse::STATUS_SUCCESS + }).release(), 0, ev->Cookie), nodeIdToIndexMap[ev->Sender.NodeId()], true); + } + break; + } + })); + observers.emplace_back(runtime.AddObserver([&](TEvStatistics::TEvAggregateKeepAliveAck::TPtr& ev) { + if (ev->Sender == indexToActorMap[1]) { + ev.Reset(); + return; + } + })); + + auto sender = runtime.AllocateEdgeActor(); + runtime.Send(indexToActorMap[0], sender, CreateStatisticsRequest(TAggregateStatisticsRequest{ + .Round = 1, + .PathId{3, 3}, + .Nodes{ nodesTablets }, + .ColumnTags{1, 2} + }).release()); + + auto res = runtime.GrabEdgeEvent(sender); + const auto& record = res->Get()->Record; + size_t expectedFailedTabletsCount = 2; + UNIT_ASSERT_VALUES_EQUAL(expectedFailedTabletsCount, record.GetFailedTablets().size()); + + ui32 expectedError = NKikimrStat::TEvAggregateStatisticsResponse::TYPE_UNAVAILABLE_NODE; + for (const auto& fail : record.GetFailedTablets()) { + ui32 actualError = fail.GetError(); + UNIT_ASSERT_VALUES_EQUAL(expectedError, actualError); + } + } + + Y_UNIT_TEST(ChildNodesShouldBeInvalidateByTimeout) { + size_t nodeCount = 4; + auto runtime = TTestActorRuntime(nodeCount, 1, false); + auto indexToActorMap = InitializeRuntime(runtime, nodeCount, + GetDefaultSettings() + .SetAggregateKeepAlivePeriod(TDuration::MilliSeconds(5)) + .SetAggregateKeepAliveTimeout(TDuration::MilliSeconds(10))); + auto nodeIdToIndexMap = GetNodeIdToIndexMap(indexToActorMap); + std::vector nodesTablets = {{.NodeId = indexToActorMap[0].NodeId(), .Ids{1}}, + {.NodeId = indexToActorMap[1].NodeId(), .Ids{2}}, {.NodeId = indexToActorMap[2].NodeId(), .Ids{3}}, + {.NodeId = indexToActorMap[3].NodeId(), .Ids{4}}}; + std::unordered_map pipeToTablet; + + std::vector observers; + observers.emplace_back(runtime.AddObserver([&](TEvTabletResolver::TEvForward::TPtr& ev) { + auto tabletId = ev->Get()->TabletID; + pipeToTablet[ev->Sender] = tabletId; + + if (tabletId == 2) { + ev.Reset(); + return; + } + + auto recipient = indexToActorMap[nodeIdToIndexMap[ev->Sender.NodeId()]]; + runtime.Send(new IEventHandle(recipient, ev->Sender, + new TEvTabletPipe::TEvClientConnected(tabletId, NKikimrProto::OK, ev->Sender, ev->Sender, + true, false, 0), 0, ev->Cookie), nodeIdToIndexMap[ev->Sender.NodeId()], true); + ev.Reset(); + })); + observers.emplace_back(runtime.AddObserver([&](TAutoPtr& ev) { + switch (ev->GetTypeRewrite()) { + case TEvTabletPipe::EvSend: + auto msg = ev->Get(); + if (msg != nullptr) { + runtime.Send(new IEventHandle(ev->Sender, ev->Sender, + CreateStatisticsResponse(TStatisticsResponse{ + .TabletId = pipeToTablet[ev->Recipient], + .Status = NKikimrStat::TEvStatisticsResponse::STATUS_SUCCESS + }).release(), 0, ev->Cookie), nodeIdToIndexMap[ev->Sender.NodeId()], true); + } + break; + } + })); + observers.emplace_back(runtime.AddObserver([&](TEvStatistics::TEvAggregateKeepAlive::TPtr& ev) { + if (ev->Sender == indexToActorMap[1]) { + ev.Reset(); + return; + } + })); + + auto sender = runtime.AllocateEdgeActor(); + runtime.Send(indexToActorMap[0], sender, CreateStatisticsRequest(TAggregateStatisticsRequest{ + .Round = 1, + .PathId{3, 3}, + .Nodes{ nodesTablets }, + .ColumnTags{1, 2} + }).release()); + + auto res = runtime.GrabEdgeEvent(sender); + const auto& record = res->Get()->Record; + size_t expectedFailedTabletsCount = 2; + UNIT_ASSERT_VALUES_EQUAL(expectedFailedTabletsCount, record.GetFailedTablets().size()); + + ui32 expectedError = NKikimrStat::TEvAggregateStatisticsResponse::TYPE_UNAVAILABLE_NODE; + for (const auto& fail : record.GetFailedTablets()) { + ui32 actualError = fail.GetError(); + UNIT_ASSERT_VALUES_EQUAL(expectedError, actualError); + } + } + + Y_UNIT_TEST(ShouldBeCcorrectProcessingOfLocalTablets) { + size_t nodeCount = 1; + auto runtime = TTestActorRuntime(nodeCount, 1, false); + auto settings = GetDefaultSettings() + .SetMaxInFlightTabletRequests(3); + auto indexToActorMap = InitializeRuntime(runtime, nodeCount, settings); + auto nodeIdToIndexMap = GetNodeIdToIndexMap(indexToActorMap); + std::vector localTabletsIds = {1, 2, 3, 4, 5, 6, 7, 8}; + std::vector nodesTablets = {{.NodeId = indexToActorMap[0].NodeId(), .Ids{localTabletsIds}}}; + + std::vector observers; + observers.emplace_back(runtime.AddObserver([&](TEvTabletResolver::TEvForward::TPtr& ev) { + auto tabletId = ev->Get()->TabletID; + auto senderNodeIndex = nodeIdToIndexMap[ev->Sender.NodeId()]; + auto recipient = indexToActorMap[senderNodeIndex]; + + if (tabletId % 3 == 1) { + runtime.Send(new IEventHandle(recipient, ev->Sender, + new TEvTabletPipe::TEvClientConnected(tabletId, NKikimrProto::ERROR, ev->Sender, ev->Sender, + true, false, 0), 0, ev->Cookie), senderNodeIndex, true); + } else if(tabletId % 3 == 2) { + runtime.Send(new IEventHandle(recipient, ev->Sender, + new TEvTabletPipe::TEvClientDestroyed(tabletId, ev->Sender, ev->Sender), 0, ev->Cookie), senderNodeIndex, true); + } else { + auto actor = indexToActorMap[senderNodeIndex]; + runtime.Send(new IEventHandle(actor, actor, + CreateStatisticsResponse(TStatisticsResponse{ + .TabletId = tabletId, + .Status = NKikimrStat::TEvStatisticsResponse::STATUS_SUCCESS + }).release(), 0, 1), senderNodeIndex, true); + } + ev.Reset(); + })); + + auto sender = runtime.AllocateEdgeActor(); + runtime.Send(indexToActorMap[0], sender, CreateStatisticsRequest(TAggregateStatisticsRequest{ + .Round = 1, + .PathId{3, 3}, + .Nodes{ nodesTablets }, + .ColumnTags{1} + }).release()); + + auto res = runtime.GrabEdgeEvent(sender); + const auto& record = res->Get()->Record; + size_t expectedFailedTabletsCount = 6; + UNIT_ASSERT_VALUES_EQUAL(expectedFailedTabletsCount, record.GetFailedTablets().size()); + + ui32 expectedError = NKikimrStat::TEvAggregateStatisticsResponse::TYPE_NON_LOCAL_TABLET; + for (const auto& fail : record.GetFailedTablets()) { + ui32 actualError = fail.GetError(); + UNIT_ASSERT_VALUES_EQUAL(expectedError, actualError); + } + } + + Y_UNIT_TEST(ShouldBeCcorrectProcessingTabletTimeout) { + size_t nodeCount = 1; + auto runtime = TTestActorRuntime(nodeCount, 1, false); + auto settings = GetDefaultSettings() + .SetStatisticsRequestTimeout(TDuration::MilliSeconds(10)); + auto indexToActorMap = InitializeRuntime(runtime, nodeCount, settings); + auto nodeIdToIndexMap = GetNodeIdToIndexMap(indexToActorMap); + std::vector localTabletsIds = {1, 2, 3, 4, 5, 6, 7}; + std::vector nodesTablets = {{.NodeId = indexToActorMap[0].NodeId(), .Ids{localTabletsIds}}}; + + std::unordered_map pipeToTablet; + std::vector observers; + observers.emplace_back(runtime.AddObserver([&](TEvTabletResolver::TEvForward::TPtr& ev) { + auto tabletId = ev->Get()->TabletID; + auto recipient = indexToActorMap[nodeIdToIndexMap[ev->Sender.NodeId()]]; + pipeToTablet[ev->Sender] = tabletId; + + runtime.Send(new IEventHandle(recipient, ev->Sender, + new TEvTabletPipe::TEvClientConnected(tabletId, NKikimrProto::OK, ev->Sender, ev->Sender, + true, false, 0), 0, ev->Cookie), nodeIdToIndexMap[ev->Sender.NodeId()], true); + ev.Reset(); + })); + observers.emplace_back(runtime.AddObserver([&](TAutoPtr& ev) { + switch (ev->GetTypeRewrite()) { + case TEvTabletPipe::EvSend: + auto msg = ev->Get(); + if (msg != nullptr) { + auto tabletId = pipeToTablet[ev->Recipient]; + if (tabletId % 2 != 0) { + auto senderNodeIndex = nodeIdToIndexMap[ev->Sender.NodeId()]; + runtime.Send(new IEventHandle(ev->Sender, ev->Sender, + CreateStatisticsResponse(TStatisticsResponse{ + .TabletId = tabletId, + .Status = NKikimrStat::TEvStatisticsResponse::STATUS_SUCCESS + }).release(), 0, ev->Cookie), senderNodeIndex, true); + } + } + break; + } + })); + + auto sender = runtime.AllocateEdgeActor(); + runtime.Send(indexToActorMap[0], sender, CreateStatisticsRequest(TAggregateStatisticsRequest{ + .Round = 1, + .PathId{3, 3}, + .Nodes{ nodesTablets }, + .ColumnTags{1} + }).release()); + + auto res = runtime.GrabEdgeEvent(sender); + const auto& record = res->Get()->Record; + size_t expectedFailedTabletsCount = 3; + UNIT_ASSERT_VALUES_EQUAL(expectedFailedTabletsCount, record.GetFailedTablets().size()); + + ui32 expectedError = NKikimrStat::TEvAggregateStatisticsResponse::TYPE_NON_LOCAL_TABLET; + for (const auto& fail : record.GetFailedTablets()) { + ui32 actualError = fail.GetError(); + UNIT_ASSERT_VALUES_EQUAL(expectedError, actualError); + } + } +} + +} // NSysView +} // NKikimr diff --git a/ydb/core/statistics/service/ut/ya.make b/ydb/core/statistics/service/ut/ya.make new file mode 100644 index 000000000000..1b32051659da --- /dev/null +++ b/ydb/core/statistics/service/ut/ya.make @@ -0,0 +1,32 @@ +UNITTEST_FOR(ydb/core/statistics/service) + +FORK_SUBTESTS() + +IF (WITH_VALGRIND) + TIMEOUT(3600) + SIZE(LARGE) + TAG(ya:fat) +ELSE() + TIMEOUT(600) + SIZE(MEDIUM) +ENDIF() + +YQL_LAST_ABI_VERSION() + +PEERDIR( + library/cpp/testing/unittest + ydb/core/protos + ydb/core/testlib/default + ydb/core/statistics/ut_common +) + +SRCS( + ut_basic_statistics.cpp + ut_service.cpp +) + +END() + +RECURSE_FOR_TESTS( + ut_http +) diff --git a/ydb/core/statistics/service/ya.make b/ydb/core/statistics/service/ya.make new file mode 100644 index 000000000000..70d536ba7e6d --- /dev/null +++ b/ydb/core/statistics/service/ya.make @@ -0,0 +1,28 @@ +LIBRARY() + +SRCS( + http_request.h + http_request.cpp + service.h + service.cpp + service_impl.cpp +) + +PEERDIR( + ydb/core/base + ydb/core/engine/minikql + ydb/core/protos + ydb/core/tablet + ydb/core/tablet_flat + ydb/core/statistics/database + ydb/library/minsketch +) + +YQL_LAST_ABI_VERSION() + +END() + +RECURSE_FOR_TESTS( + ut +) + diff --git a/ydb/core/statistics/stat_service.cpp b/ydb/core/statistics/stat_service.cpp deleted file mode 100644 index 548a80c89028..000000000000 --- a/ydb/core/statistics/stat_service.cpp +++ /dev/null @@ -1,892 +0,0 @@ -#include "stat_service.h" -#include "events.h" -#include "save_load_stats.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include - -namespace NKikimr { -namespace NStat { - -class THttpRequest : public TActorBootstrapped { -public: - using TBase = TActorBootstrapped; - - static constexpr auto ActorActivityType() { - return NKikimrServices::TActivity::STAT_SERVICE_HTTP_REQUEST; - } - - void Bootstrap() { - using TNavigate = NSchemeCache::TSchemeCacheNavigate; - auto navigate = std::make_unique(); - auto& entry = navigate->ResultSet.emplace_back(); - entry.Path = SplitPath(Path); - entry.Operation = TNavigate::EOp::OpTable; - entry.RequestType = TNavigate::TEntry::ERequestType::ByPath; - navigate->Cookie = FirstRoundCookie; - - Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(navigate.release())); - - Become(&THttpRequest::StateWork); - } - - STFUNC(StateWork) { - switch(ev->GetTypeRewrite()) { - hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); - hFunc(TEvStatistics::TEvScanTableAccepted, Handle); - hFunc(TEvStatistics::TEvGetScanStatusResponse, Handle); - hFunc(TEvPipeCache::TEvDeliveryProblem, Handle); - IgnoreFunc(TEvStatistics::TEvScanTableResponse); - default: - LOG_CRIT_S(TlsActivationContext->AsActorContext(), NKikimrServices::STATISTICS, - "NStat::THttpRequest: unexpected event# " << ev->GetTypeRewrite()); - } - } - - enum EType { - ANALYZE, - STATUS - }; - - THttpRequest(EType type, const TString& path, TActorId replyToActorId) - : Type(type) - , Path(path) - , ReplyToActorId(replyToActorId) - {} - -private: - void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { - using TNavigate = NSchemeCache::TSchemeCacheNavigate; - std::unique_ptr navigate(ev->Get()->Request.Release()); - Y_ABORT_UNLESS(navigate->ResultSet.size() == 1); - auto& entry = navigate->ResultSet.front(); - - if (navigate->Cookie == SecondRoundCookie) { - if (entry.Status != TNavigate::EStatus::Ok) { - HttpReply("Internal error"); - return; - } - if (entry.DomainInfo->Params.HasStatisticsAggregator()) { - StatisticsAggregatorId = entry.DomainInfo->Params.GetStatisticsAggregator(); - } - ResolveSuccess(); - return; - } - - if (entry.Status != TNavigate::EStatus::Ok) { - switch (entry.Status) { - case TNavigate::EStatus::PathErrorUnknown: - HttpReply("Path does not exist"); - return; - case TNavigate::EStatus::PathNotPath: - HttpReply("Invalid path"); - return; - case TNavigate::EStatus::PathNotTable: - HttpReply("Path is not a table"); - return; - default: - HttpReply("Internal error"); - return; - } - } - - PathId = entry.TableId.PathId; - - auto& domainInfo = entry.DomainInfo; - ui64 aggregatorId = 0; - if (domainInfo->Params.HasStatisticsAggregator()) { - aggregatorId = domainInfo->Params.GetStatisticsAggregator(); - } - bool isServerless = domainInfo->IsServerless(); - TPathId domainKey = domainInfo->DomainKey; - TPathId resourcesDomainKey = domainInfo->ResourcesDomainKey; - - auto navigateDomainKey = [this] (TPathId domainKey) { - using TNavigate = NSchemeCache::TSchemeCacheNavigate; - auto navigate = std::make_unique(); - auto& entry = navigate->ResultSet.emplace_back(); - entry.TableId = TTableId(domainKey.OwnerId, domainKey.LocalPathId); - entry.Operation = TNavigate::EOp::OpPath; - entry.RequestType = TNavigate::TEntry::ERequestType::ByTableId; - entry.RedirectRequired = false; - navigate->Cookie = SecondRoundCookie; - - Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(navigate.release())); - }; - - if (!isServerless) { - if (aggregatorId) { - StatisticsAggregatorId = aggregatorId; - ResolveSuccess(); - } else { - navigateDomainKey(domainKey); - } - } else { - navigateDomainKey(resourcesDomainKey); - } - } - - void Handle(TEvStatistics::TEvScanTableAccepted::TPtr&) { - HttpReply("Scan accepted"); - } - - void Handle(TEvStatistics::TEvGetScanStatusResponse::TPtr& ev) { - auto& record = ev->Get()->Record; - switch (record.GetStatus()) { - case NKikimrStat::TEvGetScanStatusResponse::NO_OPERATION: - HttpReply("No scan operation"); - break; - case NKikimrStat::TEvGetScanStatusResponse::ENQUEUED: - HttpReply("Scan is enqueued"); - break; - case NKikimrStat::TEvGetScanStatusResponse::IN_PROGRESS: - HttpReply("Scan is in progress"); - break; - } - } - - void Handle(TEvPipeCache::TEvDeliveryProblem::TPtr&) { - HttpReply("Delivery problem"); - } - - void ResolveSuccess() { - if (StatisticsAggregatorId == 0) { - HttpReply("No statistics aggregator"); - return; - } - - if (Type == ANALYZE) { - auto scanTable = std::make_unique(); - auto& record = scanTable->Record; - PathIdFromPathId(PathId, record.MutablePathId()); - - Send(MakePipePerNodeCacheID(false), - new TEvPipeCache::TEvForward(scanTable.release(), StatisticsAggregatorId, true)); - } else { - auto getStatus = std::make_unique(); - auto& record = getStatus->Record; - PathIdFromPathId(PathId, record.MutablePathId()); - - Send(MakePipePerNodeCacheID(false), - new TEvPipeCache::TEvForward(getStatus.release(), StatisticsAggregatorId, true)); - } - } - - void HttpReply(const TString& msg) { - Send(ReplyToActorId, new NMon::TEvHttpInfoRes(msg)); - PassAway(); - } - - void PassAway() { - Send(MakePipePerNodeCacheID(false), new TEvPipeCache::TEvUnlink(0)); - TBase::PassAway(); - } - -private: - const EType Type; - const TString Path; - const TActorId ReplyToActorId; - - TPathId PathId; - ui64 StatisticsAggregatorId = 0; - - static const ui64 FirstRoundCookie = 1; - static const ui64 SecondRoundCookie = 2; -}; - - -class TStatService : public TActorBootstrapped { -public: - using TBase = TActorBootstrapped; - - static constexpr auto ActorActivityType() { - return NKikimrServices::TActivity::STAT_SERVICE; - } - - struct TEvPrivate { - enum EEv { - EvRequestTimeout = EventSpaceBegin(TEvents::ES_PRIVATE), - - EvEnd - }; - - struct TEvRequestTimeout : public TEventLocal { - std::unordered_set NeedSchemeShards; - TActorId PipeClientId; - }; - }; - - void Bootstrap() { - EnableStatistics = AppData()->FeatureFlags.GetEnableStatistics(); - EnableColumnStatistics = AppData()->FeatureFlags.GetEnableColumnStatistics(); - - ui32 configKind = (ui32) NKikimrConsole::TConfigItem::FeatureFlagsItem; - Send(NConsole::MakeConfigsDispatcherID(SelfId().NodeId()), - new NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionRequest({configKind})); - - NActors::TMon* mon = AppData()->Mon; - if (mon) { - NMonitoring::TIndexMonPage *actorsMonPage = mon->RegisterIndexPage("actors", "Actors"); - mon->RegisterActorPage(actorsMonPage, "statservice", "Statistics service", - false, TlsActivationContext->ExecutorThread.ActorSystem, SelfId()); - } - - Become(&TStatService::StateWork); - } - - STFUNC(StateWork) { - switch(ev->GetTypeRewrite()) { - hFunc(NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionResponse, HandleConfig) - hFunc(NConsole::TEvConsole::TEvConfigNotificationRequest, HandleConfig) - hFunc(TEvStatistics::TEvGetStatistics, Handle); - hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); - hFunc(TEvStatistics::TEvPropagateStatistics, Handle); - IgnoreFunc(TEvStatistics::TEvPropagateStatisticsResponse); - hFunc(TEvTabletPipe::TEvClientConnected, Handle); - hFunc(TEvTabletPipe::TEvClientDestroyed, Handle); - hFunc(TEvStatistics::TEvStatisticsIsDisabled, Handle); - hFunc(TEvStatistics::TEvLoadStatisticsQueryResponse, Handle); - hFunc(TEvPrivate::TEvRequestTimeout, Handle); - hFunc(NMon::TEvHttpInfo, Handle); - cFunc(TEvents::TEvPoison::EventType, PassAway); - default: - LOG_CRIT_S(TlsActivationContext->AsActorContext(), NKikimrServices::STATISTICS, - "NStat::TStatService: unexpected event# " << ev->GetTypeRewrite()); - } - } - -private: - void HandleConfig(NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionResponse::TPtr&) { - LOG_INFO_S(TlsActivationContext->AsActorContext(), NKikimrServices::STATISTICS, - "Subscribed for config changes on node " << SelfId().NodeId()); - } - - void HandleConfig(NConsole::TEvConsole::TEvConfigNotificationRequest::TPtr& ev) { - const auto& record = ev->Get()->Record; - const auto& config = record.GetConfig(); - if (config.HasFeatureFlags()) { - const auto& featureFlags = config.GetFeatureFlags(); - EnableStatistics = featureFlags.GetEnableStatistics(); - EnableColumnStatistics = featureFlags.GetEnableColumnStatistics(); - if (!EnableStatistics) { - ReplyAllFailed(); - } - } - auto response = std::make_unique(record); - Send(ev->Sender, response.release(), 0, ev->Cookie); - } - - void Handle(TEvStatistics::TEvGetStatistics::TPtr& ev) { - ui64 requestId = NextRequestId++; - - auto& request = InFlight[requestId]; - request.ReplyToActorId = ev->Sender; - request.EvCookie = ev->Cookie; - request.StatType = ev->Get()->StatType; - request.StatRequests.swap(ev->Get()->StatRequests); - - if (!EnableStatistics) { - ReplyFailed(requestId, true); - return; - } - - if (request.StatType == EStatType::COUNT_MIN_SKETCH) { - request.StatResponses.reserve(request.StatRequests.size()); - ui32 reqIndex = 0; - for (const auto& req : request.StatRequests) { - auto& response = request.StatResponses.emplace_back(); - response.Req = req; - ui64 loadCookie = NextLoadQueryCookie++; - LoadQueriesInFlight[loadCookie] = std::make_pair(requestId, reqIndex); - Register(CreateLoadStatisticsQuery(req.PathId, request.StatType, - *req.ColumnTag, loadCookie)); - ++request.ReplyCounter; - ++reqIndex; - } - return; - } - - using TNavigate = NSchemeCache::TSchemeCacheNavigate; - auto navigate = std::make_unique(); - for (const auto& req : request.StatRequests) { - auto& entry = navigate->ResultSet.emplace_back(); - entry.TableId = TTableId(req.PathId.OwnerId, req.PathId.LocalPathId); - entry.Operation = TNavigate::EOp::OpPath; - entry.RequestType = TNavigate::TEntry::ERequestType::ByTableId; - } - navigate->Cookie = requestId; - - Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(navigate.release())); - } - - void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { - using TNavigate = NSchemeCache::TSchemeCacheNavigate; - std::unique_ptr navigate(ev->Get()->Request.Release()); - - auto cookie = navigate->Cookie; - - if (cookie == ResolveSACookie) { - Y_ABORT_UNLESS(navigate->ResultSet.size() == 1); - auto& entry = navigate->ResultSet.back(); - if (entry.Status != TNavigate::EStatus::Ok) { - StatisticsAggregatorId = 0; - } else if (entry.DomainInfo->Params.HasStatisticsAggregator()) { - StatisticsAggregatorId = entry.DomainInfo->Params.GetStatisticsAggregator(); - } - ResolveSAStage = StatisticsAggregatorId ? RSA_FINISHED : RSA_INITIAL; - - if (StatisticsAggregatorId) { - ConnectToSA(); - SyncNode(); - } else { - ReplyAllFailed(); - } - return; - } - - ui64 requestId = cookie; - auto itRequest = InFlight.find(requestId); - if (itRequest == InFlight.end()) { - return; - } - auto& request = itRequest->second; - - if (!EnableStatistics) { - ReplyFailed(requestId, true); - return; - } - - std::unordered_set ssIds; - bool isServerless = false; - ui64 aggregatorId = 0; - TPathId domainKey, resourcesDomainKey; - for (const auto& entry : navigate->ResultSet) { - if (entry.Status != TNavigate::EStatus::Ok) { - continue; - } - auto& domainInfo = entry.DomainInfo; - ssIds.insert(domainInfo->ExtractSchemeShard()); - aggregatorId = domainInfo->Params.GetStatisticsAggregator(); - isServerless = domainInfo->IsServerless(); - domainKey = domainInfo->DomainKey; - resourcesDomainKey = domainInfo->ResourcesDomainKey; - } - if (ssIds.size() != 1) { - ReplyFailed(requestId, true); - return; - } - request.SchemeShardId = *ssIds.begin(); - - if (Statistics.find(request.SchemeShardId) != Statistics.end()) { - ReplySuccess(requestId, true); - return; - } - - bool isNewSS = (NeedSchemeShards.find(request.SchemeShardId) == NeedSchemeShards.end()); - if (isNewSS) { - NeedSchemeShards.insert(request.SchemeShardId); - } - - auto navigateDomainKey = [this] (TPathId domainKey) { - using TNavigate = NSchemeCache::TSchemeCacheNavigate; - auto navigate = std::make_unique(); - auto& entry = navigate->ResultSet.emplace_back(); - entry.TableId = TTableId(domainKey.OwnerId, domainKey.LocalPathId); - entry.Operation = TNavigate::EOp::OpPath; - entry.RequestType = TNavigate::TEntry::ERequestType::ByTableId; - entry.RedirectRequired = false; - navigate->Cookie = ResolveSACookie; - Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(navigate.release())); - ResolveSAStage = RSA_IN_FLIGHT; - }; - - switch (ResolveSAStage) { - case RSA_INITIAL: - if (!isServerless) { - if (aggregatorId) { - StatisticsAggregatorId = aggregatorId; - ResolveSAStage = RSA_FINISHED; - } else { - navigateDomainKey(domainKey); - return; - } - } else { - navigateDomainKey(resourcesDomainKey); - return; - } - break; - case RSA_IN_FLIGHT: - return; - default: - break; - } - - if (!StatisticsAggregatorId) { - ReplyFailed(requestId, true); - return; - } - - if (!SAPipeClientId) { - ConnectToSA(); - SyncNode(); - - } else if (isNewSS) { - auto requestStats = std::make_unique(); - requestStats->Record.SetNodeId(SelfId().NodeId()); - requestStats->Record.SetUrgent(false); - requestStats->Record.AddNeedSchemeShards(request.SchemeShardId); - NTabletPipe::SendData(SelfId(), SAPipeClientId, requestStats.release()); - - auto timeout = std::make_unique(); - timeout->NeedSchemeShards.insert(request.SchemeShardId); - timeout->PipeClientId = SAPipeClientId; - Schedule(RequestTimeout, timeout.release()); - } - } - - void Handle(TEvStatistics::TEvPropagateStatistics::TPtr& ev) { - LOG_DEBUG_S(TlsActivationContext->AsActorContext(), NKikimrServices::STATISTICS, - "EvPropagateStatistics, node id = " << SelfId().NodeId()); - - Send(ev->Sender, new TEvStatistics::TEvPropagateStatisticsResponse); - - auto* record = ev->Get()->MutableRecord(); - for (const auto& entry : record->GetEntries()) { - ui64 schemeShardId = entry.GetSchemeShardId(); - NeedSchemeShards.erase(schemeShardId); - auto& statisticsState = Statistics[schemeShardId]; - - if (entry.GetStats().empty()) { - continue; // stats are not ready in SA, wait for next cycle - } - - statisticsState.Map.clear(); - - NKikimrStat::TSchemeShardStats statRecord; - Y_PROTOBUF_SUPPRESS_NODISCARD statRecord.ParseFromString(entry.GetStats()); - - for (const auto& pathEntry : statRecord.GetEntries()) { - TPathId pathId(pathEntry.GetPathId().GetOwnerId(), pathEntry.GetPathId().GetLocalId()); - auto& mapEntry = statisticsState.Map[pathId]; - mapEntry.RowCount = pathEntry.GetRowCount(); - mapEntry.BytesSize = pathEntry.GetBytesSize(); - } - } - - for (auto itReq = InFlight.begin(); itReq != InFlight.end(); ) { - auto requestId = itReq->first; - auto requestState = itReq->second; - if (requestState.SchemeShardId == 0) { - ++itReq; - continue; - } - if (Statistics.find(requestState.SchemeShardId) != Statistics.end()) { - ReplySuccess(requestId, false); - itReq = InFlight.erase(itReq); - } else { - ++itReq; - } - } - - if (record->NodeIdsSize() == 0) { - return; - } - - std::vector nodeIds; - nodeIds.reserve(record->NodeIdsSize()); - for (const auto nodeId : record->GetNodeIds()) { - nodeIds.push_back(nodeId); - } - - size_t step = 0; - if (nodeIds.size() <= StatFanOut + 1) { - step = 0; - } else if (nodeIds.size() <= StatFanOut * (StatFanOut + 1)) { - step = StatFanOut; - } else { - step = nodeIds.size() / StatFanOut; - } - - auto serialized = std::make_unique(); - serialized->MutableRecord()->MutableEntries()->Swap(record->MutableEntries()); - TString preSerializedStats; - Y_PROTOBUF_SUPPRESS_NODISCARD serialized->GetRecord().SerializeToString(&preSerializedStats); - - for (size_t i = 0; i < nodeIds.size(); ) { - ui32 leadingNodeId = nodeIds[i++]; - - auto propagate = std::make_unique(); - propagate->MutableRecord()->MutableNodeIds()->Reserve(step); - for (size_t j = 0; i < nodeIds.size() && j < step; ++i, ++j) { - propagate->MutableRecord()->AddNodeIds(nodeIds[i]); - } - propagate->PreSerializedData = preSerializedStats; - Send(MakeStatServiceID(leadingNodeId), propagate.release()); - } - } - - void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev) { - LOG_DEBUG_S(TlsActivationContext->AsActorContext(), NKikimrServices::STATISTICS, - "EvClientConnected" - << ", node id = " << ev->Get()->ClientId.NodeId() - << ", client id = " << ev->Get()->ClientId - << ", server id = " << ev->Get()->ServerId - << ", status = " << ev->Get()->Status); - - if (ev->Get()->Status != NKikimrProto::OK) { - SAPipeClientId = TActorId(); - ConnectToSA(); - SyncNode(); - } - } - - void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev) { - LOG_DEBUG_S(TlsActivationContext->AsActorContext(), NKikimrServices::STATISTICS, - "EvClientDestroyed" - << ", node id = " << ev->Get()->ClientId.NodeId() - << ", client id = " << ev->Get()->ClientId - << ", server id = " << ev->Get()->ServerId); - - SAPipeClientId = TActorId(); - ConnectToSA(); - SyncNode(); - } - - void Handle(TEvStatistics::TEvStatisticsIsDisabled::TPtr&) { - ReplyAllFailed(); - } - - void Handle(TEvStatistics::TEvLoadStatisticsQueryResponse::TPtr& ev) { - ui64 cookie = ev->Get()->Cookie; - - auto itLoadQuery = LoadQueriesInFlight.find(cookie); - Y_ABORT_UNLESS(itLoadQuery != LoadQueriesInFlight.end()); - auto [requestId, requestIndex] = itLoadQuery->second; - - auto itRequest = InFlight.find(requestId); - Y_ABORT_UNLESS(itRequest != InFlight.end()); - auto& request = itRequest->second; - - auto& response = request.StatResponses[requestIndex]; - Y_ABORT_UNLESS(request.StatType == EStatType::COUNT_MIN_SKETCH); - - if (ev->Get()->Success) { - response.Success = true; - auto& data = ev->Get()->Data; - Y_ABORT_UNLESS(data); - response.CountMinSketch.CountMin.reset(TCountMinSketch::FromString(data->Data(), data->Size())); - } else { - response.Success = false; - } - - if (--request.ReplyCounter == 0) { - auto result = std::make_unique(); - result->Success = true; - result->StatResponses.swap(request.StatResponses); - - Send(request.ReplyToActorId, result.release(), 0, request.EvCookie); - - InFlight.erase(requestId); - } - } - - void Handle(TEvPrivate::TEvRequestTimeout::TPtr& ev) { - LOG_DEBUG_S(TlsActivationContext->AsActorContext(), NKikimrServices::STATISTICS, - "EvRequestTimeout" - << ", pipe client id = " << ev->Get()->PipeClientId - << ", schemeshard count = " << ev->Get()->NeedSchemeShards.size()); - - if (SAPipeClientId != ev->Get()->PipeClientId) { - return; - } - auto requestStats = std::make_unique(); - bool hasNeedSchemeShards = false; - for (auto& ssId : ev->Get()->NeedSchemeShards) { - if (NeedSchemeShards.find(ssId) != NeedSchemeShards.end()) { - requestStats->Record.AddNeedSchemeShards(ssId); - hasNeedSchemeShards = true; - } - } - if (!hasNeedSchemeShards) { - return; - } - requestStats->Record.SetNodeId(SelfId().NodeId()); - requestStats->Record.SetUrgent(true); - - NTabletPipe::SendData(SelfId(), SAPipeClientId, requestStats.release()); - } - - void ConnectToSA() { - if (SAPipeClientId || !StatisticsAggregatorId) { - return; - } - auto policy = NTabletPipe::TClientRetryPolicy::WithRetries(); - NTabletPipe::TClientConfig pipeConfig{policy}; - SAPipeClientId = Register(NTabletPipe::CreateClient(SelfId(), StatisticsAggregatorId, pipeConfig)); - - LOG_DEBUG_S(TlsActivationContext->AsActorContext(), NKikimrServices::STATISTICS, - "ConnectToSA(), pipe client id = " << SAPipeClientId); - } - - void SyncNode() { - if (!SAPipeClientId || !StatisticsAggregatorId) { - return; - } - auto connect = std::make_unique(); - auto& record = connect->Record; - - auto timeout = std::make_unique(); - timeout->PipeClientId = SAPipeClientId; - - record.SetNodeId(SelfId().NodeId()); - for (const auto& [ssId, ssState] : Statistics) { - auto* entry = record.AddHaveSchemeShards(); - entry->SetSchemeShardId(ssId); - entry->SetTimestamp(ssState.Timestamp); - } - for (const auto& ssId : NeedSchemeShards) { - record.AddNeedSchemeShards(ssId); - timeout->NeedSchemeShards.insert(ssId); - } - NTabletPipe::SendData(SelfId(), SAPipeClientId, connect.release()); - - if (!NeedSchemeShards.empty()) { - Schedule(RequestTimeout, timeout.release()); - } - - LOG_DEBUG_S(TlsActivationContext->AsActorContext(), NKikimrServices::STATISTICS, - "SyncNode(), pipe client id = " << SAPipeClientId); - } - - void ReplySuccess(ui64 requestId, bool eraseRequest) { - auto itRequest = InFlight.find(requestId); - if (itRequest == InFlight.end()) { - return; - } - auto& request = itRequest->second; - - LOG_DEBUG_S(TlsActivationContext->AsActorContext(), NKikimrServices::STATISTICS, - "ReplySuccess(), request id = " << requestId); - - auto itStatistics = Statistics.find(request.SchemeShardId); - if (itStatistics == Statistics.end()) { - return; - } - auto& statisticsMap = itStatistics->second.Map; - - auto result = std::make_unique(); - result->Success = true; - - for (auto& req : request.StatRequests) { - TResponse rsp; - rsp.Success = true; - rsp.Req = req; - - TStatSimple stat; - auto itStat = statisticsMap.find(req.PathId); - if (itStat != statisticsMap.end()) { - stat.RowCount = itStat->second.RowCount; - stat.BytesSize = itStat->second.BytesSize; - } else { - stat.RowCount = 0; - stat.BytesSize = 0; - } - rsp.Simple = stat; - - result->StatResponses.push_back(rsp); - } - - Send(request.ReplyToActorId, result.release(), 0, request.EvCookie); - - if (eraseRequest) { - InFlight.erase(requestId); - } - } - - void ReplyFailed(ui64 requestId, bool eraseRequest) { - auto itRequest = InFlight.find(requestId); - if (itRequest == InFlight.end()) { - return; - } - auto& request = itRequest->second; - - LOG_DEBUG_S(TlsActivationContext->AsActorContext(), NKikimrServices::STATISTICS, - "ReplyFailed(), request id = " << requestId); - - auto result = std::make_unique(); - result->Success = false; - - for (auto& req : request.StatRequests) { - TResponse rsp; - rsp.Success = false; - rsp.Req = req; - - TStatSimple stat; - stat.RowCount = 0; - stat.BytesSize = 0; - rsp.Simple = stat; - - result->StatResponses.push_back(rsp); - } - - Send(request.ReplyToActorId, result.release(), 0, request.EvCookie); - - if (eraseRequest) { - InFlight.erase(requestId); - } - } - - void ReplyAllFailed() { - for (const auto& [requestId, _] : InFlight) { - ReplyFailed(requestId, false); - } - InFlight.clear(); - } - - void PassAway() { - if (SAPipeClientId) { - NTabletPipe::CloseClient(SelfId(), SAPipeClientId); - } - TBase::PassAway(); - } - - void Handle(NMon::TEvHttpInfo::TPtr& ev) { - auto& request = ev->Get()->Request; - - if (!EnableColumnStatistics) { - Send(ev->Sender, new NMon::TEvHttpInfoRes("Column statistics is disabled")); - return; - } - - auto method = request.GetMethod(); - if (method == HTTP_METHOD_POST) { - auto& params = request.GetPostParams(); - auto itAction = params.find("action"); - if (itAction == params.end()) { - Send(ev->Sender, new NMon::TEvHttpInfoRes("'action' parameter is required")); - return; - } - if (itAction->second != "analyze") { - Send(ev->Sender, new NMon::TEvHttpInfoRes("Unknown 'action' parameter")); - return; - } - auto itPath = params.find("path"); - if (itPath == params.end()) { - Send(ev->Sender, new NMon::TEvHttpInfoRes("'path' parameter is required")); - return; - } - Register(new THttpRequest(THttpRequest::EType::ANALYZE, itPath->second, ev->Sender)); - return; - - } else if (method == HTTP_METHOD_GET) { - auto& params = request.GetParams(); - auto itAction = params.find("action"); - if (itAction == params.end()) { - Send(ev->Sender, new NMon::TEvHttpInfoRes("'action' parameter is required")); - return; - } - if (itAction->second != "status") { - Send(ev->Sender, new NMon::TEvHttpInfoRes("Unknown 'action' parameter")); - return; - } - auto itPath = params.find("path"); - if (itPath == params.end()) { - Send(ev->Sender, new NMon::TEvHttpInfoRes("'path' parameter is required")); - return; - } - Register(new THttpRequest(THttpRequest::EType::STATUS, itPath->second, ev->Sender)); - return; - } - - TStringStream str; - HTML(str) { - str << "
" << Endl; - str << ""; - DIV() { - str << ""; - } - DIV() { - str << ""; - } - str << "" << Endl; - str << "
" << Endl; - str << ""; - DIV() { - str << ""; - } - DIV() { - str << ""; - } - str << "" << Endl; - } - - Send(ev->Sender, new NMon::TEvHttpInfoRes(str.Str())); - } - -private: - bool EnableStatistics = false; - bool EnableColumnStatistics = false; - - static constexpr size_t StatFanOut = 10; - - struct TRequestState { - NActors::TActorId ReplyToActorId; - ui64 EvCookie = 0; - ui64 SchemeShardId = 0; - EStatType StatType = EStatType::SIMPLE; - std::vector StatRequests; - std::vector StatResponses; - size_t ReplyCounter = 0; - }; - std::unordered_map InFlight; // request id -> state - ui64 NextRequestId = 1; - - std::unordered_map> LoadQueriesInFlight; // load cookie -> req id, req index - ui64 NextLoadQueryCookie = 1; - - std::unordered_set NeedSchemeShards; - - struct TStatEntry { - ui64 RowCount = 0; - ui64 BytesSize = 0; - }; - typedef std::unordered_map TStatisticsMap; - struct TStatisticsState { - TStatisticsMap Map; - ui64 Timestamp = 0; - }; - std::unordered_map Statistics; // ss id -> stats - - ui64 StatisticsAggregatorId = 0; - TActorId SAPipeClientId; - - static const ui64 ResolveSACookie = std::numeric_limits::max(); - enum EResolveSAStage { - RSA_INITIAL, - RSA_IN_FLIGHT, - RSA_FINISHED - }; - EResolveSAStage ResolveSAStage = RSA_INITIAL; - - static constexpr TDuration RequestTimeout = TDuration::MilliSeconds(100); -}; - -THolder CreateStatService() { - return MakeHolder(); -} - -} // NStat -} // NKikimr diff --git a/ydb/core/statistics/stat_service.h b/ydb/core/statistics/stat_service.h deleted file mode 100644 index 7680afd5633c..000000000000 --- a/ydb/core/statistics/stat_service.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#include - -namespace NKikimr { -namespace NStat { - -inline NActors::TActorId MakeStatServiceID(ui32 node) { - const char x[12] = "StatService"; - return NActors::TActorId(node, TStringBuf(x, 12)); -} - -THolder CreateStatService(); - -} // NStat -} // NKikimr diff --git a/ydb/core/statistics/ut/ut_aggregator.cpp b/ydb/core/statistics/ut/ut_aggregator.cpp deleted file mode 100644 index 3afbdc9b4ca2..000000000000 --- a/ydb/core/statistics/ut/ut_aggregator.cpp +++ /dev/null @@ -1,304 +0,0 @@ -#include "ut_common.h" - -#include - -#include -#include -#include - -#include -#include -#include - -#include - -namespace NKikimr { -namespace NStat { - -using namespace NYdb; -using namespace NYdb::NTable; -using namespace NYdb::NScheme; - -namespace { - -void CreateUniformTable(TTestEnv& env, const TString& databaseName, const TString& tableName) { - TTableClient client(env.GetDriver()); - auto session = client.CreateSession().GetValueSync().GetSession(); - - auto result = session.ExecuteSchemeQuery(Sprintf(R"( - CREATE TABLE `Root/%s/%s` ( - Key Uint64, - Value Uint64, - PRIMARY KEY (Key) - ) - WITH ( UNIFORM_PARTITIONS = 4 ); - )", databaseName.c_str(), tableName.c_str())).GetValueSync(); - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); - - TStringBuilder replace; - replace << Sprintf("REPLACE INTO `Root/%s/%s` (Key, Value) VALUES ", - databaseName.c_str(), tableName.c_str()); - for (ui32 i = 0; i < 4; ++i) { - if (i > 0) { - replace << ", "; - } - ui64 value = 4000000000000000000ull * (i + 1); - replace << Sprintf("(%" PRIu64 "ul, %" PRIu64 "ul)", value, value); - } - replace << ";"; - result = session.ExecuteDataQuery(replace, TTxControl::BeginTx().CommitTx()).GetValueSync(); - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); -} - -void DropTable(TTestEnv& env, const TString& databaseName, const TString& tableName) { - TTableClient client(env.GetDriver()); - auto session = client.CreateSession().GetValueSync().GetSession(); - - auto result = session.ExecuteSchemeQuery(Sprintf(R"( - DROP TABLE `Root/%s/%s`; - )", databaseName.c_str(), tableName.c_str())).GetValueSync(); - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); -} - -void ValidateCountMin(TTestActorRuntime& runtime, TPathId pathId) { - auto statServiceId = NStat::MakeStatServiceID(runtime.GetNodeId(1)); - - NStat::TRequest req; - req.PathId = pathId; - req.ColumnTag = 1; - - auto evGet = std::make_unique(); - evGet->StatType = NStat::EStatType::COUNT_MIN_SKETCH; - evGet->StatRequests.push_back(req); - - auto sender = runtime.AllocateEdgeActor(1); - runtime.Send(statServiceId, sender, evGet.release(), 1, true); - auto evResult = runtime.GrabEdgeEventRethrow(sender); - - UNIT_ASSERT(evResult); - UNIT_ASSERT(evResult->Get()); - UNIT_ASSERT(evResult->Get()->StatResponses.size() == 1); - - auto rsp = evResult->Get()->StatResponses[0]; - auto stat = rsp.CountMinSketch; - UNIT_ASSERT(rsp.Success); - UNIT_ASSERT(stat.CountMin); - - for (ui32 i = 0; i < 4; ++i) { - ui64 value = 4000000000000000000ull * (i + 1); - auto probe = stat.CountMin->Probe((const char *)&value, sizeof(ui64)); - UNIT_ASSERT_VALUES_EQUAL(probe, 1); - } -} - -void ValidateCountMinAbsense(TTestActorRuntime& runtime, TPathId pathId) { - auto statServiceId = NStat::MakeStatServiceID(runtime.GetNodeId(1)); - - NStat::TRequest req; - req.PathId = pathId; - req.ColumnTag = 1; - - auto evGet = std::make_unique(); - evGet->StatType = NStat::EStatType::COUNT_MIN_SKETCH; - evGet->StatRequests.push_back(req); - - auto sender = runtime.AllocateEdgeActor(1); - runtime.Send(statServiceId, sender, evGet.release(), 1, true); - auto evResult = runtime.GrabEdgeEventRethrow(sender); - - UNIT_ASSERT(evResult); - UNIT_ASSERT(evResult->Get()); - UNIT_ASSERT(evResult->Get()->StatResponses.size() == 1); - - auto rsp = evResult->Get()->StatResponses[0]; - UNIT_ASSERT(!rsp.Success); -} - -} // namespace - -Y_UNIT_TEST_SUITE(StatisticsAggregator) { - - Y_UNIT_TEST(ScanOneTable) { - TTestEnv env(1, 1); - auto init = [&] () { - CreateDatabase(env, "Database"); - CreateUniformTable(env, "Database", "Table"); - }; - std::thread initThread(init); - - auto& runtime = *env.GetServer().GetRuntime(); - runtime.SimulateSleep(TDuration::Seconds(5)); - initThread.join(); - - ui64 tabletId = 0; - auto pathId = ResolvePathId(runtime, "/Root/Database/Table", nullptr, &tabletId); - - auto ev = std::make_unique(); - auto& record = ev->Record; - PathIdFromPathId(pathId, record.MutablePathId()); - - auto sender = runtime.AllocateEdgeActor(); - runtime.SendToPipe(tabletId, sender, ev.release()); - runtime.GrabEdgeEventRethrow(sender); - - ValidateCountMin(runtime, pathId); - } - - Y_UNIT_TEST(ScanTwoTables) { - TTestEnv env(1, 1); - auto init = [&] () { - CreateDatabase(env, "Database"); - CreateUniformTable(env, "Database", "Table1"); - CreateUniformTable(env, "Database", "Table2"); - }; - std::thread initThread(init); - - auto& runtime = *env.GetServer().GetRuntime(); - runtime.SimulateSleep(TDuration::Seconds(5)); - initThread.join(); - - runtime.SimulateSleep(TDuration::Seconds(60)); - - auto pathId1 = ResolvePathId(runtime, "/Root/Database/Table1"); - auto pathId2 = ResolvePathId(runtime, "/Root/Database/Table2"); - - ValidateCountMin(runtime, pathId1); - ValidateCountMin(runtime, pathId2); - } - - Y_UNIT_TEST(ScanOneTableServerless) { - TTestEnv env(1, 1); - - auto init = [&] () { - CreateDatabase(env, "Shared"); - }; - std::thread initThread(init); - - auto& runtime = *env.GetServer().GetRuntime(); - runtime.SimulateSleep(TDuration::Seconds(5)); - initThread.join(); - - TPathId domainKey; - ResolvePathId(runtime, "/Root/Shared", &domainKey); - - auto init2 = [&] () { - CreateServerlessDatabase(env, "Serverless", domainKey); - CreateUniformTable(env, "Serverless", "Table"); - }; - std::thread init2Thread(init2); - - runtime.SimulateSleep(TDuration::Seconds(5)); - init2Thread.join(); - - runtime.SimulateSleep(TDuration::Seconds(60)); - - auto pathId = ResolvePathId(runtime, "/Root/Serverless/Table"); - ValidateCountMin(runtime, pathId); - } - - Y_UNIT_TEST(ScanTwoTablesServerless) { - TTestEnv env(1, 1); - - auto init = [&] () { - CreateDatabase(env, "Shared"); - }; - std::thread initThread(init); - - auto& runtime = *env.GetServer().GetRuntime(); - runtime.SimulateSleep(TDuration::Seconds(5)); - initThread.join(); - - TPathId domainKey; - ResolvePathId(runtime, "/Root/Shared", &domainKey); - - auto init2 = [&] () { - CreateServerlessDatabase(env, "Serverless", domainKey); - CreateUniformTable(env, "Serverless", "Table1"); - CreateUniformTable(env, "Serverless", "Table2"); - }; - std::thread init2Thread(init2); - - runtime.SimulateSleep(TDuration::Seconds(5)); - init2Thread.join(); - - runtime.SimulateSleep(TDuration::Seconds(60)); - - auto pathId1 = ResolvePathId(runtime, "/Root/Serverless/Table1"); - auto pathId2 = ResolvePathId(runtime, "/Root/Serverless/Table2"); - ValidateCountMin(runtime, pathId1); - ValidateCountMin(runtime, pathId2); - } - - Y_UNIT_TEST(ScanTwoTablesTwoServerlessDbs) { - TTestEnv env(1, 1); - - auto init = [&] () { - CreateDatabase(env, "Shared"); - }; - std::thread initThread(init); - - auto& runtime = *env.GetServer().GetRuntime(); - runtime.SimulateSleep(TDuration::Seconds(5)); - initThread.join(); - - TPathId domainKey; - ResolvePathId(runtime, "/Root/Shared", &domainKey); - - auto init2 = [&] () { - CreateServerlessDatabase(env, "Serverless1", domainKey); - CreateServerlessDatabase(env, "Serverless2", domainKey); - CreateUniformTable(env, "Serverless1", "Table1"); - CreateUniformTable(env, "Serverless2", "Table2"); - }; - std::thread init2Thread(init2); - - runtime.SimulateSleep(TDuration::Seconds(5)); - init2Thread.join(); - - runtime.SimulateSleep(TDuration::Seconds(60)); - - auto pathId1 = ResolvePathId(runtime, "/Root/Serverless1/Table1"); - auto pathId2 = ResolvePathId(runtime, "/Root/Serverless2/Table2"); - ValidateCountMin(runtime, pathId1); - ValidateCountMin(runtime, pathId2); - } - - Y_UNIT_TEST(DropTableNavigateError) { - TTestEnv env(1, 1); - auto init = [&] () { - CreateDatabase(env, "Database"); - CreateUniformTable(env, "Database", "Table"); - }; - std::thread initThread(init); - - auto& runtime = *env.GetServer().GetRuntime(); - runtime.SimulateSleep(TDuration::Seconds(5)); - initThread.join(); - - ui64 tabletId = 0; - auto pathId = ResolvePathId(runtime, "/Root/Database/Table", nullptr, &tabletId); - - auto init2 = [&] () { - DropTable(env, "Database", "Table"); - }; - std::thread init2Thread(init2); - - runtime.SimulateSleep(TDuration::Seconds(5)); - init2Thread.join(); - - auto ev = std::make_unique(); - auto& record = ev->Record; - PathIdFromPathId(pathId, record.MutablePathId()); - - auto sender = runtime.AllocateEdgeActor(); - runtime.SendToPipe(tabletId, sender, ev.release()); - - runtime.SimulateSleep(TDuration::Seconds(60)); - - ValidateCountMinAbsense(runtime, pathId); - } - -} - -} // NStat -} // NKikimr diff --git a/ydb/core/statistics/ut/ut_common.cpp b/ydb/core/statistics/ut/ut_common.cpp deleted file mode 100644 index ef9138246b59..000000000000 --- a/ydb/core/statistics/ut/ut_common.cpp +++ /dev/null @@ -1,144 +0,0 @@ -#include "ut_common.h" - -#include - -namespace NKikimr { -namespace NStat { - -NKikimrSubDomains::TSubDomainSettings GetSubDomainDeclareSettings(const TString &name, const TStoragePools &pools) { - NKikimrSubDomains::TSubDomainSettings subdomain; - subdomain.SetName(name); - for (auto& pool: pools) { - *subdomain.AddStoragePools() = pool; - } - return subdomain; -} - -NKikimrSubDomains::TSubDomainSettings GetSubDomainDefaultSettings(const TString &name, const TStoragePools &pools) { - NKikimrSubDomains::TSubDomainSettings subdomain; - subdomain.SetName(name); - subdomain.SetCoordinators(1); - subdomain.SetMediators(1); - subdomain.SetPlanResolution(50); - subdomain.SetTimeCastBucketsPerMediator(2); - for (auto& pool: pools) { - *subdomain.AddStoragePools() = pool; - } - return subdomain; -} - -TTestEnv::TTestEnv(ui32 staticNodes, ui32 dynamicNodes, ui32 storagePools) { - auto mbusPort = PortManager.GetPort(); - auto grpcPort = PortManager.GetPort(); - - Settings = new Tests::TServerSettings(mbusPort); - Settings->SetDomainName("Root"); - Settings->SetNodeCount(staticNodes); - Settings->SetDynamicNodeCount(dynamicNodes); - Settings->SetUseRealThreads(false); - - NKikimrConfig::TFeatureFlags featureFlags; - featureFlags.SetEnableStatistics(true); - featureFlags.SetEnableColumnStatistics(true); - Settings->SetFeatureFlags(featureFlags); - - for (ui32 i : xrange(storagePools)) { - TString poolName = Sprintf("test%d", i); - Settings->AddStoragePool(poolName, TString("/Root:") + poolName, 2); - } - - Server = new Tests::TServer(*Settings); - Server->EnableGRpc(grpcPort); - - auto sender = Server->GetRuntime()->AllocateEdgeActor(); - Server->SetupRootStoragePools(sender); - - Client = MakeHolder(*Settings); - - Tenants = MakeHolder(Server); - - Endpoint = "localhost:" + ToString(grpcPort); - DriverConfig = NYdb::TDriverConfig().SetEndpoint(Endpoint); - Driver = MakeHolder(DriverConfig); - - Server->GetRuntime()->SetLogPriority(NKikimrServices::STATISTICS, NActors::NLog::PRI_DEBUG); -} - -TTestEnv::~TTestEnv() { - Driver->Stop(true); -} - -TStoragePools TTestEnv::GetPools() const { - TStoragePools pools; - for (const auto& [kind, pool] : Settings->StoragePoolTypes) { - pools.emplace_back(pool.GetName(), kind); - } - return pools; -} - -void CreateDatabase(TTestEnv& env, const TString& databaseName, size_t nodeCount) { - auto subdomain = GetSubDomainDeclareSettings(databaseName); - UNIT_ASSERT_VALUES_EQUAL(NMsgBusProxy::MSTATUS_OK, - env.GetClient().CreateExtSubdomain("/Root", subdomain)); - - env.GetTenants().Run("/Root/" + databaseName, nodeCount); - - auto subdomainSettings = GetSubDomainDefaultSettings(databaseName, env.GetPools()); - subdomainSettings.SetExternalSchemeShard(true); - subdomainSettings.SetExternalStatisticsAggregator(true); - UNIT_ASSERT_VALUES_EQUAL(NMsgBusProxy::MSTATUS_OK, - env.GetClient().AlterExtSubdomain("/Root", subdomainSettings)); -} - -void CreateServerlessDatabase(TTestEnv& env, const TString& databaseName, TPathId resourcesDomainKey) { - auto subdomain = GetSubDomainDeclareSettings(databaseName); - subdomain.MutableResourcesDomainKey()->SetSchemeShard(resourcesDomainKey.OwnerId); - subdomain.MutableResourcesDomainKey()->SetPathId(resourcesDomainKey.LocalPathId); - UNIT_ASSERT_VALUES_EQUAL(NMsgBusProxy::MSTATUS_OK, - env.GetClient().CreateExtSubdomain("/Root", subdomain)); - - env.GetTenants().Run("/Root/" + databaseName, 0); - - auto subdomainSettings = GetSubDomainDefaultSettings(databaseName, env.GetPools()); - subdomainSettings.SetExternalSchemeShard(true); - UNIT_ASSERT_VALUES_EQUAL(NMsgBusProxy::MSTATUS_OK, - env.GetClient().AlterExtSubdomain("/Root", subdomainSettings)); -} - -TPathId ResolvePathId(TTestActorRuntime& runtime, const TString& path, - TPathId* domainKey, ui64* tabletId) -{ - auto sender = runtime.AllocateEdgeActor(); - - using TNavigate = NSchemeCache::TSchemeCacheNavigate; - using TEvRequest = TEvTxProxySchemeCache::TEvNavigateKeySet; - using TEvResponse = TEvTxProxySchemeCache::TEvNavigateKeySetResult; - - auto request = std::make_unique(); - auto& entry = request->ResultSet.emplace_back(); - entry.Path = SplitPath(path); - entry.RequestType = TNavigate::TEntry::ERequestType::ByPath; - entry.Operation = NSchemeCache::TSchemeCacheNavigate::EOp::OpPath; - entry.ShowPrivatePath = true; - runtime.Send(MakeSchemeCacheID(), sender, new TEvRequest(request.release())); - - auto ev = runtime.GrabEdgeEventRethrow(sender); - UNIT_ASSERT(ev); - UNIT_ASSERT(ev->Get()); - std::unique_ptr response(ev->Get()->Request.Release()); - UNIT_ASSERT(response->ResultSet.size() == 1); - auto& resultEntry = response->ResultSet[0]; - - if (domainKey) { - *domainKey = resultEntry.DomainInfo->DomainKey; - } - - if (tabletId && resultEntry.DomainInfo->Params.HasStatisticsAggregator()) { - *tabletId = resultEntry.DomainInfo->Params.GetStatisticsAggregator(); - } - - return resultEntry.TableId.PathId; -} - -} // NStat -} // NKikimr diff --git a/ydb/core/statistics/ut/ut_common.h b/ydb/core/statistics/ut/ut_common.h deleted file mode 100644 index e73c2d69e9e5..000000000000 --- a/ydb/core/statistics/ut/ut_common.h +++ /dev/null @@ -1,67 +0,0 @@ -#pragma once - -#include -#include - -namespace NKikimr { -namespace NStat { - -NKikimrSubDomains::TSubDomainSettings GetSubDomainDeclareSettings( - const TString &name, const TStoragePools &pools = {}); - -NKikimrSubDomains::TSubDomainSettings GetSubDomainDefaultSettings( - const TString &name, const TStoragePools &pools = {}); - -class TTestEnv { -public: - TTestEnv(ui32 staticNodes = 1, ui32 dynamicNodes = 1, ui32 storagePools = 1); - ~TTestEnv(); - - Tests::TServer& GetServer() const { - return *Server; - } - - Tests::TClient& GetClient() const { - return *Client; - } - - Tests::TTenants& GetTenants() const { - return *Tenants; - } - - NYdb::TDriver& GetDriver() const { - return *Driver; - } - - const TString& GetEndpoint() const { - return Endpoint; - } - - const Tests::TServerSettings::TPtr GetSettings() const { - return Settings; - } - - TStoragePools GetPools() const; - -private: - TPortManager PortManager; - - Tests::TServerSettings::TPtr Settings; - Tests::TServer::TPtr Server; - THolder Client; - THolder Tenants; - - TString Endpoint; - NYdb::TDriverConfig DriverConfig; - THolder Driver; -}; - -void CreateDatabase(TTestEnv& env, const TString& databaseName, size_t nodeCount = 1); - -void CreateServerlessDatabase(TTestEnv& env, const TString& databaseName, TPathId resourcesDomainKey); - -TPathId ResolvePathId(TTestActorRuntime& runtime, const TString& path, - TPathId* domainKey = nullptr, ui64* tabletId = nullptr); - -} // namespace NStat -} // namespace NKikimr diff --git a/ydb/core/statistics/ut/ut_save_load_stats.cpp b/ydb/core/statistics/ut/ut_save_load_stats.cpp deleted file mode 100644 index 50510b3d9a99..000000000000 --- a/ydb/core/statistics/ut/ut_save_load_stats.cpp +++ /dev/null @@ -1,92 +0,0 @@ -#include "ut_common.h" - -#include -#include - -#include - -namespace NKikimr::NStat { - -Y_UNIT_TEST_SUITE(StatisticsSaveLoad) { - Y_UNIT_TEST(Simple) { - TTestEnv env(1, 1); - auto init = [&] () { - CreateDatabase(env, "Database"); - }; - std::thread initThread(init); - - auto& runtime = *env.GetServer().GetRuntime(); - runtime.SimulateSleep(TDuration::Seconds(5)); - initThread.join(); - - auto sender = runtime.AllocateEdgeActor(0); - runtime.Register(CreateStatisticsTableCreator(std::make_unique()), - 0, 0, TMailboxType::Simple, 0, sender); - runtime.GrabEdgeEvent(sender); - - TPathId pathId(1, 1); - ui64 statType = 1; - std::vector columnTags = {1, 2}; - std::vector data = {"dataA", "dataB"}; - - runtime.Register(CreateSaveStatisticsQuery( - pathId, statType, std::move(columnTags), std::move(data)), - 0, 0, TMailboxType::Simple, 0, sender); - auto saveResponse = runtime.GrabEdgeEvent(sender); - UNIT_ASSERT(saveResponse->Get()->Success); - - runtime.Register(CreateLoadStatisticsQuery(pathId, statType, 1, 1), - 0, 0, TMailboxType::Simple, 0, sender); - auto loadResponseA = runtime.GrabEdgeEvent(sender); - UNIT_ASSERT(loadResponseA->Get()->Success); - UNIT_ASSERT(loadResponseA->Get()->Data); - UNIT_ASSERT_VALUES_EQUAL(*loadResponseA->Get()->Data, "dataA"); - - runtime.Register(CreateLoadStatisticsQuery(pathId, statType, 2, 1), - 0, 0, TMailboxType::Simple, 0, sender); - auto loadResponseB = runtime.GrabEdgeEvent(sender); - UNIT_ASSERT(loadResponseB->Get()->Success); - UNIT_ASSERT(loadResponseB->Get()->Data); - UNIT_ASSERT_VALUES_EQUAL(*loadResponseB->Get()->Data, "dataB"); - } - - Y_UNIT_TEST(Delete) { - TTestEnv env(1, 1); - auto init = [&] () { - CreateDatabase(env, "Database"); - }; - std::thread initThread(init); - - auto& runtime = *env.GetServer().GetRuntime(); - runtime.SimulateSleep(TDuration::Seconds(5)); - initThread.join(); - - auto sender = runtime.AllocateEdgeActor(0); - runtime.Register(CreateStatisticsTableCreator(std::make_unique()), - 0, 0, TMailboxType::Simple, 0, sender); - runtime.GrabEdgeEvent(sender); - - TPathId pathId(1, 1); - ui64 statType = 1; - std::vector columnTags = {1, 2}; - std::vector data = {"dataA", "dataB"}; - - runtime.Register(CreateSaveStatisticsQuery( - pathId, statType, std::move(columnTags), std::move(data)), - 0, 0, TMailboxType::Simple, 0, sender); - auto saveResponse = runtime.GrabEdgeEvent(sender); - UNIT_ASSERT(saveResponse->Get()->Success); - - runtime.Register(CreateDeleteStatisticsQuery(pathId), - 0, 0, TMailboxType::Simple, 0, sender); - auto deleteResponse = runtime.GrabEdgeEvent(sender); - UNIT_ASSERT(deleteResponse->Get()->Success); - - runtime.Register(CreateLoadStatisticsQuery(pathId, statType, 1, 1), - 0, 0, TMailboxType::Simple, 0, sender); - auto loadResponseA = runtime.GrabEdgeEvent(sender); - UNIT_ASSERT(!loadResponseA->Get()->Success); - } -} - -} // NKikimr::NStat diff --git a/ydb/core/statistics/ut/ut_statistics.cpp b/ydb/core/statistics/ut/ut_statistics.cpp deleted file mode 100644 index ab7846b21fe9..000000000000 --- a/ydb/core/statistics/ut/ut_statistics.cpp +++ /dev/null @@ -1,255 +0,0 @@ -#include "ut_common.h" - -#include - -#include -#include - -#include -#include -#include - -#include - -namespace NKikimr { -namespace NStat { - -using namespace NYdb; -using namespace NYdb::NTable; -using namespace NYdb::NScheme; - -namespace { - -void CreateTable(TTestEnv& env, const TString& databaseName, const TString& tableName, size_t rowCount) { - TTableClient client(env.GetDriver()); - auto session = client.CreateSession().GetValueSync().GetSession(); - - auto result = session.ExecuteSchemeQuery(Sprintf(R"( - CREATE TABLE `Root/%s/%s` ( - Key Uint64, - Value Uint64, - PRIMARY KEY (Key) - ); - )", databaseName.c_str(), tableName.c_str())).GetValueSync(); - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); - - TStringBuilder replace; - replace << Sprintf("REPLACE INTO `Root/%s/%s` (Key, Value) VALUES ", - databaseName.c_str(), tableName.c_str()); - for (ui32 i = 0; i < rowCount; ++i) { - if (i > 0) { - replace << ", "; - } - replace << Sprintf("(%uu, %uu)", i, i); - } - replace << ";"; - result = session.ExecuteDataQuery(replace, TTxControl::BeginTx().CommitTx()).GetValueSync(); - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); -} - -void ValidateRowCount(TTestActorRuntime& runtime, ui32 nodeIndex, TPathId pathId, size_t expectedRowCount) { - auto statServiceId = NStat::MakeStatServiceID(runtime.GetNodeId(nodeIndex)); - ui64 rowCount = 0; - while (rowCount == 0) { - NStat::TRequest req; - req.PathId = pathId; - - auto evGet = std::make_unique(); - evGet->StatType = NStat::EStatType::SIMPLE; - evGet->StatRequests.push_back(req); - - auto sender = runtime.AllocateEdgeActor(nodeIndex); - runtime.Send(statServiceId, sender, evGet.release(), nodeIndex, true); - auto evResult = runtime.GrabEdgeEventRethrow(sender); - - UNIT_ASSERT(evResult); - UNIT_ASSERT(evResult->Get()); - UNIT_ASSERT(evResult->Get()->StatResponses.size() == 1); - - auto rsp = evResult->Get()->StatResponses[0]; - auto stat = rsp.Simple; - - rowCount = stat.RowCount; - - if (rowCount != 0) { - UNIT_ASSERT(stat.RowCount == expectedRowCount); - break; - } - - runtime.SimulateSleep(TDuration::Seconds(5)); - } -} - -} // namespace - -Y_UNIT_TEST_SUITE(Statistics) { - - Y_UNIT_TEST(Simple) { - TTestEnv env(1, 1); - - auto init = [&] () { - CreateDatabase(env, "Database"); - CreateTable(env, "Database", "Table", 5); - }; - std::thread initThread(init); - - auto& runtime = *env.GetServer().GetRuntime(); - runtime.SimulateSleep(TDuration::Seconds(5)); - initThread.join(); - - auto pathId = ResolvePathId(runtime, "/Root/Database/Table"); - ValidateRowCount(runtime, 1, pathId, 5); - } - - Y_UNIT_TEST(TwoNodes) { - TTestEnv env(1, 2); - - auto init = [&] () { - CreateDatabase(env, "Database", 2); - CreateTable(env, "Database", "Table", 5); - }; - std::thread initThread(init); - - auto& runtime = *env.GetServer().GetRuntime(); - runtime.SimulateSleep(TDuration::Seconds(5)); - initThread.join(); - - auto pathId1 = ResolvePathId(runtime, "/Root/Database/Table"); - ValidateRowCount(runtime, 1, pathId1, 5); - ValidateRowCount(runtime, 2, pathId1, 5); - } - - Y_UNIT_TEST(TwoTables) { - TTestEnv env(1, 1); - auto init = [&] () { - CreateDatabase(env, "Database"); - CreateTable(env, "Database", "Table1", 5); - CreateTable(env, "Database", "Table2", 6); - }; - std::thread initThread(init); - - auto& runtime = *env.GetServer().GetRuntime(); - runtime.SimulateSleep(TDuration::Seconds(5)); - initThread.join(); - - auto pathId1 = ResolvePathId(runtime, "/Root/Database/Table1"); - auto pathId2 = ResolvePathId(runtime, "/Root/Database/Table2"); - ValidateRowCount(runtime, 1, pathId1, 5); - ValidateRowCount(runtime, 1, pathId2, 6); - } - - Y_UNIT_TEST(TwoDatabases) { - TTestEnv env(1, 2); - auto init = [&] () { - CreateDatabase(env, "Database1"); - CreateDatabase(env, "Database2"); - CreateTable(env, "Database1", "Table1", 5); - CreateTable(env, "Database2", "Table2", 6); - }; - std::thread initThread(init); - - auto& runtime = *env.GetServer().GetRuntime(); - runtime.SimulateSleep(TDuration::Seconds(5)); - initThread.join(); - - auto pathId1 = ResolvePathId(runtime, "/Root/Database1/Table1"); - auto pathId2 = ResolvePathId(runtime, "/Root/Database2/Table2"); - ValidateRowCount(runtime, 2, pathId1, 5); - ValidateRowCount(runtime, 1, pathId2, 6); - } - - Y_UNIT_TEST(Serverless) { - TTestEnv env(1, 1); - auto init = [&] () { - CreateDatabase(env, "Shared"); - }; - std::thread initThread(init); - - auto& runtime = *env.GetServer().GetRuntime(); - runtime.SimulateSleep(TDuration::Seconds(5)); - initThread.join(); - - TPathId domainKey; - ResolvePathId(runtime, "/Root/Shared", &domainKey); - - auto init2 = [&] () { - CreateServerlessDatabase(env, "Serverless", domainKey); - CreateTable(env, "Serverless", "Table", 5); - }; - std::thread init2Thread(init2); - - runtime.SimulateSleep(TDuration::Seconds(5)); - init2Thread.join(); - - auto pathId = ResolvePathId(runtime, "/Root/Serverless/Table"); - ValidateRowCount(runtime, 1, pathId, 5); - } - - Y_UNIT_TEST(TwoServerlessDbs) { - TTestEnv env(1, 1); - auto init = [&] () { - CreateDatabase(env, "Shared"); - }; - std::thread initThread(init); - - auto& runtime = *env.GetServer().GetRuntime(); - runtime.SimulateSleep(TDuration::Seconds(5)); - initThread.join(); - - TPathId domainKey; - ResolvePathId(runtime, "/Root/Shared", &domainKey); - - auto init2 = [&] () { - CreateServerlessDatabase(env, "Serverless1", domainKey); - CreateServerlessDatabase(env, "Serverless2", domainKey); - CreateTable(env, "Serverless1", "Table1", 5); - CreateTable(env, "Serverless2", "Table2", 6); - }; - std::thread init2Thread(init2); - - runtime.SimulateSleep(TDuration::Seconds(5)); - init2Thread.join(); - - auto pathId1 = ResolvePathId(runtime, "/Root/Serverless1/Table1"); - auto pathId2 = ResolvePathId(runtime, "/Root/Serverless2/Table2"); - ValidateRowCount(runtime, 1, pathId1, 5); - ValidateRowCount(runtime, 1, pathId2, 6); - } - - Y_UNIT_TEST(TwoServerlessTwoSharedDbs) { - TTestEnv env(1, 2); - auto init = [&] () { - CreateDatabase(env, "Shared1"); - CreateDatabase(env, "Shared2"); - }; - std::thread initThread(init); - - auto& runtime = *env.GetServer().GetRuntime(); - runtime.SimulateSleep(TDuration::Seconds(5)); - initThread.join(); - - TPathId domainKey1, domainKey2; - ResolvePathId(runtime, "/Root/Shared1", &domainKey1); - ResolvePathId(runtime, "/Root/Shared2", &domainKey2); - - auto init2 = [&] () { - CreateServerlessDatabase(env, "Serverless1", domainKey1); - CreateServerlessDatabase(env, "Serverless2", domainKey2); - CreateTable(env, "Serverless1", "Table1", 5); - CreateTable(env, "Serverless2", "Table2", 6); - }; - std::thread init2Thread(init2); - - runtime.SimulateSleep(TDuration::Seconds(5)); - init2Thread.join(); - - auto pathId1 = ResolvePathId(runtime, "/Root/Serverless1/Table1"); - auto pathId2 = ResolvePathId(runtime, "/Root/Serverless2/Table2"); - ValidateRowCount(runtime, 2, pathId1, 5); - ValidateRowCount(runtime, 1, pathId2, 6); - } - -} - -} // NSysView -} // NKikimr diff --git a/ydb/core/statistics/ut/ya.make b/ydb/core/statistics/ut/ya.make deleted file mode 100644 index d2f04f691779..000000000000 --- a/ydb/core/statistics/ut/ya.make +++ /dev/null @@ -1,29 +0,0 @@ -UNITTEST_FOR(ydb/core/statistics) - -FORK_SUBTESTS() - -IF (WITH_VALGRIND) - TIMEOUT(3600) - SIZE(LARGE) - TAG(ya:fat) -ELSE() - TIMEOUT(600) - SIZE(MEDIUM) -ENDIF() - -YQL_LAST_ABI_VERSION() - -PEERDIR( - library/cpp/testing/unittest - ydb/core/testlib/default -) - -SRCS( - ut_common.h - ut_common.cpp - ut_aggregator.cpp - ut_statistics.cpp - ut_save_load_stats.cpp -) - -END() diff --git a/ydb/core/statistics/ut_common/ut_common.cpp b/ydb/core/statistics/ut_common/ut_common.cpp new file mode 100644 index 000000000000..18c2ec9fd5fb --- /dev/null +++ b/ydb/core/statistics/ut_common/ut_common.cpp @@ -0,0 +1,516 @@ +#include "ut_common.h" + +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include + +using namespace NYdb; +using namespace NYdb::NTable; +using namespace NYdb::NScheme; + +namespace NKikimr { +namespace NStat { + +TTestEnv::TTestEnv(ui32 staticNodes, ui32 dynamicNodes, bool useRealThreads) + : CSController(NYDBTest::TControllers::RegisterCSControllerGuard()) +{ + auto mbusPort = PortManager.GetPort(); + auto grpcPort = PortManager.GetPort(); + + Settings = new Tests::TServerSettings(mbusPort); + Settings->SetDomainName("Root"); + Settings->SetNodeCount(staticNodes); + Settings->SetDynamicNodeCount(dynamicNodes); + Settings->SetUseRealThreads(useRealThreads); + Settings->AddStoragePoolType("hdd1"); + Settings->AddStoragePoolType("hdd2"); + + NKikimrConfig::TFeatureFlags featureFlags; + featureFlags.SetEnableStatistics(true); + featureFlags.SetEnableColumnStatistics(true); + Settings->SetFeatureFlags(featureFlags); + + Server = new Tests::TServer(*Settings); + Server->EnableGRpc(grpcPort); + + auto sender = Server->GetRuntime()->AllocateEdgeActor(); + Server->SetupRootStoragePools(sender); + + Client = MakeHolder(*Settings); + + Tenants = MakeHolder(Server); + + Endpoint = "localhost:" + ToString(grpcPort); + DriverConfig = NYdb::TDriverConfig().SetEndpoint(Endpoint); + Driver = MakeHolder(DriverConfig); + + CSController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + CSController->SetOverrideLagForCompactionBeforeTierings(TDuration::Seconds(1)); + CSController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); + + Server->GetRuntime()->SetLogPriority(NKikimrServices::STATISTICS, NActors::NLog::PRI_DEBUG); +} + +TTestEnv::~TTestEnv() { + Driver->Stop(true); +} + +TString CreateDatabase(TTestEnv& env, const TString& databaseName, + size_t nodeCount, bool isShared, const TString& poolName) +{ + auto& runtime = *env.GetServer().GetRuntime(); + auto fullDbName = Sprintf("/Root/%s", databaseName.c_str()); + + using TEvCreateDatabaseRequest = NKikimr::NGRpcService::TGrpcRequestOperationCall< + Ydb::Cms::CreateDatabaseRequest, + Ydb::Cms::CreateDatabaseResponse>; + + Ydb::Cms::CreateDatabaseRequest request; + request.set_path(fullDbName); + if (isShared) { + auto* resources = request.mutable_shared_resources(); + auto* storage = resources->add_storage_units(); + storage->set_unit_kind(poolName); + storage->set_count(1); + } else { + auto* resources = request.mutable_resources(); + auto* storage = resources->add_storage_units(); + storage->set_unit_kind(poolName); + storage->set_count(1); + } + + auto future = NRpcService::DoLocalRpc( + std::move(request), "", "", runtime.GetActorSystem(0)); + auto response = runtime.WaitFuture(std::move(future)); + UNIT_ASSERT(response.operation().ready()); + UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SUCCESS); + + env.GetTenants().Run(fullDbName, nodeCount); + + if (!env.GetServer().GetSettings().UseRealThreads) { + runtime.SimulateSleep(TDuration::Seconds(1)); + } + + return fullDbName; +} + +TString CreateServerlessDatabase(TTestEnv& env, const TString& databaseName, const TString& sharedName) { + auto& runtime = *env.GetServer().GetRuntime(); + auto fullDbName = Sprintf("/Root/%s", databaseName.c_str()); + + using TEvCreateDatabaseRequest = NKikimr::NGRpcService::TGrpcRequestOperationCall< + Ydb::Cms::CreateDatabaseRequest, + Ydb::Cms::CreateDatabaseResponse>; + + Ydb::Cms::CreateDatabaseRequest request; + request.set_path(fullDbName); + request.mutable_serverless_resources()->set_shared_database_path(sharedName); + + auto future = NRpcService::DoLocalRpc( + std::move(request), "", "", runtime.GetActorSystem(0)); + auto response = runtime.WaitFuture(std::move(future)); + UNIT_ASSERT(response.operation().ready()); + UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SUCCESS); + + env.GetTenants().Run(fullDbName, 0); + + if (!env.GetServer().GetSettings().UseRealThreads) { + runtime.SimulateSleep(TDuration::Seconds(1)); + } + + return fullDbName; +} + +TPathId ResolvePathId(TTestActorRuntime& runtime, const TString& path, TPathId* domainKey, ui64* saTabletId) { + auto sender = runtime.AllocateEdgeActor(); + + using TNavigate = NSchemeCache::TSchemeCacheNavigate; + using TEvRequest = TEvTxProxySchemeCache::TEvNavigateKeySet; + using TEvResponse = TEvTxProxySchemeCache::TEvNavigateKeySetResult; + + auto request = std::make_unique(); + auto& entry = request->ResultSet.emplace_back(); + entry.Path = SplitPath(path); + entry.RequestType = TNavigate::TEntry::ERequestType::ByPath; + entry.Operation = NSchemeCache::TSchemeCacheNavigate::EOp::OpPath; + entry.ShowPrivatePath = true; + runtime.Send(MakeSchemeCacheID(), sender, new TEvRequest(request.release())); + + auto ev = runtime.GrabEdgeEventRethrow(sender); + UNIT_ASSERT(ev); + UNIT_ASSERT(ev->Get()); + std::unique_ptr response(ev->Get()->Request.Release()); + UNIT_ASSERT(response->ResultSet.size() == 1); + auto& resultEntry = response->ResultSet[0]; + + if (domainKey) { + *domainKey = resultEntry.DomainInfo->DomainKey; + } + + if (saTabletId) { + if (resultEntry.DomainInfo->Params.HasStatisticsAggregator()) { + *saTabletId = resultEntry.DomainInfo->Params.GetStatisticsAggregator(); + } else { + auto resourcesDomainKey = resultEntry.DomainInfo->ResourcesDomainKey; + auto request = std::make_unique(); + auto& entry = request->ResultSet.emplace_back(); + entry.TableId = TTableId(resourcesDomainKey.OwnerId, resourcesDomainKey.LocalPathId); + entry.RequestType = TNavigate::TEntry::ERequestType::ByTableId; + entry.Operation = TNavigate::EOp::OpPath; + entry.RedirectRequired = false; + runtime.Send(MakeSchemeCacheID(), sender, new TEvRequest(request.release())); + + auto ev = runtime.GrabEdgeEventRethrow(sender); + UNIT_ASSERT(ev); + UNIT_ASSERT(ev->Get()); + std::unique_ptr response(ev->Get()->Request.Release()); + UNIT_ASSERT(response->ResultSet.size() == 1); + auto& secondResultEntry = response->ResultSet[0]; + + if (secondResultEntry.DomainInfo->Params.HasStatisticsAggregator()) { + *saTabletId = secondResultEntry.DomainInfo->Params.GetStatisticsAggregator(); + } + } + } + + return resultEntry.TableId.PathId; +} + +NKikimrScheme::TEvDescribeSchemeResult DescribeTable(TTestActorRuntime& runtime, TActorId sender, const TString& path) { + TAutoPtr handle; + + auto request = MakeHolder(); + request->Record.MutableDescribePath()->SetPath(path); + request->Record.MutableDescribePath()->MutableOptions()->SetShowPrivateTable(true); + runtime.Send(new IEventHandle(MakeTxProxyID(), sender, request.Release())); + auto reply = runtime.GrabEdgeEventRethrow(handle); + + return *reply->MutableRecord(); +} + +TVector GetTableShards(TTestActorRuntime& runtime, TActorId sender, const TString& path) { + TVector shards; + auto lsResult = DescribeTable(runtime, sender, path); + for (auto &part : lsResult.GetPathDescription().GetTablePartitions()) + shards.push_back(part.GetDatashardId()); + + return shards; +} + +TVector GetColumnTableShards(TTestActorRuntime& runtime, TActorId sender, const TString& path) { + TVector shards; + auto lsResult = DescribeTable(runtime, sender, path); + for (auto &part : lsResult.GetPathDescription().GetColumnTableDescription().GetSharding().GetColumnShards()) + shards.push_back(part); + + return shards; +} + +Ydb::StatusIds::StatusCode ExecuteYqlScript(TTestEnv& env, const TString& script, bool mustSucceed) { + auto& runtime = *env.GetServer().GetRuntime(); + + using TEvExecuteYqlRequest = NGRpcService::TGrpcRequestOperationCall< + Ydb::Scripting::ExecuteYqlRequest, + Ydb::Scripting::ExecuteYqlResponse>; + + Ydb::Scripting::ExecuteYqlRequest request; + request.set_script(script); + + auto future = NRpcService::DoLocalRpc( + std::move(request), "", "", runtime.GetActorSystem(0)); + auto response = runtime.WaitFuture(std::move(future)); + + UNIT_ASSERT(response.operation().ready()); + if (mustSucceed) { + UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SUCCESS); + } + return response.operation().status(); +} + +void CreateUniformTable(TTestEnv& env, const TString& databaseName, const TString& tableName) { + ExecuteYqlScript(env, Sprintf(R"( + CREATE TABLE `Root/%s/%s` ( + Key Uint64, + Value Uint64, + PRIMARY KEY (Key) + ) + WITH ( UNIFORM_PARTITIONS = 4 ); + )", databaseName.c_str(), tableName.c_str())); + + TStringBuilder replace; + replace << Sprintf("REPLACE INTO `Root/%s/%s` (Key, Value) VALUES ", + databaseName.c_str(), tableName.c_str()); + for (ui32 i = 0; i < 4; ++i) { + if (i > 0) { + replace << ", "; + } + ui64 value = 4000000000000000000ull * (i + 1); + replace << Sprintf("(%" PRIu64 "ul, %" PRIu64 "ul)", value, value); + } + replace << ";"; + ExecuteYqlScript(env, replace); +} + +void CreateColumnStoreTable(TTestEnv& env, const TString& databaseName, const TString& tableName, + int shardCount) +{ + auto fullTableName = Sprintf("Root/%s/%s", databaseName.c_str(), tableName.c_str()); + auto& runtime = *env.GetServer().GetRuntime(); + + ExecuteYqlScript(env, Sprintf(R"( + CREATE TABLE `%s` ( + Key Uint64 NOT NULL, + Value Uint64, + PRIMARY KEY (Key) + ) + PARTITION BY HASH(Key) + WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = %d + ); + )", fullTableName.c_str(), shardCount)); + runtime.SimulateSleep(TDuration::Seconds(1)); + + ExecuteYqlScript(env, Sprintf(R"( + ALTER OBJECT `%s` (TYPE TABLE) SET (ACTION=UPSERT_INDEX, NAME=cms_key, TYPE=COUNT_MIN_SKETCH, + FEATURES=`{"column_names" : ['Key']}`); + )", fullTableName.c_str())); + runtime.SimulateSleep(TDuration::Seconds(1)); + + ExecuteYqlScript(env, Sprintf(R"( + ALTER OBJECT `%s` (TYPE TABLE) SET (ACTION=UPSERT_INDEX, NAME=cms_value, TYPE=COUNT_MIN_SKETCH, + FEATURES=`{"column_names" : ['Value']}`); + )", fullTableName.c_str())); + runtime.SimulateSleep(TDuration::Seconds(1)); + + using TEvBulkUpsertRequest = NGRpcService::TGrpcRequestOperationCall< + Ydb::Table::BulkUpsertRequest, + Ydb::Table::BulkUpsertResponse>; + + Ydb::Table::BulkUpsertRequest request; + request.set_table(fullTableName); + auto* rows = request.mutable_rows(); + + auto* reqRowType = rows->mutable_type()->mutable_list_type()->mutable_item()->mutable_struct_type(); + auto* reqKeyType = reqRowType->add_members(); + reqKeyType->set_name("Key"); + reqKeyType->mutable_type()->set_type_id(Ydb::Type::UINT64); + auto* reqValueType = reqRowType->add_members(); + reqValueType->set_name("Value"); + reqValueType->mutable_type()->mutable_optional_type()->mutable_item()->set_type_id(Ydb::Type::UINT64); + + auto* reqRows = rows->mutable_value(); + for (size_t i = 0; i < ColumnTableRowsNumber; ++i) { + auto* row = reqRows->add_items(); + row->add_items()->set_uint64_value(i); + row->add_items()->set_uint64_value(i); + } + + auto future = NRpcService::DoLocalRpc( + std::move(request), "", "", runtime.GetActorSystem(0)); + auto response = runtime.WaitFuture(std::move(future)); + + UNIT_ASSERT(response.operation().ready()); + UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SUCCESS); + + env.GetController()->WaitActualization(TDuration::Seconds(1)); +} + +std::vector GatherColumnTablesInfo(TTestEnv& env, const TString& fullDbName, ui8 tableCount) { + auto& runtime = *env.GetServer().GetRuntime(); + auto sender = runtime.AllocateEdgeActor(); + + std::vector ret; + for (ui8 tableId = 1; tableId <= tableCount; tableId++) { + TTableInfo tableInfo; + tableInfo.Path = Sprintf("%s/Table%u", fullDbName.c_str(), tableId); + tableInfo.ShardIds = GetColumnTableShards(runtime, sender, tableInfo.Path); + tableInfo.PathId = ResolvePathId(runtime, tableInfo.Path, &tableInfo.DomainKey, &tableInfo.SaTabletId); + ret.emplace_back(tableInfo); + } + return ret; +} + +TDatabaseInfo CreateDatabaseColumnTables(TTestEnv& env, ui8 tableCount, ui8 shardCount) { + auto fullDbName = CreateDatabase(env, "Database"); + + for (ui8 tableId = 1; tableId <= tableCount; tableId++) { + CreateColumnStoreTable(env, "Database", Sprintf("Table%u", tableId), shardCount); + } + + return { + .FullDatabaseName = fullDbName, + .Tables = GatherColumnTablesInfo(env, fullDbName, tableCount) + }; +} + +TDatabaseInfo CreateServerlessDatabaseColumnTables(TTestEnv& env, ui8 tableCount, ui8 shardCount) { + auto fullServerlessDbName = CreateDatabase(env, "Shared", 1, true); + auto fullDbName = CreateServerlessDatabase(env, "Database", "/Root/Shared"); + + for (ui8 tableId = 1; tableId <= tableCount; tableId++) { + CreateColumnStoreTable(env, "Database", Sprintf("Table%u", tableId), shardCount); + } + + return { + .FullDatabaseName = fullServerlessDbName, + .Tables = GatherColumnTablesInfo(env, fullDbName, tableCount) + }; +} + +void DropTable(TTestEnv& env, const TString& databaseName, const TString& tableName) { + ExecuteYqlScript(env, Sprintf(R"( + DROP TABLE `Root/%s/%s`; + )", databaseName.c_str(), tableName.c_str())); +} + +std::shared_ptr ExtractCountMin(TTestActorRuntime& runtime, const TPathId& pathId, ui64 columnTag) { + auto statServiceId = NStat::MakeStatServiceID(runtime.GetNodeId(1)); + + NStat::TRequest req; + req.PathId = pathId; + req.ColumnTag = columnTag; + + auto evGet = std::make_unique(); + evGet->StatType = NStat::EStatType::COUNT_MIN_SKETCH; + evGet->StatRequests.push_back(req); + + auto sender = runtime.AllocateEdgeActor(1); + runtime.Send(statServiceId, sender, evGet.release(), 1, true); + auto evResult = runtime.GrabEdgeEventRethrow(sender); + + UNIT_ASSERT(evResult); + UNIT_ASSERT(evResult->Get()); + UNIT_ASSERT(evResult->Get()->StatResponses.size() == 1); + + auto rsp = evResult->Get()->StatResponses[0]; + auto stat = rsp.CountMinSketch; + UNIT_ASSERT(rsp.Success); + UNIT_ASSERT(stat.CountMin); + + return stat.CountMin; +} + +void ValidateCountMinColumnshard(TTestActorRuntime& runtime, const TPathId& pathId, ui64 expectedProbe) { + auto countMin = ExtractCountMin(runtime, pathId); + + ui32 value = 1; + auto actualProbe = countMin->Probe((const char *)&value, sizeof(value)); + UNIT_ASSERT_VALUES_EQUAL(actualProbe, expectedProbe); +} + +void ValidateCountMinDatashard(TTestActorRuntime& runtime, TPathId pathId) { + auto countMin = ExtractCountMin(runtime, pathId); + + for (ui32 i = 0; i < 4; ++i) { + ui64 value = 4000000000000000000ull * (i + 1); + auto probe = countMin->Probe((const char *)&value, sizeof(ui64)); + UNIT_ASSERT_VALUES_EQUAL(probe, 1); + } +} + +void ValidateCountMinDatashardAbsense(TTestActorRuntime& runtime, TPathId pathId) { + auto statServiceId = NStat::MakeStatServiceID(runtime.GetNodeId(1)); + + NStat::TRequest req; + req.PathId = pathId; + req.ColumnTag = 1; + + auto evGet = std::make_unique(); + evGet->StatType = NStat::EStatType::COUNT_MIN_SKETCH; + evGet->StatRequests.push_back(req); + + auto sender = runtime.AllocateEdgeActor(1); + runtime.Send(statServiceId, sender, evGet.release(), 1, true); + auto evResult = runtime.GrabEdgeEventRethrow(sender); + + UNIT_ASSERT(evResult); + UNIT_ASSERT(evResult->Get()); + UNIT_ASSERT(evResult->Get()->StatResponses.size() == 1); + + auto rsp = evResult->Get()->StatResponses[0]; + UNIT_ASSERT(!rsp.Success); +} + +TAnalyzedTable::TAnalyzedTable(const TPathId& pathId) + : PathId(pathId) +{} + +TAnalyzedTable::TAnalyzedTable(const TPathId& pathId, const std::vector& columnTags) + : PathId(pathId) + , ColumnTags(columnTags) +{} + +void TAnalyzedTable::ToProto(NKikimrStat::TTable& tableProto) const { + PathIdFromPathId(PathId, tableProto.MutablePathId()); + tableProto.MutableColumnTags()->Add(ColumnTags.begin(), ColumnTags.end()); +} + +std::unique_ptr MakeAnalyzeRequest(const std::vector& tables, const TString operationId) { + auto ev = std::make_unique(); + NKikimrStat::TEvAnalyze& record = ev->Record; + record.SetOperationId(operationId); + record.AddTypes(NKikimrStat::EColumnStatisticType::TYPE_COUNT_MIN_SKETCH); + for (const TAnalyzedTable& table : tables) + table.ToProto(*record.AddTables()); + return ev; +} + +void Analyze(TTestActorRuntime& runtime, ui64 saTabletId, const std::vector& tables, const TString operationId) { + auto ev = MakeAnalyzeRequest(tables, operationId); + + auto sender = runtime.AllocateEdgeActor(); + runtime.SendToPipe(saTabletId, sender, ev.release()); + auto evResponse = runtime.GrabEdgeEventRethrow(sender); + + const auto& record = evResponse->Get()->Record; + UNIT_ASSERT_VALUES_EQUAL(record.GetOperationId(), operationId); + UNIT_ASSERT_VALUES_EQUAL(record.GetStatus(), NKikimrStat::TEvAnalyzeResponse::STATUS_SUCCESS); +} + +void AnalyzeTable(TTestActorRuntime& runtime, ui64 shardTabletId, const TAnalyzedTable& table) { + auto ev = std::make_unique(); + auto& record = ev->Record; + table.ToProto(*record.MutableTable()); + record.AddTypes(NKikimrStat::EColumnStatisticType::TYPE_COUNT_MIN_SKETCH); + + auto sender = runtime.AllocateEdgeActor(); + runtime.SendToPipe(shardTabletId, sender, ev.release()); + runtime.GrabEdgeEventRethrow(sender); +} + +void AnalyzeStatus(TTestActorRuntime& runtime, TActorId sender, ui64 saTabletId, const TString operationId, const NKikimrStat::TEvAnalyzeStatusResponse::EStatus expectedStatus) { + auto analyzeStatusRequest = std::make_unique(); + analyzeStatusRequest->Record.SetOperationId(operationId); + runtime.SendToPipe(saTabletId, sender, analyzeStatusRequest.release()); + + auto analyzeStatusResponse = runtime.GrabEdgeEventRethrow(sender); + UNIT_ASSERT(analyzeStatusResponse); + UNIT_ASSERT_VALUES_EQUAL(analyzeStatusResponse->Get()->Record.GetOperationId(), operationId); + UNIT_ASSERT_VALUES_EQUAL(analyzeStatusResponse->Get()->Record.GetStatus(), expectedStatus); +} + +void WaitForSavedStatistics(TTestActorRuntime& runtime, const TPathId& pathId) { + TWaitForFirstEvent waiter(runtime, [pathId](const auto& ev){ + return ev->Get()->PathId == pathId; + }); + + waiter.Wait(); +} + + + +} // NStat +} // NKikimr diff --git a/ydb/core/statistics/ut_common/ut_common.h b/ydb/core/statistics/ut_common/ut_common.h new file mode 100644 index 000000000000..df756e53a99c --- /dev/null +++ b/ydb/core/statistics/ut_common/ut_common.h @@ -0,0 +1,123 @@ +#pragma once + +#include + +#include + +#include +#include + +namespace NKikimrStat { + class TTable; +} + +namespace NKikimr { +namespace NStat { + +static constexpr ui32 ColumnTableRowsNumber = 1000; + +class TTestEnv { +public: + TTestEnv(ui32 staticNodes = 1, ui32 dynamicNodes = 1, bool useRealThreads = false); + ~TTestEnv(); + + Tests::TServer& GetServer() const { + return *Server; + } + + Tests::TClient& GetClient() const { + return *Client; + } + + Tests::TTenants& GetTenants() const { + return *Tenants; + } + + NYdb::TDriver& GetDriver() const { + return *Driver; + } + + const TString& GetEndpoint() const { + return Endpoint; + } + + const Tests::TServerSettings::TPtr GetSettings() const { + return Settings; + } + + auto& GetController() { + return CSController; + } + +private: + TPortManager PortManager; + + Tests::TServerSettings::TPtr Settings; + Tests::TServer::TPtr Server; + THolder Client; + THolder Tenants; + + TString Endpoint; + NYdb::TDriverConfig DriverConfig; + THolder Driver; + NYDBTest::TControllers::TGuard CSController; +}; + +Ydb::StatusIds::StatusCode ExecuteYqlScript(TTestEnv& env, const TString& script, bool mustSucceed = true); + +TString CreateDatabase(TTestEnv& env, const TString& databaseName, + size_t nodeCount = 1, bool isShared = false, const TString& poolName = "hdd1"); + +TString CreateServerlessDatabase(TTestEnv& env, const TString& databaseName, const TString& sharedName); + +struct TTableInfo { + std::vector ShardIds; + ui64 SaTabletId; + TPathId DomainKey; + TPathId PathId; + TString Path; +}; + +struct TDatabaseInfo { + TString FullDatabaseName; + std::vector Tables; +}; + +TDatabaseInfo CreateDatabaseColumnTables(TTestEnv& env, ui8 tableCount, ui8 shardCount); +TDatabaseInfo CreateServerlessDatabaseColumnTables(TTestEnv& env, ui8 tableCount, ui8 shardCount); + +TPathId ResolvePathId(TTestActorRuntime& runtime, const TString& path, TPathId* domainKey = nullptr, ui64* saTabletId = nullptr); + + +TVector GetTableShards(TTestActorRuntime& runtime, TActorId sender, const TString &path); +TVector GetColumnTableShards(TTestActorRuntime& runtime, TActorId sender,const TString &path); + +void CreateUniformTable(TTestEnv& env, const TString& databaseName, const TString& tableName); +void CreateColumnStoreTable(TTestEnv& env, const TString& databaseName, const TString& tableName, int shardCount); +void DropTable(TTestEnv& env, const TString& databaseName, const TString& tableName); + +std::shared_ptr ExtractCountMin(TTestActorRuntime& runtime, const TPathId& pathId, ui64 columnTag = 1); +void ValidateCountMinColumnshard(TTestActorRuntime& runtime, const TPathId& pathId, ui64 expectedProbe); + +void ValidateCountMinDatashard(TTestActorRuntime& runtime, TPathId pathId); +void ValidateCountMinDatashardAbsense(TTestActorRuntime& runtime, TPathId pathId); + +struct TAnalyzedTable { + TPathId PathId; + std::vector ColumnTags; + + TAnalyzedTable(const TPathId& pathId); + TAnalyzedTable(const TPathId& pathId, const std::vector& columnTags); + void ToProto(NKikimrStat::TTable& tableProto) const; +}; + +std::unique_ptr MakeAnalyzeRequest(const std::vector& tables, const TString operationId = "operationId"); + +void Analyze(TTestActorRuntime& runtime, ui64 saTabletId, const std::vector& table, const TString operationId = "operationId"); +void AnalyzeTable(TTestActorRuntime& runtime, ui64 shardTabletId, const TAnalyzedTable& table); +void AnalyzeStatus(TTestActorRuntime& runtime, TActorId sender, ui64 saTabletId, const TString operationId, const NKikimrStat::TEvAnalyzeStatusResponse::EStatus expectedStatus); + +void WaitForSavedStatistics(TTestActorRuntime& runtime, const TPathId& pathId); + +} // namespace NStat +} // namespace NKikimr diff --git a/ydb/core/statistics/ut_common/ya.make b/ydb/core/statistics/ut_common/ya.make new file mode 100644 index 000000000000..94514bd506fe --- /dev/null +++ b/ydb/core/statistics/ut_common/ya.make @@ -0,0 +1,15 @@ +LIBRARY() + +SRCS( + ut_common.cpp + ut_common.h +) + +PEERDIR( + ydb/core/tx/columnshard/hooks/testing + ydb/core/testlib +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/core/statistics/ya.make b/ydb/core/statistics/ya.make index d19742d6ee4b..ec4a6bd7a38f 100644 --- a/ydb/core/statistics/ya.make +++ b/ydb/core/statistics/ya.make @@ -1,11 +1,8 @@ LIBRARY() SRCS( + common.h events.h - stat_service.h - stat_service.cpp - save_load_stats.h - save_load_stats.cpp ) PEERDIR( @@ -21,8 +18,13 @@ END() RECURSE( aggregator + database + service + ut_common ) RECURSE_FOR_TESTS( - ut + aggregator/ut + database/ut + service/ut ) diff --git a/ydb/core/sys_view/common/schema.h b/ydb/core/sys_view/common/schema.h index c11266041cf1..81542a645969 100644 --- a/ydb/core/sys_view/common/schema.h +++ b/ydb/core/sys_view/common/schema.h @@ -415,7 +415,7 @@ struct Schema : NIceDb::Schema { struct BlobId : Column<10, NScheme::NTypeIds::Utf8> {}; struct BlobRangeOffset : Column<11, NScheme::NTypeIds::Uint64> {}; struct BlobRangeSize : Column<12, NScheme::NTypeIds::Uint64> {}; - struct Activity : Column<13, NScheme::NTypeIds::Bool> {}; + struct Activity : Column<13, NScheme::NTypeIds::Uint8> {}; struct TierName: Column<14, NScheme::NTypeIds::Utf8> {}; struct EntityType: Column<15, NScheme::NTypeIds::Utf8> {}; @@ -525,9 +525,10 @@ struct Schema : NIceDb::Schema { struct ColumnBlobBytes: Column<7, NScheme::NTypeIds::Uint64> {}; struct IndexBlobBytes: Column<8, NScheme::NTypeIds::Uint64> {}; struct PortionId: Column<9, NScheme::NTypeIds::Uint64> {}; - struct Activity: Column<10, NScheme::NTypeIds::Bool> {}; + struct Activity: Column<10, NScheme::NTypeIds::Uint8> {}; struct TierName: Column<11, NScheme::NTypeIds::Utf8> {}; struct Stats: Column<12, NScheme::NTypeIds::Utf8> {}; + struct Optimized: Column<13, NScheme::NTypeIds::Uint8> {}; using TKey = TableKey; using TColumns = TableColumns< @@ -542,7 +543,8 @@ struct Schema : NIceDb::Schema { PortionId, Activity, TierName, - Stats + Stats, + Optimized >; }; diff --git a/ydb/core/sys_view/sessions/sessions.cpp b/ydb/core/sys_view/sessions/sessions.cpp index f9e81b1c27ab..c9593fecaba6 100644 --- a/ydb/core/sys_view/sessions/sessions.cpp +++ b/ydb/core/sys_view/sessions/sessions.cpp @@ -178,6 +178,7 @@ class TSessionsScan : public NKikimr::NSysView::TScanActorBase { const auto& nodeId = PendingNodes.front(); auto kqpProxyId = NKqp::MakeKqpProxyID(nodeId); auto req = std::make_unique(); + req->Record.SetTenantName(TenantName); if (!ContinuationToken.empty()) { req->Record.SetSessionIdStart(ContinuationToken); req->Record.SetSessionIdStartInclusive(true); diff --git a/ydb/core/sys_view/ut_kqp.cpp b/ydb/core/sys_view/ut_kqp.cpp index 9a919a7fa0d7..d418c3667e6f 100644 --- a/ydb/core/sys_view/ut_kqp.cpp +++ b/ydb/core/sys_view/ut_kqp.cpp @@ -999,7 +999,7 @@ Y_UNIT_TEST_SUITE(SystemView) { check.String("Default"); // Kind check.Uint64(env.GetServer().GetRuntime()->GetNodeId(0)); // NodeId check.Uint64(1u); // PDiskId - check.String("ERROR"); // Status + check.Null(); // Status check.Uint64(0u); // VDisk check.Uint64(1000u); // VSlotId } diff --git a/ydb/core/tablet/node_whiteboard.cpp b/ydb/core/tablet/node_whiteboard.cpp index f9ffcfb1fd73..900f0e1134e0 100644 --- a/ydb/core/tablet/node_whiteboard.cpp +++ b/ydb/core/tablet/node_whiteboard.cpp @@ -392,6 +392,117 @@ class TNodeWhiteboardService : public TActorBootstrapped return modified; } + static void CopyField(::google::protobuf::Message& protoTo, + const ::google::protobuf::Message& protoFrom, + const ::google::protobuf::Reflection& reflectionTo, + const ::google::protobuf::Reflection& reflectionFrom, + const ::google::protobuf::FieldDescriptor* field) { + using namespace ::google::protobuf; + if (field->is_repeated()) { + FieldDescriptor::CppType type = field->cpp_type(); + int size = reflectionFrom.FieldSize(protoFrom, field); + if (size != 0) { + reflectionTo.ClearField(&protoTo, field); + for (int i = 0; i < size; ++i) { + switch (type) { + case FieldDescriptor::CPPTYPE_INT32: + reflectionTo.AddInt32(&protoTo, field, reflectionFrom.GetRepeatedInt32(protoFrom, field, i)); + break; + case FieldDescriptor::CPPTYPE_INT64: + reflectionTo.AddInt64(&protoTo, field, reflectionFrom.GetRepeatedInt64(protoFrom, field, i)); + break; + case FieldDescriptor::CPPTYPE_UINT32: + reflectionTo.AddUInt32(&protoTo, field, reflectionFrom.GetRepeatedUInt32(protoFrom, field, i)); + break; + case FieldDescriptor::CPPTYPE_UINT64: + reflectionTo.AddUInt64(&protoTo, field, reflectionFrom.GetRepeatedUInt64(protoFrom, field, i)); + break; + case FieldDescriptor::CPPTYPE_DOUBLE: + reflectionTo.AddDouble(&protoTo, field, reflectionFrom.GetRepeatedDouble(protoFrom, field, i)); + break; + case FieldDescriptor::CPPTYPE_FLOAT: + reflectionTo.AddFloat(&protoTo, field, reflectionFrom.GetRepeatedFloat(protoFrom, field, i)); + break; + case FieldDescriptor::CPPTYPE_BOOL: + reflectionTo.AddBool(&protoTo, field, reflectionFrom.GetRepeatedBool(protoFrom, field, i)); + break; + case FieldDescriptor::CPPTYPE_ENUM: + reflectionTo.AddEnum(&protoTo, field, reflectionFrom.GetRepeatedEnum(protoFrom, field, i)); + break; + case FieldDescriptor::CPPTYPE_STRING: + reflectionTo.AddString(&protoTo, field, reflectionFrom.GetRepeatedString(protoFrom, field, i)); + break; + case FieldDescriptor::CPPTYPE_MESSAGE: + reflectionTo.AddMessage(&protoTo, field)->CopyFrom(reflectionFrom.GetRepeatedMessage(protoFrom, field, i)); + break; + } + } + } + } else { + if (reflectionFrom.HasField(protoFrom, field)) { + FieldDescriptor::CppType type = field->cpp_type(); + switch (type) { + case FieldDescriptor::CPPTYPE_INT32: + reflectionTo.SetInt32(&protoTo, field, reflectionFrom.GetInt32(protoFrom, field)); + break; + case FieldDescriptor::CPPTYPE_INT64: + reflectionTo.SetInt64(&protoTo, field, reflectionFrom.GetInt64(protoFrom, field)); + break; + case FieldDescriptor::CPPTYPE_UINT32: + reflectionTo.SetUInt32(&protoTo, field, reflectionFrom.GetUInt32(protoFrom, field)); + break; + case FieldDescriptor::CPPTYPE_UINT64: + reflectionTo.SetUInt64(&protoTo, field, reflectionFrom.GetUInt64(protoFrom, field)); + break; + case FieldDescriptor::CPPTYPE_DOUBLE: + reflectionTo.SetDouble(&protoTo, field, reflectionFrom.GetDouble(protoFrom, field)); + break; + case FieldDescriptor::CPPTYPE_FLOAT: + reflectionTo.SetFloat(&protoTo, field, reflectionFrom.GetFloat(protoFrom, field)); + break; + case FieldDescriptor::CPPTYPE_BOOL: + reflectionTo.SetBool(&protoTo, field, reflectionFrom.GetBool(protoFrom, field)); + break; + case FieldDescriptor::CPPTYPE_ENUM: + reflectionTo.SetEnum(&protoTo, field, reflectionFrom.GetEnum(protoFrom, field)); + break; + case FieldDescriptor::CPPTYPE_STRING: + reflectionTo.SetString(&protoTo, field, reflectionFrom.GetString(protoFrom, field)); + break; + case FieldDescriptor::CPPTYPE_MESSAGE: + reflectionTo.MutableMessage(&protoTo, field)->CopyFrom(reflectionFrom.GetMessage(protoFrom, field)); + break; + } + } + } + } + + static void SelectiveCopy(::google::protobuf::Message& protoTo, const ::google::protobuf::Message& protoFrom, const ::google::protobuf::RepeatedField& fields) { + using namespace ::google::protobuf; + const Descriptor& descriptor = *protoTo.GetDescriptor(); + const Reflection& reflectionTo = *protoTo.GetReflection(); + const Reflection& reflectionFrom = *protoFrom.GetReflection(); + for (auto fieldNumber : fields) { + const FieldDescriptor* field = descriptor.FindFieldByNumber(fieldNumber); + if (field) { + CopyField(protoTo, protoFrom, reflectionTo, reflectionFrom, field); + } + } + } + + template + static void Copy(TMessage& to, const TMessage& from, const TRequest& request) { + if (request.FieldsRequiredSize() > 0) { + if (request.FieldsRequiredSize() == 1 && request.GetFieldsRequired(0) == -1) { // all fields + to.CopyFrom(from); + } else { + SelectiveCopy(to, from, request.GetFieldsRequired()); + } + } else { + SelectiveCopy(to, from, GetDefaultWhiteboardFields()); + } + } + void SetRole(TStringBuf roleName) { for (const auto& role : SystemStateInfo.GetRoles()) { if (role == roleName) { @@ -686,14 +797,6 @@ class TNodeWhiteboardService : public TActorBootstrapped } } - static void CopyTabletStateInfo( - NKikimrWhiteboard::TTabletStateInfo& dst, - const NKikimrWhiteboard::TTabletStateInfo& src, - const NKikimrWhiteboard::TEvTabletStateRequest&) - { - dst = src; - } - void Handle(TEvWhiteboard::TEvTabletStateRequest::TPtr &ev, const TActorContext &ctx) { auto now = TMonotonic::Now(); const auto& request = ev->Get()->Record; @@ -716,7 +819,7 @@ class TNodeWhiteboardService : public TActorBootstrapped for (const auto& pr : TabletStateInfo) { if (pr.second.changetime() >= changedSince) { NKikimrWhiteboard::TTabletStateInfo& tabletStateInfo = *record.add_tabletstateinfo(); - CopyTabletStateInfo(tabletStateInfo, pr.second, request); + Copy(tabletStateInfo, pr.second, request); } } } else { @@ -725,12 +828,12 @@ class TNodeWhiteboardService : public TActorBootstrapped if (it != TabletStateInfo.end()) { if (it->second.changetime() >= changedSince) { NKikimrWhiteboard::TTabletStateInfo& tabletStateInfo = *record.add_tabletstateinfo(); - CopyTabletStateInfo(tabletStateInfo, it->second, request); + Copy(tabletStateInfo, it->second, request); } } } } - } else if (request.groupby() == "Type,State") { // the only supported group-by for now + } else if (request.groupby() == "Type,State" || request.groupby() == "NodeId,Type,State") { // the only supported group-by for now std::unordered_map, NKikimrWhiteboard::TTabletStateInfo> stateGroupBy; for (const auto& [id, stateInfo] : TabletStateInfo) { @@ -761,7 +864,7 @@ class TNodeWhiteboardService : public TActorBootstrapped for (const auto& pr : NodeStateInfo) { if (pr.second.GetChangeTime() >= changedSince) { NKikimrWhiteboard::TNodeStateInfo &nodeStateInfo = *record.AddNodeStateInfo(); - nodeStateInfo.CopyFrom(pr.second); + Copy(nodeStateInfo, pr.second, request); } } response->Record.SetResponseTime(ctx.Now().MilliSeconds()); @@ -792,7 +895,7 @@ class TNodeWhiteboardService : public TActorBootstrapped for (const auto& pr : PDiskStateInfo) { if (pr.second.GetChangeTime() >= changedSince) { NKikimrWhiteboard::TPDiskStateInfo &pDiskStateInfo = *record.AddPDiskStateInfo(); - pDiskStateInfo.CopyFrom(pr.second); + Copy(pDiskStateInfo, pr.second, request); } } response->Record.SetResponseTime(ctx.Now().MilliSeconds()); @@ -816,7 +919,7 @@ class TNodeWhiteboardService : public TActorBootstrapped for (const auto& pr : VDiskStateInfo) { if (pr.second.GetChangeTime() >= changedSince) { NKikimrWhiteboard::TVDiskStateInfo &vDiskStateInfo = *record.AddVDiskStateInfo(); - vDiskStateInfo.CopyFrom(pr.second); + Copy(vDiskStateInfo, pr.second, request); } } response->Record.SetResponseTime(ctx.Now().MilliSeconds()); @@ -831,7 +934,7 @@ class TNodeWhiteboardService : public TActorBootstrapped for (const auto& pr : BSGroupStateInfo) { if (pr.second.GetChangeTime() >= changedSince) { NKikimrWhiteboard::TBSGroupStateInfo &bSGroupStateInfo = *record.AddBSGroupStateInfo(); - bSGroupStateInfo.CopyFrom(pr.second); + Copy(bSGroupStateInfo, pr.second, request); } } response->Record.SetResponseTime(ctx.Now().MilliSeconds()); @@ -845,7 +948,7 @@ class TNodeWhiteboardService : public TActorBootstrapped auto& record = response->Record; if (SystemStateInfo.GetChangeTime() >= changedSince) { NKikimrWhiteboard::TSystemStateInfo &systemStateInfo = *record.AddSystemStateInfo(); - systemStateInfo.CopyFrom(SystemStateInfo); + Copy(systemStateInfo, SystemStateInfo, request); } response->Record.SetResponseTime(ctx.Now().MilliSeconds()); ctx.Send(ev->Sender, response.Release(), 0, ev->Cookie); @@ -999,6 +1102,30 @@ class TNodeWhiteboardService : public TActorBootstrapped } }; +template +::google::protobuf::RepeatedField InitDefaultWhiteboardFields() { + using namespace ::google::protobuf; + const Descriptor& descriptor = *TMessage::GetDescriptor(); + ::google::protobuf::RepeatedField defaultFields; + int fieldCount = descriptor.field_count(); + for (int index = 0; index < fieldCount; ++index) { + const FieldDescriptor* field = descriptor.field(index); + const auto& options(field->options()); + if (options.HasExtension(NKikimrWhiteboard::DefaultField)) { + if (options.GetExtension(NKikimrWhiteboard::DefaultField)) { + defaultFields.Add(field->number()); + } + } + } + return defaultFields; +} + +template +::google::protobuf::RepeatedField GetDefaultWhiteboardFields() { + static ::google::protobuf::RepeatedField defaultFields = InitDefaultWhiteboardFields(); + return defaultFields; +} + IActor* CreateNodeWhiteboardService() { return new TNodeWhiteboardService(); } diff --git a/ydb/core/tablet/resource_broker.cpp b/ydb/core/tablet/resource_broker.cpp index 86d08828ffcd..7f4d73d78c19 100644 --- a/ydb/core/tablet/resource_broker.cpp +++ b/ydb/core/tablet/resource_broker.cpp @@ -1,6 +1,7 @@ #include "resource_broker_impl.h" #include +#include #include @@ -1269,10 +1270,10 @@ NKikimrResourceBroker::TResourceBrokerConfig MakeDefaultConfig() const ui64 KqpRmQueueCPU = 4; const ui64 KqpRmQueueMemory = 10ULL << 30; - const ui64 CSTTLCompactionMemoryLimit = 1ULL << 30; - const ui64 CSInsertCompactionMemoryLimit = 1ULL << 30; - const ui64 CSGeneralCompactionMemoryLimit = 3ULL << 30; - const ui64 CSScanMemoryLimit = 3ULL << 30; + const ui64 CSTTLCompactionMemoryLimit = NOlap::TGlobalLimits::TTLCompactionMemoryLimit; + const ui64 CSInsertCompactionMemoryLimit = NOlap::TGlobalLimits::InsertCompactionMemoryLimit; + const ui64 CSGeneralCompactionMemoryLimit = NOlap::TGlobalLimits::GeneralCompactionMemoryLimit; + const ui64 CSScanMemoryLimit = NOlap::TGlobalLimits::ScanMemoryLimit; const ui64 TotalCPU = 20; const ui64 TotalMemory = 16ULL << 30; diff --git a/ydb/core/tablet/tablet_counters_aggregator.cpp b/ydb/core/tablet/tablet_counters_aggregator.cpp index bf77a032fa90..28e7497d2529 100644 --- a/ydb/core/tablet/tablet_counters_aggregator.cpp +++ b/ydb/core/tablet/tablet_counters_aggregator.cpp @@ -159,7 +159,7 @@ class TTabletMon { } for (ui32 i = 0, e = labeledCounters->GetCounters().Size(); i < e; ++i) { - if(!strlen(labeledCounters->GetCounterName(i))) + if(!strlen(labeledCounters->GetCounterName(i))) continue; const ui64& value = labeledCounters->GetCounters()[i].Get(); const ui64& id = labeledCounters->GetIds()[i].Get(); @@ -765,10 +765,16 @@ class TTabletMon { TCounterPtr DatashardSizeBytes; TCounterPtr DatashardCacheHitBytes; TCounterPtr DatashardCacheMissBytes; + TCounterPtr ColumnShardReadRows_; + TCounterPtr ColumnShardReadBytes_; TCounterPtr ColumnShardScanRows_; TCounterPtr ColumnShardScanBytes_; + TCounterPtr ColumnShardWriteRows_; + TCounterPtr ColumnShardWriteBytes_; TCounterPtr ColumnShardBulkUpsertRows_; TCounterPtr ColumnShardBulkUpsertBytes_; + TCounterPtr ColumnShardEraseRows_; + TCounterPtr ColumnShardEraseBytes_; TCounterPtr ResourcesStorageUsedBytes; TCounterPtr ResourcesStorageUsedBytesOnSsd; TCounterPtr ResourcesStorageUsedBytesOnHdd; @@ -787,6 +793,7 @@ class TTabletMon { TCounterPtr ResourcesStreamReservedStorageLimit; THistogramPtr ShardCpuUtilization; + THistogramPtr ColumnShardCpuUtilization; TCounterPtr RowUpdates; TCounterPtr RowUpdateBytes; @@ -808,8 +815,11 @@ class TTabletMon { TCounterPtr ColumnShardScannedBytes_; TCounterPtr ColumnShardScannedRows_; - TCounterPtr ColumnShardUpsertBlobsWritten_; - TCounterPtr ColumnShardUpsertBytesWritten_; + TCounterPtr ColumnShardOperationsRowsWritten_; + TCounterPtr ColumnShardOperationsBytesWritten_; + TCounterPtr ColumnShardErasedBytes_; + TCounterPtr ColumnShardErasedRows_; + THistogramPtr ColumnShardConsumedCpuHistogram; TCounterPtr DiskSpaceTablesTotalBytes; TCounterPtr DiskSpaceTablesTotalBytesOnSsd; @@ -859,14 +869,26 @@ class TTabletMon { DatashardCacheMissBytes = ydbGroup->GetNamedCounter("name", "table.datashard.cache_miss.bytes", true); + ColumnShardReadRows_ = ydbGroup->GetNamedCounter("name", + "table.columnshard.read.rows", true); + ColumnShardReadBytes_ = ydbGroup->GetNamedCounter("name", + "table.columnshard.read.bytes", true); ColumnShardScanRows_ = ydbGroup->GetNamedCounter("name", "table.columnshard.scan.rows", true); ColumnShardScanBytes_ = ydbGroup->GetNamedCounter("name", "table.columnshard.scan.bytes", true); + ColumnShardWriteRows_ = ydbGroup->GetNamedCounter("name", + "table.columnshard.write.rows", true); + ColumnShardWriteBytes_ = ydbGroup->GetNamedCounter("name", + "table.columnshard.write.bytes", true); ColumnShardBulkUpsertRows_ = ydbGroup->GetNamedCounter("name", "table.columnshard.bulk_upsert.rows", true); ColumnShardBulkUpsertBytes_ = ydbGroup->GetNamedCounter("name", "table.columnshard.bulk_upsert.bytes", true); + ColumnShardEraseRows_ = ydbGroup->GetNamedCounter("name", + "table.columnshard.erase.rows", true); + ColumnShardEraseBytes_ = ydbGroup->GetNamedCounter("name", + "table.columnshard.erase.bytes", true); ResourcesStorageUsedBytes = ydbGroup->GetNamedCounter("name", "resources.storage.used_bytes", false); @@ -908,6 +930,8 @@ class TTabletMon { ShardCpuUtilization = ydbGroup->GetNamedHistogram("name", "table.datashard.used_core_percents", NMonitoring::LinearHistogram(12, 0, 10), false); + ColumnShardCpuUtilization = ydbGroup->GetNamedHistogram("name", + "table.columnshard.used_core_percents", NMonitoring::LinearHistogram(12, 0, 10), false); }; void Initialize(::NMonitoring::TDynamicCounterPtr counters, bool hasDatashard, bool hasSchemeshard, bool hasColumnShard) { @@ -943,8 +967,11 @@ class TTabletMon { ColumnShardScannedBytes_ = appGroup->GetCounter("ColumnShard/ScannedBytes"); ColumnShardScannedRows_ = appGroup->GetCounter("ColumnShard/ScannedRows"); - ColumnShardUpsertBlobsWritten_ = appGroup->GetCounter("ColumnShard/UpsertBlobsWritten"); - ColumnShardUpsertBytesWritten_ = appGroup->GetCounter("ColumnShard/UpsertBytesWritten"); + ColumnShardOperationsRowsWritten_ = appGroup->GetCounter("ColumnShard/OperationsRowsWritten"); + ColumnShardOperationsBytesWritten_ = appGroup->GetCounter("ColumnShard/OperationsBytesWritten"); + ColumnShardErasedBytes_ = appGroup->GetCounter("ColumnShard/BytesErased"); + ColumnShardErasedRows_ = appGroup->GetCounter("ColumnShard/RowsErased"); + ColumnShardConsumedCpuHistogram = appGroup->FindHistogram("HIST(ConsumedCPU)"); } if (hasSchemeshard && !DiskSpaceTablesTotalBytes) { @@ -990,10 +1017,20 @@ class TTabletMon { } if (ColumnShardScannedBytes_) { + ColumnShardReadRows_->Set(0); + ColumnShardReadBytes_->Set(0); ColumnShardScanRows_->Set(ColumnShardScannedRows_->Val()); ColumnShardScanBytes_->Set(ColumnShardScannedBytes_->Val()); - ColumnShardBulkUpsertRows_->Set(ColumnShardUpsertBlobsWritten_->Val()); - ColumnShardBulkUpsertBytes_->Set(ColumnShardUpsertBytesWritten_->Val()); + ColumnShardWriteRows_->Set(ColumnShardOperationsRowsWritten_->Val()); + ColumnShardWriteBytes_->Set(ColumnShardOperationsBytesWritten_->Val()); + ColumnShardBulkUpsertRows_->Set(ColumnShardOperationsRowsWritten_->Val()); + ColumnShardBulkUpsertBytes_->Set(ColumnShardOperationsBytesWritten_->Val()); + ColumnShardEraseRows_->Set(ColumnShardErasedRows_->Val()); + ColumnShardEraseBytes_->Set(ColumnShardErasedBytes_->Val()); + + if (ColumnShardConsumedCpuHistogram) { + TransferBuckets(ColumnShardCpuUtilization, ColumnShardConsumedCpuHistogram); + } } if (DiskSpaceTablesTotalBytes) { @@ -1464,7 +1501,8 @@ TTabletCountersAggregatorActor::HandleWork(TEvTabletCounters::TEvTabletLabeledCo continue; } if (groupNames[j] == "Client") { - group = group->GetSubgroup("ConsumerPath", NPersQueue::ConvertOldConsumerName(groups[j], ctx)); + group = group->GetSubgroup("ConsumerPath", + NPersQueue::ConvertOldConsumerName(groups[j], AppData(ctx)->PQConfig)); continue; } } @@ -2058,7 +2096,7 @@ class TClusterLabeledCountersAggregatorActorV3 : public TClusterLabeledCountersA if (groups.size() == 1) { //topic case ff = groups[0]; } else if (groups.size() == 3) { //client important topic - res = NPersQueue::ConvertOldConsumerName(groups[0], ctx) + "|" + groups[1] + "|"; + res = NPersQueue::ConvertOldConsumerName(groups[0], AppData(ctx)->PQConfig) + "|" + groups[1] + "|"; ff = groups[2]; } else { continue; diff --git a/ydb/core/tablet_flat/flat_database.cpp b/ydb/core/tablet_flat/flat_database.cpp index b87ee8056d59..ef50fab90283 100644 --- a/ydb/core/tablet_flat/flat_database.cpp +++ b/ydb/core/tablet_flat/flat_database.cpp @@ -494,7 +494,8 @@ ui64 TDatabase::GetTableMemOpsCount(ui32 tableId) const { } ui64 TDatabase::GetTableIndexSize(ui32 tableId) const { - return Require(tableId)->Stat().Parts.IndexBytes; + const auto& partStats = Require(tableId)->Stat().Parts; + return partStats.FlatIndexBytes + partStats.BTreeIndexBytes; } ui64 TDatabase::GetTableSearchHeight(ui32 tableId) const { diff --git a/ydb/core/tablet_flat/flat_dbase_misc.h b/ydb/core/tablet_flat/flat_dbase_misc.h index 1163eb05f9ba..4c3fe35fd252 100644 --- a/ydb/core/tablet_flat/flat_dbase_misc.h +++ b/ydb/core/tablet_flat/flat_dbase_misc.h @@ -14,7 +14,7 @@ namespace NTable { void Describe(IOutputStream &out) const noexcept { - const ui64 sys = Parts.IndexBytes + Parts.ByKeyBytes + Parts.OtherBytes; + const ui64 sys = Parts.FlatIndexBytes + Parts.BTreeIndexBytes + Parts.ByKeyBytes + Parts.OtherBytes; out << "DBase{" << Tables << "t " << Parts.PartsCount << "p" diff --git a/ydb/core/tablet_flat/flat_executor.cpp b/ydb/core/tablet_flat/flat_executor.cpp index 8d73dd4cc04a..152064a6d907 100644 --- a/ydb/core/tablet_flat/flat_executor.cpp +++ b/ydb/core/tablet_flat/flat_executor.cpp @@ -3556,23 +3556,23 @@ void TExecutor::Handle(NOps::TEvResult *ops, TProdCompact *msg, bool cancelled) } void TExecutor::UpdateUsedTabletMemory() { - UsedTabletMemory = 0; - // Estimate memory usage for internal executor structures. - UsedTabletMemory += 50 << 10; // 50kb - // Count the number of bytes exclusive to private cache. + // Estimate memory usage for internal executor structures: + UsedTabletMemory = 50 << 10; // 50kb + + // Count the number of bytes kept in private cache (can't be offloaded right now): if (PrivatePageCache) { - UsedTabletMemory += PrivatePageCache->GetStats().TotalExclusive; + UsedTabletMemory += PrivatePageCache->GetStats().TotalPinnedBody; + UsedTabletMemory += PrivatePageCache->GetStats().PinnedLoadSize; } - // Estimate memory used by database structures. + + // Estimate memory used by internal database structures: auto &counters = Database->Counters(); UsedTabletMemory += counters.MemTableWaste; UsedTabletMemory += counters.MemTableBytes; - UsedTabletMemory += counters.Parts.IndexBytes; UsedTabletMemory += counters.Parts.OtherBytes; - UsedTabletMemory += counters.Parts.ByKeyBytes; UsedTabletMemory += Stats->PacksMetaBytes; - // Add tablet memory usage. + // Add tablet memory usage: UsedTabletMemory += Owner->GetMemoryUsage(); } @@ -3590,7 +3590,9 @@ void TExecutor::UpdateCounters(const TActorContext &ctx) { { /* Memory consumption of common for leader and follower components */ Counters->Simple()[TExecutorCounters::DB_WARM_BYTES].Set(dbCounters.MemTableBytes); Counters->Simple()[TExecutorCounters::DB_META_BYTES].Set(Stats->PacksMetaBytes); - Counters->Simple()[TExecutorCounters::DB_INDEX_BYTES].Set(dbCounters.Parts.IndexBytes); + Counters->Simple()[TExecutorCounters::DB_FLAT_INDEX_BYTES].Set(dbCounters.Parts.FlatIndexBytes); + Counters->Simple()[TExecutorCounters::DB_B_TREE_INDEX_BYTES].Set(dbCounters.Parts.BTreeIndexBytes); + Counters->Simple()[TExecutorCounters::DB_INDEX_BYTES].Set(dbCounters.Parts.FlatIndexBytes + dbCounters.Parts.BTreeIndexBytes); Counters->Simple()[TExecutorCounters::DB_OTHER_BYTES].Set(dbCounters.Parts.OtherBytes); Counters->Simple()[TExecutorCounters::DB_BYKEY_BYTES].Set(dbCounters.Parts.ByKeyBytes); Counters->Simple()[TExecutorCounters::USED_TABLET_MEMORY].Set(UsedTabletMemory); diff --git a/ydb/core/tablet_flat/flat_executor_counters.h b/ydb/core/tablet_flat/flat_executor_counters.h index 914fdad59995..22a1dc7b03b2 100644 --- a/ydb/core/tablet_flat/flat_executor_counters.h +++ b/ydb/core/tablet_flat/flat_executor_counters.h @@ -62,6 +62,8 @@ namespace NTabletFlatExecutor { XX(CONSUMED_STORAGE, "ConsumedStorage") \ XX(CONSUMED_MEMORY, "ConsumedMemory") \ XX(COMPACTION_READ_IN_FLY, "CompactionReadInFly") \ + XX(DB_FLAT_INDEX_BYTES, "DbFlatIndexBytes") \ + XX(DB_B_TREE_INDEX_BYTES, "DbBTreeIndexBytes") \ // don't change order! #define FLAT_EXECUTOR_CUMULATIVE_COUNTERS_MAP(XX) \ diff --git a/ydb/core/tablet_flat/flat_executor_ut.cpp b/ydb/core/tablet_flat/flat_executor_ut.cpp index 5f9552f6f833..7583f10e790e 100644 --- a/ydb/core/tablet_flat/flat_executor_ut.cpp +++ b/ydb/core/tablet_flat/flat_executor_ut.cpp @@ -6228,13 +6228,15 @@ Y_UNIT_TEST_SUITE(TFlatTableExecutorBTreeIndex) { } }; - Y_UNIT_TEST(EnableLocalDBBtreeIndex_Default) { // uses b-tree index + Y_UNIT_TEST(EnableLocalDBBtreeIndex_Default) { // uses flat index TMyEnvBase env; TRowsModel rows; auto &appData = env->GetAppData(); UNIT_ASSERT_VALUES_EQUAL(appData.FeatureFlags.HasEnableLocalDBBtreeIndex(), false); UNIT_ASSERT_VALUES_EQUAL(appData.FeatureFlags.HasEnableLocalDBFlatIndex(), false); + UNIT_ASSERT_VALUES_EQUAL(appData.FeatureFlags.GetEnableLocalDBBtreeIndex(), false); + UNIT_ASSERT_VALUES_EQUAL(appData.FeatureFlags.GetEnableLocalDBFlatIndex(), true); auto counters = MakeIntrusive(env->GetDynamicCounters()); int readRows = 0, failedAttempts = 0; @@ -6250,8 +6252,8 @@ Y_UNIT_TEST_SUITE(TFlatTableExecutorBTreeIndex) { env.SendSync(new NFake::TEvCompact(TRowsModel::TableId)); env.WaitFor(); - // all pages are always kept in shared cache (except flat index) - UNIT_ASSERT_VALUES_EQUAL(counters->ActivePages->Val(), 334); + // all pages are always kept in shared cache + UNIT_ASSERT_VALUES_EQUAL(counters->ActivePages->Val(), 290); env.SendSync(new NFake::TEvExecute{ new TTxFullScan(readRows, failedAttempts) }); UNIT_ASSERT_VALUES_EQUAL(readRows, 1000); @@ -6264,7 +6266,7 @@ Y_UNIT_TEST_SUITE(TFlatTableExecutorBTreeIndex) { // after restart we have no pages in private cache env.SendSync(new NFake::TEvExecute{ new TTxFullScan(readRows, failedAttempts) }, true); UNIT_ASSERT_VALUES_EQUAL(readRows, 1000); - UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 330); + UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 288); } Y_UNIT_TEST(EnableLocalDBBtreeIndex_True) { // uses b-tree index @@ -6305,6 +6307,45 @@ Y_UNIT_TEST_SUITE(TFlatTableExecutorBTreeIndex) { UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 330); } + Y_UNIT_TEST(EnableLocalDBBtreeIndex_False) { // uses flat index + TMyEnvBase env; + TRowsModel rows; + + auto &appData = env->GetAppData(); + + appData.FeatureFlags.SetEnableLocalDBBtreeIndex(false); + auto counters = MakeIntrusive(env->GetDynamicCounters()); + int readRows = 0, failedAttempts = 0; + + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); + + auto policy = MakeIntrusive(); + policy->MinBTreeIndexNodeSize = 128; + env.SendSync(rows.MakeScheme(std::move(policy))); + + env.SendSync(rows.VersionTo(TRowVersion(1, 10)).RowTo(0).MakeRows(1000, 950)); + env.SendSync(rows.VersionTo(TRowVersion(2, 20)).RowTo(0).MakeRows(1000, 950)); + + env.SendSync(new NFake::TEvCompact(TRowsModel::TableId)); + env.WaitFor(); + + // all pages are always kept in shared cache + UNIT_ASSERT_VALUES_EQUAL(counters->ActivePages->Val(), 290); + + env.SendSync(new NFake::TEvExecute{ new TTxFullScan(readRows, failedAttempts) }); + UNIT_ASSERT_VALUES_EQUAL(readRows, 1000); + UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 0); + + // restart tablet + env.SendSync(new TEvents::TEvPoison, false, true); + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); + + // after restart we have no pages in private cache + env.SendSync(new NFake::TEvExecute{ new TTxFullScan(readRows, failedAttempts) }, true); + UNIT_ASSERT_VALUES_EQUAL(readRows, 1000); + UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 288); + } + Y_UNIT_TEST(EnableLocalDBBtreeIndex_True_EnableLocalDBFlatIndex_False) { // uses b-tree index TMyEnvBase env; TRowsModel rows; diff --git a/ydb/core/tablet_flat/flat_part_loader.cpp b/ydb/core/tablet_flat/flat_part_loader.cpp index b031c698cce2..33c69b131c87 100644 --- a/ydb/core/tablet_flat/flat_part_loader.cpp +++ b/ydb/core/tablet_flat/flat_part_loader.cpp @@ -210,17 +210,22 @@ TAutoPtr TLoader::StageCreatePartView() noexcept TEpoch epoch = Epoch != TEpoch::Max() ? Epoch : TEpoch(Root.GetEpoch()); // TODO: put index size to stat? - // TODO: include history indexes bytes size_t indexesRawSize = 0; if (BTreeGroupIndexes) { for (const auto &meta : BTreeGroupIndexes) { indexesRawSize += meta.IndexSize; } + for (const auto &meta : BTreeHistoricIndexes) { + indexesRawSize += meta.IndexSize; + } // Note: although we also have flat index, it shouldn't be loaded; so let's not count it here } else { for (auto indexPage : FlatGroupIndexes) { indexesRawSize += Packs[0]->GetPageSize(indexPage); } + for (auto indexPage : FlatHistoricIndexes) { + indexesRawSize += Packs[0]->GetPageSize(indexPage); + } } auto *partStore = new TPartStore( diff --git a/ydb/core/tablet_flat/flat_stat_table.cpp b/ydb/core/tablet_flat/flat_stat_table.cpp index 08a850256e90..afacbe7f3747 100644 --- a/ydb/core/tablet_flat/flat_stat_table.cpp +++ b/ydb/core/tablet_flat/flat_stat_table.cpp @@ -17,9 +17,6 @@ bool BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, u } } - // TODO: enable b-tree index after benchmarks - mixedIndex = true; - return mixedIndex ? BuildStatsMixedIndex(subset, stats, rowCountResolution, dataSizeResolution, env, yieldHandler) : BuildStatsBTreeIndex(subset, stats, histogramBucketsCount, env, yieldHandler); diff --git a/ydb/core/tablet_flat/flat_stat_table_btree_index.cpp b/ydb/core/tablet_flat/flat_stat_table_btree_index.cpp new file mode 100644 index 000000000000..e25f160cfef3 --- /dev/null +++ b/ydb/core/tablet_flat/flat_stat_table_btree_index.cpp @@ -0,0 +1,217 @@ +#include "flat_stat_table.h" +#include "flat_table_subset.h" +#include "flat_stat_table_btree_index.h" + +namespace NKikimr::NTable { + +namespace { + +using TGroupId = NPage::TGroupId; +using TFrames = NPage::TFrames; +using TBtreeIndexNode = NPage::TBtreeIndexNode; +using TChild = TBtreeIndexNode::TChild; +using TColumns = TBtreeIndexNode::TColumns; +using TCells = NPage::TCells; + +ui64 GetPrevDataSize(const TPart* part, TGroupId groupId, TRowId rowId, IPages* env, bool& ready) { + auto& meta = part->IndexPages.GetBTree(groupId); + + if (rowId == 0) { + return 0; + } + if (rowId >= meta.GetRowCount()) { + return meta.GetDataSize(); + } + + TPageId pageId = meta.GetPageId(); + ui64 prevDataSize = 0; + + for (ui32 height = 0; height < meta.LevelCount; height++) { + auto page = env->TryGetPage(part, pageId, {}); + if (!page) { + ready = false; + return prevDataSize; + } + auto node = TBtreeIndexNode(*page); + auto pos = node.Seek(rowId); + + pageId = node.GetShortChild(pos).GetPageId(); + if (pos) { + prevDataSize = node.GetShortChild(pos - 1).GetDataSize(); + } + } + + return prevDataSize; +} + +ui64 GetPrevHistoricDataSize(const TPart* part, TGroupId groupId, TRowId rowId, IPages* env, TRowId& historicRowId, bool& ready) { + Y_ABORT_UNLESS(groupId == TGroupId(0, true)); + + auto& meta = part->IndexPages.GetBTree(groupId); + + if (rowId == 0) { + historicRowId = 0; + return 0; + } + if (rowId >= part->IndexPages.GetBTree({}).GetRowCount()) { + historicRowId = meta.GetRowCount(); + return meta.GetDataSize(); + } + + TPageId pageId = meta.GetPageId(); + ui64 prevDataSize = 0; + historicRowId = 0; + + // Minimum key is (startRowId, max, max) + ui64 startStep = Max(); + ui64 startTxId = Max(); + TCell key1Cells[3] = { + TCell::Make(rowId), + TCell::Make(startStep), + TCell::Make(startTxId), + }; + TCells key1{ key1Cells, 3 }; + + for (ui32 height = 0; height < meta.LevelCount; height++) { + auto page = env->TryGetPage(part, pageId, {}); + if (!page) { + ready = false; + return prevDataSize; + } + auto node = TBtreeIndexNode(*page); + auto pos = node.Seek(ESeek::Lower, key1, part->Scheme->HistoryGroup.ColsKeyIdx, part->Scheme->HistoryKeys.Get()); + + pageId = node.GetShortChild(pos).GetPageId(); + if (pos) { + const auto& prevChild = node.GetShortChild(pos - 1); + prevDataSize = prevChild.GetDataSize(); + historicRowId = prevChild.GetRowCount(); + } + } + + return prevDataSize; +} + +void AddBlobsSize(const TPart* part, TChanneledDataSize& stats, const TFrames* frames, ELargeObj lob, TRowId beginRowId, TRowId endRowId) noexcept { + ui32 page = frames->Lower(beginRowId, 0, Max()); + + while (auto &rel = frames->Relation(page)) { + if (rel.Row < endRowId) { + auto channel = part->GetPageChannel(lob, page); + stats.Add(rel.Size, channel); + ++page; + } else if (!rel.IsHead()) { + Y_ABORT("Got unaligned TFrames head record"); + } else { + break; + } + } +} + +bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsYieldHandler yieldHandler) { + bool ready = true; + + if (!part.Slices || part.Slices->empty()) { + return true; + } + + if (part->GroupsCount) { // main group + TGroupId groupId{}; + auto channel = part->GetGroupChannel(groupId); + + for (const auto& slice : *part.Slices) { + yieldHandler(); + + stats.RowCount += slice.EndRowId() - slice.BeginRowId(); + + ui64 beginDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.BeginRowId(), env, ready); + ui64 endDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.EndRowId(), env, ready); + if (ready && endDataSize > beginDataSize) { + stats.DataSize.Add(endDataSize - beginDataSize, channel); + } + + if (part->Small) { + AddBlobsSize(part.Part.Get(), stats.DataSize, part->Small.Get(), ELargeObj::Outer, slice.BeginRowId(), slice.EndRowId()); + } + if (part->Large) { + AddBlobsSize(part.Part.Get(), stats.DataSize, part->Large.Get(), ELargeObj::Extern, slice.BeginRowId(), slice.EndRowId()); + } + } + } + + for (ui32 groupIndex : xrange(1, part->GroupsCount)) { + TGroupId groupId{groupIndex}; + auto channel = part->GetGroupChannel(groupId); + for (const auto& slice : *part.Slices) { + yieldHandler(); + + ui64 beginDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.BeginRowId(), env, ready); + ui64 endDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.EndRowId(), env, ready); + if (ready && endDataSize > beginDataSize) { + stats.DataSize.Add(endDataSize - beginDataSize, channel); + } + } + } + + TVector> historicSlices; + + if (part->HistoricGroupsCount) { // main historic group + TGroupId groupId{0, true}; + auto channel = part->GetGroupChannel(groupId); + for (const auto& slice : *part.Slices) { + yieldHandler(); + + TRowId beginRowId, endRowId; + bool readySlice = true; + ui64 beginDataSize = GetPrevHistoricDataSize(part.Part.Get(), groupId, slice.BeginRowId(), env, beginRowId, readySlice); + ui64 endDataSize = GetPrevHistoricDataSize(part.Part.Get(), groupId, slice.EndRowId(), env, endRowId, readySlice); + ready &= readySlice; + if (ready && endDataSize > beginDataSize) { + stats.DataSize.Add(endDataSize - beginDataSize, channel); + } + if (readySlice && endRowId > beginRowId) { + historicSlices.emplace_back(beginRowId, endRowId); + } + } + } + + for (ui32 groupIndex : xrange(1, part->HistoricGroupsCount)) { + TGroupId groupId{groupIndex, true}; + auto channel = part->GetGroupChannel(groupId); + for (const auto& slice : historicSlices) { + yieldHandler(); + + ui64 beginDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.first, env, ready); + ui64 endDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.second, env, ready); + if (ready && endDataSize > beginDataSize) { + stats.DataSize.Add(endDataSize - beginDataSize, channel); + } + } + } + + return ready; +} + +} + +bool BuildStatsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBucketsCount, IPages* env, TBuildStatsYieldHandler yieldHandler) { + stats.Clear(); + + bool ready = true; + for (const auto& part : subset.Flatten) { + stats.IndexSize.Add(part->IndexesRawSize, part->Label.Channel()); + ready &= AddDataSize(part, stats, env, yieldHandler); + } + + if (!ready) { + return false; + } + + ready &= BuildStatsHistogramsBTreeIndex(subset, stats, + stats.RowCount / histogramBucketsCount, stats.DataSize.Size / histogramBucketsCount, + env, yieldHandler); + + return ready; +} + +} diff --git a/ydb/core/tablet_flat/flat_stat_table_btree_index.h b/ydb/core/tablet_flat/flat_stat_table_btree_index.h index a82c8e70a41f..a01a92d8890b 100644 --- a/ydb/core/tablet_flat/flat_stat_table_btree_index.h +++ b/ydb/core/tablet_flat/flat_stat_table_btree_index.h @@ -2,216 +2,12 @@ #include "flat_stat_table.h" #include "flat_table_subset.h" -#include "flat_stat_table_btree_index_histogram.h" namespace NKikimr::NTable { -namespace { +bool BuildStatsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBucketsCount, IPages* env, TBuildStatsYieldHandler yieldHandler); -using TGroupId = NPage::TGroupId; -using TFrames = NPage::TFrames; -using TBtreeIndexNode = NPage::TBtreeIndexNode; -using TChild = TBtreeIndexNode::TChild; -using TColumns = TBtreeIndexNode::TColumns; -using TCells = NPage::TCells; +bool BuildStatsHistogramsBTreeIndex(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, IPages* env, TBuildStatsYieldHandler yieldHandler); -ui64 GetPrevDataSize(const TPart* part, TGroupId groupId, TRowId rowId, IPages* env, bool& ready) { - auto& meta = part->IndexPages.GetBTree(groupId); - - if (rowId == 0) { - return 0; - } - if (rowId >= meta.GetRowCount()) { - return meta.GetDataSize(); - } - - TPageId pageId = meta.GetPageId(); - ui64 prevDataSize = 0; - - for (ui32 height = 0; height < meta.LevelCount; height++) { - auto page = env->TryGetPage(part, pageId, {}); - if (!page) { - ready = false; - return prevDataSize; - } - auto node = TBtreeIndexNode(*page); - auto pos = node.Seek(rowId); - - pageId = node.GetShortChild(pos).GetPageId(); - if (pos) { - prevDataSize = node.GetShortChild(pos - 1).GetDataSize(); - } - } - - return prevDataSize; -} - -ui64 GetPrevHistoricDataSize(const TPart* part, TGroupId groupId, TRowId rowId, IPages* env, TRowId& historicRowId, bool& ready) { - Y_ABORT_UNLESS(groupId == TGroupId(0, true)); - - auto& meta = part->IndexPages.GetBTree(groupId); - - if (rowId == 0) { - historicRowId = 0; - return 0; - } - if (rowId >= part->IndexPages.GetBTree({}).GetRowCount()) { - historicRowId = meta.GetRowCount(); - return meta.GetDataSize(); - } - - TPageId pageId = meta.GetPageId(); - ui64 prevDataSize = 0; - historicRowId = 0; - - // Minimum key is (startRowId, max, max) - ui64 startStep = Max(); - ui64 startTxId = Max(); - TCell key1Cells[3] = { - TCell::Make(rowId), - TCell::Make(startStep), - TCell::Make(startTxId), - }; - TCells key1{ key1Cells, 3 }; - - for (ui32 height = 0; height < meta.LevelCount; height++) { - auto page = env->TryGetPage(part, pageId, {}); - if (!page) { - ready = false; - return prevDataSize; - } - auto node = TBtreeIndexNode(*page); - auto pos = node.Seek(ESeek::Lower, key1, part->Scheme->HistoryGroup.ColsKeyIdx, part->Scheme->HistoryKeys.Get()); - - pageId = node.GetShortChild(pos).GetPageId(); - if (pos) { - const auto& prevChild = node.GetShortChild(pos - 1); - prevDataSize = prevChild.GetDataSize(); - historicRowId = prevChild.GetRowCount(); - } - } - - return prevDataSize; -} - -void AddBlobsSize(const TPart* part, TChanneledDataSize& stats, const TFrames* frames, ELargeObj lob, TRowId beginRowId, TRowId endRowId) noexcept { - ui32 page = frames->Lower(beginRowId, 0, Max()); - - while (auto &rel = frames->Relation(page)) { - if (rel.Row < endRowId) { - auto channel = part->GetPageChannel(lob, page); - stats.Add(rel.Size, channel); - ++page; - } else if (!rel.IsHead()) { - Y_ABORT("Got unaligned TFrames head record"); - } else { - break; - } - } -} - -bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsYieldHandler yieldHandler) { - bool ready = true; - - if (!part.Slices || part.Slices->empty()) { - return true; - } - - if (part->GroupsCount) { // main group - TGroupId groupId{}; - auto channel = part->GetGroupChannel(groupId); - - for (const auto& slice : *part.Slices) { - yieldHandler(); - - stats.RowCount += slice.EndRowId() - slice.BeginRowId(); - - ui64 beginDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.BeginRowId(), env, ready); - ui64 endDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.EndRowId(), env, ready); - if (ready && endDataSize > beginDataSize) { - stats.DataSize.Add(endDataSize - beginDataSize, channel); - } - - if (part->Small) { - AddBlobsSize(part.Part.Get(), stats.DataSize, part->Small.Get(), ELargeObj::Outer, slice.BeginRowId(), slice.EndRowId()); - } - if (part->Large) { - AddBlobsSize(part.Part.Get(), stats.DataSize, part->Large.Get(), ELargeObj::Extern, slice.BeginRowId(), slice.EndRowId()); - } - } - } - - for (ui32 groupIndex : xrange(1, part->GroupsCount)) { - TGroupId groupId{groupIndex}; - auto channel = part->GetGroupChannel(groupId); - for (const auto& slice : *part.Slices) { - yieldHandler(); - - ui64 beginDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.BeginRowId(), env, ready); - ui64 endDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.EndRowId(), env, ready); - if (ready && endDataSize > beginDataSize) { - stats.DataSize.Add(endDataSize - beginDataSize, channel); - } - } - } - - TVector> historicSlices; - - if (part->HistoricGroupsCount) { // main historic group - TGroupId groupId{0, true}; - auto channel = part->GetGroupChannel(groupId); - for (const auto& slice : *part.Slices) { - yieldHandler(); - - TRowId beginRowId, endRowId; - bool readySlice = true; - ui64 beginDataSize = GetPrevHistoricDataSize(part.Part.Get(), groupId, slice.BeginRowId(), env, beginRowId, readySlice); - ui64 endDataSize = GetPrevHistoricDataSize(part.Part.Get(), groupId, slice.EndRowId(), env, endRowId, readySlice); - ready &= readySlice; - if (ready && endDataSize > beginDataSize) { - stats.DataSize.Add(endDataSize - beginDataSize, channel); - } - if (readySlice && endRowId > beginRowId) { - historicSlices.emplace_back(beginRowId, endRowId); - } - } - } - - for (ui32 groupIndex : xrange(1, part->HistoricGroupsCount)) { - TGroupId groupId{groupIndex, true}; - auto channel = part->GetGroupChannel(groupId); - for (const auto& slice : historicSlices) { - yieldHandler(); - - ui64 beginDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.first, env, ready); - ui64 endDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.second, env, ready); - if (ready && endDataSize > beginDataSize) { - stats.DataSize.Add(endDataSize - beginDataSize, channel); - } - } - } - - return ready; -} - -} - -inline bool BuildStatsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBucketsCount, IPages* env, TBuildStatsYieldHandler yieldHandler) { - stats.Clear(); - - bool ready = true; - for (const auto& part : subset.Flatten) { - stats.IndexSize.Add(part->IndexesRawSize, part->Label.Channel()); - ready &= AddDataSize(part, stats, env, yieldHandler); - } - - if (!ready) { - return false; - } - - ready &= BuildStatsHistogramsBTreeIndex(subset, stats, histogramBucketsCount, env, yieldHandler); - - return ready; -} } diff --git a/ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.cpp b/ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.cpp new file mode 100644 index 000000000000..4c060d594780 --- /dev/null +++ b/ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.cpp @@ -0,0 +1,533 @@ +#include "flat_stat_table.h" +#include "flat_table_subset.h" +#include "flat_page_btree_index_writer.h" + +namespace NKikimr::NTable { + +namespace { + +using TGroupId = NPage::TGroupId; +using TFrames = NPage::TFrames; +using TBtreeIndexNode = NPage::TBtreeIndexNode; +using TChild = TBtreeIndexNode::TChild; +using TColumns = TBtreeIndexNode::TColumns; +using TCells = NPage::TCells; +using TCellsIterable = TBtreeIndexNode::TCellsIterable; +using TCellsIter = TBtreeIndexNode::TCellsIter; + +const static TCellsIterable EmptyKey(static_cast(nullptr), TColumns()); + +enum class ENodeState : ui8 { + Initial, + Opened, + Closed, + Ignored, +}; + +class TTableHistogramBuilderBtreeIndex { + struct TNodeState { + const TPart* Part; + TPageId PageId; + ui32 Level; + TRowId BeginRowId, EndRowId; + ui64 BeginDataSize, EndDataSize; + TCellsIterable BeginKey, EndKey; + ENodeState State = ENodeState::Initial; + + TNodeState(const TPart* part, TPageId pageId, ui32 level, TRowId beginRowId, TRowId endRowId, TRowId beginDataSize, TRowId endDataSize, TCellsIterable beginKey, TCellsIterable endKey) + : Part(part) + , PageId(pageId) + , Level(level) + , BeginRowId(beginRowId) + , EndRowId(endRowId) + , BeginDataSize(beginDataSize) + , EndDataSize(endDataSize) + , BeginKey(beginKey) + , EndKey(endKey) + { + } + + TRowId GetRowCount() const noexcept { + return EndRowId - BeginRowId; + } + + ui64 GetDataSize() const noexcept { + return EndDataSize - BeginDataSize; + } + + // usually a node state goes in order: + // 1. Initial + // 2. Opened - after processing TEvent.IsBegin = true + // 3. Closed - after processing TEvent.IsBegin = false + // if an opened node is being loaded, its state goes in order: + // 1. Initial + // 2. Opened - after processing TEvent.IsBegin = true + // 3. Ignored - after have been loaded + // in a case when a node EndKey >= BeginKey a node state goes in order: + // (which is theoretically possible scenario because of slice bounds) + // 1. Initial + // 2. Closed - after processing TEvent.IsBegin = false + + bool Open(ui64& openedRowCount, ui64& openedDataSize) noexcept { + if (Y_LIKELY(State == ENodeState::Initial)) { + State = ENodeState::Opened; + openedRowCount += GetRowCount(); + openedDataSize += GetDataSize(); + return true; + } + return false; + } + + bool Close(ui64& openedRowCount, ui64& closedRowCount, ui64& openedDataSize, ui64& closedDataSize) noexcept { + if (State == ENodeState::Opened) { + State = ENodeState::Closed; + ui64 rowCount = GetRowCount(); + ui64 dataSize = GetDataSize(); + Y_ABORT_UNLESS(openedRowCount >= rowCount); + Y_ABORT_UNLESS(openedDataSize >= dataSize); + openedRowCount -= rowCount; + openedDataSize -= dataSize; + closedRowCount += rowCount; + closedDataSize += dataSize; + return true; + } else if (Y_UNLIKELY(State == ENodeState::Initial)) { + State = ENodeState::Closed; + closedRowCount += GetRowCount(); + closedDataSize += GetDataSize(); + return true; + } + return false; + } + + bool IgnoreOpened(ui64& openedRowCount, ui64& openedDataSize) noexcept { + if (Y_LIKELY(State == ENodeState::Opened)) { + State = ENodeState::Ignored; + ui64 rowCount = GetRowCount(); + ui64 dataSize = GetDataSize(); + Y_ABORT_UNLESS(openedRowCount >= rowCount); + Y_ABORT_UNLESS(openedDataSize >= dataSize); + openedRowCount -= rowCount; + openedDataSize -= dataSize; + return true; + } + return false; + } + }; + + struct TEvent { + TCellsIterable Key; + bool IsBegin; + TNodeState* Node; + }; + + struct TNodeEventKeyGreater { + const TKeyCellDefaults& KeyDefaults; + + bool operator ()(const TEvent& a, const TEvent& b) const noexcept { + return Compare(a, b) > 0; + } + + i8 Compare(const TEvent& a, const TEvent& b) const noexcept { + // events go in order: + // - Key = {}, IsBegin = true + // - ... + // - Key = {'c'}, IsBegin = false + // - Key = {'c'}, IsBegin = true + // - ... + // - Key = {'d'}, IsBegin = false + // - Key = {'d'}, IsBegin = true + // - ... + // - Key = {}, IsBegin = false + + if (a.Key && b.Key) { // compare by keys + auto cmp = CompareKeys(a.Key, b.Key, KeyDefaults); + if (cmp != 0) { + return cmp; + } + // keys are the same, compare by begin flag, end events first: + return Compare(a.IsBegin ? 1 : -1, b.IsBegin ? 1 : -1); + } + + // category = -1 for Key = { }, IsBegin = true + // category = 0 for Key = {*}, IsBegin = * + // category = +1 for Key = { }, IsBegin = false + return Compare(GetCategory(a), GetCategory(b)); + } + + private: + static i8 GetCategory(const TEvent& a) noexcept { + if (a.Key) { + return 0; + } + return a.IsBegin ? -1 : +1; + } + + static i8 Compare(i8 a, i8 b) noexcept { + if (a < b) return -1; + if (a > b) return +1; + return 0; + } + }; + + struct TNodeRowCountLess { + bool operator ()(const TNodeState* a, const TNodeState* b) const noexcept { + return a->GetRowCount() < b->GetRowCount(); + } + }; + + struct TNodeDataSizeLess { + bool operator ()(const TNodeState* a, const TNodeState* b) const noexcept { + return a->GetDataSize() < b->GetDataSize(); + } + }; + +public: + TTableHistogramBuilderBtreeIndex(const TSubset& subset, ui64 rowCountResolution, ui64 dataSizeResolution, IPages* env, TBuildStatsYieldHandler yieldHandler) + : Subset(subset) + , KeyDefaults(*Subset.Scheme->Keys) + , RowCountResolution(rowCountResolution) + , DataSizeResolution(dataSizeResolution) + , RowCountResolutionGap(RowCountResolution / 2) + , DataSizeResolutionGap(DataSizeResolution / 2) + , Env(env) + , YieldHandler(yieldHandler) + , NodeEventKeyGreater{KeyDefaults} + , FutureEvents(NodeEventKeyGreater) + { + } + + bool Build(TStats& stats) { + bool ready = true; + + for (auto index : xrange(Subset.Flatten.size())) { + auto& part = Subset.Flatten[index]; + auto& meta = part->IndexPages.GetBTree({}); + TCellsIterable beginKey = EmptyKey; + if (part.Slices && part.Slices->front().FirstKey.GetCells()) { + beginKey = MakeCellsIterableKey(part.Part.Get(), part.Slices->front().FirstKey); + } + TCellsIterable endKey = EmptyKey; + if (part.Slices && part.Slices->back().LastKey.GetCells()) { + endKey = MakeCellsIterableKey(part.Part.Get(), part.Slices->back().LastKey); + } + LoadedStateNodes.emplace_back(part.Part.Get(), meta.GetPageId(), meta.LevelCount, 0, meta.GetRowCount(), 0, meta.GetDataSize(), beginKey, endKey); + ready &= SlicePart(*part.Slices, LoadedStateNodes.back()); + } + + if (!ready) { + return false; + } + + ready &= BuildIterate(stats); + + FutureEvents.clear(); + LoadedBTreeNodes.clear(); + LoadedStateNodes.clear(); + + return ready; + } + +private: + bool SlicePart(const TSlices& slices, TNodeState& node) { + YieldHandler(); + + // TODO: avoid binary search for each call (we may intersect slices with nodes in linear time actually) + auto it = slices.LookupBackward(slices.end(), node.EndRowId - 1); + + if (it == slices.end() || node.EndRowId <= it->BeginRowId() || it->EndRowId() <= node.BeginRowId) { + // skip the node + return true; + } + + if (it->BeginRowId() <= node.BeginRowId && node.EndRowId <= it->EndRowId()) { + // take the node + AddFutureEvents(node); + return true; + } + + // split the node + + if (node.Level == 0) { + // can't split, decide by node.EndRowId - 1 + // TODO: decide by non-empty slice and node intersection, but this requires size calculation changes too + if (it->Has(node.EndRowId - 1)) { + AddFutureEvents(node); + } + return true; + } + + bool ready = true; + + const auto addNode = [&](TNodeState& child) { + ready &= SlicePart(slices, child); + }; + if (!TryLoadNode(node, addNode)) { + return false; + } + + return ready; + } + + bool BuildIterate(TStats& stats) { + // The idea is the following: + // - move a key pointer through all parts simultaneously + // keeping all nodes that contain current key pointer in opened heaps (sorted by size descending) + // all nodes that ended before current key pointer are considered as closed + // - keep an invariant that size of closed and opened nodes don't exceed next histogram bucket values + // otherwise, load opened nodes + // - because histogram is approximate each its value is allowed to be in a range + // [next value - gap, next value + gap] + + // next histogram keys are been looking for: + ui64 nextHistogramRowCount = RowCountResolution, nextHistogramDataSize = DataSizeResolution; + + // closed nodes stats: + ui64 closedRowCount = 0, closedDataSize = 0; + + // opened nodes stats and heaps: + ui64 openedRowCount = 0, openedDataSize = 0; + TPriorityQueue, TNodeRowCountLess> openedSortedByRowCount; + TPriorityQueue, TNodeDataSizeLess> openedSortedByDataSize; + + // will additionally save list of all nodes that start at current key pointer: + TVector currentKeyPointerOpens; + + while (FutureEvents && (nextHistogramRowCount != Max() || nextHistogramDataSize != Max())) { + YieldHandler(); + + auto currentKeyPointer = FutureEvents.top(); + currentKeyPointerOpens.clear(); + + auto processEvent = [&](const TEvent& event) { + Y_DEBUG_ABORT_UNLESS(NodeEventKeyGreater.Compare(event, currentKeyPointer) <= 0, "Can't process future events"); + if (event.IsBegin) { + if (event.Node->Open(openedRowCount, openedDataSize)) { + openedSortedByRowCount.push(event.Node); + openedSortedByDataSize.push(event.Node); + } + } else { + event.Node->Close(openedRowCount, closedRowCount, openedDataSize, closedDataSize); + } + }; + + // process all events with the same key and type as current key pointer: + do { + const TEvent& event = FutureEvents.top(); + processEvent(event); + if (event.IsBegin) { + currentKeyPointerOpens.push_back(event.Node); + } + FutureEvents.pop(); + } while (FutureEvents && NodeEventKeyGreater.Compare(FutureEvents.top(), currentKeyPointer) == 0); + + const auto addEvent = [&](TEvent event) { + // TODO: skip all closed nodes and don't process them here + // TODO: don't compare each node key and replace it with parentNode.Seek(currentKeyPointer) + auto cmp = NodeEventKeyGreater.Compare(event, currentKeyPointer); + if (cmp <= 0) { // event happened + processEvent(event); + if (cmp == 0) { + currentKeyPointerOpens.push_back(event.Node); + } + } else { // event didn't yet happen + FutureEvents.push(event); + } + }; + const auto addNode = [&](TNodeState& node) { + addEvent(TEvent{node.BeginKey, true, &node}); + addEvent(TEvent{node.EndKey, false, &node}); + }; + + // may safely skip current key pointer and go further only if at the next iteration + // sum of sizes of closed and opened nodes don't exceed next histogram bucket values (plus their gaps) + // otherwise, load opened nodes right now + // in that case, next level nodes will be converted to begin and end events + // and then either processed or been postponed to future events according to current key pointer position + while (nextHistogramRowCount != Max() && closedRowCount + openedRowCount > nextHistogramRowCount + RowCountResolutionGap && openedSortedByRowCount) { + auto node = openedSortedByRowCount.top(); + openedSortedByRowCount.pop(); + + // may have already closed or ignored nodes in the heap, just skip them + // leaf nodes will be closed later + if (node->Level && node->IgnoreOpened(openedRowCount, openedDataSize)) { + if (!TryLoadNode(*node, addNode)) { + return false; + } + } + } + while (nextHistogramDataSize != Max() && closedDataSize + openedDataSize > nextHistogramDataSize + DataSizeResolutionGap && openedSortedByDataSize) { + auto node = openedSortedByDataSize.top(); + openedSortedByDataSize.pop(); + + // may have already closed or ignored nodes in the heap, just skip them + // leaf nodes will be closed later + if (node->Level && node->IgnoreOpened(openedRowCount, openedDataSize)) { + if (!TryLoadNode(*node, addNode)) { + return false; + } + } + } + + // add current key pointer to a histogram if we either: + // - failed to split opened nodes and may exceed a next histogram bucket value (plus its gaps) + // - have enough closed nodes (more than a next histogram bucket value (minus its gap)) + // current key pointer value is calculated as follows: + // - size of all closed nodes + // - minus size of all nodes that start at current key pointer + // - plus half of size of all ohter opened nodes (as they exact position is unknown) + // also check that current key pointer value is > then last presented value in a histogram + if (currentKeyPointer.Key) { + if (nextHistogramRowCount != Max()) { + if (closedRowCount + openedRowCount > nextHistogramRowCount + RowCountResolutionGap || closedRowCount > nextHistogramRowCount - RowCountResolutionGap) { + ui64 currentKeyRowCountOpens = 0; + for (auto* node : currentKeyPointerOpens) { + if (node->State == ENodeState::Opened) { + currentKeyRowCountOpens += node->GetRowCount(); + } + } + Y_ABORT_UNLESS(currentKeyRowCountOpens <= openedRowCount); + ui64 currentKeyPointerRowCount = closedRowCount + (openedRowCount - currentKeyRowCountOpens) / 2; + if ((stats.RowCountHistogram.empty() ? 0 : stats.RowCountHistogram.back().Value) < currentKeyPointerRowCount && currentKeyPointerRowCount < stats.RowCount) { + AddKey(stats.RowCountHistogram, currentKeyPointer.Key, currentKeyPointerRowCount); + nextHistogramRowCount = Max(currentKeyPointerRowCount + 1, nextHistogramRowCount + RowCountResolution); + if (nextHistogramRowCount + RowCountResolutionGap > stats.RowCount) { + nextHistogramRowCount = Max(); + } + } + } + } + if (nextHistogramDataSize != Max()) { + if (closedDataSize + openedDataSize > nextHistogramDataSize + DataSizeResolutionGap || closedDataSize > nextHistogramDataSize - DataSizeResolutionGap) { + ui64 currentKeyDataSizeOpens = 0; + for (auto* node : currentKeyPointerOpens) { + if (node->State == ENodeState::Opened) { + currentKeyDataSizeOpens += node->GetDataSize(); + } + } + Y_ABORT_UNLESS(currentKeyDataSizeOpens <= openedDataSize); + ui64 currentKeyPointerDataSize = closedDataSize + (openedDataSize - currentKeyDataSizeOpens) / 2; + if ((stats.DataSizeHistogram.empty() ? 0 : stats.DataSizeHistogram.back().Value) < currentKeyPointerDataSize && currentKeyPointerDataSize < stats.DataSize.Size) { + AddKey(stats.DataSizeHistogram, currentKeyPointer.Key, currentKeyPointerDataSize); + nextHistogramDataSize = Max(currentKeyPointerDataSize + 1, nextHistogramDataSize + DataSizeResolution); + if (nextHistogramDataSize + DataSizeResolutionGap > stats.DataSize.Size) { + nextHistogramDataSize = Max(); + } + } + } + } + } + } + + return true; + } + + void AddKey(THistogram& histogram, TCellsIterable& key, ui64 value) { + TVector keyCells; + + // add columns that are present in the part: + auto iter = key.Iter(); + for (TPos pos : xrange(iter.Count())) { + Y_UNUSED(pos); + keyCells.push_back(iter.Next()); + } + + // extend with default values if needed: + for (TPos index = keyCells.size(); index < KeyDefaults.Defs.size(); ++index) { + keyCells.push_back(KeyDefaults.Defs[index]); + } + + TString serializedKey = TSerializedCellVec::Serialize(keyCells); + + histogram.push_back({serializedKey, value}); + } + + bool TryLoadNode(const TNodeState& parent, const auto& addNode) { + Y_ABORT_UNLESS(parent.Level); + + auto page = Env->TryGetPage(parent.Part, parent.PageId, {}); + if (!page) { + return false; + } + + LoadedBTreeNodes.emplace_back(*page); + auto &bTreeNode = LoadedBTreeNodes.back(); + auto& groupInfo = parent.Part->Scheme->GetLayout({}); + + for (auto pos : xrange(bTreeNode.GetChildrenCount())) { + auto& child = bTreeNode.GetChild(pos); + + LoadedStateNodes.emplace_back(parent.Part, child.GetPageId(), parent.Level - 1, + pos ? bTreeNode.GetChild(pos - 1).GetRowCount() : parent.BeginRowId, child.GetRowCount(), + pos ? bTreeNode.GetChild(pos - 1).GetTotalDataSize() : parent.BeginDataSize, child.GetTotalDataSize(), + pos ? bTreeNode.GetKeyCellsIterable(pos - 1, groupInfo.ColsKeyData) : parent.BeginKey, + pos < bTreeNode.GetKeysCount() ? bTreeNode.GetKeyCellsIterable(pos, groupInfo.ColsKeyData) : parent.EndKey); + + addNode(LoadedStateNodes.back()); + } + + return true; + } + + void AddFutureEvents(TNodeState& node) { + FutureEvents.push(TEvent{node.BeginKey, true, &node}); + FutureEvents.push(TEvent{node.EndKey, false, &node}); + } + +private: + TCellsIterable MakeCellsIterableKey(const TPart* part, TSerializedCellVec serializedKey) { + // Note: this method is only called for root nodes and don't worth optimizing + // so let's simply create a new fake b-tree index node with a given key + NPage::TBtreeIndexNodeWriter writer(part->Scheme, {}); + writer.AddChild({1, 1, 1, 0, 0}); + writer.AddKey(serializedKey.GetCells()); + writer.AddChild({2, 2, 2, 0, 0}); + TSharedData serializedNode = writer.Finish(); + LoadedBTreeNodes.emplace_back(serializedNode); + return LoadedBTreeNodes.back().GetKeyCellsIterable(0, part->Scheme->GetLayout({}).ColsKeyData); + } + + static int CompareKeys(const TCellsIterable& left_, const TCellsIterable& right_, const TKeyCellDefaults& keyDefaults) { + Y_ABORT_UNLESS(left_); + Y_ABORT_UNLESS(right_); + + auto left = left_.Iter(), right = right_.Iter(); + size_t end = Max(left.Count(), right.Count()); + Y_ABORT_UNLESS(end <= keyDefaults.Size(), "Key schema is smaller than compared keys"); + + for (size_t pos = 0; pos < end; ++pos) { + const auto& leftCell = pos < left.Count() ? left.Next() : keyDefaults.Defs[pos]; + const auto& rightCell = pos < right.Count() ? right.Next() : keyDefaults.Defs[pos]; + if (int cmp = CompareTypedCells(leftCell, rightCell, keyDefaults.Types[pos])) { + return cmp; + } + } + + return 0; + } + +private: + const TSubset& Subset; + const TKeyCellDefaults& KeyDefaults; + ui64 RowCountResolution, DataSizeResolution; + ui64 RowCountResolutionGap, DataSizeResolutionGap; + IPages* const Env; + TBuildStatsYieldHandler YieldHandler; + TDeque LoadedBTreeNodes; // keep nodes to use TCellsIterable references + TDeque LoadedStateNodes; // keep nodes to use their references + TNodeEventKeyGreater NodeEventKeyGreater; + TPriorityQueue, TNodeEventKeyGreater> FutureEvents; +}; + +} + +bool BuildStatsHistogramsBTreeIndex(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, IPages* env, TBuildStatsYieldHandler yieldHandler) { + TTableHistogramBuilderBtreeIndex builder(subset, rowCountResolution, dataSizeResolution, env, yieldHandler); + + if (!builder.Build(stats)) { + return false; + } + + return true; +} + +} diff --git a/ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.h b/ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.h deleted file mode 100644 index 78bb64b69ccb..000000000000 --- a/ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.h +++ /dev/null @@ -1,525 +0,0 @@ -#pragma once - -#include "flat_stat_table.h" -#include "flat_table_subset.h" - -namespace NKikimr::NTable { - -namespace { - -using TGroupId = NPage::TGroupId; -using TFrames = NPage::TFrames; -using TBtreeIndexNode = NPage::TBtreeIndexNode; -using TChild = TBtreeIndexNode::TChild; -using TColumns = TBtreeIndexNode::TColumns; -using TCells = NPage::TCells; -using TCellsIterable = TBtreeIndexNode::TCellsIterable; -using TCellsIter = TBtreeIndexNode::TCellsIter; - -const static TCellsIterable EmptyKey(static_cast(nullptr), TColumns()); - -class TTableHistogramBuilderBtreeIndex { -public: - struct TNodeState : public TIntrusiveListItem { - TPageId PageId; - ui32 Level; - TRowId BeginRowId, EndRowId; - TCellsIterable BeginKey, EndKey; - ui64 BeginSize, EndSize; - - TNodeState(TPageId pageId, ui32 level, TRowId beginRowId, TRowId endRowId, TCellsIterable beginKey, TCellsIterable endKey, TRowId beginSize, TRowId endSize) - : PageId(pageId) - , Level(level) - , BeginRowId(beginRowId) - , EndRowId(endRowId) - , BeginKey(beginKey) - , EndKey(endKey) - , BeginSize(beginSize) - , EndSize(endSize) - { - } - - ui64 GetSize() const noexcept { - return EndSize - BeginSize; - } - }; - - struct TGetRowCount { - static ui64 Get(const TChild& child) noexcept { - return child.GetRowCount(); - } - }; - - struct TGetDataSize { - static ui64 Get(const TChild& child) noexcept { - return child.GetTotalDataSize(); - } - }; - -private: - struct TPartNodes { - TPartNodes(const TPart* part, size_t index) - : Part(part) - , Index(index) - { - } - - const TPart* GetPart() const noexcept { - return Part; - } - - size_t GetIndex() const noexcept { - return Index; - } - - size_t GetCount() const noexcept { - return Count; - } - - ui64 GetSize() const noexcept { - return Size; - } - - const TIntrusiveList& GetNodes() const noexcept { - return Nodes; - } - - TNodeState* PopFront() noexcept { - auto result = Nodes.PopFront(); - - Count--; - Size -= result->GetSize(); - - return result; - } - - TNodeState* PopBack() noexcept { - auto result = Nodes.PopBack(); - - Count--; - Size -= result->GetSize(); - - return result; - } - - void PushFront(TNodeState* item) noexcept { - Count++; - Size += item->GetSize(); - Nodes.PushFront(item); - } - - void PushBack(TNodeState* item) noexcept { - Count++; - Size += item->GetSize(); - Nodes.PushBack(item); - } - - bool operator < (const TPartNodes& other) const noexcept { - return Size < other.Size; - } - - private: - const TPart* Part; - size_t Index; - size_t Count = 0; - ui64 Size = 0; - TIntrusiveList Nodes; - }; - -public: - TTableHistogramBuilderBtreeIndex(const TSubset& subset, IPages* env, ui32 histogramBucketsCount, TBuildStatsYieldHandler yieldHandler) - : Subset(subset) - , KeyDefaults(*Subset.Scheme->Keys) - , Env(env) - , HistogramBucketsCount(histogramBucketsCount) - , YieldHandler(yieldHandler) - { - } - - template - bool Build(THistogram& histogram, ui64 statTotalSize) { - if (!HistogramBucketsCount) { - return true; - } - - Resolution = statTotalSize / HistogramBucketsCount; - StatTotalSize = statTotalSize; - - bool ready = true; - ui64 endSize = 0; - TVector parts; - - for (auto index : xrange(Subset.Flatten.size())) { - auto& part = Subset.Flatten[index]; - auto& meta = part->IndexPages.GetBTree({}); - parts.emplace_back(part.Part.Get(), index); - LoadedStateNodes.emplace_back(meta.GetPageId(), meta.LevelCount, 0, meta.GetRowCount(), EmptyKey, EmptyKey, 0, TGetSize::Get(meta)); - ready &= SlicePart(parts.back(), *part.Slices, LoadedStateNodes.back()); - endSize += parts.back().GetSize(); - } - - if (!ready) { - return false; - } - - if (endSize) { - ready &= BuildHistogramRecursive(histogram, parts, 0, endSize, 0); - } - - LoadedBTreeNodes.clear(); - LoadedStateNodes.clear(); - - return ready; - } - -private: - template - bool SlicePart(TPartNodes& part, const TSlices& slices, TNodeState& node) { - YieldHandler(); - - auto it = slices.LookupBackward(slices.end(), node.EndRowId - 1); - - if (it == slices.end() || node.EndRowId <= it->BeginRowId() || it->EndRowId() <= node.BeginRowId) { - // skip the node - return true; - } - - if (it->BeginRowId() <= node.BeginRowId && node.EndRowId <= it->EndRowId()) { - // take the node - part.PushBack(&node); - return true; - } - - // split the node - - if (node.Level == 0) { - // can't split, decide by node.EndRowId - 1 - if (it->Has(node.EndRowId - 1)) { - part.PushBack(&node); - } - return true; - } - - bool ready = true; - - const auto addNode = [&](TNodeState& child) { - ready &= SlicePart(part, slices, child); - }; - if (!TryLoadNode(part.GetPart(), node, addNode)) { - return false; - } - - return ready; - } - - template - bool BuildHistogramRecursive(THistogram& histogram, TVector& parts, ui64 beginSize, ui64 endSize, ui32 depth) { - const static ui32 MaxDepth = 100; - - YieldHandler(); - -#ifndef NDEBUG - { - Y_DEBUG_ABORT_UNLESS(beginSize < endSize); - ui64 size = 0; - for (const auto& part : parts) { - size += part.GetSize(); - } - Y_DEBUG_ABORT_UNLESS(size == endSize - beginSize); - } -#endif - - if (SafeDiff(endSize, beginSize) <= Resolution || depth > MaxDepth) { - Y_DEBUG_ABORT_UNLESS(depth <= MaxDepth, "Shouldn't normally happen"); - return true; - } - - auto biggestPart = std::max_element(parts.begin(), parts.end()); - if (Y_UNLIKELY(biggestPart == parts.end())) { - Y_DEBUG_ABORT("Invalid part states"); - return true; - } - Y_ABORT_UNLESS(biggestPart->GetCount()); - - if (biggestPart->GetCount() == 1 && biggestPart->GetNodes().Front()->Level > 0) { - const auto addNode = [&biggestPart](TNodeState& child) { - biggestPart->PushBack(&child); - }; - if (!TryLoadNode(biggestPart->GetPart(), *biggestPart->PopFront(), addNode)) { - return false; - } - } - TCellsIterable splitKey = biggestPart->GetCount() > 1 - ? FindMedianPartKey(*biggestPart) - : FindMedianTableKey(parts); - - if (!splitKey) { - return true; - } - - ui64 leftSize = 0, middleSize = 0, rightSize = 0; - TVector leftParts, middleParts, rightParts; - - for (auto& part : parts) { - auto& leftNodes = PushNextPartNodes(part, leftParts); - auto& middleNodes = PushNextPartNodes(part, middleParts); - auto& rightNodes = PushNextPartNodes(part, rightParts); - - while (part.GetCount()) { - auto& node = *part.PopFront(); - if (node.EndKey && CompareKeys(node.EndKey, splitKey) <= 0) { - leftNodes.PushBack(&node); - } else if (node.BeginKey && CompareKeys(node.BeginKey, splitKey) >= 0) { - rightNodes.PushBack(&node); - } else { - middleNodes.PushBack(&node); - } - } - - Y_DEBUG_ABORT_UNLESS(middleNodes.GetCount() <= 1); - leftSize += leftNodes.GetSize(); - middleSize += middleNodes.GetSize(); - rightSize += rightNodes.GetSize(); - } - - if (middleSize > Resolution / 2) { - std::make_heap(middleParts.begin(), middleParts.end()); - - while (middleSize > Resolution / 2 && middleParts.size()) { - std::pop_heap(middleParts.begin(), middleParts.end()); - auto& middleNodes = middleParts.back(); - auto& leftNodes = GetNextPartNodes(middleNodes, leftParts); - auto& rightNodes = GetNextPartNodes(middleNodes, rightParts); - TIntrusiveList rightNodesBuffer; - - leftSize -= leftNodes.GetSize(); - middleSize -= middleNodes.GetSize(); - rightSize -= rightNodes.GetSize(); - - auto count = middleNodes.GetCount(); - bool hasChanges = false; - for (auto index : xrange(count)) { - Y_UNUSED(index); - auto& node = *middleNodes.PopFront(); - if (!node.Level) { // can't be splitted, return as-is - middleNodes.PushBack(&node); - continue; - } - const auto addNode = [&](TNodeState& node) { - if (node.EndKey && CompareKeys(node.EndKey, splitKey) <= 0) { - leftNodes.PushBack(&node); - } else if (node.BeginKey && CompareKeys(node.BeginKey, splitKey) >= 0) { - rightNodesBuffer.PushBack(&node); - } else { - middleNodes.PushBack(&node); - } - }; - if (!TryLoadNode(middleNodes.GetPart(), node, addNode)) { - return false; - } - hasChanges = true; - } - - while (!rightNodesBuffer.Empty()) { // reverse right part new nodes - rightNodes.PushFront(rightNodesBuffer.PopBack()); - } - - Y_DEBUG_ABORT_UNLESS(middleNodes.GetCount() <= 1); - leftSize += leftNodes.GetSize(); - middleSize += middleNodes.GetSize(); - rightSize += rightNodes.GetSize(); - - if (hasChanges) { // return updated nodes to the heap - std::push_heap(middleParts.begin(), middleParts.end()); - } else { // can't be splitted, ignore - middleParts.pop_back(); - } - } - } - - if (middleSize == 0 && (leftSize == 0 || rightSize == 0)) { - // no progress, don't continue - return true; - } - - bool ready = true; - - if (leftSize) { - ready &= BuildHistogramRecursive(histogram, leftParts, beginSize, beginSize + leftSize, depth + 1); - } - - ui64 splitSize = beginSize + leftSize + middleSize / 2; - // Note: due to different calculation approaches splitSize may exceed StatTotalSize, ignore them - if (beginSize < splitSize && splitSize < Min(endSize, StatTotalSize)) { - AddBucket(histogram, splitKey, splitSize); - } - - if (rightSize) { - ready &= BuildHistogramRecursive(histogram, rightParts, SafeDiff(endSize, rightSize), endSize, depth + 1); - } - - return ready; - } - - TCellsIterable FindMedianPartKey(const TPartNodes& part) { - Y_ABORT_UNLESS(part.GetCount() > 1, "It's impossible to split part with only one node"); - - TCellsIterable splitKey = EmptyKey; - ui64 splitSize = 0, currentSize = 0; - const ui64 middleSize = part.GetSize() / 2; - - for (const auto& node : part.GetNodes()) { - if (currentSize) { // can't split with the first key, skip it - if (!splitSize || AbsDifference(currentSize, middleSize) < AbsDifference(splitSize, middleSize)) { - splitKey = node.BeginKey; - splitSize = currentSize; - } - } - - currentSize += node.GetSize(); - } - - Y_ABORT_UNLESS(splitKey); - - return splitKey; - } - - TCellsIterable FindMedianTableKey(const TVector& parts) { - TVector keys; - for (const auto& part : parts) { - for (const auto& node : part.GetNodes()) { - if (node.BeginKey) { - keys.push_back(node.BeginKey); - } - } - } - - auto median = keys.begin() + (keys.size() + 1) / 2; - - if (median == keys.end()) { - return EmptyKey; - } - - // Note: may work badly in case when all begin keys are the same - // however such cases are rare and don't worth optimizing with sort+unique complex code - // also this method is only called when we couldn't split the biggest part - std::nth_element(keys.begin(), median, keys.end(), [this](const TCellsIterable& left, const TCellsIterable& right) { - return CompareKeys(left, right) < 0; - }); - - return *median; - } - - void AddBucket(THistogram& histogram, TCellsIterable key, ui64 size) { - TVector splitKeyCells; - - // Add columns that are present in the part - auto iter = key.Iter(); - for (TPos pos : xrange(iter.Count())) { - Y_UNUSED(pos); - splitKeyCells.push_back(iter.Next()); - } - - // Extend with default values if needed - for (TPos index = splitKeyCells.size(); index < KeyDefaults.Defs.size(); ++index) { - splitKeyCells.push_back(KeyDefaults.Defs[index]); - } - - TString serializedSplitKey = TSerializedCellVec::Serialize(splitKeyCells); - - histogram.push_back({serializedSplitKey, size}); - } - - template - bool TryLoadNode(const TPart* part, const TNodeState& parent, const auto& addNode) { - Y_ABORT_UNLESS(parent.Level); - - auto page = Env->TryGetPage(part, parent.PageId, {}); - if (!page) { - return false; - } - - LoadedBTreeNodes.emplace_back(*page); - auto &bTreeNode = LoadedBTreeNodes.back(); - auto& groupInfo = part->Scheme->GetLayout({}); - - for (auto pos : xrange(bTreeNode.GetChildrenCount())) { - auto& child = bTreeNode.GetChild(pos); - - LoadedStateNodes.emplace_back(child.GetPageId(), parent.Level - 1, - pos ? bTreeNode.GetChild(pos - 1).GetRowCount() : parent.BeginRowId, child.GetRowCount(), - pos ? bTreeNode.GetKeyCellsIterable(pos - 1, groupInfo.ColsKeyData) : parent.BeginKey, - pos < bTreeNode.GetKeysCount() ? bTreeNode.GetKeyCellsIterable(pos, groupInfo.ColsKeyData) : parent.EndKey, - pos ? TGetSize::Get(bTreeNode.GetChild(pos - 1)) : parent.BeginSize, TGetSize::Get(child)); - - addNode(LoadedStateNodes.back()); - } - - return true; - } - - TPartNodes& PushNextPartNodes(const TPartNodes& part, TVector& list) const { - Y_ABORT_UNLESS(part.GetIndex() == list.size()); - list.emplace_back(part.GetPart(), part.GetIndex()); - return list.back(); - } - - TPartNodes& GetNextPartNodes(const TPartNodes& part, TVector& list) const { - Y_ABORT_UNLESS(part.GetPart() == list[part.GetIndex()].GetPart()); - return list[part.GetIndex()]; - } - -private: - int CompareKeys(const TCellsIterable& left_, const TCellsIterable& right_) const { - Y_DEBUG_ABORT_UNLESS(left_); - Y_DEBUG_ABORT_UNLESS(right_); - - auto left = left_.Iter(), right = right_.Iter(); - size_t end = Max(left.Count(), right.Count()); - Y_DEBUG_ABORT_UNLESS(end <= KeyDefaults.Size(), "Key schema is smaller than compared keys"); - - - for (size_t pos = 0; pos < end; ++pos) { - const auto& leftCell = pos < left.Count() ? left.Next() : KeyDefaults.Defs[pos]; - const auto& rightCell = pos < right.Count() ? right.Next() : KeyDefaults.Defs[pos]; - if (int cmp = CompareTypedCells(leftCell, rightCell, KeyDefaults.Types[pos])) { - return cmp; - } - } - - return 0; - } - - ui64 AbsDifference(ui64 a, ui64 b) const { - return static_cast(std::abs(static_cast(a) - static_cast(b))); - } - - ui64 SafeDiff(ui64 a, ui64 b) const { - return a - Min(a, b); - } - -private: - const TSubset& Subset; - const TKeyCellDefaults& KeyDefaults; - IPages* const Env; - ui32 HistogramBucketsCount; - TBuildStatsYieldHandler YieldHandler; - ui64 Resolution, StatTotalSize; - TDeque LoadedBTreeNodes; // keep nodes to use TCellsIterable key refs - TDeque LoadedStateNodes; // keep nodes to use TIntrusiveList -}; - -} - -inline bool BuildStatsHistogramsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBucketsCount, IPages* env, TBuildStatsYieldHandler yieldHandler) { - bool ready = true; - - TTableHistogramBuilderBtreeIndex builder(subset, env, histogramBucketsCount, yieldHandler); - - ready &= builder.Build(stats.RowCountHistogram, stats.RowCount); - ready &= builder.Build(stats.DataSizeHistogram, stats.DataSize.Size); - - return ready; -} - -} diff --git a/ydb/core/tablet_flat/flat_table.cpp b/ydb/core/tablet_flat/flat_table.cpp index 354bccbcaefc..ca0b9e58533d 100644 --- a/ydb/core/tablet_flat/flat_table.cpp +++ b/ydb/core/tablet_flat/flat_table.cpp @@ -1415,7 +1415,11 @@ void TTable::SetTableObserver(TIntrusivePtr ptr) noexcept void TPartStats::Add(const TPartView& partView) { PartsCount += 1; - IndexBytes += partView->IndexesRawSize; + if (partView->IndexPages.HasBTree()) { + BTreeIndexBytes += partView->IndexesRawSize; + } else { + FlatIndexBytes += partView->IndexesRawSize; + } ByKeyBytes += partView->ByKey ? partView->ByKey->Raw.size() : 0; PlainBytes += partView->Stat.Bytes; CodedBytes += partView->Stat.Coded; @@ -1434,7 +1438,11 @@ void TPartStats::Add(const TPartView& partView) bool TPartStats::Remove(const TPartView& partView) { NUtil::SubSafe(PartsCount, ui64(1)); - NUtil::SubSafe(IndexBytes, partView->IndexesRawSize); + if (partView->IndexPages.HasBTree()) { + NUtil::SubSafe(BTreeIndexBytes, partView->IndexesRawSize); + } else { + NUtil::SubSafe(FlatIndexBytes, partView->IndexesRawSize); + } NUtil::SubSafe(ByKeyBytes, partView->ByKey ? partView->ByKey->Raw.size() : 0); NUtil::SubSafe(PlainBytes, partView->Stat.Bytes); NUtil::SubSafe(CodedBytes, partView->Stat.Coded); @@ -1463,7 +1471,8 @@ bool TPartStats::Remove(const TPartView& partView) TPartStats& TPartStats::operator+=(const TPartStats& rhs) { PartsCount += rhs.PartsCount; - IndexBytes += rhs.IndexBytes; + FlatIndexBytes += rhs.FlatIndexBytes; + BTreeIndexBytes += rhs.BTreeIndexBytes; OtherBytes += rhs.OtherBytes; ByKeyBytes += rhs.ByKeyBytes; PlainBytes += rhs.PlainBytes; @@ -1480,7 +1489,8 @@ TPartStats& TPartStats::operator+=(const TPartStats& rhs) TPartStats& TPartStats::operator-=(const TPartStats& rhs) { NUtil::SubSafe(PartsCount, rhs.PartsCount); - NUtil::SubSafe(IndexBytes, rhs.IndexBytes); + NUtil::SubSafe(FlatIndexBytes, rhs.FlatIndexBytes); + NUtil::SubSafe(BTreeIndexBytes, rhs.BTreeIndexBytes); NUtil::SubSafe(OtherBytes, rhs.OtherBytes); NUtil::SubSafe(ByKeyBytes, rhs.ByKeyBytes); NUtil::SubSafe(PlainBytes, rhs.PlainBytes); diff --git a/ydb/core/tablet_flat/flat_table_stats.h b/ydb/core/tablet_flat/flat_table_stats.h index 172c777ac3dd..4d735d3fccdc 100644 --- a/ydb/core/tablet_flat/flat_table_stats.h +++ b/ydb/core/tablet_flat/flat_table_stats.h @@ -8,7 +8,8 @@ namespace NTable { struct TPartStats { ui64 PartsCount = 0; /* Total used TPart units in db */ - ui64 IndexBytes = 0; + ui64 FlatIndexBytes = 0; + ui64 BTreeIndexBytes = 0; ui64 OtherBytes = 0; /* Other metadata and sys. indexes */ ui64 ByKeyBytes = 0; ui64 PlainBytes = 0; /* Plain data pages size */ diff --git a/ydb/core/tablet_flat/shared_cache_clock_pro.h b/ydb/core/tablet_flat/shared_cache_clock_pro.h new file mode 100644 index 000000000000..91ae2f45c804 --- /dev/null +++ b/ydb/core/tablet_flat/shared_cache_clock_pro.h @@ -0,0 +1,423 @@ +#pragma once +#include "defs.h" +#include +#include +#include +#include + +namespace NKikimr::NCache { + +// TODO: remove template args and make some page base class + +// TODO: metrics + +enum class EClockProPageLocation { + None, + Hot, + Cold +}; + +template +class TClockProCache : public ICacheCache { + using TPageKey = typename TPageTraits::TPageKey; + + struct TPageEntry : public TIntrusiveListItem { + TPageKey Key; + TPage* Page; + ui64 Size; + + TPageEntry(const TPageKey& key, TPage* page, ui64 size) + : Key(key) + , Page(page) + , Size(size) + {} + }; + + struct TPageKeyHash { + using is_transparent = void; + + inline size_t operator()(const TPageEntry& entry) const { + return TPageTraits::GetHash(entry.Key); + } + + inline size_t operator()(const TPageKey& key) const { + return TPageTraits::GetHash(key); + } + }; + + struct TPageKeyEqual { + using is_transparent = void; + + inline bool operator()(const TPageEntry& left, const TPageEntry& right) const { + return TPageTraits::Equals(left.Key, right.Key); + } + + inline bool operator()(const TPageEntry& left, const TPageKey& right) const { + return TPageTraits::Equals(left.Key, right); + } + }; + +public: + TClockProCache(ui64 limit) + : Limit(limit) + , ColdTarget(limit) + {} + + TIntrusiveList EvictNext() override { + if (GetSize() == 0) { + return {}; + } + + ui64 savedLimit = std::exchange(Limit, SizeHot + SizeCold - 1); + ui64 savedColdTarget = std::exchange(ColdTarget, Min(ColdTarget, Limit)); + + TIntrusiveList evictedList = EvictWhileFull(); + + Limit = savedLimit; + ColdTarget = savedColdTarget; + + return evictedList; + } + + TIntrusiveList Touch(TPage* page) override { + if (TPageTraits::GetLocation(page) != EClockProPageLocation::None) { + // touch a 'Cold resident' or a 'Hot' page: + TPageTraits::SetReferenced(page, true); + return {}; + } else if (auto it = Entries.find(TPageTraits::GetKey(page)); it != Entries.end()) { + // transforms a 'Cold non-resident' ('Test') page to a 'Hot' page: + TPageEntry* entry = AsEntry(it); + Y_ABORT_UNLESS(!entry->Page); + return Fill(entry, page); + } else { + // adds a 'Cold resident' page + return Add(page); + } + } + + void Erase(TPage* page) override { + if (auto it = Entries.find(TPageTraits::GetKey(page)); it != Entries.end()) { + TPageEntry* entry = AsEntry(it); + + EraseEntry(entry); + + Entries.erase(it); + } else { + Y_ABORT_UNLESS(TPageTraits::GetLocation(page) == EClockProPageLocation::None); + Y_ABORT_UNLESS(!TPageTraits::GetReferenced(page)); + } + } + + void UpdateLimit(ui64 limit) override { + if (ColdTarget == Limit) { + Limit = limit; + ColdTarget = limit; + } else { + Limit = limit; + ColdTarget = Min(ColdTarget, Limit); + } + } + + ui64 GetSize() const override { + return SizeHot + SizeCold; + } + + TString Dump() const override { + TStringBuilder result; + + size_t count = 0; + ui64 sizeHot = 0, sizeCold = 0, sizeTest = 0; + + auto ptr = HandHot; + while (ptr != nullptr) { + TPageEntry* entry = ptr->Node(); + auto it = Entries.find(entry->Key); + Y_DEBUG_ABORT_UNLESS(it != Entries.end()); + Y_DEBUG_ABORT_UNLESS(AsEntry(it) == entry); + + if (count != 0) result << ", "; + if (entry == HandHot) result << "Hot>"; + if (entry == HandCold) result << "Cold>"; + if (entry == HandTest) result << "Test>"; + + result << "{" << TPageTraits::ToString(entry->Key) << " "; + + count++; + if (entry->Page) { + auto location = TPageTraits::GetLocation(entry->Page); + switch (location) { + case EClockProPageLocation::Hot: + result << "H "; + sizeHot += entry->Size; + break; + case EClockProPageLocation::Cold: + result << "C "; + sizeCold += entry->Size; + break; + default: + Y_ABORT("Unknown location"); + } + } else { + result << "T "; + sizeTest += entry->Size; + } + + if (entry->Page) { + result << TPageTraits::GetReferenced(entry->Page) << "r "; + } + result << entry->Size << "b}"; + + Advance(ptr); + if (ptr == HandHot) break; + } + + Y_DEBUG_ABORT_UNLESS(sizeHot == SizeHot); + Y_DEBUG_ABORT_UNLESS(sizeCold == SizeCold); + Y_DEBUG_ABORT_UNLESS(sizeTest == SizeTest); + Y_DEBUG_ABORT_UNLESS(count == Entries.size()); + if (count == 0) { + Y_DEBUG_ABORT_UNLESS(!HandHot); + Y_DEBUG_ABORT_UNLESS(!HandCold); + Y_DEBUG_ABORT_UNLESS(!HandTest); + } + + if (count) result << "; "; + result << "ColdTarget: " << ColdTarget; + + return result; + } + +private: + TIntrusiveList Add(TPage* page) { + Y_DEBUG_ABORT_UNLESS(TPageTraits::GetLocation(page) == EClockProPageLocation::None); + + auto inserted = Entries.emplace(TPageTraits::GetKey(page), page, TPageTraits::GetSize(page)); + Y_ABORT_UNLESS(inserted.second); + TPageEntry* entry = AsEntry(inserted.first); + + LinkEntry(entry); + + TPageTraits::SetLocation(entry->Page, EClockProPageLocation::Cold); + SizeCold += entry->Size; + + return EvictWhileFull(); + } + + TIntrusiveList Fill(TPageEntry* entry, TPage* page) { + Y_DEBUG_ABORT_UNLESS(!entry->Page); + Y_DEBUG_ABORT_UNLESS(TPageTraits::GetLocation(page) == EClockProPageLocation::None); + Y_ABORT_UNLESS(!TPageTraits::GetReferenced(page)); + Y_ABORT_UNLESS(TPageTraits::GetSize(page) == entry->Size); + + Y_ABORT_UNLESS(SizeTest >= entry->Size); + SizeTest -= entry->Size; + + UnlinkEntry(entry); + + entry->Page = page; + TPageTraits::SetLocation(page, EClockProPageLocation::Hot); + SizeHot += entry->Size; + + LinkEntry(entry); + + ColdTarget = Min(ColdTarget + entry->Size, Limit); + + return EvictWhileFull(); + } + + TIntrusiveList EvictWhileFull() { + TIntrusiveList evictedList; + + while (SizeHot + SizeCold > Limit) { + RunHandCold(evictedList); + } + + return evictedList; + } + + void RunHandCold(TIntrusiveList& evictedList) { + Y_ABORT_UNLESS(HandCold); + TPageEntry* entry = HandCold->Node(); + + if (IsCold(entry)) { + if (TPageTraits::GetReferenced(entry->Page)) { + TPageTraits::SetReferenced(entry->Page, false); + + Y_ABORT_UNLESS(SizeCold >= entry->Size); + SizeCold -= entry->Size; + + TPageTraits::SetLocation(entry->Page, EClockProPageLocation::Hot); + SizeHot += entry->Size; + } else { + Y_ABORT_UNLESS(SizeCold >= entry->Size); + SizeCold -= entry->Size; + + TPageTraits::SetLocation(entry->Page, EClockProPageLocation::None); + evictedList.PushBack(entry->Page); + entry->Page = nullptr; + + SizeTest += entry->Size; + + // TODO: should we advance HandCold before that call? + while (SizeTest > Limit) { + RunHandTest(evictedList); + } + } + } + + Advance(HandCold); + + while (SizeHot > Limit - ColdTarget) { + RunHandHot(evictedList); + } + } + + void RunHandHot(TIntrusiveList& evictedList) { + Y_ABORT_UNLESS(HandHot); + + if (HandHot == HandTest) { + RunHandTest(evictedList); + if (!HandHot) { + return; + } + } + + TPageEntry* entry = HandHot->Node(); + + if (IsHot(entry)) { + if (TPageTraits::GetReferenced(entry->Page)) { + TPageTraits::SetReferenced(entry->Page, false); + } else { + Y_ABORT_UNLESS(SizeHot >= entry->Size); + SizeHot -= entry->Size; + + TPageTraits::SetLocation(entry->Page, EClockProPageLocation::Cold); + + SizeCold += entry->Size; + } + } + + Advance(HandHot); + } + + void RunHandTest(TIntrusiveList& evictedList) { + Y_ABORT_UNLESS(HandTest); + + if (HandTest == HandCold) { + RunHandCold(evictedList); + if (!HandTest) { + return; + } + } + + TPageEntry* entry = HandTest->Node(); + + if (IsTest(entry)) { + Y_ABORT_UNLESS(SizeTest >= entry->Size); + SizeTest -= entry->Size; + + ColdTarget -= Min(ColdTarget, entry->Size); + + UnlinkEntry(entry); + + auto it = Entries.find(entry->Key); + Y_ABORT_UNLESS(it != Entries.end()); + Y_ABORT_UNLESS(AsEntry(it) == entry); + Entries.erase(it); + } + + Advance(HandTest); + } + + void LinkEntry(TPageEntry* entry) { + if (HandHot == nullptr) { // first element + HandHot = HandCold = HandTest = entry; + } else { + entry->LinkBefore(HandHot); + } + + if (HandHot == HandCold) { + HandCold = HandCold->Prev()->Node(); + } + } + + void UnlinkEntry(TPageEntry* entry) { + if (entry->Empty()) { // the last entry in the cache + HandHot = HandCold = HandTest = nullptr; + } else { + if (entry == HandHot) { + HandHot = HandHot->Prev()->Node(); + } + if (entry == HandCold) { + HandCold = HandCold->Prev()->Node(); + } + if (entry == HandTest) { + HandTest = HandTest->Prev()->Node(); + } + entry->Unlink(); + } + } + + void EraseEntry(TPageEntry* entry) { + if (entry->Page) { + switch (TPageTraits::GetLocation(entry->Page)) { + case EClockProPageLocation::Hot: + Y_ABORT_UNLESS(SizeHot >= entry->Size); + SizeHot -= entry->Size; + break; + case EClockProPageLocation::Cold: + Y_ABORT_UNLESS(SizeCold >= entry->Size); + SizeCold -= entry->Size; + break; + default: + Y_ABORT("Unexpected page location"); + } + + TPageTraits::SetReferenced(entry->Page, false); + TPageTraits::SetLocation(entry->Page, EClockProPageLocation::None); + } else { + Y_ABORT_UNLESS(SizeTest >= entry->Size); + SizeTest -= entry->Size; + } + + UnlinkEntry(entry); + } + + bool IsHot(TPageEntry* entry) const { + return entry->Page && TPageTraits::GetLocation(entry->Page) == EClockProPageLocation::Hot; + } + + bool IsCold(TPageEntry* entry) const { + return entry->Page && TPageTraits::GetLocation(entry->Page) == EClockProPageLocation::Cold; + } + + bool IsTest(TPageEntry* entry) const { + return entry->Page == nullptr; + } + + void Advance(TPageEntry*& ptr) const { + if (ptr) { + ptr = ptr->Next()->Node(); + } + } + + TPageEntry* AsEntry(typename THashSet::iterator it) const { + return const_cast(&*it); + } + +private: + ui64 Limit; + ui64 ColdTarget; + + // TODO: unify this with TPageMap + THashSet Entries; + + TPageEntry* HandHot = nullptr; + TPageEntry* HandCold = nullptr; + TPageEntry* HandTest = nullptr; + ui64 SizeHot = 0; + ui64 SizeCold = 0; + ui64 SizeTest = 0; +}; + +} diff --git a/ydb/core/tablet_flat/shared_cache_clock_pro_ut.cpp b/ydb/core/tablet_flat/shared_cache_clock_pro_ut.cpp new file mode 100644 index 000000000000..fad0993e5a9a --- /dev/null +++ b/ydb/core/tablet_flat/shared_cache_clock_pro_ut.cpp @@ -0,0 +1,281 @@ +#include +#include +#include "shared_cache_clock_pro.h" + +namespace NKikimr::NCache { + +namespace { + + struct TPage : public TIntrusiveListItem { + ui32 Id; + size_t Size; + + TPage(ui32 id, size_t size) + : Id(id), Size(size) + {} + + ui32 CacheFlags1 : 4 = 0; + ui32 CacheFlags2 : 4 = 0; + }; + + struct TPageTraits { + struct TPageKey { + ui32 Id; + + TPageKey(ui32 id) + : Id(id) + {} + }; + + static ui64 GetSize(const TPage* page) { + return page->Size; + } + + static TPageKey GetKey(const TPage* page) { + return {page->Id}; + } + + static size_t GetHash(const TPageKey& key) { + return std::hash()(key.Id); + } + + static bool Equals(const TPageKey& left, const TPageKey& right) { + return left.Id == right.Id; + } + + static TString ToString(const TPageKey& key) { + return std::to_string(key.Id); + } + + static TString GetKeyToString(const TPage* page) { + return ToString(GetKey(page)); + } + + static EClockProPageLocation GetLocation(const TPage* page) { + return static_cast(page->CacheFlags1); + } + + static void SetLocation(TPage* page, EClockProPageLocation location) { + ui32 location_ = static_cast(location); + Y_ABORT_UNLESS(location_ < (1 << 4)); + page->CacheFlags1 = location_; + } + + static bool GetReferenced(const TPage* page) { + return page->CacheFlags2; + } + + static void SetReferenced(TPage* page, bool referenced) { + page->CacheFlags2 = referenced; + } + }; + +} + +Y_UNIT_TEST_SUITE(TClockProCache) { + + TVector Touch(auto& cache, TPage& page) { + auto evicted = cache.Touch(&page); + TVector result; + for (auto& p : evicted) { + UNIT_ASSERT_VALUES_EQUAL(p.CacheFlags1, 0); + UNIT_ASSERT_VALUES_EQUAL(p.CacheFlags2, 0); + result.push_back(p.Id); + } + return result; + } + + TVector EvictNext(auto& cache) { + auto evicted = cache.EvictNext(); + TVector result; + for (auto& p : evicted) { + UNIT_ASSERT_VALUES_EQUAL(p.CacheFlags1, 0); + UNIT_ASSERT_VALUES_EQUAL(p.CacheFlags2, 0); + result.push_back(p.Id); + } + return result; + } + + Y_UNIT_TEST(Touch) { + TClockProCache cache(10); + + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "ColdTarget: 10"); + + TPage page1{1, 2}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page1), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Cold>Test>{1 C 0r 2b}; ColdTarget: 10"); + + TPage page2{2, 3}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page2), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Test>{1 C 0r 2b}, Cold>{2 C 0r 3b}; ColdTarget: 10"); + + TPage page3{3, 4}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page3), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Test>{1 C 0r 2b}, Cold>{2 C 0r 3b}, {3 C 0r 4b}; ColdTarget: 10"); + + TPage page4{4, 1}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page4), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Test>{1 C 0r 2b}, Cold>{2 C 0r 3b}, {3 C 0r 4b}, {4 C 0r 1b}; ColdTarget: 10"); + + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page1), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page3), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Test>{1 C 1r 2b}, Cold>{2 C 0r 3b}, {3 C 1r 4b}, {4 C 0r 1b}; ColdTarget: 10"); + + TPage page5{5, 1}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page5), TVector{2}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Test>{1 C 1r 2b}, {2 T 3b}, Cold>{3 C 1r 4b}, {4 C 0r 1b}, {5 C 0r 1b}; ColdTarget: 10"); + } + + Y_UNIT_TEST(Lifecycle) { + TClockProCache cache(10); + + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "ColdTarget: 10"); + + TPage page1{1, 1}; + TPage page2{2, 2}; + TPage page3{3, 3}; + TPage page4{4, 4}; + TPage page5{5, 5}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page1), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page2), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page3), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page4), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Test>{1 C 0r 1b}, Cold>{2 C 0r 2b}, {3 C 0r 3b}, {4 C 0r 4b}; ColdTarget: 10"); + + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page2), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Test>{1 C 0r 1b}, Cold>{2 C 1r 2b}, {3 C 0r 3b}, {4 C 0r 4b}; ColdTarget: 10"); + + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page5), (TVector{3, 4})); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Test>{3 T 3b}, {4 T 4b}, Cold>{5 C 0r 5b}, {1 C 0r 1b}, {2 C 0r 2b}; ColdTarget: 10"); + + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page3), (TVector{5})); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Test>{5 T 5b}, Cold>{1 C 0r 1b}, {3 H 0r 3b}, {2 C 0r 2b}; ColdTarget: 6"); + + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page3), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Test>{5 T 5b}, Cold>{1 C 0r 1b}, {3 H 1r 3b}, {2 C 0r 2b}; ColdTarget: 6"); + + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page4), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Test>{5 T 5b}, Cold>{1 C 0r 1b}, {3 H 1r 3b}, {2 C 0r 2b}, {4 C 0r 4b}; ColdTarget: 6"); + + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page5), (TVector{1, 2, 4})); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Test>{4 T 4b}, Cold>{3 C 0r 3b}, {5 C 0r 5b}; ColdTarget: 7"); + } + + Y_UNIT_TEST(EvictNext) { + TClockProCache cache(10); + + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "ColdTarget: 10"); + + TPage page1{1, 1}; + TPage page2{2, 2}; + TPage page3{3, 3}; + TPage page4{4, 4}; + TPage page5{5, 5}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page1), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page2), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page3), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page4), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Test>{1 C 0r 1b}, Cold>{2 C 0r 2b}, {3 C 0r 3b}, {4 C 0r 4b}; ColdTarget: 10"); + + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page2), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Test>{1 C 0r 1b}, Cold>{2 C 1r 2b}, {3 C 0r 3b}, {4 C 0r 4b}; ColdTarget: 10"); + + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page5), (TVector{3, 4})); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Test>{3 T 3b}, {4 T 4b}, Cold>{5 C 0r 5b}, {1 C 0r 1b}, {2 C 0r 2b}; ColdTarget: 10"); + + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page3), (TVector{5})); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Test>{5 T 5b}, Cold>{1 C 0r 1b}, {3 H 0r 3b}, {2 C 0r 2b}; ColdTarget: 6"); + + UNIT_ASSERT_VALUES_EQUAL(EvictNext(cache), TVector{1}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>{2 C 0r 2b}, Test>{1 T 1b}, Cold>{3 H 0r 3b}; ColdTarget: 6"); + + UNIT_ASSERT_VALUES_EQUAL(EvictNext(cache), TVector{2}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Test>{2 T 2b}, Cold>{3 C 0r 3b}; ColdTarget: 6"); + + UNIT_ASSERT_VALUES_EQUAL(EvictNext(cache), TVector{3}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "ColdTarget: 6"); + + UNIT_ASSERT_VALUES_EQUAL(EvictNext(cache), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "ColdTarget: 6"); + } + + Y_UNIT_TEST(UpdateLimit) { + TClockProCache cache(10); + + TPage page1{1, 1}; + TPage page2{2, 2}; + TPage page3{3, 3}; + TPage page4{4, 4}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page1), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page2), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page3), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page4), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Test>{1 C 0r 1b}, Cold>{2 C 0r 2b}, {3 C 0r 3b}, {4 C 0r 4b}; ColdTarget: 10"); + + cache.UpdateLimit(5); + TPage page5{5, 1}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page5), (TVector{2, 3, 4})); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>{1 C 0r 1b}, Test>{4 T 4b}, Cold>{5 C 0r 1b}; ColdTarget: 0"); + + cache.UpdateLimit(0); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page2), (TVector{5, 1, 2})); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "ColdTarget: 0"); + } + + Y_UNIT_TEST(Erase) { + TClockProCache cache(10); + + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "ColdTarget: 10"); + + TPage page1{1, 2}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page1), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Cold>Test>{1 C 0r 2b}; ColdTarget: 10"); + + TPage page2{2, 3}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page2), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Test>{1 C 0r 2b}, Cold>{2 C 0r 3b}; ColdTarget: 10"); + + TPage page3{3, 4}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page3), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Test>{1 C 0r 2b}, Cold>{2 C 0r 3b}, {3 C 0r 4b}; ColdTarget: 10"); + + cache.Erase(&page1); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Test>{3 C 0r 4b}, Cold>{2 C 0r 3b}; ColdTarget: 10"); + + cache.Erase(&page2); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Cold>Test>{3 C 0r 4b}; ColdTarget: 10"); + + TPage page42{42, 1}; + cache.Erase(&page42); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "Hot>Cold>Test>{3 C 0r 4b}; ColdTarget: 10"); + + cache.Erase(&page3); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "ColdTarget: 10"); + } + + Y_UNIT_TEST(Random) { + TClockProCache cache(100); + + TVector> pages; + for (ui32 pageId : xrange(500)) { + pages.push_back(MakeHolder(pageId, 1)); + } + + ui32 hits = 0, misses = 0; + + for (ui32 i = 0; i < 100000; i++) { + ui32 pageId = std::sqrt(RandomNumber(pages.size() * pages.size())); + TPage* page = pages[pageId].Get(); + if (TPageTraits::GetLocation(page) != EClockProPageLocation::None) { + hits++; + } else { + misses++; + } + cache.Touch(page); + } + + Cerr << 1.0 * hits / (hits + misses) << Endl; + } +} + +} diff --git a/ydb/core/tablet_flat/shared_cache_s3fifo.h b/ydb/core/tablet_flat/shared_cache_s3fifo.h new file mode 100644 index 000000000000..81c5f7ff5e27 --- /dev/null +++ b/ydb/core/tablet_flat/shared_cache_s3fifo.h @@ -0,0 +1,300 @@ +#pragma once +#include "defs.h" +#include +#include +#include +#include + +namespace NKikimr::NCache { + +// TODO: remove template args and make some page base class + +enum class ES3FIFOPageLocation { + None, + SmallQueue, + MainQueue +}; + +template +class TS3FIFOGhostPageQueue { + using TPageKey = typename TPageTraits::TPageKey; + +public: + bool Add(const TPageKey& key) { + size_t hash = TPageTraits::GetHash(key); + + if (GhostsSet.insert(hash).second) { + GhostsQueue.push_back(hash); + return true; + } + + return false; + } + + void Limit(size_t limit) { + while (GhostsQueue.size() > limit) { + bool erased = GhostsSet.erase(GhostsQueue.front()); + Y_DEBUG_ABORT_UNLESS(erased); + GhostsQueue.pop_front(); + } + } + + bool Contains(const TPageKey& key) { + size_t hash = TPageTraits::GetHash(key); + return GhostsSet.contains(hash); + } + + TString Dump() const { + TStringBuilder result; + size_t count = 0; + for (size_t hash : GhostsQueue) { + Y_DEBUG_ABORT_UNLESS(GhostsSet.contains(hash)); + if (count != 0) result << ", "; + result << hash; + count++; + } + Y_DEBUG_ABORT_UNLESS(GhostsSet.size() == count); + return result; + } + +private: + // Note: only hashes are stored, all the collisions just ignored + THashSet GhostsSet; + TDeque GhostsQueue; +}; + +template +class TS3FIFOCache : public ICacheCache { + using TPageKey = typename TPageTraits::TPageKey; + + static const ui32 MaxMainQueueReinserts = 20; + + struct TLimit { + ui64 TotalLimit; + ui64 SmallQueueLimit; + ui64 MainQueueLimit; + + TLimit(ui64 limit) + : TotalLimit(limit) + , SmallQueueLimit(limit / 10) + , MainQueueLimit(limit - SmallQueueLimit) + {} + }; + + struct TQueue { + TQueue(ES3FIFOPageLocation location) + : Location(location) + {} + + ES3FIFOPageLocation Location; + TIntrusiveList Queue; + ui64 Count = 0; + ui64 Size = 0; + }; + +public: + TS3FIFOCache(ui64 limit) + : Limit(limit) + , SmallQueue(ES3FIFOPageLocation::SmallQueue) + , MainQueue(ES3FIFOPageLocation::MainQueue) + {} + + TIntrusiveList EvictNext() override { + if (SmallQueue.Queue.Empty() && MainQueue.Queue.Empty()) { + return {}; + } + + // TODO: account passive pages inside the cache + TLimit savedLimit = std::exchange(Limit, TLimit(SmallQueue.Size + MainQueue.Size - 1)); + + TIntrusiveList evictedList; + if (TPage* evictedPage = EvictOneIfFull()) { + evictedList.PushBack(evictedPage); + } else { + Y_DEBUG_ABORT("Unexpected empty eviction"); + } + + Limit = savedLimit; + + return evictedList; + } + + TIntrusiveList Touch(TPage* page) override { + const ES3FIFOPageLocation location = TPageTraits::GetLocation(page); + switch (location) { + case ES3FIFOPageLocation::SmallQueue: + case ES3FIFOPageLocation::MainQueue: { + TouchFast(page); + return {}; + } + case ES3FIFOPageLocation::None: + return Insert(page); + default: + Y_ABORT("Unknown page location"); + } + } + + void Erase(TPage* page) override { + const ES3FIFOPageLocation location = TPageTraits::GetLocation(page); + switch (location) { + case ES3FIFOPageLocation::None: + break; + case ES3FIFOPageLocation::SmallQueue: + Erase(SmallQueue, page); + break; + case ES3FIFOPageLocation::MainQueue: + Erase(MainQueue, page); + break; + default: + Y_ABORT("Unknown page location"); + } + + TPageTraits::SetFrequency(page, 0); + } + + void UpdateLimit(ui64 limit) override { + Limit = limit; + } + + ui64 GetSize() const override { + return SmallQueue.Size + MainQueue.Size; + } + + TString Dump() const override { + TStringBuilder result; + + auto dump = [&](const TQueue& queue) { + size_t count = 0; + ui64 size = 0; + for (auto it = queue.Queue.begin(); it != queue.Queue.end(); it++) { + const TPage* page = &*it; + if (count != 0) result << ", "; + result << "{" << TPageTraits::GetKeyToString(page) << " " << TPageTraits::GetFrequency(page) << "f " << TPageTraits::GetSize(page) << "b}"; + count++; + size += TPageTraits::GetSize(page); + } + Y_DEBUG_ABORT_UNLESS(queue.Count == count); + Y_DEBUG_ABORT_UNLESS(queue.Size == size); + }; + + result << "SmallQueue: "; + dump(SmallQueue); + result << Endl << "MainQueue: "; + dump(MainQueue); + result << Endl << "GhostQueue: "; + result << GhostQueue.Dump(); + + return result; + } + +private: + TPage* EvictOneIfFull() { + ui32 mainQueueReinserts = 0; + + while (GetSize() > Limit.TotalLimit) { + if (SmallQueue.Size > Limit.SmallQueueLimit) { + TPage* page = Pop(SmallQueue); + if (ui32 frequency = TPageTraits::GetFrequency(page); frequency > 1) { // load inserts, first read touches, second read touches + TPageTraits::SetFrequency(page, 0); + Push(MainQueue, page); + } else { + if (frequency) { // the page is used only once + TPageTraits::SetFrequency(page, 0); + } + AddGhost(page); + return page; + } + } else { + TPage* page = Pop(MainQueue); + if (ui32 frequency = TPageTraits::GetFrequency(page); frequency > 0 && mainQueueReinserts < MaxMainQueueReinserts) { + mainQueueReinserts++; + TPageTraits::SetFrequency(page, frequency - 1); + Push(MainQueue, page); + } else { + if (frequency) { // reinserts limit exceeded + TPageTraits::SetFrequency(page, 0); + } + return page; + } + } + } + + return nullptr; + } + + void TouchFast(TPage* page) { + Y_DEBUG_ABORT_UNLESS(TPageTraits::GetLocation(page) != ES3FIFOPageLocation::None); + + ui32 frequency = TPageTraits::GetFrequency(page); + if (frequency < 3) { + TPageTraits::SetFrequency(page, frequency + 1); + } + } + + TIntrusiveList Insert(TPage* page) { + Y_DEBUG_ABORT_UNLESS(TPageTraits::GetLocation(page) == ES3FIFOPageLocation::None); + + Push(IsGhost(page) ? MainQueue : SmallQueue, page); + TPageTraits::SetFrequency(page, 0); + + TIntrusiveList evictedList; + while (TPage* evictedPage = EvictOneIfFull()) { + evictedList.PushBack(evictedPage); + } + + return evictedList; + } + + TPage* Pop(TQueue& queue) { + Y_ABORT_UNLESS(!queue.Queue.Empty()); + Y_ABORT_UNLESS(TPageTraits::GetLocation(queue.Queue.Front()) == queue.Location); + Y_ABORT_UNLESS(queue.Count > 0); + Y_ABORT_UNLESS(queue.Size >= TPageTraits::GetSize(queue.Queue.Front())); + + TPage* page = queue.Queue.PopFront(); + queue.Count--; + queue.Size -= TPageTraits::GetSize(page); + TPageTraits::SetLocation(page, ES3FIFOPageLocation::None); + + return page; + } + + void Push(TQueue& queue, TPage* page) { + Y_ABORT_UNLESS(TPageTraits::GetLocation(page) == ES3FIFOPageLocation::None); + + queue.Queue.PushBack(page); + queue.Count++; + queue.Size += TPageTraits::GetSize(page); + TPageTraits::SetLocation(page, queue.Location); + } + + void Erase(TQueue& queue, TPage* page) { + Y_ABORT_UNLESS(TPageTraits::GetLocation(page) == queue.Location); + Y_ABORT_UNLESS(queue.Count > 0); + Y_ABORT_UNLESS(queue.Size >= TPageTraits::GetSize(page)); + + page->Unlink(); + queue.Count--; + queue.Size -= TPageTraits::GetSize(page); + TPageTraits::SetLocation(page, ES3FIFOPageLocation::None); + } + + void AddGhost(const TPage* page) { + if (GhostQueue.Add(TPageTraits::GetKey(page))) { + GhostQueue.Limit(SmallQueue.Count + MainQueue.Count); + } + } + + bool IsGhost(const TPage* page) { + return GhostQueue.Contains(TPageTraits::GetKey(page)); + } + +private: + TLimit Limit; + TQueue SmallQueue; + TQueue MainQueue; + TS3FIFOGhostPageQueue GhostQueue; + +}; + +} diff --git a/ydb/core/tablet_flat/shared_cache_s3fifo_ut.cpp b/ydb/core/tablet_flat/shared_cache_s3fifo_ut.cpp new file mode 100644 index 000000000000..150c9b929358 --- /dev/null +++ b/ydb/core/tablet_flat/shared_cache_s3fifo_ut.cpp @@ -0,0 +1,366 @@ +#include +#include +#include "shared_cache_s3fifo.h" + +namespace NKikimr::NCache { + +namespace { + + struct TPage : public TIntrusiveListItem { + ui32 Id; + size_t Size; + + TPage(ui32 id, size_t size) + : Id(id), Size(size) + {} + + ui32 CacheFlags1 : 4 = 0; + ui32 CacheFlags2 : 4 = 0; + }; + + struct TPageTraits { + struct TPageKey { + ui32 Id; + + TPageKey(ui32 id) + : Id(id) + {} + }; + + static ui64 GetSize(const TPage* page) { + return page->Size; + } + + static TPageKey GetKey(const TPage* page) { + return {page->Id}; + } + + static size_t GetHash(const TPageKey& key) { + return key.Id; + } + + static TString ToString(const TPageKey& key) { + return std::to_string(key.Id); + } + + static TString GetKeyToString(const TPage* page) { + return ToString(GetKey(page)); + } + + static ES3FIFOPageLocation GetLocation(const TPage* page) { + return static_cast(page->CacheFlags1); + } + + static void SetLocation(TPage* page, ES3FIFOPageLocation location) { + ui32 location_ = static_cast(location); + Y_ABORT_UNLESS(location_ < (1 << 4)); + page->CacheFlags1 = location_; + } + + static ui32 GetFrequency(const TPage* page) { + return page->CacheFlags2; + } + + static void SetFrequency(TPage* page, ui32 frequency) { + Y_ABORT_UNLESS(frequency < (1 << 4)); + page->CacheFlags2 = frequency; + } + }; + +} + +Y_UNIT_TEST_SUITE(TS3FIFOGhostQueue) { + + Y_UNIT_TEST(Basics) { + TS3FIFOGhostPageQueue queue; + UNIT_ASSERT_VALUES_EQUAL(queue.Dump(), ""); + + // TODO: test Add, Limit, Contains + + UNIT_ASSERT(queue.Add(1)); + UNIT_ASSERT_VALUES_EQUAL(queue.Dump(), "1"); + + UNIT_ASSERT(queue.Add(2)); + UNIT_ASSERT_VALUES_EQUAL(queue.Dump(), "1, 2"); + + UNIT_ASSERT(queue.Add(3)); + UNIT_ASSERT_VALUES_EQUAL(queue.Dump(), "1, 2, 3"); + UNIT_ASSERT(queue.Contains(1)); + UNIT_ASSERT(queue.Contains(2)); + UNIT_ASSERT(queue.Contains(3)); + UNIT_ASSERT(!queue.Contains(4)); + + queue.Limit(2); + UNIT_ASSERT_VALUES_EQUAL(queue.Dump(), "2, 3"); + UNIT_ASSERT(!queue.Contains(1)); + UNIT_ASSERT(queue.Contains(2)); + UNIT_ASSERT(queue.Contains(3)); + UNIT_ASSERT(!queue.Contains(4)); + + UNIT_ASSERT(!queue.Add(2)); + UNIT_ASSERT_VALUES_EQUAL(queue.Dump(), "2, 3"); + UNIT_ASSERT(!queue.Contains(1)); + UNIT_ASSERT(queue.Contains(2)); + UNIT_ASSERT(queue.Contains(3)); + UNIT_ASSERT(!queue.Contains(4)); + } + +} + +Y_UNIT_TEST_SUITE(TS3FIFOCache) { + + TVector Touch(auto& cache, TPage& page) { + auto evicted = cache.Touch(&page); + TVector result; + for (auto& p : evicted) { + UNIT_ASSERT_VALUES_EQUAL(p.CacheFlags1, 0); + UNIT_ASSERT_VALUES_EQUAL(p.CacheFlags2, 0); + result.push_back(p.Id); + } + return result; + } + + TVector EvictNext(auto& cache) { + auto evicted = cache.EvictNext(); + TVector result; + for (auto& p : evicted) { + UNIT_ASSERT_VALUES_EQUAL(p.CacheFlags1, 0); + UNIT_ASSERT_VALUES_EQUAL(p.CacheFlags2, 0); + result.push_back(p.Id); + } + return result; + } + + void Erase(auto& cache, TPage& page) { + cache.Erase(&page); + UNIT_ASSERT_VALUES_EQUAL(page.CacheFlags1, 0); + UNIT_ASSERT_VALUES_EQUAL(page.CacheFlags2, 0); + } + + Y_UNIT_TEST(Touch) { + TS3FIFOCache cache(20); + + TPage page1{1, 2}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page1), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), (TString)(TStringBuilder() + << "SmallQueue: {1 0f 2b}" << Endl + << "MainQueue: " << Endl + << "GhostQueue: ")); + + TPage page2{2, 3}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page2), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), (TString)(TStringBuilder() + << "SmallQueue: {1 0f 2b}, {2 0f 3b}" << Endl + << "MainQueue: " << Endl + << "GhostQueue: ")); + + TPage page3{3, 4}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page3), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), (TString)(TStringBuilder() + << "SmallQueue: {1 0f 2b}, {2 0f 3b}, {3 0f 4b}" << Endl + << "MainQueue: " << Endl + << "GhostQueue: ")); + + TPage page4{4, 1}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page4), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), (TString)(TStringBuilder() + << "SmallQueue: {1 0f 2b}, {2 0f 3b}, {3 0f 4b}, {4 0f 1b}" << Endl + << "MainQueue: " << Endl + << "GhostQueue: ")); + + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page1), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page1), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page2), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page3), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page3), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page4), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), (TString)(TStringBuilder() + << "SmallQueue: {1 2f 2b}, {2 1f 3b}, {3 2f 4b}, {4 1f 1b}" << Endl + << "MainQueue: " << Endl + << "GhostQueue: ")); + + TPage page5{5, 12}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page5), TVector{2}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), (TString)(TStringBuilder() + << "SmallQueue: {3 2f 4b}, {4 1f 1b}, {5 0f 12b}" << Endl + << "MainQueue: {1 0f 2b}" << Endl + << "GhostQueue: 2")); + + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page2), (TVector{4, 5})); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), (TString)(TStringBuilder() + << "SmallQueue: " << Endl + << "MainQueue: {1 0f 2b}, {2 0f 3b}, {3 0f 4b}" << Endl + << "GhostQueue: 2, 4, 5")); + + TPage page6{6, 2}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page6), (TVector{})); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), (TString)(TStringBuilder() + << "SmallQueue: {6 0f 2b}" << Endl + << "MainQueue: {1 0f 2b}, {2 0f 3b}, {3 0f 4b}" << Endl + << "GhostQueue: 2, 4, 5")); + } + + Y_UNIT_TEST(Touch_MainQueue) { + TS3FIFOCache cache(10); + + TVector> pages; + for (ui32 pageId : xrange(20)) { + pages.push_back(MakeHolder(pageId, 1)); + } + + for (ui32 pageId : xrange(10)) { + for (ui32 times = 0; times <= pageId % 4; times++) { + Touch(cache, *pages[pageId]); + } + } + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), (TString)(TStringBuilder() + << "SmallQueue: {0 0f 1b}, {1 1f 1b}, {2 2f 1b}, {3 3f 1b}, {4 0f 1b}, {5 1f 1b}, {6 2f 1b}, {7 3f 1b}, {8 0f 1b}, {9 1f 1b}" << Endl + << "MainQueue: " << Endl + << "GhostQueue: ")); + + for (ui32 pageId : xrange(10)) { + Touch(cache, *pages[10 + pageId]); + } + Touch(cache, *pages[3]); + Touch(cache, *pages[6]); + Touch(cache, *pages[6]); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), (TString)(TStringBuilder() + << "SmallQueue: {14 0f 1b}, {15 0f 1b}, {16 0f 1b}, {17 0f 1b}, {18 0f 1b}, {19 0f 1b}" << Endl + << "MainQueue: {2 0f 1b}, {3 1f 1b}, {6 2f 1b}, {7 0f 1b}" << Endl + << "GhostQueue: 0, 1, 4, 5, 8, 9, 10, 11, 12, 13")); + + for (ui32 pageId : xrange(10)) { + Touch(cache, *pages[pageId]); + } + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), (TString)(TStringBuilder() + << "SmallQueue: {19 0f 1b}" << Endl + << "MainQueue: {1 0f 1b}, {4 0f 1b}, {5 0f 1b}, {8 0f 1b}, {9 0f 1b}, {2 0f 1b}, {3 1f 1b}, {6 2f 1b}, {7 0f 1b}" << Endl + << "GhostQueue: 9, 10, 11, 12, 13, 14, 15, 16, 17, 18")); + + for (ui32 pageId : xrange(10)) { + Touch(cache, *pages[10 + pageId]); + } + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), (TString)(TStringBuilder() + << "SmallQueue: {19 1f 1b}" << Endl + << "MainQueue: {12 0f 1b}, {13 0f 1b}, {14 0f 1b}, {15 0f 1b}, {16 0f 1b}, {3 0f 1b}, {6 1f 1b}, {17 0f 1b}, {18 0f 1b}" << Endl + << "GhostQueue: 9, 10, 11, 12, 13, 14, 15, 16, 17, 18")); + } + + Y_UNIT_TEST(EvictNext) { + TS3FIFOCache cache(10); + + TVector> pages; + for (ui32 pageId : xrange(30)) { + pages.push_back(MakeHolder(pageId, 1)); + } + + for (ui32 pageId : xrange(30)) { + for (ui32 times = 0; times <= pageId % 4; times++) { + Touch(cache, *pages[pageId]); + } + } + Touch(cache, *pages[14]); + Touch(cache, *pages[15]); + Touch(cache, *pages[15]); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), (TString)(TStringBuilder() + << "SmallQueue: {29 1f 1b}" << Endl + << "MainQueue: {11 0f 1b}, {14 1f 1b}, {15 2f 1b}, {18 0f 1b}, {19 0f 1b}, {22 0f 1b}, {23 0f 1b}, {26 0f 1b}, {27 0f 1b}" << Endl + << "GhostQueue: 9, 12, 13, 16, 17, 20, 21, 24, 25, 28")); + + UNIT_ASSERT_VALUES_EQUAL(EvictNext(cache), TVector{29}); + UNIT_ASSERT_VALUES_EQUAL(EvictNext(cache), TVector{11}); + UNIT_ASSERT_VALUES_EQUAL(EvictNext(cache), TVector{18}); + UNIT_ASSERT_VALUES_EQUAL(EvictNext(cache), TVector{19}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), (TString)(TStringBuilder() + << "SmallQueue: " << Endl + << "MainQueue: {22 0f 1b}, {23 0f 1b}, {26 0f 1b}, {27 0f 1b}, {14 0f 1b}, {15 1f 1b}" << Endl + << "GhostQueue: 13, 16, 17, 20, 21, 24, 25, 28, 29")); + } + + Y_UNIT_TEST(UpdateLimit) { + TS3FIFOCache cache(10); + + TVector> pages; + for (ui32 pageId : xrange(30)) { + pages.push_back(MakeHolder(pageId, 1)); + } + + for (ui32 pageId : xrange(30)) { + for (ui32 times = 0; times <= pageId % 4; times++) { + Touch(cache, *pages[pageId]); + } + } + Touch(cache, *pages[14]); + Touch(cache, *pages[15]); + Touch(cache, *pages[15]); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), (TString)(TStringBuilder() + << "SmallQueue: {29 1f 1b}" << Endl + << "MainQueue: {11 0f 1b}, {14 1f 1b}, {15 2f 1b}, {18 0f 1b}, {19 0f 1b}, {22 0f 1b}, {23 0f 1b}, {26 0f 1b}, {27 0f 1b}" << Endl + << "GhostQueue: 9, 12, 13, 16, 17, 20, 21, 24, 25, 28")); + + cache.UpdateLimit(6); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, *pages[0]), (TVector{29, 0, 11, 18, 19})); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), (TString)(TStringBuilder() + << "SmallQueue: " << Endl + << "MainQueue: {22 0f 1b}, {23 0f 1b}, {26 0f 1b}, {27 0f 1b}, {14 0f 1b}, {15 1f 1b}" << Endl + << "GhostQueue: 16, 17, 20, 21, 24, 25, 28, 29, 0")); + } + + Y_UNIT_TEST(Erase) { + TS3FIFOCache cache(10); + + TVector> pages; + for (ui32 pageId : xrange(30)) { + pages.push_back(MakeHolder(pageId, 1)); + } + + for (ui32 pageId : xrange(30)) { + for (ui32 times = 0; times <= pageId % 4; times++) { + Touch(cache, *pages[pageId]); + } + } + Touch(cache, *pages[14]); + Touch(cache, *pages[15]); + Touch(cache, *pages[15]); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), (TString)(TStringBuilder() + << "SmallQueue: {29 1f 1b}" << Endl + << "MainQueue: {11 0f 1b}, {14 1f 1b}, {15 2f 1b}, {18 0f 1b}, {19 0f 1b}, {22 0f 1b}, {23 0f 1b}, {26 0f 1b}, {27 0f 1b}" << Endl + << "GhostQueue: 9, 12, 13, 16, 17, 20, 21, 24, 25, 28")); + + Erase(cache, *pages[29]); + Erase(cache, *pages[22]); + Erase(cache, *pages[26]); + Erase(cache, *pages[17]); + Cerr << cache.Dump() << Endl; + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), (TString)(TStringBuilder() + << "SmallQueue: " << Endl + << "MainQueue: {11 0f 1b}, {14 1f 1b}, {15 2f 1b}, {18 0f 1b}, {19 0f 1b}, {23 0f 1b}, {27 0f 1b}" << Endl + << "GhostQueue: 9, 12, 13, 16, 17, 20, 21, 24, 25, 28")); + } + + Y_UNIT_TEST(Random) { + TS3FIFOCache cache(100); + + TVector> pages; + for (ui32 pageId : xrange(500)) { + pages.push_back(MakeHolder(pageId, 1)); + } + + ui32 hits = 0, misses = 0; + + for (ui32 i = 0; i < 100000; i++) { + ui32 pageId = std::sqrt(RandomNumber(pages.size() * pages.size())); + TPage* page = pages[pageId].Get(); + if (TPageTraits::GetLocation(page) != ES3FIFOPageLocation::None) { + hits++; + } else { + misses++; + } + cache.Touch(page); + } + + Cerr << 1.0 * hits / (hits + misses) << Endl; + } +} + +} diff --git a/ydb/core/tablet_flat/shared_cache_switchable.h b/ydb/core/tablet_flat/shared_cache_switchable.h new file mode 100644 index 000000000000..48d28cc90b41 --- /dev/null +++ b/ydb/core/tablet_flat/shared_cache_switchable.h @@ -0,0 +1,228 @@ +#pragma once +#include "defs.h" +#include +#include +#include + +namespace NKikimr::NCache { + +template +class TSwitchableCache : public ICacheCache { + using TCounterPtr = ::NMonitoring::TDynamicCounters::TCounterPtr; + + static const ui32 MaxCachesCount = 3; + static const ui32 RotatePagesPerCallCount = 10; + static_assert(MaxCachesCount < (1 << 4)); + + class TCacheHolder { + public: + TCacheHolder(ui32 id, THolder>&& cache, TCounterPtr& sizeCounter) + : Id(id) + , Cache(std::move(cache)) + , SizeCounter(sizeCounter) + { + Y_ABORT_UNLESS(GetSize() == 0); + } + + TIntrusiveList EvictNext() { + return ProcessEvictedList(Cache->EvictNext()); + } + + TIntrusiveList Touch(TPage* page) { + ui32 cacheId = TPageTraits::GetCacheId(page); + if (cacheId == 0) { + TPageTraits::SetCacheId(page, Id); + SizeCounter->Add(TPageTraits::GetSize(page)); + } else { + Y_ABORT_UNLESS(cacheId == Id); + } + + return ProcessEvictedList(Cache->Touch(page)); + } + + void Erase(TPage* page) { + ui32 cacheId = TPageTraits::GetCacheId(page); + if (cacheId != 0) { + Y_ABORT_UNLESS(cacheId == Id); + SizeCounter->Sub(TPageTraits::GetSize(page)); + TPageTraits::SetCacheId(page, 0); + } + + Cache->Erase(page); + } + + void UpdateLimit(ui64 limit) { + Cache->UpdateLimit(limit); + } + + ui64 GetSize() const { + return Cache->GetSize(); + } + + TString Dump() const { + return Cache->Dump(); + } + + private: + TIntrusiveList ProcessEvictedList(TIntrusiveList&& evictedList) { + ui64 evictedSize = 0; + + for (auto& page_ : evictedList) { + TPage* page = &page_; + Y_ABORT_UNLESS(TPageTraits::GetCacheId(page) == Id); + TPageTraits::SetCacheId(page, 0); + evictedSize += TPageTraits::GetSize(page); + } + + SizeCounter->Sub(evictedSize); + + return evictedList; + } + + public: + const ui32 Id; // in [1 .. MaxCachesCount] range + + private: + const THolder> Cache; + const TCounterPtr SizeCounter; + }; + +public: + TSwitchableCache(ui64 limit, THolder>&& cache, TCounterPtr sizeCounter) { + Caches.emplace_back(1, std::move(cache), sizeCounter); + UpdateLimit(limit); + } + + TIntrusiveList Switch(THolder>&& cache, TCounterPtr sizeCounter) Y_WARN_UNUSED_RESULT { + ui32 cacheId = Caches.back().Id + 1; + if (cacheId > MaxCachesCount) { + cacheId -= MaxCachesCount; + } + + Caches.emplace_back(cacheId, std::move(cache), sizeCounter) + .UpdateLimit(Limit); + + TIntrusiveList evictedList; + + while (Caches.size() > 1 && Caches.front().Id == cacheId) { // MaxCachesCount is exceeded + RotatePages(evictedList); + } + + return evictedList; + } + + TIntrusiveList EvictNext() override { + while (Y_UNLIKELY(Caches.size() > 1)) { + auto result = Caches.front().EvictNext(); + if (!result) { + Y_ABORT_UNLESS(Caches.front().GetSize() == 0); + Caches.pop_front(); + } else { + return result; + } + } + + return Caches.back().EvictNext(); + } + + TIntrusiveList Touch(TPage* page) override { + if (Y_LIKELY(Caches.size() == 1)) { + return Caches.back().Touch(page); + } + + ui32 cacheId = TPageTraits::GetCacheId(page); + if (cacheId > 0 && cacheId != Caches.back().Id) { + // rotate the current page first: + GetCache(cacheId).Erase(page); + } + + TIntrusiveList evictedList = Caches.back().Touch(page); + + RotatePages(evictedList); + + while (GetSize() > Limit && Caches.size() > 1) { + evictedList.Append(EvictNext()); + } + + return evictedList; + } + + void Erase(TPage* page) override { + if (Y_LIKELY(Caches.size() == 1)) { + Caches.back().Erase(page); + return; + } + + GetCache(TPageTraits::GetCacheId(page)) + .Erase(page); + } + + void UpdateLimit(ui64 limit) override { + Limit = limit; + for (auto& cache : Caches) { + cache.UpdateLimit(limit); + } + } + + ui64 GetSize() const override { + ui64 result = 0; + for (const auto& cache : Caches) { + result += cache.GetSize(); + } + return result; + } + + TString Dump() const override { + TStringBuilder result; + size_t count = 0; + + for (const auto& cache : Caches) { + if (count) result << "; "; + result << cache.Dump(); + count++; + } + + return result; + } + +private: + TCacheHolder& GetCache(ui32 cacheId) { + if (cacheId == 0) { + // use the most-recent cache by default + return Caches.back(); + } else { + // Note: this loop might be replaced with formula + // but it seems useless and error-prone + for (auto& cache : Caches) { + if (cache.Id == cacheId) { + return cache; + } + } + Y_ABORT("Failed to locate page cache"); + } + } + + void RotatePages(TIntrusiveList& evictedList) { + ui32 rotatedPagesCount = 0; + while (Caches.size() > 1 && rotatedPagesCount < RotatePagesPerCallCount) { + auto rotatedList = Caches.front().EvictNext(); + if (!rotatedList) { + Y_ABORT_UNLESS(Caches.front().GetSize() == 0); + Caches.pop_front(); + continue; + } + + while (!rotatedList.Empty()) { + TPage* page = rotatedList.PopFront(); + evictedList.Append(Caches.back().Touch(page)); + rotatedPagesCount++; + } + } + } + +private: + ui64 Limit; + TDeque Caches; +}; + +} diff --git a/ydb/core/tablet_flat/shared_cache_switchable_ut.cpp b/ydb/core/tablet_flat/shared_cache_switchable_ut.cpp new file mode 100644 index 000000000000..74c9b43867ed --- /dev/null +++ b/ydb/core/tablet_flat/shared_cache_switchable_ut.cpp @@ -0,0 +1,542 @@ +#include +#include +#include + +namespace NKikimr::NCache { + +namespace { + + using TCounterPtr = ::NMonitoring::TDynamicCounters::TCounterPtr; + + struct TPage : public TIntrusiveListItem { + ui32 Id; + size_t Size; + + TPage(ui32 id, size_t size) + : Id(id), Size(size) + {} + + ui32 CacheId : 4 = 0; + }; + + struct TPageTraits { + struct TPageKey { + ui32 Id; + + TPageKey(ui32 id) + : Id(id) + {} + }; + + static ui64 GetSize(const TPage* page) { + return page->Size; + } + + static ui32 GetCacheId(const TPage* page) { + return page->CacheId; + } + + static void SetCacheId(TPage* page, ui32 id) { + Y_ABORT_UNLESS(id < (1 << 4)); + page->CacheId = id; + } + }; + + class TSimpleCache : public ICacheCache { + public: + TIntrusiveList EvictNext() override { + TIntrusiveList result; + + if (!List.empty()) { + TPage* page = List.front(); + List.pop_front(); + Map.erase(page->Id); + result.PushBack(page); + }; + + return result; + } + + TIntrusiveList Touch(TPage* page) override { + if (Map.contains(page->Id)) { + List.erase(Map[page->Id]); + } + List.push_back(page); + Map[page->Id] = prev(List.end()); + + TIntrusiveList evictedList; + + while (GetSize() > Limit) { + TPage* page = List.front(); + List.pop_front(); + Map.erase(page->Id); + evictedList.PushBack(page); + } + + return evictedList; + } + + void Erase(TPage* page) override { + if (Map.contains(page->Id)) { + List.erase(Map[page->Id]); + Map.erase(page->Id); + } + } + + void UpdateLimit(ui64 limit) override { + Limit = limit; + } + + ui64 GetSize() const override { + ui64 size = 0; + for (auto page : List) { + size += page->Size; + } + return size; + } + + TString Dump() const override { + TStringBuilder result; + size_t count = 0; + for (auto it = List.begin(); it != List.end(); it++) { + TPage* page = *it; + if (count != 0) result << ", "; + result << "{" << page->Id << " " << page->Size << "b}"; + count++; + Y_ABORT_UNLESS(*Map.FindPtr(page->Id) == it); + } + Y_ABORT_UNLESS(Map.size() == count); + return result; + } + + private: + ui64 Limit = 0; + TList List; + THashMap::iterator> Map; + }; + +} + +Y_UNIT_TEST_SUITE(TSwitchableCache) { + + TVector Touch(auto& cache, TPage& page) { + auto evicted = cache.Touch(&page); + TVector result; + for (auto& p : evicted) { + UNIT_ASSERT_VALUES_EQUAL(p.CacheId, 0); + result.push_back(p.Id); + } + return result; + } + + void Erase(auto& cache, TPage& page) { + cache.Erase(&page); + UNIT_ASSERT_VALUES_EQUAL(page.CacheId, 0); + } + + TVector EvictNext(auto& cache) { + auto evicted = cache.EvictNext(); + TVector result; + for (auto& p : evicted) { + UNIT_ASSERT_VALUES_EQUAL(p.CacheId, 0); + result.push_back(p.Id); + } + return result; + } + + TVector Switch(auto& cache, auto&& cache2, auto& counter) { + auto evicted = cache.Switch(std::move(cache2), counter); + TVector result; + for (auto& p : evicted) { + UNIT_ASSERT_VALUES_EQUAL(p.CacheId, 0); + result.push_back(p.Id); + } + return result; + } + + Y_UNIT_TEST(Touch) { + TCounterPtr counter = new NMonitoring::TCounterForPtr; + TSwitchableCache cache(10, MakeHolder(), counter); + + TPage page1{1, 2}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page1), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{1 2b}"); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), cache.GetSize()); + + TPage page2{2, 3}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page2), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{1 2b}, {2 3b}"); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), cache.GetSize()); + + TPage page3{3, 4}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page3), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{1 2b}, {2 3b}, {3 4b}"); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), cache.GetSize()); + + TPage page4{4, 1}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page4), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{1 2b}, {2 3b}, {3 4b}, {4 1b}"); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), cache.GetSize()); + + TPage page5{5, 4}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page5), (TVector{1, 2})); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{3 4b}, {4 1b}, {5 4b}"); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), cache.GetSize()); + + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page2), (TVector{3})); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{4 1b}, {5 4b}, {2 3b}"); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), cache.GetSize()); + + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page5), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{4 1b}, {2 3b}, {5 4b}"); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), cache.GetSize()); + } + + Y_UNIT_TEST(Erase) { + TCounterPtr counter = new NMonitoring::TCounterForPtr; + TSwitchableCache cache(10, MakeHolder(), counter); + + TPage page1{1, 2}; + TPage page2{2, 3}; + TPage page3{3, 4}; + TPage page4{4, 1}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page1), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page2), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page3), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page4), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{1 2b}, {2 3b}, {3 4b}, {4 1b}"); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), cache.GetSize()); + + Erase(cache, page2); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{1 2b}, {3 4b}, {4 1b}"); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), cache.GetSize()); + + TPage page5{5, 4}; + Erase(cache, page5); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{1 2b}, {3 4b}, {4 1b}"); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), cache.GetSize()); + } + + Y_UNIT_TEST(EvictNext) { + TCounterPtr counter = new NMonitoring::TCounterForPtr; + TSwitchableCache cache(10, MakeHolder(), counter); + + TPage page1{1, 2}; + TPage page2{2, 3}; + TPage page3{3, 4}; + TPage page4{4, 1}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page1), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page2), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page3), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page4), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{1 2b}, {2 3b}, {3 4b}, {4 1b}"); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), cache.GetSize()); + + UNIT_ASSERT_VALUES_EQUAL(EvictNext(cache), (TVector{1})); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{2 3b}, {3 4b}, {4 1b}"); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), cache.GetSize()); + + UNIT_ASSERT_VALUES_EQUAL(EvictNext(cache), (TVector{2})); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{3 4b}, {4 1b}"); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), cache.GetSize()); + + UNIT_ASSERT_VALUES_EQUAL(EvictNext(cache), (TVector{3})); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{4 1b}"); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), cache.GetSize()); + + UNIT_ASSERT_VALUES_EQUAL(EvictNext(cache), (TVector{4})); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), ""); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), cache.GetSize()); + + UNIT_ASSERT_VALUES_EQUAL(EvictNext(cache), (TVector{})); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), ""); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), cache.GetSize()); + } + + Y_UNIT_TEST(UpdateLimit) { + TCounterPtr counter = new NMonitoring::TCounterForPtr; + TSwitchableCache cache(10, MakeHolder(), counter); + + TPage page1{1, 2}; + TPage page2{2, 3}; + TPage page3{3, 4}; + TPage page4{4, 1}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page1), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page2), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page3), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page4), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{1 2b}, {2 3b}, {3 4b}, {4 1b}"); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), cache.GetSize()); + + cache.UpdateLimit(6); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page2), (TVector{1, 3})); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{4 1b}, {2 3b}"); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), cache.GetSize()); + } + + Y_UNIT_TEST(Switch_Touch_RotatePages_All) { + TCounterPtr counter1 = new NMonitoring::TCounterForPtr; + TCounterPtr counter2 = new NMonitoring::TCounterForPtr; + TSwitchableCache cache(10, MakeHolder(), counter1); + + TPage page1{1, 2}; + TPage page2{2, 3}; + TPage page3{3, 4}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page1), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page2), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page3), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{1 2b}, {2 3b}, {3 4b}"); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 9); + UNIT_ASSERT_VALUES_EQUAL(counter1->Val(), 9); + UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 0); + + UNIT_ASSERT_VALUES_EQUAL(Switch(cache, MakeHolder(), counter2), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{1 2b}, {2 3b}, {3 4b}; "); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 9); + UNIT_ASSERT_VALUES_EQUAL(counter1->Val(), 9); + UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 0); + + TPage page4{4, 1}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page4), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{4 1b}, {1 2b}, {2 3b}, {3 4b}"); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 10); + UNIT_ASSERT_VALUES_EQUAL(counter1->Val(), 0); + UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 10); + } + + Y_UNIT_TEST(Switch_Touch_RotatePages_Parts) { + TCounterPtr counter1 = new NMonitoring::TCounterForPtr; + TCounterPtr counter2 = new NMonitoring::TCounterForPtr; + TSwitchableCache cache(50, MakeHolder(), counter1); + + TVector> pages; + for (ui32 pageId : xrange(50)) { + pages.push_back(MakeHolder(pageId, 1)); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, *pages.back()), TVector{}); + } + + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 50); + UNIT_ASSERT_VALUES_EQUAL(counter1->Val(), 50); + UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 0); + + UNIT_ASSERT_VALUES_EQUAL(Switch(cache, MakeHolder(), counter2), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 50); + UNIT_ASSERT_VALUES_EQUAL(counter1->Val(), 50); + UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 0); + + pages.push_back(MakeHolder(pages.size(), 1)); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, *pages.back()), TVector{10}); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 50); + UNIT_ASSERT_VALUES_EQUAL(counter1->Val(), 39); // [11 .. 49] + UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 11); // [50, 0 .. 9] + + pages.push_back(MakeHolder(pages.size(), 1)); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, *pages.back()), TVector{21}); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 50); + UNIT_ASSERT_VALUES_EQUAL(counter1->Val(), 28); + UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 22); + + pages.push_back(MakeHolder(pages.size(), 1)); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, *pages.back()), TVector{32}); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 50); + UNIT_ASSERT_VALUES_EQUAL(counter1->Val(), 17); + UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 33); + + pages.push_back(MakeHolder(pages.size(), 1)); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, *pages.back()), TVector{43}); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 50); + UNIT_ASSERT_VALUES_EQUAL(counter1->Val(), 6); + UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 44); + + pages.push_back(MakeHolder(pages.size(), 1)); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, *pages.back()), TVector{50}); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 50); + UNIT_ASSERT_VALUES_EQUAL(counter1->Val(), 0); + UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 50); + } + + Y_UNIT_TEST(Switch_RotatePages_Force) { + TCounterPtr counter = new NMonitoring::TCounterForPtr; + TSwitchableCache cache(10, MakeHolder(), counter); + + TPage page1{1, 2}; + TPage page2{2, 3}; + TPage page3{3, 4}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page1), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page2), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page3), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{1 2b}, {2 3b}, {3 4b}"); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 9); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), 9); + + UNIT_ASSERT_VALUES_EQUAL(Switch(cache, MakeHolder(), counter), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{1 2b}, {2 3b}, {3 4b}; "); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 9); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), 9); + + UNIT_ASSERT_VALUES_EQUAL(Switch(cache, MakeHolder(), counter), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{1 2b}, {2 3b}, {3 4b}; ; "); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 9); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), 9); + + UNIT_ASSERT_VALUES_EQUAL(Switch(cache, MakeHolder(), counter), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{1 2b}, {2 3b}, {3 4b}"); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 9); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), 9); + } + + Y_UNIT_TEST(Switch_RotatePages_Evicts) { + TCounterPtr counter = new NMonitoring::TCounterForPtr; + TSwitchableCache cache(10, MakeHolder(), counter); + + TPage page1{1, 2}; + TPage page2{2, 3}; + TPage page3{3, 4}; + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page1), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page2), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, page3), TVector{}); + + UNIT_ASSERT_VALUES_EQUAL(Switch(cache, MakeHolder(), counter), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(Switch(cache, MakeHolder(), counter), TVector{}); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{1 2b}, {2 3b}, {3 4b}; ; "); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 9); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), 9); + + cache.UpdateLimit(5); + + UNIT_ASSERT_VALUES_EQUAL(Switch(cache, MakeHolder(), counter), (TVector{1, 2})); + UNIT_ASSERT_VALUES_EQUAL(cache.Dump(), "{3 4b}"); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 4); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), 4); + } + + Y_UNIT_TEST(Switch_Touch) { + TCounterPtr counter1 = new NMonitoring::TCounterForPtr; + TCounterPtr counter2 = new NMonitoring::TCounterForPtr; + TSwitchableCache cache(50, MakeHolder(), counter1); + + TVector> pages; + for (ui32 pageId : xrange(50)) { + pages.push_back(MakeHolder(pageId, 1)); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, *pages.back()), TVector{}); + } + + UNIT_ASSERT_VALUES_EQUAL(Switch(cache, MakeHolder(), counter2), TVector{}); + + pages.push_back(MakeHolder(pages.size(), 1)); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, *pages.back()), TVector{10}); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 50); + UNIT_ASSERT_VALUES_EQUAL(counter1->Val(), 39); // [11 .. 49] + UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 11); // [50, 0 .. 9] + + Touch(cache, *pages[23]); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 50); + UNIT_ASSERT_VALUES_EQUAL(counter1->Val(), 28); + UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 22); + + Touch(cache, *pages[7]); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 50); + UNIT_ASSERT_VALUES_EQUAL(counter1->Val(), 18); + UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 32); + } + + Y_UNIT_TEST(Switch_Erase) { + TCounterPtr counter1 = new NMonitoring::TCounterForPtr; + TCounterPtr counter2 = new NMonitoring::TCounterForPtr; + TSwitchableCache cache(50, MakeHolder(), counter1); + + TVector> pages; + for (ui32 pageId : xrange(50)) { + pages.push_back(MakeHolder(pageId, 1)); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, *pages.back()), TVector{}); + } + + UNIT_ASSERT_VALUES_EQUAL(Switch(cache, MakeHolder(), counter2), TVector{}); + + pages.push_back(MakeHolder(pages.size(), 1)); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, *pages.back()), TVector{10}); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 50); + UNIT_ASSERT_VALUES_EQUAL(counter1->Val(), 39); // [11 .. 49] + UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 11); // [50, 0 .. 9] + + Erase(cache, *pages[23]); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 49); + UNIT_ASSERT_VALUES_EQUAL(counter1->Val(), 38); + UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 11); + + Erase(cache, *pages[7]); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 48); + UNIT_ASSERT_VALUES_EQUAL(counter1->Val(), 38); + UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 10); + } + + Y_UNIT_TEST(Switch_EvictNext) { + TCounterPtr counter1 = new NMonitoring::TCounterForPtr; + TCounterPtr counter2 = new NMonitoring::TCounterForPtr; + TSwitchableCache cache(50, MakeHolder(), counter1); + + TVector> pages; + for (ui32 pageId : xrange(50)) { + pages.push_back(MakeHolder(pageId, 1)); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, *pages.back()), TVector{}); + } + + UNIT_ASSERT_VALUES_EQUAL(Switch(cache, MakeHolder(), counter2), TVector{}); + + pages.push_back(MakeHolder(pages.size(), 1)); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, *pages.back()), TVector{10}); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 50); + UNIT_ASSERT_VALUES_EQUAL(counter1->Val(), 39); // [11 .. 49] + UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 11); // [50, 0 .. 9] + + for (ui32 i : xrange(39)) { + UNIT_ASSERT_VALUES_EQUAL(EvictNext(cache), TVector{i + 11}); + } + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 11); + UNIT_ASSERT_VALUES_EQUAL(counter1->Val(), 0); + UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 11); + + UNIT_ASSERT_VALUES_EQUAL(EvictNext(cache), TVector{50}); + for (ui32 i : xrange(10)) { + UNIT_ASSERT_VALUES_EQUAL(EvictNext(cache), TVector{i}); + } + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 0); + UNIT_ASSERT_VALUES_EQUAL(counter1->Val(), 0); + UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 0); + } + + Y_UNIT_TEST(Switch_UpdateLimit) { + TCounterPtr counter1 = new NMonitoring::TCounterForPtr; + TCounterPtr counter2 = new NMonitoring::TCounterForPtr; + TSwitchableCache cache(50, MakeHolder(), counter1); + + TVector> pages; + for (ui32 pageId : xrange(50)) { + pages.push_back(MakeHolder(pageId, 1)); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, *pages.back()), TVector{}); + } + + UNIT_ASSERT_VALUES_EQUAL(Switch(cache, MakeHolder(), counter2), TVector{}); + + pages.push_back(MakeHolder(pages.size(), 1)); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, *pages.back()), TVector{10}); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 50); + UNIT_ASSERT_VALUES_EQUAL(counter1->Val(), 39); // [11 .. 49] + UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 11); // [50, 0 .. 9] + + cache.UpdateLimit(40); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, *pages[23]), (TVector{21, 22, 24, 25, 26, 27, 28, 29, 30, 31})); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 40); + UNIT_ASSERT_VALUES_EQUAL(counter1->Val(), 18); // [32 .. 49] + UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 22); // [50, 0 .. 9, 23, 11 .. 20] + + cache.UpdateLimit(7); + UNIT_ASSERT_VALUES_EQUAL(Touch(cache, *pages[7]).size(), 33); + UNIT_ASSERT_VALUES_EQUAL(cache.GetSize(), 7); + UNIT_ASSERT_VALUES_EQUAL(counter1->Val(), 0); + UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 7); + } + +} + +} diff --git a/ydb/core/tablet_flat/shared_sausagecache.cpp b/ydb/core/tablet_flat/shared_sausagecache.cpp index 5aa3d5e5b507..6a3f204b96d5 100644 --- a/ydb/core/tablet_flat/shared_sausagecache.cpp +++ b/ydb/core/tablet_flat/shared_sausagecache.cpp @@ -3,6 +3,9 @@ #include "flat_bio_events.h" #include "flat_bio_actor.h" #include "util_fmt_logger.h" +#include +#include +#include #include #include #include @@ -15,7 +18,11 @@ namespace NKikimr { TSharedPageCacheCounters::TSharedPageCacheCounters(const TIntrusivePtr<::NMonitoring::TDynamicCounters> &group) - : MemLimitBytes(group->GetCounter("MemLimitBytes")) + : Counters(group) + , FreshBytes(group->GetCounter("fresh")) + , StagingBytes(group->GetCounter("staging")) + , WarmBytes(group->GetCounter("warm")) + , MemLimitBytes(group->GetCounter("MemLimitBytes")) , ConfigLimitBytes(group->GetCounter("ConfigLimitBytes")) , ActivePages(group->GetCounter("ActivePages")) , ActiveBytes(group->GetCounter("ActiveBytes")) @@ -35,11 +42,17 @@ TSharedPageCacheCounters::TSharedPageCacheCounters(const TIntrusivePtr<::NMonito , MemTableCompactedBytes(group->GetCounter("MemTableCompactedBytes", true)) { } +TSharedPageCacheCounters::TCounterPtr TSharedPageCacheCounters::ReplacementPolicySize(TReplacementPolicy policy) { + return Counters->GetCounter(TStringBuilder() << "ReplacementPolicySize/" << policy); +} + } namespace NKikimr { namespace NTabletFlatExecutor { +using namespace NCache; + static bool Satisfies(NLog::EPriority priority = NLog::PRI_DEBUG) { if (NLog::TSettings *settings = TlsActivationContext->LoggerSettings()) return settings->Satisfies(priority, NKikimrServices::TABLET_SAUSAGECACHE); @@ -213,9 +226,10 @@ class TSharedPageCache : public TActorBootstrapped { : public TSharedPageHandle , public TIntrusiveListItem { - ui32 State : 4; - ui32 CacheGeneration : 3; - ui32 InMemory : 1; + ui32 State : 4 = PageStateNo; + ui32 CacheId : 4 = 0; + ui32 CacheFlags1 : 4 = 0; + ui32 CacheFlags2 : 4 = 0; const ui32 PageId; const size_t Size; @@ -223,10 +237,7 @@ class TSharedPageCache : public TActorBootstrapped { TCollection* Collection; TPage(ui32 pageId, size_t size, TCollection* collection) - : State(PageStateNo) - , CacheGeneration(TCacheCacheConfig::CacheGenNone) - , InMemory(false) - , PageId(pageId) + : PageId(pageId) , Size(size) , Collection(collection) {} @@ -250,11 +261,139 @@ class TSharedPageCache : public TActorBootstrapped { State = PageStateLoaded; } - struct TWeight { - static ui64 Get(TPage *x) { - return sizeof(TPage) + (x->State == PageStateLoaded ? x->Size : 0); - } + void EnsureNoCacheFlags() { + Y_VERIFY_S(CacheId == 0, "Unexpected page " << CacheId << " cache id"); + Y_VERIFY_S(CacheFlags1 == 0, "Unexpected page " << CacheFlags1 << " cache flags 1"); + Y_VERIFY_S(CacheFlags2 == 0, "Unexpected page " << CacheFlags2 << " cache flags 2"); + } + }; + + static_assert(sizeof(TPage) == 104); + + struct TCacheCachePageTraits { + static ui64 GetWeight(const TPage* page) { + return sizeof(TPage) + page->Size; + } + + static ECacheCacheGeneration GetGeneration(const TPage *page) { + return static_cast(page->CacheFlags1); + } + + static void SetGeneration(TPage *page, ECacheCacheGeneration generation) { + ui32 generation_ = static_cast(generation); + Y_ABORT_UNLESS(generation_ < (1 << 4)); + page->CacheFlags1 = generation_; + } + }; + + struct TS3FIFOPageTraits { + struct TPageKey { + TLogoBlobID LogoBlobID; + ui32 PageId; + }; + + static ui64 GetSize(const TPage* page) { + return sizeof(TPage) + page->Size; + } + + static TPageKey GetKey(const TPage* page) { + return {page->Collection->MetaId, page->PageId}; + } + + static size_t GetHash(const TPageKey& key) { + return MultiHash(key.LogoBlobID.Hash(), key.PageId); + } + + static TString ToString(const TPageKey& key) { + return TStringBuilder() << "LogoBlobID: " << key.LogoBlobID.ToString() << " PageId: " << key.PageId; + } + + static TString GetKeyToString(const TPage* page) { + return ToString(GetKey(page)); + } + + static ES3FIFOPageLocation GetLocation(const TPage* page) { + return static_cast(page->CacheFlags1); + } + + static void SetLocation(TPage* page, ES3FIFOPageLocation location) { + ui32 location_ = static_cast(location); + Y_ABORT_UNLESS(location_ < (1 << 4)); + page->CacheFlags1 = location_; + } + + static ui32 GetFrequency(const TPage* page) { + return page->CacheFlags2; + } + + static void SetFrequency(TPage* page, ui32 frequency) { + Y_ABORT_UNLESS(frequency < (1 << 4)); + page->CacheFlags2 = frequency; + } + }; + + struct TClockProPageTraits { + struct TPageKey { + TLogoBlobID LogoBlobID; + ui32 PageId; }; + + static ui64 GetSize(const TPage* page) { + return sizeof(TPage) + page->Size; + } + + static TPageKey GetKey(const TPage* page) { + return {page->Collection->MetaId, page->PageId}; + } + + static size_t GetHash(const TPageKey& key) { + return MultiHash(key.LogoBlobID.Hash(), key.PageId); + } + + static bool Equals(const TPageKey& left, const TPageKey& right) { + return left.PageId == right.PageId && left.LogoBlobID == right.LogoBlobID; + } + + static TString ToString(const TPageKey& key) { + return TStringBuilder() << "LogoBlobID: " << key.LogoBlobID.ToString() << " PageId: " << key.PageId; + } + + static TString GetKeyToString(const TPage* page) { + return ToString(GetKey(page)); + } + + static EClockProPageLocation GetLocation(const TPage* page) { + return static_cast(page->CacheFlags1); + } + + static void SetLocation(TPage* page, EClockProPageLocation location) { + ui32 location_ = static_cast(location); + Y_ABORT_UNLESS(location_ < (1 << 4)); + page->CacheFlags1 = location_; + } + + static bool GetReferenced(const TPage* page) { + return page->CacheFlags2; + } + + static void SetReferenced(TPage* page, bool referenced) { + page->CacheFlags2 = static_cast(referenced); + } + }; + + struct TCompositeCachePageTraits { + static ui64 GetSize(const TPage* page) { + return sizeof(TPage) + page->Size; + } + + static ui32 GetCacheId(const TPage* page) { + return page->CacheId; + } + + static void SetCacheId(TPage* page, ui32 id) { + Y_ABORT_UNLESS(id < (1 << 4)); + page->CacheId = id; + } }; struct TRequest : public TSimpleRefCount { @@ -326,7 +465,7 @@ class TSharedPageCache : public TActorBootstrapped { TRequestQueue ScanRequests; THolder Config; - TCacheCache> Cache; + TSwitchableCache Cache; TControlWrapper SizeOverride; @@ -343,27 +482,41 @@ class TSharedPageCache : public TActorBootstrapped { // 0 means unlimited ui64 MemLimitBytes = 0; - ui64 ConfigLimitBytes; - void ActualizeCacheSizeLimit() { - if ((ui64)SizeOverride != Config->CacheConfig->Limit) { - Config->CacheConfig->SetLimit(SizeOverride); + THolder> CreateCache() { + // TODO: pass actual limit to cache config + // now it will be fixed by ActualizeCacheSizeLimit call + + switch (Config->ReplacementPolicy) { + case NKikimrSharedCache::S3FIFO: + return MakeHolder>(1); + case NKikimrSharedCache::ClockPro: + return MakeHolder>(1); + case NKikimrSharedCache::ThreeLeveledLRU: + default: { + TCacheCacheConfig cacheCacheConfig(1, Config->Counters->FreshBytes, Config->Counters->StagingBytes, Config->Counters->WarmBytes); + return MakeHolder>(std::move(cacheCacheConfig)); + } } + } - ConfigLimitBytes = Config->CacheConfig->Limit; + void ActualizeCacheSizeLimit() { + if ((ui64)SizeOverride != Config->LimitBytes) { + Config->LimitBytes = SizeOverride; + } - ui64 limit = ConfigLimitBytes; - if (MemLimitBytes && ConfigLimitBytes > MemLimitBytes) { + ui64 limit = Config->LimitBytes; + if (MemLimitBytes && Config->LimitBytes > MemLimitBytes) { limit = MemLimitBytes; } // limit of cache depends only on config and mem because passive pages may go in and out arbitrary // we may have some passive bytes, so if we fully fill this Cache we may exceed the limit // because of that DoGC should be called to ensure limits - Cache.UpdateCacheSize(limit); + Cache.UpdateLimit(limit); if (Config->Counters) { - Config->Counters->ConfigLimitBytes->Set(ConfigLimitBytes); + Config->Counters->ConfigLimitBytes->Set(Config->LimitBytes); Config->Counters->ActiveLimitBytes->Set(limit); } } @@ -373,16 +526,19 @@ class TSharedPageCache : public TActorBootstrapped { // update StatActiveBytes + StatPassiveBytes ProcessGCList(); - ui64 configActiveReservedBytes = ConfigLimitBytes * Config->ActivePagesReservationPercent / 100; + ui64 configActiveReservedBytes = Config->LimitBytes * Config->ActivePagesReservationPercent / 100; THashSet recheck; while (MemLimitBytes && GetStatAllBytes() > MemLimitBytes - || GetStatAllBytes() > ConfigLimitBytes && StatActiveBytes > configActiveReservedBytes) { - auto page = Cache.EvictNext(); - if (!page) { + || GetStatAllBytes() > Config->LimitBytes && StatActiveBytes > configActiveReservedBytes) { + TIntrusiveList pages = Cache.EvictNext(); + if (pages.Empty()) { break; } - EvictNow(page, recheck); + while (!pages.Empty()) { + TPage* page = pages.PopFront(); + EvictNow(page, recheck); + } } if (recheck) { CheckExpiredCollections(std::move(recheck)); @@ -416,12 +572,12 @@ class TSharedPageCache : public TActorBootstrapped { DoGC(); - if (MemLimitBytes && MemLimitBytes < ConfigLimitBytes) { + if (MemLimitBytes && MemLimitBytes < Config->LimitBytes) { // in normal scenario we expect that we can fill the whole shared cache - ui64 memTableReservedBytes = ConfigLimitBytes * Config->MemTableReservationPercent / 100; + ui64 memTableReservedBytes = Config->LimitBytes * Config->MemTableReservationPercent / 100; ui64 memTableTotal = MemTableTracker->GetTotalConsumption(); if (memTableTotal > memTableReservedBytes) { - ui64 toCompact = Min(ConfigLimitBytes - MemLimitBytes, memTableTotal - memTableReservedBytes); + ui64 toCompact = Min(Config->LimitBytes - MemLimitBytes, memTableTotal - memTableReservedBytes); auto registrations = MemTableTracker->SelectForCompaction(toCompact); for (auto registration : registrations) { Send(registration.first->Owner, new NSharedCache::TEvMemTableCompact(registration.first->Table, registration.second)); @@ -1101,7 +1257,7 @@ class TSharedPageCache : public TActorBootstrapped { const TLogoBlobID &pageCollectionId = collectionIt->first; if (auto logl = Logger->Log(ELnLev::Debug)) - logl << "droping pageCollection " << pageCollectionId; + logl << "dropping pageCollection " << pageCollectionId; for (auto &expe : collection.Expectants) { for (auto &xpair : expe.second.SourceRequests) { @@ -1120,8 +1276,8 @@ class TSharedPageCache : public TActorBootstrapped { for (const auto &kv : collection.PageMap) { auto* page = kv.second.Get(); - Cache.Evict(page); - page->CacheGeneration = TCacheCacheConfig::CacheGenNone; + Cache.Erase(page); + page->EnsureNoCacheFlags(); if (page->State == PageStateLoaded) { page->State = PageStateEvicted; @@ -1207,8 +1363,7 @@ class TSharedPageCache : public TActorBootstrapped { while (!pages.Empty()) { TPage* page = pages.PopFront(); - Y_VERIFY_S(page->CacheGeneration == TCacheCacheConfig::CacheGenEvicted, "unexpected " << page->CacheGeneration << " page cache generation"); - page->CacheGeneration = TCacheCacheConfig::CacheGenNone; + page->EnsureNoCacheFlags(); Y_VERIFY_S(page->State == PageStateLoaded, "unexpected " << page->State << " page state"); page->State = PageStateEvicted; @@ -1222,8 +1377,7 @@ class TSharedPageCache : public TActorBootstrapped { } void EvictNow(TPage* page, THashSet& recheck) { - Y_VERIFY_S(page->CacheGeneration == TCacheCacheConfig::CacheGenEvicted, "unexpected " << page->CacheGeneration << " page cache generation"); - page->CacheGeneration = TCacheCacheConfig::CacheGenNone; + page->EnsureNoCacheFlags(); Y_VERIFY_S(page->State == PageStateLoaded, "unexpected " << page->State << " page state"); page->State = PageStateEvicted; @@ -1239,9 +1393,9 @@ class TSharedPageCache : public TActorBootstrapped { const auto* msg = ev->Get(); if (msg->Record.GetMemoryLimit() != 0) { - Config->CacheConfig->SetLimit(msg->Record.GetMemoryLimit()); - SizeOverride = Config->CacheConfig->Limit; - // limit will be updated with ActualizeCacheSizeLimit call + Config->LimitBytes = msg->Record.GetMemoryLimit(); + SizeOverride = Config->LimitBytes; + ActualizeCacheSizeLimit(); } if (msg->Record.HasActivePagesReservationPercent()) { @@ -1260,6 +1414,18 @@ class TSharedPageCache : public TActorBootstrapped { ScanRequests.Limit = msg->Record.GetScanQueueInFlyLimit(); RequestFromQueue(ScanRequests); } + + if (msg->Record.GetReplacementPolicy() != Config->ReplacementPolicy) { + if (auto logl = Logger->Log(ELnLev::Info)) { + logl << "Replacement policy switch from " << Config->ReplacementPolicy << " to " << msg->Record.GetReplacementPolicy(); + } + Config->ReplacementPolicy = msg->Record.GetReplacementPolicy(); + Evict(Cache.Switch(CreateCache(), Config->Counters->ReplacementPolicySize(Config->ReplacementPolicy))); + DoGC(); + if (auto logl = Logger->Log(ELnLev::Info)) { + logl << "Replacement policy switch to " << Config->ReplacementPolicy << " finished"; + } + } } inline ui64 GetStatAllBytes() const { @@ -1322,9 +1488,8 @@ class TSharedPageCache : public TActorBootstrapped { : MemObserver(std::move(memObserver)) , MemTableTracker(std::make_shared(config->Counters)) , Config(std::move(config)) - , Cache(*Config->CacheConfig) - , SizeOverride(Config->CacheConfig->Limit, 1, Max()) - , ConfigLimitBytes(Config->CacheConfig->Limit) + , Cache(1, CreateCache(), Config->Counters->ReplacementPolicySize(Config->ReplacementPolicy)) + , SizeOverride(Config->LimitBytes, 1, Max()) { AsyncRequests.Limit = Config->TotalAsyncQueueInFlyLimit; ScanRequests.Limit = Config->TotalScanQueueInFlyLimit; diff --git a/ydb/core/tablet_flat/shared_sausagecache.h b/ydb/core/tablet_flat/shared_sausagecache.h index 6733510f73cc..8164049270c8 100644 --- a/ydb/core/tablet_flat/shared_sausagecache.h +++ b/ydb/core/tablet_flat/shared_sausagecache.h @@ -23,6 +23,13 @@ struct TEvSharedPageCache { struct TSharedPageCacheCounters final : public TAtomicRefCount { using TCounterPtr = ::NMonitoring::TDynamicCounters::TCounterPtr; + using TReplacementPolicy = NKikimrSharedCache::TReplacementPolicy; + + const TIntrusivePtr<::NMonitoring::TDynamicCounters> Counters; + + const TCounterPtr FreshBytes; + const TCounterPtr StagingBytes; + const TCounterPtr WarmBytes; const TCounterPtr MemLimitBytes; const TCounterPtr ConfigLimitBytes; @@ -44,16 +51,23 @@ struct TSharedPageCacheCounters final : public TAtomicRefCount &group); + + TCounterPtr ReplacementPolicySize(TReplacementPolicy policy); }; +// TODO: use protobuf configs struct TSharedPageCacheConfig { - TIntrusivePtr CacheConfig; + using TReplacementPolicy = NKikimrSharedCache::TReplacementPolicy; + + ui64 LimitBytes; ui64 TotalScanQueueInFlyLimit = 512 * 1024 * 1024; ui64 TotalAsyncQueueInFlyLimit = 512 * 1024 * 1024; TString CacheName = "SharedPageCache"; TIntrusivePtr Counters; ui32 ActivePagesReservationPercent = 50; ui32 MemTableReservationPercent = 20; + + TReplacementPolicy ReplacementPolicy = TReplacementPolicy::ThreeLeveledLRU; }; IActor* CreateSharedPageCache(THolder config, TIntrusivePtr memObserver); diff --git a/ydb/core/tablet_flat/test/libs/exec/runner.h b/ydb/core/tablet_flat/test/libs/exec/runner.h index b2b53302f15e..81a023864507 100644 --- a/ydb/core/tablet_flat/test/libs/exec/runner.h +++ b/ydb/core/tablet_flat/test/libs/exec/runner.h @@ -183,7 +183,7 @@ namespace NFake { { /*_ Shared page collection cache service, used by executor */ auto config = MakeHolder(); - config->CacheConfig = new TCacheCacheConfig(conf.Shared, nullptr, nullptr, nullptr); + config->LimitBytes = conf.Shared; config->TotalAsyncQueueInFlyLimit = conf.AsyncQueue; config->TotalScanQueueInFlyLimit = conf.ScanQueue; config->Counters = MakeIntrusive(Env.GetDynamicCounters()); diff --git a/ydb/core/tablet_flat/test/libs/table/test_mixer.h b/ydb/core/tablet_flat/test/libs/table/test_mixer.h index 458e4b7070ae..7c82abb30a2d 100644 --- a/ydb/core/tablet_flat/test/libs/table/test_mixer.h +++ b/ydb/core/tablet_flat/test/libs/table/test_mixer.h @@ -45,7 +45,7 @@ namespace NTest { if (CurrentBucketRemainingRows-- == 0) { // start next bucket with CurrentBucketRemainingRows rows ui64 one = (Skip && Skip > Random.Uniform(Buckets) ? 1 : 0); - CurrentBucketRemainingRows = RowsPerBucket + one, Skip -= one, CurrentBucket++; + CurrentBucketRemainingRows = RowsPerBucket + one - 1, Skip -= one, CurrentBucket++; } return Min(CurrentBucket, Buckets - 1); diff --git a/ydb/core/tablet_flat/test/libs/table/test_store.h b/ydb/core/tablet_flat/test/libs/table/test_store.h index e699fa92e811..761d5c939cbf 100644 --- a/ydb/core/tablet_flat/test/libs/table/test_store.h +++ b/ydb/core/tablet_flat/test/libs/table/test_store.h @@ -24,8 +24,8 @@ namespace NTest { struct TEggs { bool Rooted; - TVector GroupIndexes; - TVector HistoricIndexes; + TVector FlatGroupIndexes; + TVector FlatHistoricIndexes; TVector BTreeGroupIndexes; TVector BTreeHistoricIndexes; TData *Scheme; diff --git a/ydb/core/tablet_flat/test/libs/table/test_writer.h b/ydb/core/tablet_flat/test/libs/table/test_writer.h index 70e916d7b65c..f710071fb6c9 100644 --- a/ydb/core/tablet_flat/test/libs/table/test_writer.h +++ b/ydb/core/tablet_flat/test/libs/table/test_writer.h @@ -77,8 +77,14 @@ namespace NTest { for (const auto &meta : eggs.BTreeGroupIndexes) { indexesRawSize += meta.IndexSize; } + for (const auto &meta : eggs.BTreeHistoricIndexes) { + indexesRawSize += meta.IndexSize; + } } else { - for (auto indexPage : eggs.GroupIndexes) { + for (auto indexPage : eggs.FlatGroupIndexes) { + indexesRawSize += Store->GetPageSize(0, indexPage); + } + for (auto indexPage : eggs.FlatHistoricIndexes) { indexesRawSize += Store->GetPageSize(0, indexPage); } } @@ -90,7 +96,7 @@ namespace NTest { { epoch, TPartScheme::Parse(*eggs.Scheme, eggs.Rooted), - { eggs.GroupIndexes, eggs.HistoricIndexes, eggs.BTreeGroupIndexes, eggs.BTreeHistoricIndexes }, + { eggs.FlatGroupIndexes, eggs.FlatHistoricIndexes, eggs.BTreeGroupIndexes, eggs.BTreeHistoricIndexes }, eggs.Blobs ? new TExtBlobs(*eggs.Blobs, { }) : nullptr, eggs.ByKey ? new TBloom(*eggs.ByKey) : nullptr, eggs.Large ? new TFrames(*eggs.Large) : nullptr, diff --git a/ydb/core/tablet_flat/ut/ut_db_iface.cpp b/ydb/core/tablet_flat/ut/ut_db_iface.cpp index b82252553f46..39fb1473ebe0 100644 --- a/ydb/core/tablet_flat/ut/ut_db_iface.cpp +++ b/ydb/core/tablet_flat/ut/ut_db_iface.cpp @@ -276,7 +276,8 @@ Y_UNIT_TEST_SUITE(DBase) { UNIT_ASSERT(me->Counters().Parts.RowsErase == 0); UNIT_ASSERT(me->Counters().Parts.PartsCount == 0); UNIT_ASSERT(me->Counters().Parts.PlainBytes == 0); - UNIT_ASSERT(me->Counters().Parts.IndexBytes == 0); + UNIT_ASSERT(me->Counters().Parts.FlatIndexBytes == 0); + UNIT_ASSERT(me->Counters().Parts.BTreeIndexBytes == 0); UNIT_ASSERT(me->Counters().Parts.OtherBytes == 0); } diff --git a/ydb/core/tablet_flat/ut/ut_shared_sausagecache.cpp b/ydb/core/tablet_flat/ut/ut_shared_sausagecache.cpp index 89837d06a418..af9709fc0e8c 100644 --- a/ydb/core/tablet_flat/ut/ut_shared_sausagecache.cpp +++ b/ydb/core/tablet_flat/ut/ut_shared_sausagecache.cpp @@ -15,16 +15,30 @@ enum : ui32 { ValueColumnId = 2, }; +using TRetriedCounters = TVector; +using namespace NSharedCache; + +void Increment(TRetriedCounters& retried, ui32 attempts) { + if (attempts >= retried.size()) { + retried.resize(attempts + 1); + } + retried.at(attempts)++; +} + struct TTxInitSchema : public ITransaction { bool Execute(TTransactionContext& txc, const TActorContext&) override { if (txc.DB.GetScheme().GetTableInfo(TableId)) return true; + TCompactionPolicy policy; + policy.MinBTreeIndexNodeSize = 128; + txc.DB.Alter() .AddTable("test" + ToString(ui32(TableId)), TableId) .AddColumn(TableId, "key", KeyColumnId, NScheme::TInt64::TypeId, false) .AddColumn(TableId, "value", ValueColumnId, NScheme::TString::TypeId, false) - .AddColumnToKey(TableId, KeyColumnId); + .AddColumnToKey(TableId, KeyColumnId) + .SetCompactionPolicy(TableId, policy); return true; } @@ -61,12 +75,18 @@ struct TTxWriteRow : public ITransaction { struct TTxReadRow : public ITransaction { i64 Key; + TRetriedCounters& Retried; + ui32 Attempts = 0; - explicit TTxReadRow(i64 key) + explicit TTxReadRow(i64 key, TRetriedCounters& retried) : Key(key) + , Retried(retried) { } bool Execute(TTransactionContext& txc, const TActorContext&) override { + Increment(Retried, Attempts); + Attempts++; + TVector rawKey; rawKey.emplace_back(&Key, sizeof(Key), NScheme::TTypeInfo(NScheme::TInt64::TypeId)); @@ -102,7 +122,27 @@ void WaitEvent(TMyEnvBase& env, ui32 eventType, ui32 requiredCount = 1) { env->DispatchEvents(options); } -Y_UNIT_TEST(PageCacheLimits) { +void RestartAndClearCache(TMyEnvBase& env) { + env.SendSync(new TEvents::TEvPoison, false, true); + + env->GetMemObserver()->NotifyStat({200*MB, 100*MB, 100*MB}); + WaitEvent(env, NSharedCache::EvMem); + + env->GetMemObserver()->NotifyStat({100*MB, 108*MB, 108*MB}); + WaitEvent(env, NSharedCache::EvMem); + + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); +} + +void SwitchPolicy(TMyEnvBase& env, NKikimrSharedCache::TReplacementPolicy policy) { + auto configure = MakeHolder(); + configure->Record.SetReplacementPolicy(policy); + configure->Record.SetMemoryLimit(0); // no limit + env->Send(MakeSharedPageCacheId(), TActorId{}, configure.Release()); + WaitEvent(env, TEvSharedPageCache::EvConfigure); +} + +Y_UNIT_TEST(Limits) { TMyEnvBase env; auto counters = MakeIntrusive(env->GetDynamicCounters()); @@ -112,7 +152,14 @@ Y_UNIT_TEST(PageCacheLimits) { env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); env.SendSync(new NFake::TEvExecute{ new TTxInitSchema() }); - // write 300 rows, each ~100KB + auto configure = MakeHolder(); + configure->Record.SetMemoryLimit(8_MB); + env->Send(MakeSharedPageCacheId(), TActorId{}, configure.Release()); + WaitEvent(env, TEvSharedPageCache::EvConfigure); + + SwitchPolicy(env, NKikimrSharedCache::ThreeLeveledLRU); + + // write 300 rows, each ~100KB (~30MB) for (i64 key = 0; key < 300; ++key) { TString value(size_t(100 * 1024), char('a' + key % 26)); env.SendSync(new NFake::TEvExecute{ new TTxWriteRow(key, std::move(value)) }); @@ -123,8 +170,9 @@ Y_UNIT_TEST(PageCacheLimits) { Cerr << "...waiting until compacted" << Endl; env.WaitFor(); + TRetriedCounters retried; for (i64 key = 0; key < 100; ++key) { - env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key) }); + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }); } LogCounters(counters); UNIT_ASSERT_VALUES_EQUAL(counters->LoadInFlyBytes->Val(), 0); @@ -223,8 +271,9 @@ Y_UNIT_TEST(MemTableLimits) { Cerr << "...waiting until compacted" << Endl; env.WaitFor(); + TRetriedCounters retried; for (i64 key = 0; key < 10; ++key) { - env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key) }); + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }); } // write 10 rows, each ~50KB @@ -266,6 +315,374 @@ Y_UNIT_TEST(MemTableLimits) { UNIT_ASSERT_DOUBLES_EQUAL(counters->MemTableTotalBytes->Val(), 1.5*MB, MB / 3); } +Y_UNIT_TEST(ThreeLeveledLRU) { + TMyEnvBase env; + auto counters = MakeIntrusive(env->GetDynamicCounters()); + + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); + env.SendSync(new NFake::TEvExecute{ new TTxInitSchema() }); + + SwitchPolicy(env, NKikimrSharedCache::ThreeLeveledLRU); + env->GetAppData().FeatureFlags.SetEnableLocalDBBtreeIndex(true); + + env->GetMemObserver()->NotifyStat({100*MB, 108*MB, 108*MB}); + WaitEvent(env, NSharedCache::EvMem); + + // write 100 rows, each ~100KB (~10MB) + for (i64 key = 0; key < 100; ++key) { + TString value(size_t(100 * 1024), char('a' + key % 26)); + env.SendSync(new NFake::TEvExecute{ new TTxWriteRow(key, std::move(value)) }); + } + + Cerr << "...compacting" << Endl; + env.SendSync(new NFake::TEvCompact(TableId)); + Cerr << "...waiting until compacted" << Endl; + env.WaitFor(); + + TRetriedCounters retried; + for (i64 key = 99; key >= 0; --key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }); + } + LogCounters(counters); + UNIT_ASSERT_DOUBLES_EQUAL(counters->ActiveBytes->Val(), static_cast(8_MB), static_cast(1_MB / 3)); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{100, 45, 5})); + + RestartAndClearCache(env); + + retried = {}; + for (i64 key = 0; key < 100; ++key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + } + LogCounters(counters); + UNIT_ASSERT_DOUBLES_EQUAL(counters->ActiveBytes->Val(), static_cast(8_MB / 3 * 2), static_cast(1_MB / 3)); // 2 full layers (fresh & staging) + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{100, 100, 14, 2})); + + retried = {}; + for (i64 key = 99; key >= 0; --key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + } + LogCounters(counters); + UNIT_ASSERT_DOUBLES_EQUAL(counters->ActiveBytes->Val(), static_cast(8_MB), static_cast(1_MB / 3)); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{100, 44, 6})); + + retried = {}; + for (i64 key = 99; key >= 0; --key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + } + LogCounters(counters); + UNIT_ASSERT_DOUBLES_EQUAL(counters->ActiveBytes->Val(), static_cast(8_MB), static_cast(1_MB / 3)); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{100, 73, 10, 1})); + + RestartAndClearCache(env); + + // read some key twice + retried = {}; + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(0, retried) }, true); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{1, 1, 1, 1})); + retried = {}; + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(0, retried) }, true); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{1})); + + // simulate scan + retried = {}; + for (i64 key = 1; key < 100; ++key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + } + LogCounters(counters); + UNIT_ASSERT_DOUBLES_EQUAL(counters->ActiveBytes->Val(), static_cast(8_MB / 3 * 2), static_cast(1_MB / 3)); // 2 full layers (fresh & staging) + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{99, 99, 13, 1})); + + // read the key again + retried = {}; + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(0, retried) }, true); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{1, 1, 1})); + + RestartAndClearCache(env); + + retried = {}; + for (i64 key = 0; key < 100; ++key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + } + LogCounters(counters); + UNIT_ASSERT_DOUBLES_EQUAL(counters->ActiveBytes->Val(), static_cast(8_MB / 3 * 2), static_cast(1_MB / 3)); // 2 full layers (fresh & staging) + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{200, 100, 14, 2})); + + retried = {}; + for (i64 key = 0; key < 100; ++key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + } + LogCounters(counters); + UNIT_ASSERT_DOUBLES_EQUAL(counters->ActiveBytes->Val(), static_cast(8_MB / 3 * 2), static_cast(1_MB / 3)); // 2 full layers (fresh & staging) + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{100, 100, 14})); +} + +Y_UNIT_TEST(S3FIFO) { + TMyEnvBase env; + auto counters = MakeIntrusive(env->GetDynamicCounters()); + + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); + env.SendSync(new NFake::TEvExecute{ new TTxInitSchema() }); + + SwitchPolicy(env, NKikimrSharedCache::S3FIFO); + env->GetAppData().FeatureFlags.SetEnableLocalDBBtreeIndex(true); + + env->GetMemObserver()->NotifyStat({100*MB, 108*MB, 108*MB}); + WaitEvent(env, NSharedCache::EvMem); + + // write 100 rows, each ~100KB (~10MB) + for (i64 key = 0; key < 100; ++key) { + TString value(size_t(100 * 1024), char('a' + key % 26)); + env.SendSync(new NFake::TEvExecute{ new TTxWriteRow(key, std::move(value)) }); + } + + Cerr << "...compacting" << Endl; + env.SendSync(new NFake::TEvCompact(TableId)); + Cerr << "...waiting until compacted" << Endl; + env.WaitFor(); + + TRetriedCounters retried; + for (i64 key = 99; key >= 0; --key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }); + } + LogCounters(counters); + UNIT_ASSERT_DOUBLES_EQUAL(counters->ActiveBytes->Val(), static_cast(8_MB), static_cast(1_MB / 3)); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{100, 19, 2})); + + RestartAndClearCache(env); + + retried = {}; + for (i64 key = 0; key < 100; ++key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + } + LogCounters(counters); + UNIT_ASSERT_DOUBLES_EQUAL(counters->ActiveBytes->Val(), static_cast(8_MB), static_cast(1_MB / 3)); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{100, 100, 14, 2})); + + retried = {}; + for (i64 key = 99; key >= 0; --key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + } + LogCounters(counters); + UNIT_ASSERT_DOUBLES_EQUAL(counters->ActiveBytes->Val(), static_cast(8_MB), static_cast(1_MB / 3)); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{100, 19, 2})); + + retried = {}; + for (i64 key = 99; key >= 0; --key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + } + LogCounters(counters); + UNIT_ASSERT_DOUBLES_EQUAL(counters->ActiveBytes->Val(), static_cast(8_MB), static_cast(1_MB / 3)); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{100, 21, 3})); + + RestartAndClearCache(env); + + // read some key twice + retried = {}; + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(0, retried) }, true); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{1, 1, 1, 1})); + retried = {}; + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(0, retried) }, true); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{1})); + + // simulate scan + retried = {}; + for (i64 key = 1; key < 100; ++key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + } + LogCounters(counters); + UNIT_ASSERT_DOUBLES_EQUAL(counters->ActiveBytes->Val(), static_cast(8_MB), static_cast(1_MB / 3)); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{99, 99, 13, 1})); + + // read the key again + retried = {}; + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(0, retried) }, true); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{1})); + + RestartAndClearCache(env); + + retried = {}; + for (i64 key = 0; key < 100; ++key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + } + LogCounters(counters); + UNIT_ASSERT_DOUBLES_EQUAL(counters->ActiveBytes->Val(), static_cast(8_MB), static_cast(1_MB / 3)); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{200, 100, 14, 2})); + + retried = {}; + for (i64 key = 0; key < 100; ++key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + } + LogCounters(counters); + UNIT_ASSERT_DOUBLES_EQUAL(counters->ActiveBytes->Val(), static_cast(8_MB), static_cast(1_MB / 3)); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{100, 28, 4, 1})); +} + +Y_UNIT_TEST(ClockPro) { + TMyEnvBase env; + auto counters = MakeIntrusive(env->GetDynamicCounters()); + + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); + env.SendSync(new NFake::TEvExecute{ new TTxInitSchema() }); + + SwitchPolicy(env, NKikimrSharedCache::ClockPro); + env->GetAppData().FeatureFlags.SetEnableLocalDBBtreeIndex(true); + + env->GetMemObserver()->NotifyStat({100*MB, 108*MB, 108*MB}); + WaitEvent(env, NSharedCache::EvMem); + + // write 100 rows, each ~100KB (~10MB) + for (i64 key = 0; key < 100; ++key) { + TString value(size_t(100 * 1024), char('a' + key % 26)); + env.SendSync(new NFake::TEvExecute{ new TTxWriteRow(key, std::move(value)) }); + } + + Cerr << "...compacting" << Endl; + env.SendSync(new NFake::TEvCompact(TableId)); + Cerr << "...waiting until compacted" << Endl; + env.WaitFor(); + + TRetriedCounters retried; + for (i64 key = 99; key >= 0; --key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }); + } + LogCounters(counters); + UNIT_ASSERT_DOUBLES_EQUAL(counters->ActiveBytes->Val(), static_cast(8_MB), static_cast(1_MB / 3)); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{100, 20, 2})); + + RestartAndClearCache(env); + + retried = {}; + for (i64 key = 0; key < 100; ++key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + } + LogCounters(counters); + UNIT_ASSERT_DOUBLES_EQUAL(counters->ActiveBytes->Val(), static_cast(8_MB), static_cast(1_MB / 3)); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{100, 100, 14, 2})); + + retried = {}; + for (i64 key = 99; key >= 0; --key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + } + LogCounters(counters); + UNIT_ASSERT_DOUBLES_EQUAL(counters->ActiveBytes->Val(), static_cast(8_MB), static_cast(1_MB / 3)); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{100, 21, 2})); + + retried = {}; + for (i64 key = 99; key >= 0; --key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + } + LogCounters(counters); + UNIT_ASSERT_DOUBLES_EQUAL(counters->ActiveBytes->Val(), static_cast(8_MB), static_cast(1_MB / 3)); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{100, 20, 2})); + + RestartAndClearCache(env); + + // read some key twice + retried = {}; + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(0, retried) }, true); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{1, 1, 1, 1})); + retried = {}; + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(0, retried) }, true); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{1})); + + // simulate scan + retried = {}; + for (i64 key = 1; key < 100; ++key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + } + LogCounters(counters); + UNIT_ASSERT_DOUBLES_EQUAL(counters->ActiveBytes->Val(), static_cast(8_MB), static_cast(1_MB / 3)); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{99, 99, 13, 1})); + + // read the key again + retried = {}; + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(0, retried) }, true); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{1, 1, 1, 1})); + + // simulate scan again + retried = {}; + for (i64 key = 1; key < 100; ++key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + } + LogCounters(counters); + UNIT_ASSERT_DOUBLES_EQUAL(counters->ActiveBytes->Val(), static_cast(8_MB), static_cast(1_MB / 3)); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{99, 99, 13})); + + // read the key again again + retried = {}; + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(0, retried) }, true); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{1, 1, 1, 1})); + + RestartAndClearCache(env); + + retried = {}; + for (i64 key = 0; key < 100; ++key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + } + LogCounters(counters); + UNIT_ASSERT_DOUBLES_EQUAL(counters->ActiveBytes->Val(), static_cast(8_MB), static_cast(1_MB / 3)); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{200, 100, 14, 2})); + + retried = {}; + for (i64 key = 0; key < 100; ++key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + } + LogCounters(counters); + UNIT_ASSERT_DOUBLES_EQUAL(counters->ActiveBytes->Val(), static_cast(8_MB), static_cast(1_MB / 3)); + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{100, 100, 14, 1})); +} + +Y_UNIT_TEST(ReplacementPolicySwitch) { + TMyEnvBase env; + auto counters = MakeIntrusive(env->GetDynamicCounters()); + + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); + env.SendSync(new NFake::TEvExecute{ new TTxInitSchema() }); + env->GetAppData().FeatureFlags.SetEnableLocalDBBtreeIndex(true); + + // write 100 rows, each ~100KB (~10MB) + for (i64 key = 0; key < 100; ++key) { + TString value(size_t(100 * 1024), char('a' + key % 26)); + env.SendSync(new NFake::TEvExecute{ new TTxWriteRow(key, std::move(value)) }); + } + + Cerr << "...compacting" << Endl; + env.SendSync(new NFake::TEvCompact(TableId)); + Cerr << "...waiting until compacted" << Endl; + env.WaitFor(); + + RestartAndClearCache(env); + + TRetriedCounters retried = {}; + for (i64 key = 0; key < 3; ++key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + } + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{3, 3, 1, 1})); + + UNIT_ASSERT_GT(counters->ReplacementPolicySize(NKikimrSharedCache::ThreeLeveledLRU)->Val(), 0); + UNIT_ASSERT_VALUES_EQUAL(counters->ReplacementPolicySize(NKikimrSharedCache::S3FIFO)->Val(), 0); + + SwitchPolicy(env, NKikimrSharedCache::S3FIFO); + + retried = {}; + for (i64 key = 0; key < 3; ++key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + } + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{3})); + + retried = {}; + for (i64 key = 90; key < 93; ++key) { + env.SendSync(new NFake::TEvExecute{ new TTxReadRow(key, retried) }, true); + } + UNIT_ASSERT_VALUES_EQUAL(retried, (TVector{3, 3, 2, 1})); + + UNIT_ASSERT_GT(counters->ReplacementPolicySize(NKikimrSharedCache::S3FIFO)->Val(), 0); + UNIT_ASSERT_VALUES_EQUAL(counters->ReplacementPolicySize(NKikimrSharedCache::ThreeLeveledLRU)->Val(), 0); +} + } // Y_UNIT_TEST_SUITE(TSharedPageCache) } // namespace NTabletFlatExecutor diff --git a/ydb/core/tablet_flat/ut/ut_stat.cpp b/ydb/core/tablet_flat/ut/ut_stat.cpp index a02151472585..ec265efe8daa 100644 --- a/ydb/core/tablet_flat/ut/ut_stat.cpp +++ b/ydb/core/tablet_flat/ut/ut_stat.cpp @@ -2,6 +2,8 @@ #include "flat_stat_table.h" #include "flat_stat_table_mixed_index.h" #include "flat_stat_table_btree_index.h" +#include +#include #include #include #include @@ -141,14 +143,14 @@ Y_UNIT_TEST_SUITE(BuildStatsFlatIndex) { Y_UNIT_TEST(Single_History) { auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3); - CheckMixedIndex(*subset, 24000, 3547100, 31242); + CheckMixedIndex(*subset, 24000, 3547100, 49916); } Y_UNIT_TEST(Single_History_Slices) { auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3, 13); subset->Flatten.begin()->Slices->Describe(Cerr); Cerr << Endl; - CheckMixedIndex(*subset, 9582, 1425198, 31242); + CheckMixedIndex(*subset, 9582, 1425198, 49916); } Y_UNIT_TEST(Single_Groups) @@ -167,14 +169,14 @@ Y_UNIT_TEST_SUITE(BuildStatsFlatIndex) { Y_UNIT_TEST(Single_Groups_History) { auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3); - CheckMixedIndex(*subset, 24000, 4054050, 18810); + CheckMixedIndex(*subset, 24000, 4054050, 29361); } Y_UNIT_TEST(Single_Groups_History_Slices) { auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3, 13); subset->Flatten.begin()->Slices->Describe(Cerr); Cerr << Endl; - CheckMixedIndex(*subset, 13570, 2277890, 18810); + CheckMixedIndex(*subset, 13570, 2277890, 29361); } Y_UNIT_TEST(Mixed) @@ -192,14 +194,14 @@ Y_UNIT_TEST_SUITE(BuildStatsFlatIndex) { Y_UNIT_TEST(Mixed_Groups_History) { auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 4, TMixerRnd(4), 0.3); - CheckMixedIndex(*subset, 24000, 4054270, 19152); + CheckMixedIndex(*subset, 24000, 4054270, 29970); } Y_UNIT_TEST(Serial) { TMixerSeq mixer(4, Mass0.Saved.Size()); auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 4, mixer); - CheckMixedIndex(*subset, 24000, 2106459, 25428); + CheckMixedIndex(*subset, 24000, 2106479, 25458); } Y_UNIT_TEST(Serial_Groups) @@ -213,7 +215,7 @@ Y_UNIT_TEST_SUITE(BuildStatsFlatIndex) { { TMixerSeq mixer(4, Mass1.Saved.Size()); auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 4, mixer, 0.3); - CheckMixedIndex(*subset, 24000, 4054290, 19168); + CheckMixedIndex(*subset, 24000, 4054290, 30013); } } @@ -237,14 +239,14 @@ Y_UNIT_TEST_SUITE(BuildStatsMixedIndex) { Y_UNIT_TEST(Single_History) { auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3); - CheckMixedIndex(*subset, 24000, 3547100, 61162); + CheckMixedIndex(*subset, 24000, 3547100, 81694); } Y_UNIT_TEST(Single_History_Slices) { auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3, 13); subset->Flatten.begin()->Slices->Describe(Cerr); Cerr << Endl; - CheckMixedIndex(*subset, 9582, 1425198, 61162); + CheckMixedIndex(*subset, 9582, 1425198, 81694); } Y_UNIT_TEST(Single_Groups) @@ -263,14 +265,14 @@ Y_UNIT_TEST_SUITE(BuildStatsMixedIndex) { Y_UNIT_TEST(Single_Groups_History) { auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3); - CheckMixedIndex(*subset, 24000, 4054050, 34837); + CheckMixedIndex(*subset, 24000, 4054050, 46562); } Y_UNIT_TEST(Single_Groups_History_Slices) { auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3, 13); subset->Flatten.begin()->Slices->Describe(Cerr); Cerr << Endl; - CheckMixedIndex(*subset, 13570, 2277890, 34837); + CheckMixedIndex(*subset, 13570, 2277890, 46562); } Y_UNIT_TEST(Mixed) @@ -288,14 +290,14 @@ Y_UNIT_TEST_SUITE(BuildStatsMixedIndex) { Y_UNIT_TEST(Mixed_Groups_History) { auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 4, TMixerRnd(4), 0.3); - CheckMixedIndex(*subset, 24000, 4054270, 34579); + CheckMixedIndex(*subset, 24000, 4054270, 46543); } Y_UNIT_TEST(Serial) { TMixerSeq mixer(4, Mass0.Saved.Size()); auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 4, mixer); - CheckMixedIndex(*subset, 24000, 2106459, 49502); + CheckMixedIndex(*subset, 24000, 2106479, 49555); } Y_UNIT_TEST(Serial_Groups) @@ -309,7 +311,7 @@ Y_UNIT_TEST_SUITE(BuildStatsMixedIndex) { { TMixerSeq mixer(4, Mass1.Saved.Size()); auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 4, mixer, 0.3); - CheckMixedIndex(*subset, 24000, 4054290, 34652); + CheckMixedIndex(*subset, 24000, 4054290, 46640); } Y_UNIT_TEST(Single_LowResolution) @@ -341,14 +343,14 @@ Y_UNIT_TEST_SUITE(BuildStatsMixedIndex) { Y_UNIT_TEST(Single_Groups_History_LowResolution) { auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), true, WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3); - CheckMixedIndex(*subset, 24000, 4054050, 48540, 5310, 531050); + CheckMixedIndex(*subset, 24000, 4054050, 64742, 5310, 531050); } Y_UNIT_TEST(Single_Groups_History_Slices_LowResolution) { auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), true, WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3, 13); subset->Flatten.begin()->Slices->Describe(Cerr); Cerr << Endl; - CheckMixedIndex(*subset, 13570, 2234982 /* ~2277890 */, 48540, 5310, 531050); + CheckMixedIndex(*subset, 13570, 2234982 /* ~2277890 */, 64742, 5310, 531050); } } @@ -372,14 +374,14 @@ Y_UNIT_TEST_SUITE(BuildStatsBTreeIndex) { Y_UNIT_TEST(Single_History) { auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3); - CheckBTreeIndex(*subset, 24000, 3547100, 61162); + CheckBTreeIndex(*subset, 24000, 3547100, 81694); } Y_UNIT_TEST(Single_History_Slices) { auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3, 13); subset->Flatten.begin()->Slices->Describe(Cerr); Cerr << Endl; - CheckBTreeIndex(*subset, 9582, 1425282, 61162); + CheckBTreeIndex(*subset, 9582, 1425282, 81694); } Y_UNIT_TEST(Single_Groups) @@ -398,14 +400,14 @@ Y_UNIT_TEST_SUITE(BuildStatsBTreeIndex) { Y_UNIT_TEST(Single_Groups_History) { auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3); - CheckBTreeIndex(*subset, 24000, 4054050, 34837); + CheckBTreeIndex(*subset, 24000, 4054050, 46562); } Y_UNIT_TEST(Single_Groups_History_Slices) { auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3, 13); subset->Flatten.begin()->Slices->Describe(Cerr); Cerr << Endl; - CheckBTreeIndex(*subset, 13570, 2273213, 34837); + CheckBTreeIndex(*subset, 13570, 2273213, 46562); } Y_UNIT_TEST(Mixed) @@ -423,7 +425,7 @@ Y_UNIT_TEST_SUITE(BuildStatsBTreeIndex) { Y_UNIT_TEST(Mixed_Groups_History) { auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 4, TMixerRnd(4), 0.3); - CheckBTreeIndex(*subset, 24000, 4054270, 34579); + CheckBTreeIndex(*subset, 24000, 4054270, 46543); } } @@ -455,14 +457,14 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { { const ui32 samples = 5; - Cerr << "Parts:" << Endl; + Cerr << subset.Flatten.size() << " parts:" << Endl; for (auto &part : subset.Flatten) { TTestEnv env; auto index = CreateIndexIter(part.Part.Get(), &env, {}); Cerr << " " << index->GetEndRowId() << " rows, " << IndexTools::CountMainPages(*part.Part) << " pages, " << (part->IndexPages.HasBTree() ? part->IndexPages.GetBTree({}).LevelCount : -1) << " levels: "; - for (ui32 sample : xrange(samples + 1)) { + for (ui32 sample : xrange(1u, samples + 1)) { TRowId rowId((index->GetEndRowId() - 1) * sample / samples); Y_ABORT_UNLESS(index->Seek(rowId) == EReady::Data); TSmallVec keyCells; @@ -492,7 +494,33 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { UNIT_ASSERT_LE(std::abs(percent), allowed); } - void CalcDataBefore(const TSubset& subset, TSerializedCellVec key, ui64& bytes, ui64& rows) { + void CalcDataBeforeIterate(const TSubset& subset, TSerializedCellVec key, ui64& bytes, ui64& rows) { + NTest::TChecker wrap(subset, { new TTouchEnv }); + auto env = wrap.GetEnv(); + env->Faulty = false; + + bytes = 0; + rows = 0; + wrap.Seek({}, ESeek::Lower); + + while (wrap.GetReady() == EReady::Data) { + ui64 prevBytes = env->TouchedBytes; + + wrap.Next(); + + if (wrap.GetReady() == EReady::Data && key.GetCells()) { + auto cmp = CompareTypedCellVectors(key.GetCells().data(), wrap->GetKey().Cells().data(), subset.Scheme->Keys->Types.data(), Min(key.GetCells().size(), wrap->GetKey().Cells().size())); + if (cmp < 0) { + break; + } + } + + rows++; + bytes = prevBytes; + } + } + + void CalcDataBeforePrecharge(const TSubset& subset, TSerializedCellVec key, ui64& bytes, ui64& rows) { TTouchEnv env; env.Faulty = false; @@ -513,6 +541,23 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { rows = env.TouchedRows; } + void CalcDataBefore(const TSubset& subset, TSerializedCellVec key, ui64& bytes, ui64& rows) { + bool groups = false; + rows = 0; + for (const auto& part : subset.Flatten) { + TTestEnv env; + auto index = CreateIndexIter(part.Part.Get(), &env, {}); + rows += index->GetEndRowId(); + groups |= part->GroupsCount > 1 || part->HistoricGroupsCount > 0; + } + + if (groups || rows > 10000) { + CalcDataBeforePrecharge(subset, key, bytes, rows); + } else { + CalcDataBeforeIterate(subset, key, bytes, rows); + } + } + void CheckHistogram(const TSubset& subset, THistogram histogram, bool isBytes, ui64 total, bool verifyPercents) { Cerr << " " << (isBytes ? "DataSizeHistogram:" : "RowCountHistogram:") << Endl; @@ -550,11 +595,11 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { ui64 delta = total - prevValue, actualDelta = total - prevActualValue; Cerr << " " << FormatPercent(delta, total) << " (actual " << FormatPercent(actualDelta, total) << ")" << Endl; if (verifyPercents) VerifyPercent(delta, total, 20); - UNIT_ASSERT_GT(total, prevValue); + UNIT_ASSERT_GE(total, prevValue); } } - void Check(const TSubset& subset, TMode mode, ui32 histogramBucketsCount = 10, bool verifyPercents = true) { + void Check(const TSubset& subset, TMode mode, ui32 histogramBucketsCount = 10, bool verifyPercents = true, bool faulty = true) { if (mode == 0) { Dump(subset); } @@ -571,6 +616,7 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { ui64 dataSizeResolution = totalBytes / histogramBucketsCount; TTouchEnv env; + env.Faulty = faulty; // env.Faulty = false; // uncomment for debug TStats stats; auto buildStats = [&]() { @@ -581,7 +627,7 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { } }; - const ui32 attempts = 35; + const ui32 attempts = 100; for (ui32 attempt : xrange(attempts)) { if (buildStats()) { break; @@ -593,6 +639,11 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { CheckHistogram(subset, stats.RowCountHistogram, false, totalRows, verifyPercents); CheckHistogram(subset, stats.DataSizeHistogram, true, totalBytes, verifyPercents); + + if (mode == BTreeIndex && verifyPercents && histogramBucketsCount != 1000) { + UNIT_ASSERT_VALUES_EQUAL(stats.RowCountHistogram.size(), histogramBucketsCount - 1); + UNIT_ASSERT_VALUES_EQUAL(stats.DataSizeHistogram.size(), histogramBucketsCount - 1); + } } Y_UNIT_TEST(Single) @@ -906,6 +957,14 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { } } + Y_UNIT_TEST(Single_Small_2_Levels_3_Buckets) + { + for (auto mode : {BTreeIndex, FlatIndex, MixedIndex}) { + auto subset = TMake(Mass3, PageConf(Mass3.Model->Scheme->Families.size(), mode)).Mixed(0, 1, TMixerOne{ }); + Check(*subset, mode, 5, false); + } + } + Y_UNIT_TEST(Single_Small_1_Level) { for (auto mode : {BTreeIndex, FlatIndex, MixedIndex}) { @@ -938,6 +997,14 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { } } + Y_UNIT_TEST(Three_Mixed_Small_2_Levels_3_Buckets) + { + for (auto mode : {BTreeIndex, FlatIndex, MixedIndex}) { + auto subset = TMake(Mass3, PageConf(Mass3.Model->Scheme->Families.size(), mode)).Mixed(0, 3, TMixerRnd(3)); + Check(*subset, mode, 5, false); + } + } + Y_UNIT_TEST(Three_Mixed_Small_1_Level) { for (auto mode : {BTreeIndex, FlatIndex, MixedIndex}) { @@ -970,6 +1037,14 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { } } + Y_UNIT_TEST(Three_Serial_Small_2_Levels_3_Buckets) + { + for (auto mode : {BTreeIndex, FlatIndex, MixedIndex}) { + auto subset = TMake(Mass3, PageConf(Mass3.Model->Scheme->Families.size(), mode)).Mixed(0, 3, TMixerSeq(3, Mass3.Saved.Size())); + Check(*subset, mode, 5, false); + } + } + Y_UNIT_TEST(Three_Serial_Small_1_Level) { for (auto mode : {BTreeIndex, FlatIndex, MixedIndex}) { @@ -1033,6 +1108,46 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { Check(*subset, mode, 10, false); } } + + Y_UNIT_TEST(Many_Mixed) + { + const ui32 partsCount = 1000; + const ui64 rowsCount = 100000; + + TAutoPtr mass = new NTest::TMass(new NTest::TModelStd(false), rowsCount); + + for (auto mode : {BTreeIndex, FlatIndex, MixedIndex}) { + NPage::TConf conf; + conf.Groups.resize(mass->Model->Scheme->Families.size()); + conf.Group(0).PageRows = 1; // we don't care about pages actual size + conf.Group(0).BTreeIndexNodeKeysMin = conf.Group(0).BTreeIndexNodeKeysMax = 2; + conf.WriteBTreeIndex = (mode == FlatIndex ? false : true); + + TAutoPtr subset = TMake(*mass, conf).Mixed(0, partsCount, TMixerRnd(partsCount)); + + Check(*subset, mode, 10, false, false); + } + } + + Y_UNIT_TEST(Many_Serial) + { + const ui32 partsCount = 1000; + const ui64 rowsCount = 100000; + + TAutoPtr mass = new NTest::TMass(new NTest::TModelStd(false), rowsCount); + + for (auto mode : {BTreeIndex, FlatIndex, MixedIndex}) { + NPage::TConf conf; + conf.Groups.resize(mass->Model->Scheme->Families.size()); + conf.Group(0).PageRows = 1; // we don't care about pages actual size + conf.Group(0).BTreeIndexNodeKeysMin = conf.Group(0).BTreeIndexNodeKeysMax = 2; + conf.WriteBTreeIndex = (mode == FlatIndex ? false : true); + + TAutoPtr subset = TMake(*mass, conf).Mixed(0, partsCount, TMixerSeq(partsCount, mass->Saved.Size())); + + Check(*subset, mode, 10, false, false); + } + } } } diff --git a/ydb/core/tablet_flat/ut/ya.make b/ydb/core/tablet_flat/ut/ya.make index 1103aaa188de..faa34804e6f2 100644 --- a/ydb/core/tablet_flat/ut/ya.make +++ b/ydb/core/tablet_flat/ut/ya.make @@ -27,6 +27,9 @@ SRCS( flat_test_db.h flat_test_db.cpp flat_test_db_helpers.h + shared_cache_s3fifo_ut.cpp + shared_cache_clock_pro_ut.cpp + shared_cache_switchable_ut.cpp shared_handle_ut.cpp ut_btree_index_nodes.cpp ut_btree_index_iter_charge.cpp diff --git a/ydb/core/tablet_flat/ya.make b/ydb/core/tablet_flat/ya.make index e5f02f16af21..bc026a084bf1 100644 --- a/ydb/core/tablet_flat/ya.make +++ b/ydb/core/tablet_flat/ya.make @@ -56,6 +56,8 @@ SRCS( flat_stat_part.h flat_stat_table.h flat_stat_table.cpp + flat_stat_table_btree_index.cpp + flat_stat_table_btree_index_histogram.cpp flat_stat_part_group_iter_create.cpp flat_store_hotdog.cpp flat_table.cpp @@ -109,6 +111,7 @@ PEERDIR( ydb/core/protos ydb/core/tablet ydb/core/tablet_flat/protos + ydb/core/util ydb/library/binary_json ydb/library/dynumber ydb/library/mkql_proto/protos diff --git a/ydb/core/testlib/actors/block_events.cpp b/ydb/core/testlib/actors/block_events.cpp new file mode 100644 index 000000000000..801f1c5bf126 --- /dev/null +++ b/ydb/core/testlib/actors/block_events.cpp @@ -0,0 +1 @@ +#include "block_events.h" diff --git a/ydb/core/testlib/actors/block_events.h b/ydb/core/testlib/actors/block_events.h new file mode 100644 index 000000000000..fb1845bd50a8 --- /dev/null +++ b/ydb/core/testlib/actors/block_events.h @@ -0,0 +1,89 @@ +#include "test_runtime.h" + +#include +#include + +namespace NActors { + + /** + * Easy blocking for events under the test actor runtime + * + * Matching events are blocked just before they are processed and stashed + * into a deque. + */ + template + class TBlockEvents : public std::deque { + public: + TBlockEvents(TTestActorRuntime& runtime, std::function condition = {}) + : Runtime(runtime) + , Condition(std::move(condition)) + , Holder(Runtime.AddObserver( + [this](typename TEvType::TPtr& ev) { + this->Process(ev); + })) + {} + + /** + * Unblocks up to count events at the front of the deque, allowing them + * to be handled by the destination actor. + */ + TBlockEvents& Unblock(size_t count = -1) { + while (!this->empty() && count > 0) { + auto& ev = this->front(); + if (!Stopped) { + IEventHandle* ptr = ev.Get(); + UnblockedOnce.insert(ptr); + } + ui32 nodeId = ev->GetRecipientRewrite().NodeId(); + ui32 nodeIdx = nodeId - Runtime.GetFirstNodeId(); + Cerr << "... unblocking " << (ev->HasEvent() ? TypeName(*ev->GetBase()) : TypeName()) + << " from " << Runtime.FindActorName(ev->Sender) + << " to " << Runtime.FindActorName(ev->GetRecipientRewrite()) + << Endl; + Runtime.Send(ev.Release(), nodeIdx, /* viaActorSystem */ true); + this->pop_front(); + --count; + } + return *this; + } + + /** + * Stops blocking any new events. Events currently in the deque are + * not unblocked, but may be unblocked at a later time if needed. + */ + TBlockEvents& Stop() { + UnblockedOnce.clear(); + Holder.Remove(); + Stopped = true; + return *this; + } + + private: + void Process(typename TEvType::TPtr& ev) { + IEventHandle* ptr = ev.Get(); + auto it = UnblockedOnce.find(ptr); + if (it != UnblockedOnce.end()) { + UnblockedOnce.erase(it); + return; + } + + if (Condition && !Condition(ev)) { + return; + } + + Cerr << "... blocking " << (ev->HasEvent() ? TypeName(*ev->GetBase()) : TypeName()) + << " from " << Runtime.FindActorName(ev->Sender) + << " to " << Runtime.FindActorName(ev->GetRecipientRewrite()) + << Endl; + this->emplace_back(std::move(ev)); + } + + private: + TTestActorRuntime& Runtime; + std::function Condition; + TTestActorRuntime::TEventObserverHolder Holder; + THashSet UnblockedOnce; + bool Stopped = false; + }; + +} // namespace NActors diff --git a/ydb/core/testlib/actors/test_runtime.cpp b/ydb/core/testlib/actors/test_runtime.cpp index 22201081f16f..645a9368b69e 100644 --- a/ydb/core/testlib/actors/test_runtime.cpp +++ b/ydb/core/testlib/actors/test_runtime.cpp @@ -23,10 +23,11 @@ namespace NActors { void TTestActorRuntime::TNodeData::Stop() { - TNodeDataBase::Stop(); if (Mon) { Mon->Stop(); + GetAppData()->Mon = nullptr; } + TNodeDataBase::Stop(); } TTestActorRuntime::TNodeData::~TNodeData() { diff --git a/ydb/core/testlib/actors/test_runtime.h b/ydb/core/testlib/actors/test_runtime.h index 3016bc441417..f3a403dcdf3d 100644 --- a/ydb/core/testlib/actors/test_runtime.h +++ b/ydb/core/testlib/actors/test_runtime.h @@ -68,22 +68,43 @@ namespace NActors { void SimulateSleep(TDuration duration); template - inline TResult WaitFuture(NThreading::TFuture f) { + inline TResult WaitFuture(NThreading::TFuture f, TDuration simTimeout = TDuration::Max()) { if (!f.HasValue() && !f.HasException()) { TDispatchOptions options; options.CustomFinalCondition = [&]() { return f.HasValue() || f.HasException(); }; - options.FinalEvents.emplace_back([&](IEventHandle&) { - return f.HasValue() || f.HasException(); - }); + // Quirk: non-empty FinalEvents enables full simulation + options.FinalEvents.emplace_back([](IEventHandle&) { return false; }); - this->DispatchEvents(options); + this->DispatchEvents(options, simTimeout); Y_ABORT_UNLESS(f.HasValue() || f.HasException()); } - return f.ExtractValue(); + if constexpr (!std::is_same_v) { + return f.ExtractValue(); + } else { + return f.GetValue(); + } + } + + template + inline void WaitFor(const TString& description, const TCondition& condition, TDuration simTimeout = TDuration::Max()) { + if (!condition()) { + TDispatchOptions options; + options.CustomFinalCondition = [&]() { + return condition(); + }; + // Quirk: non-empty FinalEvents enables full simulation + options.FinalEvents.emplace_back([](IEventHandle&) { return false; }); + + Cerr << "... waiting for " << description << Endl; + this->DispatchEvents(options, simTimeout); + + Y_ABORT_UNLESS(condition(), "Timeout while waiting for %s", description.c_str()); + Cerr << "... waiting for " << description << " (done)" << Endl; + } } TIntrusivePtr GetMemObserver(ui32 nodeIndex = 0) { diff --git a/ydb/core/testlib/actors/test_runtime_ut.cpp b/ydb/core/testlib/actors/test_runtime_ut.cpp index d649df72fc89..d6f47f77d310 100644 --- a/ydb/core/testlib/actors/test_runtime_ut.cpp +++ b/ydb/core/testlib/actors/test_runtime_ut.cpp @@ -1,4 +1,6 @@ #include +#include +#include #include #include #include @@ -622,6 +624,268 @@ Y_UNIT_TEST_SUITE(TActorTest) { UNIT_ASSERT_VALUES_EQUAL(event->Get()->Index, 12u); } } -}; + + Y_UNIT_TEST(TestWaitFuture) { + enum EEv { + EvTrigger = EventSpaceBegin(TEvents::ES_PRIVATE) + }; + + struct TEvTrigger : public TEventLocal { + TEvTrigger() = default; + }; + + class TTriggerActor : public TActorBootstrapped { + public: + TTriggerActor(NThreading::TPromise promise) + : Promise(std::move(promise)) + {} + + void Bootstrap() { + Schedule(TDuration::Seconds(1), new TEvTrigger); + Become(&TThis::StateWork); + } + + private: + STFUNC(StateWork) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvTrigger, Handle); + } + } + + void Handle(TEvTrigger::TPtr&) { + Promise.SetValue(); + PassAway(); + } + + private: + NThreading::TPromise Promise; + }; + + TTestActorRuntime runtime; + runtime.Initialize(MakeEgg()); + + NThreading::TPromise promise = NThreading::NewPromise(); + NThreading::TFuture future = promise.GetFuture(); + + auto actor = runtime.Register(new TTriggerActor(std::move(promise))); + runtime.EnableScheduleForActor(actor); + + runtime.WaitFuture(std::move(future)); + } + + Y_UNIT_TEST(TestWaitFor) { + enum EEv { + EvTrigger = EventSpaceBegin(TEvents::ES_PRIVATE) + }; + + struct TEvTrigger : public TEventLocal { + TEvTrigger() = default; + }; + + class TTriggerActor : public TActorBootstrapped { + public: + TTriggerActor(int* ptr) + : Ptr(ptr) + {} + + void Bootstrap() { + Schedule(TDuration::Seconds(1), new TEvTrigger); + Become(&TThis::StateWork); + } + + private: + STFUNC(StateWork) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvTrigger, Handle); + } + } + + void Handle(TEvTrigger::TPtr&) { + *Ptr = 42; + PassAway(); + } + + private: + int* Ptr; + }; + + TTestActorRuntime runtime; + runtime.Initialize(MakeEgg()); + + int value = 0; + auto actor = runtime.Register(new TTriggerActor(&value)); + runtime.EnableScheduleForActor(actor); + + runtime.WaitFor("value = 42", [&]{ return value == 42; }); + UNIT_ASSERT_VALUES_EQUAL(value, 42); + } + + Y_UNIT_TEST(TestBlockEvents) { + enum EEv { + EvTrigger = EventSpaceBegin(TEvents::ES_PRIVATE) + }; + + struct TEvTrigger : public TEventLocal { + int Value; + + TEvTrigger(int value) + : Value(value) + {} + }; + + class TTargetActor : public TActorBootstrapped { + public: + TTargetActor(std::vector* ptr) + : Ptr(ptr) + {} + + void Bootstrap() { + Become(&TThis::StateWork); + } + + private: + STFUNC(StateWork) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvTrigger, Handle); + } + } + + void Handle(TEvTrigger::TPtr& ev) { + Ptr->push_back(ev->Get()->Value); + } + + private: + std::vector* Ptr; + }; + + class TSourceActor : public TActorBootstrapped { + public: + TSourceActor(const TActorId& target) + : Target(target) + {} + + void Bootstrap() { + Become(&TThis::StateWork); + Schedule(TDuration::Seconds(1), new TEvents::TEvWakeup); + } + + private: + STFUNC(StateWork) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvents::TEvWakeup, Handle); + } + } + + void Handle(TEvents::TEvWakeup::TPtr&) { + Send(Target, new TEvTrigger(++Counter)); + Schedule(TDuration::Seconds(1), new TEvents::TEvWakeup); + } + + private: + TActorId Target; + int Counter = 0; + }; + + TTestActorRuntime runtime(2); + runtime.Initialize(MakeEgg()); + + std::vector values; + auto target = runtime.Register(new TTargetActor(&values), /* nodeIdx */ 1); + auto source = runtime.Register(new TSourceActor(target), /* nodeIdx */ 1); + runtime.EnableScheduleForActor(source); + + TBlockEvents block(runtime, [&](TEvTrigger::TPtr& ev){ return ev->GetRecipientRewrite() == target; }); + runtime.WaitFor("blocked 3 events", [&]{ return block.size() >= 3; }); + UNIT_ASSERT_VALUES_EQUAL(block.size(), 3u); + UNIT_ASSERT_VALUES_EQUAL(values.size(), 0u); + + block.Unblock(2); + UNIT_ASSERT_VALUES_EQUAL(block.size(), 1u); + UNIT_ASSERT_VALUES_EQUAL(values.size(), 0u); + + runtime.WaitFor("blocked 1 more event", [&]{ return block.size() >= 2; }); + UNIT_ASSERT_VALUES_EQUAL(block.size(), 2u); + UNIT_ASSERT_VALUES_EQUAL(values.size(), 2u); + UNIT_ASSERT_VALUES_EQUAL(values.at(0), 1); + UNIT_ASSERT_VALUES_EQUAL(values.at(1), 2); + values.clear(); + + block.Stop(); + runtime.WaitFor("processed 2 more events", [&]{ return values.size() >= 2; }); + UNIT_ASSERT_VALUES_EQUAL(block.size(), 2u); + UNIT_ASSERT_VALUES_EQUAL(values.size(), 2u); + UNIT_ASSERT_VALUES_EQUAL(values.at(0), 5); + UNIT_ASSERT_VALUES_EQUAL(values.at(1), 6); + values.clear(); + + block.Unblock(); + UNIT_ASSERT_VALUES_EQUAL(block.size(), 0u); + UNIT_ASSERT_VALUES_EQUAL(values.size(), 0u); + runtime.WaitFor("processed 3 more events", [&]{ return values.size() >= 3; }); + UNIT_ASSERT_VALUES_EQUAL(values.size(), 3u); + UNIT_ASSERT_VALUES_EQUAL(values.at(0), 3); + UNIT_ASSERT_VALUES_EQUAL(values.at(1), 4); + UNIT_ASSERT_VALUES_EQUAL(values.at(2), 7); + } + + Y_UNIT_TEST(TestWaitForFirstEvent) { + enum EEv { + EvTrigger = EventSpaceBegin(TEvents::ES_PRIVATE) + }; + + struct TEvTrigger : public TEventLocal { + int Value; + + TEvTrigger(int value) + : Value(value) + {} + }; + + class TSourceActor : public TActorBootstrapped { + public: + TSourceActor(const TActorId& target) + : Target(target) + {} + + void Bootstrap() { + Become(&TThis::StateWork); + Schedule(TDuration::Seconds(1), new TEvents::TEvWakeup); + } + + private: + STFUNC(StateWork) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvents::TEvWakeup, Handle); + } + } + + void Handle(TEvents::TEvWakeup::TPtr&) { + Send(Target, new TEvTrigger(++Counter)); + Schedule(TDuration::Seconds(1), new TEvents::TEvWakeup); + } + + private: + TActorId Target; + int Counter = 0; + }; + + TTestActorRuntime runtime; + runtime.Initialize(MakeEgg()); + + TActorId target = runtime.AllocateEdgeActor(); + + TActorId actorId = runtime.Register(new TSourceActor(target)); + runtime.EnableScheduleForActor(actorId); + + { + TWaitForFirstEvent waiter(runtime); + waiter.Wait(); + } + { + TWaitForFirstEvent waiter(runtime, [](const TEvTrigger::TPtr& ev){ return ev->Get()->Value == 10; }); + waiter.Wait(); + } + } +} } diff --git a/ydb/core/testlib/actors/wait_events.cpp b/ydb/core/testlib/actors/wait_events.cpp new file mode 100644 index 000000000000..d085c37468ee --- /dev/null +++ b/ydb/core/testlib/actors/wait_events.cpp @@ -0,0 +1 @@ +#include "wait_events.h" diff --git a/ydb/core/testlib/actors/wait_events.h b/ydb/core/testlib/actors/wait_events.h new file mode 100644 index 000000000000..8982952ee5f9 --- /dev/null +++ b/ydb/core/testlib/actors/wait_events.h @@ -0,0 +1,48 @@ +#include "test_runtime.h" + +#include + +namespace NActors { + + /** + * Easy wait for a first event under the test actor runtime + * + */ + template + class TWaitForFirstEvent { + public: + TWaitForFirstEvent(TTestActorRuntime& runtime, std::function condition = {}) + : Runtime(runtime) + , Condition(std::move(condition)) + , Holder(Runtime.AddObserver( + [this](typename TEvType::TPtr& ev) { + if (EventSeen) + return; + if (Condition && !Condition(ev)) + return; + EventSeen = true; + })) + {} + + /** + * Wait for a first event + */ + void Wait() { + Runtime.WaitFor(TypeName(), [&]{ return EventSeen; }); + } + + /** + * Stops waiting and remove event observer + */ + void Stop() { + Holder.Remove(); + } + + private: + TTestActorRuntime& Runtime; + std::function Condition; + TTestActorRuntime::TEventObserverHolder Holder; + bool EventSeen = false; + }; + +} // namespace NActors diff --git a/ydb/core/testlib/actors/ya.make b/ydb/core/testlib/actors/ya.make index 25f814605794..1c51d6746a47 100644 --- a/ydb/core/testlib/actors/ya.make +++ b/ydb/core/testlib/actors/ya.make @@ -1,7 +1,12 @@ LIBRARY() SRCS( + block_events.cpp + block_events.h test_runtime.cpp + test_runtime.h + wait_events.cpp + wait_events.h ) PEERDIR( diff --git a/ydb/core/testlib/basics/feature_flags.h b/ydb/core/testlib/basics/feature_flags.h index 1270874e4a02..04395a54f3fd 100644 --- a/ydb/core/testlib/basics/feature_flags.h +++ b/ydb/core/testlib/basics/feature_flags.h @@ -9,13 +9,13 @@ class TTestFeatureFlagsHolder { public: TFeatureFlags FeatureFlags; - #define FEATURE_FLAG_SETTER(name) \ - TDerived& Set##name(std::optional value) { \ - if (value) { \ - FeatureFlags.Set##name(*value); \ - } \ - return *static_cast(this); \ - } +#define FEATURE_FLAG_SETTER(name) \ + TDerived& Set##name(std::optional value) { \ + if (value) { \ + FeatureFlags.Set##name(*value); \ + } \ + return *static_cast(this); \ + } FEATURE_FLAG_SETTER(AllowYdbRequestsWithoutDatabase) FEATURE_FLAG_SETTER(EnableSystemViews) @@ -61,8 +61,12 @@ class TTestFeatureFlagsHolder { FEATURE_FLAG_SETTER(EnableCMSRequestPriorities) FEATURE_FLAG_SETTER(EnableTableDatetime64) FEATURE_FLAG_SETTER(EnableResourcePools) + FEATURE_FLAG_SETTER(EnableChangefeedsOnIndexTables) + FEATURE_FLAG_SETTER(EnablePgSyntax) + FEATURE_FLAG_SETTER(EnableTieringInColumnShard) + FEATURE_FLAG_SETTER(EnableOlapCompression) - #undef FEATURE_FLAG_SETTER +#undef FEATURE_FLAG_SETTER }; -} // NKikimr +} // namespace NKikimr diff --git a/ydb/core/testlib/basics/runtime.cpp b/ydb/core/testlib/basics/runtime.cpp index 520181dc05ee..0c9921bf59b4 100644 --- a/ydb/core/testlib/basics/runtime.cpp +++ b/ydb/core/testlib/basics/runtime.cpp @@ -17,6 +17,7 @@ namespace NActors { void TTestBasicRuntime::Initialize(TEgg egg) { AddICStuff(); + AddAuditLogStuff(); TTestActorRuntime::Initialize(std::move(egg)); } @@ -76,4 +77,21 @@ namespace NActors { } } } + + void TTestBasicRuntime::AddAuditLogStuff() + { + if (AuditLogBackends) { + for (ui32 nodeIndex = 0; nodeIndex < GetNodeCount(); ++nodeIndex) { + AddLocalService( + NKikimr::NAudit::MakeAuditServiceID(), + TActorSetupCmd( + NKikimr::NAudit::CreateAuditWriter(std::move(AuditLogBackends)).Release(), + TMailboxType::HTSwap, + 0 + ), + nodeIndex + ); + } + } + } } diff --git a/ydb/core/testlib/basics/runtime.h b/ydb/core/testlib/basics/runtime.h index 863f12286fa1..33126d920f53 100644 --- a/ydb/core/testlib/basics/runtime.h +++ b/ydb/core/testlib/basics/runtime.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -12,9 +13,13 @@ namespace NActors { using TNodeLocationCallback = std::function; TNodeLocationCallback LocationCallback; + NKikimr::NAudit::TAuditLogBackends AuditLogBackends; + ~TTestBasicRuntime(); void Initialize(TEgg) override; + void AddICStuff(); + void AddAuditLogStuff(); }; } diff --git a/ydb/core/testlib/basics/services.cpp b/ydb/core/testlib/basics/services.cpp index 80d526f7815e..d3a6c70f0293 100644 --- a/ydb/core/testlib/basics/services.cpp +++ b/ydb/core/testlib/basics/services.cpp @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include @@ -157,15 +157,15 @@ namespace NPDisk { void SetupSharedPageCache(TTestActorRuntime& runtime, ui32 nodeIndex, NFake::TCaches caches) { - auto pageCollectionCacheConfig = MakeHolder(); - pageCollectionCacheConfig->CacheConfig = new TCacheCacheConfig(caches.Shared, nullptr, nullptr, nullptr); - pageCollectionCacheConfig->TotalAsyncQueueInFlyLimit = caches.AsyncQueue; - pageCollectionCacheConfig->TotalScanQueueInFlyLimit = caches.ScanQueue; - pageCollectionCacheConfig->Counters = MakeIntrusive(runtime.GetDynamicCounters(nodeIndex)); + auto sharedCacheConfig = MakeHolder(); + sharedCacheConfig->LimitBytes = caches.Shared; + sharedCacheConfig->TotalAsyncQueueInFlyLimit = caches.AsyncQueue; + sharedCacheConfig->TotalScanQueueInFlyLimit = caches.ScanQueue; + sharedCacheConfig->Counters = MakeIntrusive(runtime.GetDynamicCounters(nodeIndex)); runtime.AddLocalService(MakeSharedPageCacheId(0), TActorSetupCmd( - CreateSharedPageCache(std::move(pageCollectionCacheConfig), runtime.GetMemObserver(nodeIndex)), + CreateSharedPageCache(std::move(sharedCacheConfig), runtime.GetMemObserver(nodeIndex)), TMailboxType::ReadAsFilled, 0), nodeIndex); diff --git a/ydb/core/testlib/basics/ya.make b/ydb/core/testlib/basics/ya.make index 642640447975..67fef3796fbc 100644 --- a/ydb/core/testlib/basics/ya.make +++ b/ydb/core/testlib/basics/ya.make @@ -21,6 +21,7 @@ PEERDIR( ydb/core/mind ydb/core/node_whiteboard ydb/core/quoter + ydb/core/statistics/service ydb/core/tablet_flat ydb/core/testlib/actors ydb/core/tx/columnshard diff --git a/ydb/core/testlib/common_helper.cpp b/ydb/core/testlib/common_helper.cpp index d5267342d540..8e92ccd15c1b 100644 --- a/ydb/core/testlib/common_helper.cpp +++ b/ydb/core/testlib/common_helper.cpp @@ -22,6 +22,9 @@ const std::vector TLoggerInit::KqpServices = { const std::vector TLoggerInit::CSServices = { NKikimrServices::TX_COLUMNSHARD, + NKikimrServices::TX_COLUMNSHARD_BLOBS, + NKikimrServices::TX_COLUMNSHARD_BLOBS_BS, + NKikimrServices::TX_COLUMNSHARD_BLOBS_TIER, NKikimrServices::TX_COLUMNSHARD_SCAN, NKikimrServices::TX_CONVEYOR }; diff --git a/ydb/core/testlib/cs_helper.cpp b/ydb/core/testlib/cs_helper.cpp index c6f05ec8c86f..dd26da35fa74 100644 --- a/ydb/core/testlib/cs_helper.cpp +++ b/ydb/core/testlib/cs_helper.cpp @@ -183,7 +183,7 @@ std::shared_ptr THelper::TestArrowBatch(ui64 pathIdBegin, ui TString THelper::GetTestTableSchema() const { TStringBuilder sb; sb << R"(Columns{ Name: "timestamp" Type : "Timestamp" NotNull : true })"; - sb << R"(Columns{ Name: "resource_id" Type : "Utf8" })"; + sb << R"(Columns{ Name: "resource_id" Type : "Utf8" DataAccessorConstructor{ ClassName: "SPARSED" } })"; sb << "Columns{ Name: \"uid\" Type : \"Utf8\" NotNull : true StorageId : \"" + OptionalStorageId + "\" }"; sb << R"(Columns{ Name: "level" Type : "Int32" })"; sb << "Columns{ Name: \"message\" Type : \"Utf8\" StorageId : \"" + OptionalStorageId + "\" }"; @@ -198,7 +198,7 @@ TString THelper::GetTestTableSchema() const { return sb; } -void THelper::CreateOlapTableWithStore(TString tableName /*= "olapTable"*/, TString storeName /*= "olapStore"*/, ui32 storeShardsCount /*= 4*/, ui32 tableShardsCount /*= 3*/) { +void THelper::CreateSchemaOlapTablesWithStore(const TString tableSchema, TVector tableNames /*= "olapTable"*/, TString storeName /*= "olapStore"*/, ui32 storeShardsCount /*= 4*/, ui32 tableShardsCount /*= 3*/) { TActorId sender = Server.GetRuntime()->AllocateEdgeActor(); CreateTestOlapStore(sender, Sprintf(R"( Name: "%s" @@ -209,19 +209,25 @@ void THelper::CreateOlapTableWithStore(TString tableName /*= "olapTable"*/, TStr %s } } - )", storeName.c_str(), storeShardsCount, GetTestTableSchema().data())); + )", storeName.c_str(), storeShardsCount, tableSchema.data())); const TString shardingColumns = "[\"" + JoinSeq("\",\"", GetShardingColumns()) + "\"]"; - TBase::CreateTestOlapTable(sender, storeName, Sprintf(R"( - Name: "%s" - ColumnShardCount: %d - Sharding { - HashSharding { - Function: %s - Columns: %s - } - })", tableName.c_str(), tableShardsCount, ShardingMethod.data(), shardingColumns.c_str())); + for (const TString& tableName : tableNames) { + TBase::CreateTestOlapTable(sender, storeName, Sprintf(R"( + Name: "%s" + ColumnShardCount: %d + Sharding { + HashSharding { + Function: %s + Columns: %s + } + })", tableName.c_str(), tableShardsCount, ShardingMethod.data(), shardingColumns.c_str())); + } +} + +void THelper::CreateOlapTablesWithStore(TVector tableNames /*= {"olapTable"}*/, TString storeName /*= "olapStore"*/, ui32 storeShardsCount /*= 4*/, ui32 tableShardsCount /*= 3*/) { + CreateSchemaOlapTablesWithStore(GetTestTableSchema(), tableNames, storeName, storeShardsCount, tableShardsCount); } // Clickbench table diff --git a/ydb/core/testlib/cs_helper.h b/ydb/core/testlib/cs_helper.h index 7a9e3dad1bf3..95c8877b6ba6 100644 --- a/ydb/core/testlib/cs_helper.h +++ b/ydb/core/testlib/cs_helper.h @@ -27,11 +27,16 @@ class THelper: public THelperSchemaless { std::shared_ptr GetArrowSchema() const; YDB_FLAG_ACCESSOR(WithJsonDocument, false); YDB_ACCESSOR(TString, OptionalStorageId, "__MEMORY"); +protected: TString ShardingMethod = "HASH_FUNCTION_CONSISTENCY_64"; +private: bool WithSomeNulls_ = false; protected: - void CreateOlapTableWithStore(TString tableName = "olapTable", TString storeName = "olapStore", + void CreateSchemaOlapTablesWithStore(const TString tableSchema, TVector tableName = {"olapTable"}, TString storeName = "olapStore", + ui32 storeShardsCount = 4, ui32 tableShardsCount = 3); + void CreateOlapTablesWithStore(TVector tableName = {"olapTable"}, TString storeName = "olapStore", ui32 storeShardsCount = 4, ui32 tableShardsCount = 3); + public: using TBase::TBase; @@ -43,9 +48,9 @@ class THelper: public THelperSchemaless { static constexpr const char * PROTO_SCHEMA = R"( Columns { Name: "timestamp" Type: "Timestamp" NotNull: true } - Columns { Name: "resource_id" Type: "Utf8" } + Columns { Name: "resource_id" Type: "Utf8" DataAccessorConstructor{ ClassName: "SPARSED" }} Columns { Name: "uid" Type: "Utf8" } - Columns { Name: "level" Type: "Int32" } + Columns { Name: "level" Type: "Int32" DataAccessorConstructor{ ClassName: "SPARSED" }} Columns { Name: "message" Type: "Utf8" } KeyColumnNames: "timestamp" Engine: COLUMN_ENGINE_REPLACING_TIMESERIES diff --git a/ydb/core/testlib/test_client.cpp b/ydb/core/testlib/test_client.cpp index cfb7952b7a73..fbc0ed46e7db 100644 --- a/ydb/core/testlib/test_client.cpp +++ b/ydb/core/testlib/test_client.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -112,6 +113,7 @@ #include #include #include +#include #include #include @@ -250,6 +252,7 @@ namespace Tests { appData.PersQueueMirrorReaderFactory = Settings->PersQueueMirrorReaderFactory.get(); appData.HiveConfig.MergeFrom(Settings->AppConfig->GetHiveConfig()); appData.GraphConfig.MergeFrom(Settings->AppConfig->GetGraphConfig()); + appData.SqsConfig.MergeFrom(Settings->AppConfig->GetSqsConfig()); appData.DynamicNameserviceConfig = new TDynamicNameserviceConfig; auto dnConfig = appData.DynamicNameserviceConfig; @@ -469,7 +472,7 @@ namespace Tests { app.AddDomain(domain.Release()); } - TVector TServer::StartPQTablets(ui32 pqTabletsN) { + TVector TServer::StartPQTablets(ui32 pqTabletsN, bool wait) { auto getChannelBind = [](const TString& storagePool) { TChannelBind bind; bind.SetStoragePoolName(storagePool); @@ -504,7 +507,7 @@ namespace Tests { UNIT_ASSERT_EQUAL_C(createTabletReply->Record.GetOwner(), tabletId, createTabletReply->Record.GetOwner() << " != " << tabletId); ui64 id = createTabletReply->Record.GetTabletID(); - while (true) { + while (wait) { auto tabletCreationResult = Runtime->GrabEdgeEventRethrow(handle); UNIT_ASSERT(tabletCreationResult); @@ -559,7 +562,12 @@ namespace Tests { NKikimrBlobStorage::TDefineHostConfig hostConfig; hostConfig.SetHostConfigId(nodeId); - TString path = TStringBuilder() << Runtime->GetTempDir() << "pdisk_1.dat"; + TString path; + if (Settings->UseSectorMap) { + path ="SectorMap:test-client[:2000]"; + } else { + path = TStringBuilder() << Runtime->GetTempDir() << "pdisk_1.dat"; + } hostConfig.AddDrive()->SetPath(path); Cerr << "test_client.cpp: SetPath # " << path << Endl; bsConfigureRequest->Record.MutableRequest()->AddCommand()->MutableDefineHostConfig()->CopyFrom(hostConfig); @@ -760,6 +768,11 @@ namespace Tests { const auto aid = Runtime->Register(actor, nodeIdx, appData.SystemPoolId, TMailboxType::Revolving, 0); Runtime->RegisterService(NCSIndex::MakeServiceId(Runtime->GetNodeId(nodeIdx)), aid, nodeIdx); } + { + auto* actor = NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::CreateService(NOlap::NGroupedMemoryManager::TConfig(), new ::NMonitoring::TDynamicCounters()); + const auto aid = Runtime->Register(actor, nodeIdx, appData.UserPoolId, TMailboxType::Revolving, 0); + Runtime->RegisterService(NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::MakeServiceId(Runtime->GetNodeId(nodeIdx)), aid, nodeIdx); + } { auto* actor = NConveyor::TScanServiceOperator::CreateService(NConveyor::TConfig(), new ::NMonitoring::TDynamicCounters()); const auto aid = Runtime->Register(actor, nodeIdx, appData.UserPoolId, TMailboxType::Revolving, 0); @@ -831,7 +844,7 @@ namespace Tests { auto kqpProxySharedResources = std::make_shared(); IActor* kqpRmService = NKqp::CreateKqpResourceManagerActor( - Settings->AppConfig->GetTableServiceConfig().GetResourceManager(), nullptr, {}, kqpProxySharedResources); + Settings->AppConfig->GetTableServiceConfig().GetResourceManager(), nullptr, {}, kqpProxySharedResources, Runtime->GetNodeId(nodeIdx)); TActorId kqpRmServiceId = Runtime->Register(kqpRmService, nodeIdx); Runtime->RegisterService(NKqp::MakeKqpRmServiceID(Runtime->GetNodeId(nodeIdx)), kqpRmServiceId, nodeIdx); @@ -1133,7 +1146,7 @@ namespace Tests { "TestTenant", nullptr, // MakeIntrusive(), YqSharedResources, - NKikimr::NFolderService::CreateMockFolderServiceAdapterActor, + [](auto& config) { return NKikimr::NFolderService::CreateMockFolderServiceAdapterActor(config, "");}, /*IcPort = */0, {} ); @@ -2692,6 +2705,50 @@ namespace Tests { return Server->DynamicNodes(); } + void TTenants::CreateTenant(Ydb::Cms::CreateDatabaseRequest request, ui32 nodes, TDuration timeout) { + const TString path = request.path(); + const bool serverless = request.has_serverless_resources(); + + // Create new tenant + auto& runtime = *Server->GetRuntime(); + const auto result = NKikimr::NRpcService::DoLocalRpc>( + std::move(request), "", "", runtime.GetActorSystem(0), true + ).ExtractValueSync(); + + if (result.operation().status() != Ydb::StatusIds::SUCCESS) { + NYql::TIssues issues; + NYql::IssuesFromMessage(result.operation().issues(), issues); + ythrow yexception() << "Failed to create tenant " << path << ", " << result.operation().status() << ", reason:\n" << issues.ToString(); + } + + // Run new tenant + if (!serverless) { + Run(path, nodes); + } + + // Wait tenant is up + Ydb::Cms::GetDatabaseStatusResult getTenantResult; + const TActorId edgeActor = runtime.AllocateEdgeActor(); + const TInstant start = TInstant::Now(); + while (TInstant::Now() - start <= timeout) { + auto getTenantRequest = std::make_unique(); + getTenantRequest->Record.MutableRequest()->set_path(path); + runtime.SendToPipe(MakeConsoleID(), edgeActor, getTenantRequest.release(), 0, GetPipeConfigWithRetries()); + + auto response = runtime.GrabEdgeEvent(edgeActor, timeout); + if (!response) { + ythrow yexception() << "Waiting CMS get tenant response timeout. Last tenant description:\n" << getTenantResult.DebugString(); + } + response->Get()->Record.GetResponse().operation().result().UnpackTo(&getTenantResult); + if (getTenantResult.state() == Ydb::Cms::GetDatabaseStatusResult::RUNNING) { + return; + } + + Sleep(TDuration::MilliSeconds(100)); + } + ythrow yexception() << "Waiting tenant status RUNNING timeout. Spent time " << TInstant::Now() - start << " exceeds limit " << timeout << ". Last tenant description:\n" << getTenantResult.DebugString(); + } + TVector &TTenants::Nodes(const TString &name) { return Tenants[name]; } diff --git a/ydb/core/testlib/test_client.h b/ydb/core/testlib/test_client.h index 795491279c1d..68b878f4de04 100644 --- a/ydb/core/testlib/test_client.h +++ b/ydb/core/testlib/test_client.h @@ -156,6 +156,8 @@ namespace Tests { NYql::IYtGateway::TPtr YtGateway; bool InitializeFederatedQuerySetupFactory = false; TString ServerCertFilePath; + bool Verbose = true; + bool UseSectorMap = false; std::function CreateTicketParser = NKikimr::CreateTicketParser; std::shared_ptr GrpcServiceFactory; @@ -205,6 +207,8 @@ namespace Tests { TServerSettings& SetComputationFactory(NMiniKQL::TComputationNodeFactory computationFactory) { ComputationFactory = std::move(computationFactory); return *this; } TServerSettings& SetYtGateway(NYql::IYtGateway::TPtr ytGateway) { YtGateway = std::move(ytGateway); return *this; } TServerSettings& SetInitializeFederatedQuerySetupFactory(bool value) { InitializeFederatedQuerySetupFactory = value; return *this; } + TServerSettings& SetVerbose(bool value) { Verbose = value; return *this; } + TServerSettings& SetUseSectorMap(bool value) { UseSectorMap = value; return *this; } TServerSettings& SetPersQueueGetReadSessionsInfoWorkerFactory( std::shared_ptr factory ) { @@ -301,7 +305,7 @@ namespace Tests { } } void StartDummyTablets(); - TVector StartPQTablets(ui32 pqTabletsN); + TVector StartPQTablets(ui32 pqTabletsN, bool wait = true); TTestActorRuntime* GetRuntime() const; const TServerSettings& GetSettings() const; const NScheme::TTypeRegistry* GetTypeRegistry(); @@ -630,6 +634,8 @@ namespace Tests { ui32 Availabe() const; ui32 Capacity() const; + void CreateTenant(Ydb::Cms::CreateDatabaseRequest request, ui32 nodes = 1, TDuration timeout = TDuration::Seconds(30)); + private: TVector& Nodes(const TString &name); void StopNode(const TString /*name*/, ui32 nodeIdx); diff --git a/ydb/core/testlib/ya.make b/ydb/core/testlib/ya.make index 3bc9f140b1f0..5a63f36a639f 100644 --- a/ydb/core/testlib/ya.make +++ b/ydb/core/testlib/ya.make @@ -100,7 +100,9 @@ PEERDIR( ydb/services/datastreams ydb/services/discovery ydb/services/ext_index/service + ydb/services/ymq ydb/core/tx/conveyor/service + ydb/core/tx/limiter/grouped_memory/usage ydb/services/fq ydb/services/kesus ydb/services/persqueue_cluster_discovery diff --git a/ydb/core/tx/columnshard/background_controller.cpp b/ydb/core/tx/columnshard/background_controller.cpp index fa6a2b32eb85..7449e7d31ff4 100644 --- a/ydb/core/tx/columnshard/background_controller.cpp +++ b/ydb/core/tx/columnshard/background_controller.cpp @@ -20,7 +20,7 @@ void TBackgroundController::CheckDeadlines() { void TBackgroundController::CheckDeadlinesIndexation() { for (auto&& i : ActiveIndexationTasks) { if (TMonotonic::Now() - i.second > NOlap::TCompactionLimits::CompactionTimeout) { - AFL_CRIT(NKikimrServices::TX_COLUMNSHARD)("event", "deadline_compaction")("task_id", i.first); + AFL_CRIT(NKikimrServices::TX_COLUMNSHARD)("event", "deadline_indexation")("task_id", i.first); Y_DEBUG_ABORT_UNLESS(false); } } diff --git a/ydb/core/tx/columnshard/background_controller.h b/ydb/core/tx/columnshard/background_controller.h index bb38f2744061..b57a29d5b072 100644 --- a/ydb/core/tx/columnshard/background_controller.h +++ b/ydb/core/tx/columnshard/background_controller.h @@ -1,6 +1,7 @@ #pragma once #include "engines/changes/abstract/compaction_info.h" #include "engines/portions/meta.h" +#include namespace NKikimr::NOlap { class TColumnEngineChanges; @@ -15,11 +16,16 @@ class TBackgroundController { using TCurrentCompaction = THashMap; TCurrentCompaction ActiveCompactionInfo; + std::shared_ptr Counters; bool ActiveCleanupPortions = false; bool ActiveCleanupTables = false; bool ActiveCleanupInsertTable = false; YDB_READONLY(TMonotonic, LastIndexationInstant, TMonotonic::Zero()); public: + TBackgroundController(std::shared_ptr counters) + : Counters(std::move(counters)) { + } + THashSet GetConflictTTLPortions() const; THashSet GetConflictCompactionPortions() const; @@ -29,6 +35,7 @@ class TBackgroundController { bool StartCompaction(const NOlap::TPlanCompactionInfo& info); void FinishCompaction(const NOlap::TPlanCompactionInfo& info) { Y_ABORT_UNLESS(ActiveCompactionInfo.erase(info.GetPathId())); + Counters->OnCompactionFinish(info.GetPathId()); } const TCurrentCompaction& GetActiveCompaction() const { return ActiveCompactionInfo; diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/blob_set.h b/ydb/core/tx/columnshard/blobs_action/abstract/blob_set.h index d14d7b2dee19..ee1cf91bb419 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/blob_set.h +++ b/ydb/core/tx/columnshard/blobs_action/abstract/blob_set.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include #include #include @@ -44,6 +45,60 @@ class TTabletByBlob { }; +class TBlobsByGenStep { +private: + struct TGenStepFromLogoBlobIdComparator { + bool operator()(const TLogoBlobID& l, const TLogoBlobID& r) const { + TGenStep gsl(l); + TGenStep gsr(r); + if (gsl == gsr) { + return l < r; + } else { + return gsl < gsr; + } + } + }; + std::set Blobs; +public: + [[nodiscard]] bool Add(const TLogoBlobID& blobId) { + return Blobs.emplace(blobId).second; + } + [[nodiscard]] bool Remove(const TLogoBlobID& blobId) { + return Blobs.erase(blobId); + } + bool IsEmpty() const { + return Blobs.empty(); + } + size_t GetSize() const { + return Blobs.size(); + } + + TGenStep GetMinGenStepVerified() const { + AFL_VERIFY(Blobs.size()); + return TGenStep(*Blobs.begin()); + } + + template + requires std::invocable + bool ExtractTo(const TGenStep& lessOrEqualThan, const ui32 countLimit, const TActor& actor) { + ui32 idx = 0; + for (auto it = Blobs.begin(); it != Blobs.end(); ++it) { + TGenStep gs(*it); + if (lessOrEqualThan < gs) { + Blobs.erase(Blobs.begin(), it); + return true; + } + if (++idx > countLimit) { + Blobs.erase(Blobs.begin(), it); + return false; + } + actor(gs, *it); + } + Blobs.clear(); + return true; + } +}; + class TTabletsByBlob { private: THashMap> Data; @@ -518,11 +573,11 @@ class TBlobsCategories { void AddSharing(const TTabletId tabletId, const TUnifiedBlobId& id) { AFL_VERIFY(Sharing.Add(tabletId, id)); } - void RemoveSharing(const TTabletId tabletId, const TUnifiedBlobId& id) { - Y_UNUSED(Sharing.Remove(tabletId, id)); + [[nodiscard]] bool RemoveSharing(const TTabletId tabletId, const TUnifiedBlobId& id) { + return Sharing.Remove(tabletId, id); } - void RemoveBorrowed(const TTabletId tabletId, const TUnifiedBlobId& id) { - Y_UNUSED(Borrowed.Remove(tabletId, id)); + [[nodiscard]] bool RemoveBorrowed(const TTabletId tabletId, const TUnifiedBlobId& id) { + return Borrowed.Remove(tabletId, id); } TBlobsCategories(const TTabletId selfTabletId) : SelfTabletId(selfTabletId) diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/gc.h b/ydb/core/tx/columnshard/blobs_action/abstract/gc.h index 19e2da1b39b6..3c7279e7b3f1 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/gc.h +++ b/ydb/core/tx/columnshard/blobs_action/abstract/gc.h @@ -33,7 +33,7 @@ class IBlobsGCAction: public ICommonBlobsAction { virtual bool DoIsEmpty() const = 0; public: void AddSharedBlobToNextIteration(const TUnifiedBlobId& blobId, const TTabletId ownerTabletId) { - BlobsToRemove.RemoveSharing(ownerTabletId, blobId); + AFL_VERIFY(BlobsToRemove.RemoveBorrowed(ownerTabletId, blobId)); } void OnExecuteTxAfterCleaning(NColumnShard::TColumnShard& self, TBlobManagerDb& dbBlobs); diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.cpp b/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.cpp index 8b80dd9be277..0902a464871a 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.cpp +++ b/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.cpp @@ -51,6 +51,7 @@ void IStoragesManager::OnTieringModified(const std::shared_ptrCounters.OnPutResult(blobId.BlobSize()); Y_ABORT_UNLESS(status == NKikimrProto::OK, "The caller must handle unsuccessful status"); Y_ABORT_UNLESS(BatchInfo); Y_ABORT_UNLESS(blobId.Cookie() < BatchInfo->InFlight.size()); @@ -129,6 +128,8 @@ TBlobManager::TBlobManager(TIntrusivePtr tabletInfo, ui32 ge , CurrentGen(gen) , CurrentStep(0) { + BlobsManagerCounters.CurrentGen->Set(CurrentGen); + BlobsManagerCounters.CurrentStep->Set(CurrentStep); } void TBlobManager::RegisterControls(NKikimr::TControlBoard& /*icb*/) { @@ -151,43 +152,19 @@ bool TBlobManager::LoadState(IBlobManagerDb& db, const TTabletId selfTabletId) { return false; } - for (auto it = BlobsToDelete.GetIterator(); it.IsValid(); ++it) { - BlobsManagerCounters.OnDeleteBlobMarker(it.GetBlobId().BlobSize()); - } - BlobsManagerCounters.OnBlobsDelete(BlobsToDelete); + BlobsManagerCounters.OnBlobsToDelete(BlobsToDelete); // Build the list of steps that cannot be garbage collected before Keep flag is set on the blobs - THashSet genStepsWithBlobsToKeep; - std::map> blobsToKeepLocal; + TBlobsByGenStep blobsToKeepLocal; for (const auto& unifiedBlobId : blobsToKeep) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("add_blob_to_keep", unifiedBlobId.ToStringNew()); TLogoBlobID blobId = unifiedBlobId.GetLogoBlobId(); - TGenStep genStep(blobId); - Y_ABORT_UNLESS(LastCollectedGenStep < genStep); - - AFL_VERIFY(blobsToKeepLocal[genStep].emplace(blobId).second)("blob_to_keep_double", unifiedBlobId.ToStringNew()); - BlobsManagerCounters.OnKeepMarker(blobId.BlobSize()); - const ui64 groupId = dsGroupSelector.GetGroup(blobId); - // Keep + DontKeep (probably in different gen:steps) - // GC could go through it to a greater LastCollectedGenStep - if (BlobsToDelete.Contains(SelfTabletId, TUnifiedBlobId(groupId, blobId))) { - continue; - } + Y_ABORT_UNLESS(LastCollectedGenStep < TGenStep(blobId)); - genStepsWithBlobsToKeep.insert(genStep); + AFL_VERIFY(blobsToKeepLocal.Add(blobId))("blob_to_keep_double", unifiedBlobId.ToStringNew()); } std::swap(blobsToKeepLocal, BlobsToKeep); - BlobsManagerCounters.OnBlobsKeep(BlobsToKeep); - - AllocatedGenSteps.clear(); - for (const auto& gs : genStepsWithBlobsToKeep) { - AllocatedGenSteps.push_back(new TAllocatedGenStep(gs)); - } - AllocatedGenSteps.push_back(new TAllocatedGenStep({ CurrentGen, 0 })); - - Sort(AllocatedGenSteps.begin(), AllocatedGenSteps.end(), [](const TAllocatedGenStepConstPtr& a, const TAllocatedGenStepConstPtr& b) { - return a->GenStep < b->GenStep; - }); + BlobsManagerCounters.OnBlobsToKeep(BlobsToKeep); return true; } @@ -201,7 +178,7 @@ void TBlobManager::PopGCBarriers(const TGenStep gs) { std::deque TBlobManager::FindNewGCBarriers() { TGenStep newCollectGenStep = LastCollectedGenStep; std::deque result; - if (AllocatedGenSteps.empty()) { + if (AllocatedGenSteps.empty() && LastCollectedGenStep < TGenStep(CurrentGen, CurrentStep)) { result.emplace_back(TGenStep(CurrentGen, CurrentStep)); } for (auto& allocated : AllocatedGenSteps) { @@ -212,9 +189,6 @@ std::deque TBlobManager::FindNewGCBarriers() { result.emplace_back(allocated->GenStep); newCollectGenStep = allocated->GenStep; } - if (result.empty() || LastCollectedGenStep < result.front()) { - result.emplace_front(LastCollectedGenStep); - } return result; } @@ -226,6 +200,22 @@ class TBlobManager::TGCContext { YDB_ACCESSOR_DEF(std::deque, KeepsToErase); YDB_READONLY_DEF(std::shared_ptr, SharedBlobsManager); public: + ui64 GetKeepBytes() const { + ui64 size = 0; + for (auto&& i : KeepsToErase) { + size += i.BlobSize(); + } + return size; + } + + ui64 GetDeleteBytes() const { + ui64 size = 0; + for (TTabletsByBlob::TIterator it(ExtractedToRemoveFromDB); it.IsValid(); ++it) { + size += it.GetBlobId().BlobSize(); + } + return size; + } + TGCContext(const std::shared_ptr& sharedBlobsManager) : SharedBlobsManager(sharedBlobsManager) { @@ -237,7 +227,6 @@ class TBlobManager::TGCContext { // TODO: we need only actual channel history here for (ui32 channelIdx = 2; channelIdx < tabletInfo->Channels.size(); ++channelIdx) { const auto& channelHistory = tabletInfo->ChannelInfo(channelIdx)->History; - for (auto it = channelHistory.begin(); it != channelHistory.end(); ++it) { PerGroupGCListsInFlight[TBlobAddress(it->GroupID, channelIdx)]; } @@ -281,78 +270,62 @@ void TBlobManager::DrainDeleteTo(const TGenStep& dest, TGCContext& gcContext) { } } -bool TBlobManager::DrainKeepTo(const TGenStep& dest, TGCContext& gcContext, const bool controlCapacity) { - AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("event", "PreparePerGroupGCRequests")("gen_step", dest)("gs_blobs_to_keep_count", BlobsToKeep.size()); - for (; BlobsToKeep.size() && (!controlCapacity || !gcContext.IsFull()); BlobsToKeep.erase(BlobsToKeep.begin())) { - auto gsBlobs = BlobsToKeep.begin(); - TGenStep genStep = gsBlobs->first; +bool TBlobManager::DrainKeepTo(const TGenStep& dest, TGCContext& gcContext) { + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("event", "PreparePerGroupGCRequests")("gen_step", dest)("gs_blobs_to_keep_count", BlobsToKeep.GetSize()); + + const auto pred = [&](const TGenStep& genStep, const TLogoBlobID& logoBlobId) { AFL_VERIFY(LastCollectedGenStep < genStep)("last", LastCollectedGenStep.ToString())("gen", genStep.ToString()); - if (dest < genStep) { - return true; - } - for (auto&& keepBlobIt : gsBlobs->second) { - const ui32 blobGroup = TabletInfo->GroupFor(keepBlobIt.Channel(), keepBlobIt.Generation()); - TBlobAddress bAddress(blobGroup, keepBlobIt.Channel()); - const TUnifiedBlobId keepUnified(blobGroup, keepBlobIt); - gcContext.MutableKeepsToErase().emplace_back(keepUnified); - if (BlobsToDelete.ExtractBlobTo(keepUnified, gcContext.MutableExtractedToRemoveFromDB())) { - if (keepBlobIt.Generation() == CurrentGen) { - AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_not_keep", keepUnified.ToStringNew()); - continue; - } - if (gcContext.GetSharedBlobsManager()->BuildStoreCategories({ keepUnified }).GetDirect().IsEmpty()) { - AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_not_keep_not_direct", keepUnified.ToStringNew()); - continue; - } - AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_not_keep_old", keepUnified.ToStringNew()); - gcContext.MutablePerGroupGCListsInFlight()[bAddress].DontKeepList.insert(keepBlobIt); - } else { - AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_keep", keepUnified.ToStringNew()); - gcContext.MutablePerGroupGCListsInFlight()[bAddress].KeepList.insert(keepBlobIt); + const ui32 blobGroup = TabletInfo->GroupFor(logoBlobId.Channel(), logoBlobId.Generation()); + TBlobAddress bAddress(blobGroup, logoBlobId.Channel()); + const TUnifiedBlobId keepUnified(blobGroup, logoBlobId); + gcContext.MutableKeepsToErase().emplace_back(keepUnified); + if (BlobsToDelete.ExtractBlobTo(keepUnified, gcContext.MutableExtractedToRemoveFromDB())) { + if (logoBlobId.Generation() == CurrentGen) { + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_not_keep", keepUnified.ToStringNew()); + return; } + if (gcContext.GetSharedBlobsManager()->BuildStoreCategories({ keepUnified }).GetDirect().IsEmpty()) { + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_not_keep_not_direct", keepUnified.ToStringNew()); + return; + } + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_not_keep_old", keepUnified.ToStringNew()); + gcContext.MutablePerGroupGCListsInFlight()[bAddress].DontKeepList.insert(logoBlobId); + } else { + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_keep", keepUnified.ToStringNew()); + gcContext.MutablePerGroupGCListsInFlight()[bAddress].KeepList.insert(logoBlobId); } - } - return BlobsToKeep.empty(); + }; + + return BlobsToKeep.ExtractTo(dest, gcContext.GetFreeSpace(), pred); } std::shared_ptr TBlobManager::BuildGCTask(const TString& storageId, const std::shared_ptr& manager, const std::shared_ptr& sharedBlobsInfo, const std::shared_ptr& counters) noexcept { AFL_VERIFY(!CollectGenStepInFlight); - if (BlobsToKeep.empty() && BlobsToDelete.IsEmpty() && LastCollectedGenStep == TGenStep{ CurrentGen, CurrentStep }) { - ACFL_DEBUG("event", "TBlobManager::BuildGCTask skip")("current_gen", CurrentGen)("current_step", CurrentStep); + if (BlobsToKeep.IsEmpty() && BlobsToDelete.IsEmpty() && LastCollectedGenStep == TGenStep{ CurrentGen, CurrentStep }) { + BlobsManagerCounters.GCCounters.SkipCollectionEmpty->Add(1); + ACFL_DEBUG("event", "TBlobManager::BuildGCTask skip")("current_gen", CurrentGen)("current_step", CurrentStep)("reason", "empty"); return nullptr; } - if (AppData()->TimeProvider->Now() - PreviousGCTime < NYDBTest::TControllers::GetColumnShardController()->GetOverridenGCPeriod(TDuration::Seconds(GC_INTERVAL_SECONDS))) { + if (AppData()->TimeProvider->Now() - PreviousGCTime < NYDBTest::TControllers::GetColumnShardController()->GetOverridenGCPeriod()) { + ACFL_DEBUG("event", "TBlobManager::BuildGCTask skip")("current_gen", CurrentGen)("current_step", CurrentStep)("reason", "too_often"); + BlobsManagerCounters.GCCounters.SkipCollectionThrottling->Add(1); return nullptr; } PreviousGCTime = AppData()->TimeProvider->Now(); TGCContext gcContext(sharedBlobsInfo); - if (FirstGC) { - gcContext.InitializeFirst(TabletInfo); - FirstGC = false; - } - NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("action_id", TGUID::CreateTimebased().AsGuidString()); const std::deque newCollectGenSteps = FindNewGCBarriers(); - AFL_VERIFY(newCollectGenSteps.size()); - AFL_VERIFY(newCollectGenSteps.front() == LastCollectedGenStep); if (GCBarrierPreparation != LastCollectedGenStep) { - if (!GCBarrierPreparation.Generation()) { - for (auto&& newCollectGenStep : newCollectGenSteps) { - if (!DrainKeepTo(newCollectGenStep, gcContext)) { - break; - } - CollectGenStepInFlight = std::max(CollectGenStepInFlight.value_or(newCollectGenStep), newCollectGenStep); - } - AFL_VERIFY(LastCollectedGenStep <= CollectGenStepInFlight)("last", LastCollectedGenStep)("collect", CollectGenStepInFlight); - } else { - AFL_VERIFY(GCBarrierPreparation.Generation() != CurrentGen); + if (GCBarrierPreparation.Generation()) { + AFL_VERIFY(GCBarrierPreparation.Generation() < CurrentGen); AFL_VERIFY(LastCollectedGenStep <= GCBarrierPreparation); - CollectGenStepInFlight = GCBarrierPreparation; - AFL_VERIFY(DrainKeepTo(*CollectGenStepInFlight, gcContext, false)); + if (DrainKeepTo(GCBarrierPreparation, gcContext)) { + CollectGenStepInFlight = GCBarrierPreparation; + } } } else { DrainDeleteTo(LastCollectedGenStep, gcContext); @@ -361,36 +334,45 @@ std::shared_ptr TBlobManager::BuildGCTas if (!DrainKeepTo(newCollectGenStep, gcContext)) { break; } - CollectGenStepInFlight = std::max(CollectGenStepInFlight.value_or(newCollectGenStep), newCollectGenStep); + if (newCollectGenStep.Generation() == CurrentGen) { + CollectGenStepInFlight = std::max(CollectGenStepInFlight.value_or(newCollectGenStep), newCollectGenStep); + } } - if (!CollectGenStepInFlight) { - CollectGenStepInFlight = LastCollectedGenStep; + if (CollectGenStepInFlight) { + PopGCBarriers(*CollectGenStepInFlight); + if (FirstGC) { + gcContext.InitializeFirst(TabletInfo); + FirstGC = false; + } + if (!BlobsToKeep.IsEmpty()) { + AFL_VERIFY(*CollectGenStepInFlight < BlobsToKeep.GetMinGenStepVerified())("gs", *CollectGenStepInFlight)("first", BlobsToKeep.GetMinGenStepVerified()); + } + AFL_VERIFY(LastCollectedGenStep < *CollectGenStepInFlight); } - PopGCBarriers(*CollectGenStepInFlight); - AFL_VERIFY(LastCollectedGenStep <= *CollectGenStepInFlight); - AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("notice", "collect_gen_step")("value", *CollectGenStepInFlight)("current_gen", CurrentGen); + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("notice", "collect_gen_step")("value", CollectGenStepInFlight)("current_gen", CurrentGen); - const bool isFull = gcContext.IsFull(); + if (gcContext.IsFull()) { + PreviousGCTime = TInstant::Zero(); + } + BlobsManagerCounters.GCCounters.OnGCTask(gcContext.GetKeepsToErase().size(), gcContext.GetKeepBytes(), + gcContext.GetExtractedToRemoveFromDB().GetSize(), gcContext.GetDeleteBytes(), gcContext.IsFull(), !!CollectGenStepInFlight); auto removeCategories = sharedBlobsInfo->BuildRemoveCategories(std::move(gcContext.MutableExtractedToRemoveFromDB())); - - auto result = std::make_shared(storageId, std::move(gcContext.MutablePerGroupGCListsInFlight()), *CollectGenStepInFlight, - std::move(gcContext.MutableKeepsToErase()), manager, std::move(removeCategories), counters, TabletInfo->TabletID, CurrentGen); + auto result = std::make_shared(storageId, std::move(gcContext.MutablePerGroupGCListsInFlight()), + CollectGenStepInFlight, std::move(gcContext.MutableKeepsToErase()), manager, std::move(removeCategories), counters, TabletInfo->TabletID, CurrentGen); if (result->IsEmpty()) { + BlobsManagerCounters.GCCounters.OnEmptyGCTask(); CollectGenStepInFlight = {}; return nullptr; } - if (isFull) { - PreviousGCTime = TInstant::Zero(); - } - return result; } TBlobBatch TBlobManager::StartBlobBatch() { - ++CurrentStep; + AFL_VERIFY(++CurrentStep < Max() - 10); + BlobsManagerCounters.CurrentStep->Set(CurrentStep); AFL_VERIFY(TabletInfo->Channels.size() > 2); const auto& channel = TabletInfo->Channels[(CurrentStep % (TabletInfo->Channels.size() - 2)) + 2]; ++CountersUpdate.BatchesStarted; @@ -418,11 +400,9 @@ void TBlobManager::DoSaveBlobBatchOnComplete(TBlobBatch&& blobBatch) { AFL_VERIFY(genStep > edgeGenStep)("gen_step", genStep)("edge_gen_step", edgeGenStep)("blob_id", blobId.ToStringNew()); AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_keep", logoBlobId.ToString()); - BlobsManagerCounters.OnKeepMarker(logoBlobId.BlobSize()); - AFL_VERIFY(BlobsToKeep[genStep].emplace(logoBlobId).second); + AFL_VERIFY(BlobsToKeep.Add(logoBlobId)); + BlobsManagerCounters.OnBlobsToKeep(BlobsToKeep); } - BlobsManagerCounters.OnBlobsKeep(BlobsToKeep); - blobBatch.BatchInfo->GenStepRef.Reset(); } @@ -458,33 +438,42 @@ void TBlobManager::DeleteBlobOnComplete(const TTabletId tabletId, const TUnified if (!IsBlobInUsage(blobId)) { LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Delete Blob " << blobId); AFL_VERIFY(BlobsToDelete.Add(tabletId, blobId)); - BlobsManagerCounters.OnDeleteBlobMarker(blobId.BlobSize()); - BlobsManagerCounters.OnBlobsDelete(BlobsToDelete); + BlobsManagerCounters.OnBlobsToDelete(BlobsToDelete); } else { - BlobsManagerCounters.OnDeleteBlobDelayedMarker(blobId.BlobSize()); LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Delay Delete Blob " << blobId); - BlobsToDeleteDelayed.Add(tabletId, blobId); + AFL_VERIFY(BlobsToDeleteDelayed.Add(tabletId, blobId)); + BlobsManagerCounters.OnBlobsToDeleteDelayed(BlobsToDeleteDelayed); } } -void TBlobManager::OnGCFinishedOnExecute(const TGenStep& genStep, IBlobManagerDb& db) { - db.SaveLastGcBarrier(genStep); +void TBlobManager::OnGCFinishedOnExecute(const std::optional& genStep, IBlobManagerDb& db) { + if (genStep) { + db.SaveLastGcBarrier(*genStep); + } } -void TBlobManager::OnGCFinishedOnComplete(const TGenStep& genStep) { - LastCollectedGenStep = genStep; - AFL_VERIFY(GCBarrierPreparation == LastCollectedGenStep)("prepare", GCBarrierPreparation)("last", LastCollectedGenStep); - CollectGenStepInFlight.reset(); +void TBlobManager::OnGCFinishedOnComplete(const std::optional& genStep) { + if (genStep) { + LastCollectedGenStep = *genStep; + AFL_VERIFY(GCBarrierPreparation == LastCollectedGenStep)("prepare", GCBarrierPreparation)("last", LastCollectedGenStep); + CollectGenStepInFlight.reset(); + } else { + AFL_VERIFY(!CollectGenStepInFlight); + } } -void TBlobManager::OnGCStartOnExecute(const TGenStep& genStep, IBlobManagerDb& db) { - AFL_VERIFY(LastCollectedGenStep <= genStep)("last", LastCollectedGenStep)("prepared", genStep); - db.SaveGCBarrierPreparation(genStep); +void TBlobManager::OnGCStartOnExecute(const std::optional& genStep, IBlobManagerDb& db) { + if (genStep) { + AFL_VERIFY(LastCollectedGenStep < *genStep)("last", LastCollectedGenStep)("prepared", genStep); + db.SaveGCBarrierPreparation(*genStep); + } } -void TBlobManager::OnGCStartOnComplete(const TGenStep& genStep) { - AFL_VERIFY(GCBarrierPreparation <= genStep)("last", GCBarrierPreparation)("prepared", genStep); - GCBarrierPreparation = genStep; +void TBlobManager::OnGCStartOnComplete(const std::optional& genStep) { + if (genStep) { + AFL_VERIFY(GCBarrierPreparation <= *genStep)("last", GCBarrierPreparation)("prepared", genStep); + GCBarrierPreparation = *genStep; + } } void TBlobManager::OnBlobFree(const TUnifiedBlobId& blobId) { @@ -492,8 +481,8 @@ void TBlobManager::OnBlobFree(const TUnifiedBlobId& blobId) { // Check if the blob is marked for delayed deletion if (BlobsToDeleteDelayed.ExtractBlobTo(blobId, BlobsToDelete)) { AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("blob_id", blobId)("event", "blob_delayed_deleted"); - BlobsManagerCounters.OnBlobsDelete(BlobsToDelete); - BlobsManagerCounters.OnDeleteBlobMarker(blobId.GetLogoBlobId().BlobSize()); + BlobsManagerCounters.OnBlobsToDelete(BlobsToDelete); + BlobsManagerCounters.OnBlobsToDeleteDelayed(BlobsToDeleteDelayed); } } diff --git a/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.h b/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.h index c7a507553c24..52e0f573eb60 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.h +++ b/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.h @@ -133,9 +133,6 @@ struct TBlobManagerCounters { // The implementation of BlobManager that hides all GC-related details class TBlobManager : public IBlobManager, public TCommonBlobsTracker { -private: - static constexpr ui64 GC_INTERVAL_SECONDS = 30; - private: using TBlobAddress = NBlobOperations::NBlobStorage::TBlobAddress; class TGCContext; @@ -145,7 +142,7 @@ class TBlobManager : public IBlobManager, public TCommonBlobsTracker { ui32 CurrentStep; std::optional CollectGenStepInFlight; // Lists of blobs that need Keep flag to be set - std::map> BlobsToKeep; + TBlobsByGenStep BlobsToKeep; // Lists of blobs that need DoNotKeep flag to be set TTabletsByBlob BlobsToDelete; @@ -173,7 +170,7 @@ class TBlobManager : public IBlobManager, public TCommonBlobsTracker { virtual void DoSaveBlobBatchOnExecute(const TBlobBatch& blobBatch, IBlobManagerDb& db) override; virtual void DoSaveBlobBatchOnComplete(TBlobBatch&& blobBatch) override; void DrainDeleteTo(const TGenStep& dest, TGCContext& gcContext); - [[nodiscard]] bool DrainKeepTo(const TGenStep& dest, TGCContext& gcContext, const bool controlCapacity = true); + [[nodiscard]] bool DrainKeepTo(const TGenStep& dest, TGCContext& gcContext); public: TBlobManager(TIntrusivePtr tabletInfo, const ui32 gen, const TTabletId selfTabletId); @@ -215,11 +212,11 @@ class TBlobManager : public IBlobManager, public TCommonBlobsTracker { const std::shared_ptr& manager, const std::shared_ptr& sharedBlobsInfo, const std::shared_ptr& counters) noexcept; - void OnGCFinishedOnExecute(const TGenStep& genStep, IBlobManagerDb& db); - void OnGCFinishedOnComplete(const TGenStep& genStep); + void OnGCFinishedOnExecute(const std::optional& genStep, IBlobManagerDb& db); + void OnGCFinishedOnComplete(const std::optional& genStep); - void OnGCStartOnExecute(const TGenStep& genStep, IBlobManagerDb& db); - void OnGCStartOnComplete(const TGenStep& genStep); + void OnGCStartOnExecute(const std::optional& genStep, IBlobManagerDb& db); + void OnGCStartOnComplete(const std::optional& genStep); TBlobManagerCounters GetCountersUpdate() { TBlobManagerCounters res = CountersUpdate; @@ -239,7 +236,7 @@ class TBlobManager : public IBlobManager, public TCommonBlobsTracker { bool ExtractEvicted(TEvictedBlob& evict, TEvictMetadata& meta, bool fromDropped = false); TGenStep EdgeGenStep() const { - return CollectGenStepInFlight ? *CollectGenStepInFlight : LastCollectedGenStep; + return CollectGenStepInFlight ? *CollectGenStepInFlight : std::max(GCBarrierPreparation, LastCollectedGenStep); } }; diff --git a/ydb/core/tx/columnshard/blobs_action/bs/gc.cpp b/ydb/core/tx/columnshard/blobs_action/bs/gc.cpp index ca22c12bd968..a72c6fb413de 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/gc.cpp +++ b/ydb/core/tx/columnshard/blobs_action/bs/gc.cpp @@ -31,7 +31,7 @@ bool TGCTask::DoOnCompleteTxBeforeCleaning(NColumnShard::TColumnShard& /*self*/, return true; } -TGCTask::TGCTask(const TString& storageId, TGCListsByGroup&& listsByGroupId, const TGenStep& collectGenStepInFlight, std::deque&& keepsToErase, +TGCTask::TGCTask(const TString& storageId, TGCListsByGroup&& listsByGroupId, const std::optional& collectGenStepInFlight, std::deque&& keepsToErase, const std::shared_ptr& manager, TBlobsCategories&& blobsToRemove, const std::shared_ptr& counters, const ui64 tabletId, const ui64 currentGen) : TBase(storageId, std::move(blobsToRemove), counters) @@ -65,8 +65,8 @@ std::unique_ptr TGCTask::BuildRequest(const T ("count", it->second.RequestsCount); auto result = std::make_unique( TabletId, CurrentGen, PerGenerationCounter.Val(), - address.GetChannelId(), true, - CollectGenStepInFlight.Generation(), CollectGenStepInFlight.Step(), + address.GetChannelId(), !!CollectGenStepInFlight, + CollectGenStepInFlight ? CollectGenStepInFlight->Generation() : 0, CollectGenStepInFlight ? CollectGenStepInFlight->Step() : 0, new TVector(it->second.KeepList.begin(), it->second.KeepList.end()), new TVector(it->second.DontKeepList.begin(), it->second.DontKeepList.end()), TInstant::Max(), true); diff --git a/ydb/core/tx/columnshard/blobs_action/bs/gc.h b/ydb/core/tx/columnshard/blobs_action/bs/gc.h index a8f334780417..5471fc04c0dd 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/gc.h +++ b/ydb/core/tx/columnshard/blobs_action/bs/gc.h @@ -21,7 +21,7 @@ class TGCTask: public IBlobsGCAction { using TGCListsByGroup = THashMap; private: TGCListsByGroup ListsByGroupId; - const TGenStep CollectGenStepInFlight; + const std::optional CollectGenStepInFlight; const ui64 TabletId; const ui64 CurrentGen; std::deque KeepsToErase; @@ -35,11 +35,11 @@ class TGCTask: public IBlobsGCAction { virtual bool DoOnCompleteTxBeforeCleaning(NColumnShard::TColumnShard& self, const std::shared_ptr& taskAction) override; virtual bool DoIsEmpty() const override { - return false; + return !CollectGenStepInFlight && KeepsToErase.empty(); } public: - TGCTask(const TString& storageId, TGCListsByGroup&& listsByGroupId, const TGenStep& collectGenStepInFlight, std::deque&& keepsToErase, + TGCTask(const TString& storageId, TGCListsByGroup&& listsByGroupId, const std::optional& collectGenStepInFlight, std::deque&& keepsToErase, const std::shared_ptr& manager, TBlobsCategories&& blobsToRemove, const std::shared_ptr& counters, const ui64 tabletId, const ui64 currentGen); const TGCListsByGroup& GetListsByGroupId() const { diff --git a/ydb/core/tx/columnshard/blobs_action/bs/write.cpp b/ydb/core/tx/columnshard/blobs_action/bs/write.cpp index 021abc972a0d..06ebc64eccf3 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/write.cpp +++ b/ydb/core/tx/columnshard/blobs_action/bs/write.cpp @@ -15,13 +15,10 @@ void TWriteAction::DoOnCompleteTxAfterWrite(NColumnShard::TColumnShard& self, co ui64 blobsWritten = BlobBatch.GetBlobCount(); ui64 bytesWritten = BlobBatch.GetTotalSize(); if (blobsWroteSuccessfully) { - self.IncCounter(NColumnShard::COUNTER_UPSERT_BLOBS_WRITTEN, blobsWritten); - self.IncCounter(NColumnShard::COUNTER_UPSERT_BYTES_WRITTEN, bytesWritten); - // self.IncCounter(NColumnShard::COUNTER_RAW_BYTES_UPSERTED, insertedBytes); - self.IncCounter(NColumnShard::COUNTER_WRITE_SUCCESS); + self.Counters.GetTabletCounters()->OnWriteSuccess(blobsWritten, bytesWritten); Manager->SaveBlobBatchOnComplete(std::move(BlobBatch)); } else { - self.IncCounter(NColumnShard::COUNTER_WRITE_FAIL); + self.Counters.GetTabletCounters()->OnWriteFailure(); } } diff --git a/ydb/core/tx/columnshard/blobs_action/common/const.h b/ydb/core/tx/columnshard/blobs_action/common/const.h index 8901620b2dd0..d599b97050ce 100644 --- a/ydb/core/tx/columnshard/blobs_action/common/const.h +++ b/ydb/core/tx/columnshard/blobs_action/common/const.h @@ -7,6 +7,7 @@ class TGlobal { public: static const inline TString DefaultStorageId = "__DEFAULT"; static const inline TString MemoryStorageId = "__MEMORY"; + static const inline TString LocalMetadataStorageId = "__LOCAL_METADATA"; }; } \ No newline at end of file diff --git a/ydb/core/tx/columnshard/blobs_action/local/storage.cpp b/ydb/core/tx/columnshard/blobs_action/local/storage.cpp new file mode 100644 index 000000000000..141ffe3e7e0c --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/local/storage.cpp @@ -0,0 +1,62 @@ +#include "storage.h" + +namespace NKikimr::NOlap::NBlobOperations::NLocal { + +TOperator::TOperator(const TString& storageId, const std::shared_ptr& storageSharedBlobsManager) + : TBase(storageId, storageSharedBlobsManager) +{ +} + +namespace { +class TBlobInUseTracker: public IBlobInUseTracker { +private: + virtual bool DoFreeBlob(const NOlap::TUnifiedBlobId& /*blobId*/) override { + AFL_VERIFY(false); + return true; + } + virtual bool DoUseBlob(const NOlap::TUnifiedBlobId& /*blobId*/) override { + AFL_VERIFY(false); + return true; + } + virtual bool IsBlobInUsage(const NOlap::TUnifiedBlobId& /*blobId*/) const override { + AFL_VERIFY(false); + return false; + } + +public: +}; +} + +std::shared_ptr TOperator::GetBlobsTracker() const { + static std::shared_ptr result = std::make_shared(); + return result; +} + +namespace { +class TBlobsDeclareRemovingAction: public IBlobsDeclareRemovingAction { +private: + using TBase = IBlobsDeclareRemovingAction; +protected: + virtual void DoDeclareRemove(const TTabletId /*tabletId*/, const TUnifiedBlobId& /*blobId*/) override { + AFL_VERIFY(false); + } + virtual void DoOnExecuteTxAfterRemoving(TBlobManagerDb& /*dbBlobs*/, const bool /*blobsWroteSuccessfully*/) override { + } + virtual void DoOnCompleteTxAfterRemoving(const bool /*blobsWroteSuccessfully*/) override { + + } + +public: + TBlobsDeclareRemovingAction(const TString& storageId, const TTabletId selfTabletId, const std::shared_ptr& counters) + : TBase(storageId, selfTabletId, counters) { + } +}; +} + +std::shared_ptr TOperator::DoStartDeclareRemovingAction( + const std::shared_ptr& counters) { + static std::shared_ptr result = std::make_shared(GetStorageId(), GetSelfTabletId(), counters); + return result; +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/local/storage.h b/ydb/core/tx/columnshard/blobs_action/local/storage.h new file mode 100644 index 000000000000..beb5c4286cab --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/local/storage.h @@ -0,0 +1,53 @@ +#pragma once +#include + +namespace NKikimr::NOlap::NBlobOperations::NLocal { + +class TOperator: public IBlobsStorageOperator { +private: + using TBase = IBlobsStorageOperator; + NSplitter::TSplitSettings SplitSettings = Default(); + +protected: + virtual const NSplitter::TSplitSettings& DoGetBlobSplitSettings() const override { + return SplitSettings; + } + virtual std::shared_ptr DoStartDeclareRemovingAction( + const std::shared_ptr& /*counters*/) override; + virtual std::shared_ptr DoStartWritingAction() override { + AFL_VERIFY(false)("problem", "unimplemented method"); + return nullptr; + }; + virtual std::shared_ptr DoStartReadingAction() override { + AFL_VERIFY(false)("problem", "unimplemented method"); + return nullptr; + }; + virtual std::shared_ptr DoCreateGCAction(const std::shared_ptr& /*counters*/) const override { + return nullptr; + } + virtual void DoStartGCAction(const std::shared_ptr& /*action*/) const override { + AFL_VERIFY(false)("problem", "unimplemented method"); + }; + virtual bool DoLoad(IBlobManagerDb& /*dbBlobs*/) override { + return true; + }; + virtual void DoOnTieringModified(const std::shared_ptr& /*tiers*/) override { + return; + }; + +public: + TOperator(const TString& storageId, const std::shared_ptr& storageSharedBlobsManager); + + virtual TTabletsByBlob GetBlobsToDelete() const override { + return Default(); + } + + virtual std::shared_ptr GetBlobsTracker() const override; + + virtual bool HasToDelete(const TUnifiedBlobId& /*blobId*/, const TTabletId /*tabletId*/) const override { + return false; + } + +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/local/ya.make b/ydb/core/tx/columnshard/blobs_action/local/ya.make new file mode 100644 index 000000000000..ed4199e82243 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/local/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + storage.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/blobs_action/abstract +) + +END() diff --git a/ydb/core/tx/columnshard/blobs_action/storages_manager/manager.cpp b/ydb/core/tx/columnshard/blobs_action/storages_manager/manager.cpp index 255845c9fb92..bd2a8acec539 100644 --- a/ydb/core/tx/columnshard/blobs_action/storages_manager/manager.cpp +++ b/ydb/core/tx/columnshard/blobs_action/storages_manager/manager.cpp @@ -1,18 +1,22 @@ #include "manager.h" -#include + #include +#include +#include #ifndef KIKIMR_DISABLE_S3_OPS #include #endif -#include #include +#include namespace NKikimr::NOlap { std::shared_ptr TStoragesManager::DoBuildOperator(const TString& storageId) { if (storageId == TBase::DefaultStorageId) { - return std::make_shared(storageId, Shard.SelfId(), Shard.Info(), - Shard.Executor()->Generation(), SharedBlobsManager->GetStorageManagerGuarantee(storageId)); + return std::make_shared( + storageId, Shard.SelfId(), Shard.Info(), Shard.Executor()->Generation(), SharedBlobsManager->GetStorageManagerGuarantee(storageId)); + } else if (storageId == TBase::LocalMetadataStorageId) { + return std::make_shared(storageId, SharedBlobsManager->GetStorageManagerGuarantee(storageId)); } else if (storageId == TBase::MemoryStorageId) { #ifndef KIKIMR_DISABLE_S3_OPS { @@ -20,7 +24,7 @@ std::shared_ptr TStoragesManager::DoBuild TGuard g(mutexLocal); Singleton()->SetSecretKey("fakeSecret"); } - return std::make_shared(storageId, Shard.SelfId(), + return std::make_shared(storageId, Shard.SelfId(), std::make_shared("fakeBucket", "fakeSecret"), SharedBlobsManager->GetStorageManagerGuarantee(storageId), Shard.Executor()->Generation()); #else @@ -30,7 +34,8 @@ std::shared_ptr TStoragesManager::DoBuild return nullptr; } else { #ifndef KIKIMR_DISABLE_S3_OPS - return std::make_shared(storageId, Shard, SharedBlobsManager->GetStorageManagerGuarantee(storageId)); + return std::make_shared( + storageId, Shard, SharedBlobsManager->GetStorageManagerGuarantee(storageId)); #else return nullptr; #endif @@ -43,9 +48,7 @@ bool TStoragesManager::DoLoadIdempotency(NTable::TDatabase& database) { TStoragesManager::TStoragesManager(NColumnShard::TColumnShard& shard) : Shard(shard) - , SharedBlobsManager(std::make_shared((TTabletId)Shard.TabletID())) -{ - + , SharedBlobsManager(std::make_shared((TTabletId)Shard.TabletID())) { } -} \ No newline at end of file +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/blobs_action/storages_manager/ya.make b/ydb/core/tx/columnshard/blobs_action/storages_manager/ya.make index b79b6720608b..ad3f37f24574 100644 --- a/ydb/core/tx/columnshard/blobs_action/storages_manager/ya.make +++ b/ydb/core/tx/columnshard/blobs_action/storages_manager/ya.make @@ -7,6 +7,7 @@ SRCS( PEERDIR( ydb/core/tx/columnshard/data_sharing/manager ydb/core/tx/columnshard/blobs_action/bs + ydb/core/tx/columnshard/blobs_action/local ) IF (OS_WINDOWS) diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.cpp b/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.cpp index bda89c9c9daf..15a05e7108a7 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.cpp +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.cpp @@ -9,6 +9,8 @@ bool TTxInsertTableCleanup::Execute(TTransactionContext& txc, const TActorContex NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); NIceDb::TNiceDb db(txc.DB); + Self->TryAbortWrites(db, dbTable, std::move(WriteIdsToAbort)); + NOlap::TBlobManagerDb blobManagerDb(txc.DB); auto allAborted = Self->InsertTable->GetAborted(); auto storage = Self->StoragesManager->GetInsertOperator(); diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.h b/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.h index 6996333a0bd3..96d8f09e5e6d 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.h +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.h @@ -5,11 +5,13 @@ namespace NKikimr::NColumnShard { class TTxInsertTableCleanup: public TTransactionBase { private: + THashSet WriteIdsToAbort; std::shared_ptr BlobsAction; public: - TTxInsertTableCleanup(TColumnShard* self) - : TBase(self) { - Y_ABORT_UNLESS(self->InsertTable->GetAborted().size()); + TTxInsertTableCleanup(TColumnShard* self, THashSet&& writeIdsToAbort) + : TBase(self) + , WriteIdsToAbort(std::move(writeIdsToAbort)) { + Y_ABORT_UNLESS(WriteIdsToAbort.size() || self->InsertTable->GetAborted().size()); } ~TTxInsertTableCleanup() { diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_remove_blobs.h b/ydb/core/tx/columnshard/blobs_action/transaction/tx_remove_blobs.h index 937174875fb2..437a16874bce 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_remove_blobs.h +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_remove_blobs.h @@ -36,8 +36,12 @@ class TTxRemoveSharedBlobs: public TTransactionBase { for (auto it = categories.GetDirect().GetIterator(); it.IsValid(); ++it) { RemoveAction->DeclareRemove(it.GetTabletId(), it.GetBlobId()); } + for (auto it = categories.GetBorrowed().GetIterator(); it.IsValid(); ++it) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_BLOBS)("problem", "borrowed_to_remove")("blob_id", it.GetBlobId())("tablet_id", it.GetTabletId()); + } AFL_VERIFY(categories.GetBorrowed().IsEmpty()); - AFL_VERIFY(categories.GetSharing().GetSize() == SharingBlobIds.GetSize()); + AFL_VERIFY(categories.GetSharing().GetSize() == SharingBlobIds.GetSize())("sharing_category", categories.GetSharing().GetSize())( + "sharing", SharingBlobIds.GetSize()); } bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp index 853bcb7c5463..96a5cf794190 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp @@ -1,13 +1,17 @@ #include "tx_write.h" +#include +#include + namespace NKikimr::NColumnShard { -bool TTxWrite::InsertOneBlob(TTransactionContext& txc, const NOlap::TWideSerializedBatch& batch, const TWriteId writeId) { +bool TTxWrite::InsertOneBlob(TTransactionContext& txc, const NOlap::TWideSerializedBatch& batch, const TInsertWriteId writeId) { NKikimrTxColumnShard::TLogicalMetadata meta; meta.SetNumRows(batch->GetRowsCount()); meta.SetRawBytes(batch->GetRawBytes()); meta.SetDirtyWriteTimeSeconds(batch.GetStartInstant().Seconds()); - meta.SetSpecialKeysRawData(batch->GetSpecialKeysSafe().SerializeToString()); + meta.SetSpecialKeysRawData(batch->GetSpecialKeysFullSafe()); + meta.SetSpecialKeysPayloadData(batch->GetSpecialKeysPayloadSafe()); const auto& blobRange = batch.GetRange(); Y_ABORT_UNLESS(blobRange.GetBlobId().IsValid()); @@ -16,12 +20,14 @@ bool TTxWrite::InsertOneBlob(TTransactionContext& txc, const NOlap::TWideSeriali TBlobGroupSelector dsGroupSelector(Self->Info()); NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); - const auto& writeMeta = batch.GetAggregation().GetWriteData()->GetWriteMeta(); + const auto& writeMeta = batch.GetAggregation().GetWriteMeta(); meta.SetModificationType(TEnumOperator::SerializeToProto(writeMeta.GetModificationType())); - auto schemeVersion = batch.GetAggregation().GetWriteData()->GetData()->GetSchemaVersion(); + *meta.MutableSchemaSubset() = batch.GetAggregation().GetSchemaSubset().SerializeToProto(); + auto schemeVersion = batch.GetAggregation().GetSchemaVersion(); auto tableSchema = Self->TablesManager.GetPrimaryIndex()->GetVersionedIndex().GetSchemaVerified(schemeVersion); - NOlap::TInsertedData insertData((ui64)writeId, writeMeta.GetTableId(), writeMeta.GetDedupId(), blobRange, meta, tableSchema->GetVersion(), batch->GetData()); + auto userData = std::make_shared(writeMeta.GetTableId(), blobRange, meta, tableSchema->GetVersion(), batch->GetData()); + NOlap::TInsertedData insertData(writeId, userData); bool ok = Self->InsertTable->Insert(dbTable, std::move(insertData)); if (ok) { Self->UpdateInsertTableCounters(); @@ -32,42 +38,37 @@ bool TTxWrite::InsertOneBlob(TTransactionContext& txc, const NOlap::TWideSeriali bool TTxWrite::Execute(TTransactionContext& txc, const TActorContext&) { TMemoryProfileGuard mpg("TTxWrite::Execute"); - NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_BLOBS)("tablet_id", Self->TabletID())("tx_state", "execute"); + NActors::TLogContextGuard logGuard = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_BLOBS)("tablet_id", Self->TabletID())("tx_state", "execute"); ACFL_DEBUG("event", "start_execute"); const NOlap::TWritingBuffer& buffer = PutBlobResult->Get()->MutableWritesBuffer(); for (auto&& aggr : buffer.GetAggregations()) { - const auto& writeMeta = aggr->GetWriteData()->GetWriteMeta(); + const auto& writeMeta = aggr->GetWriteMeta(); Y_ABORT_UNLESS(Self->TablesManager.IsReadyForWrite(writeMeta.GetTableId())); txc.DB.NoMoreReadsForTx(); TWriteOperation::TPtr operation; if (writeMeta.HasLongTxId()) { + NIceDb::TNiceDb db(txc.DB); + const TInsertWriteId insertWriteId = + Self->GetLongTxWrite(db, writeMeta.GetLongTxIdUnsafe(), writeMeta.GetWritePartId(), writeMeta.GetGranuleShardingVersion()); + aggr->AddInsertWriteId(insertWriteId); if (writeMeta.IsGuaranteeWriter()) { AFL_VERIFY(aggr->GetSplittedBlobs().size() == 1)("count", aggr->GetSplittedBlobs().size()); } else { AFL_VERIFY(aggr->GetSplittedBlobs().size() <= 1)("count", aggr->GetSplittedBlobs().size()); } + if (aggr->GetSplittedBlobs().size() == 1) { + AFL_VERIFY(InsertOneBlob(txc, aggr->GetSplittedBlobs().front(), insertWriteId))("write_id", writeMeta.GetWriteId())( + "insert_write_id", insertWriteId); + } } else { - operation = Self->OperationsManager->GetOperation((TWriteId)writeMeta.GetWriteId()); - Y_ABORT_UNLESS(operation); + operation = Self->OperationsManager->GetOperationVerified((TOperationWriteId)writeMeta.GetWriteId()); Y_ABORT_UNLESS(operation->GetStatus() == EOperationStatus::Started); - } - - auto writeId = TWriteId(writeMeta.GetWriteId()); - if (!operation) { - NIceDb::TNiceDb db(txc.DB); - writeId = Self->GetLongTxWrite(db, writeMeta.GetLongTxIdUnsafe(), writeMeta.GetWritePartId(), writeMeta.GetGranuleShardingVersion()); - aggr->AddWriteId(writeId); - } - - for (auto&& i : aggr->GetSplittedBlobs()) { - if (operation) { - writeId = Self->BuildNextWriteId(txc); - aggr->AddWriteId(writeId); - } - - if (!InsertOneBlob(txc, i, writeId)) { - LOG_S_DEBUG(TxPrefix() << "duplicate writeId " << (ui64)writeId << TxSuffix()); - Self->IncCounter(COUNTER_WRITE_DUPLICATE); + for (auto&& i : aggr->GetSplittedBlobs()) { + const TInsertWriteId insertWriteId = Self->InsertTable->BuildNextWriteId(txc); + aggr->AddInsertWriteId(insertWriteId); + AFL_VERIFY(InsertOneBlob(txc, i, insertWriteId))("write_id", writeMeta.GetWriteId())("insert_write_id", insertWriteId)( + "size", aggr->GetSplittedBlobs().size()); } } } @@ -82,34 +83,43 @@ bool TTxWrite::Execute(TTransactionContext& txc, const TActorContext&) { } Results.clear(); for (auto&& aggr : buffer.GetAggregations()) { - const auto& writeMeta = aggr->GetWriteData()->GetWriteMeta(); + const auto& writeMeta = aggr->GetWriteMeta(); if (!writeMeta.HasLongTxId()) { - auto operation = Self->OperationsManager->GetOperation((TWriteId)writeMeta.GetWriteId()); - Y_ABORT_UNLESS(operation); + auto operation = Self->OperationsManager->GetOperationVerified((TOperationWriteId)writeMeta.GetWriteId()); Y_ABORT_UNLESS(operation->GetStatus() == EOperationStatus::Started); - operation->OnWriteFinish(txc, aggr->GetWriteIds()); - if (operation->GetBehaviour() == EOperationBehaviour::InTxWrite) { + operation->OnWriteFinish(txc, aggr->GetInsertWriteIds(), operation->GetBehaviour() == EOperationBehaviour::NoTxWrite); + Self->OperationsManager->LinkInsertWriteIdToOperationWriteId(aggr->GetInsertWriteIds(), operation->GetWriteId()); + if (operation->GetBehaviour() == EOperationBehaviour::NoTxWrite) { + auto ev = NEvents::TDataEvents::TEvWriteResult::BuildCompleted(Self->TabletID()); + Results.emplace_back(std::move(ev), writeMeta.GetSource(), operation->GetCookie()); + Self->OperationsManager->AddTemporaryTxLink(operation->GetLockId()); + Self->OperationsManager->CommitTransactionOnExecute(*Self, operation->GetLockId(), txc, Self->GetLastTxSnapshot()); + } else if (operation->GetBehaviour() == EOperationBehaviour::InTxWrite) { NKikimrTxColumnShard::TCommitWriteTxBody proto; proto.SetLockId(operation->GetLockId()); TString txBody; Y_ABORT_UNLESS(proto.SerializeToString(&txBody)); auto op = Self->GetProgressTxController().StartProposeOnExecute( - TTxController::TTxInfo(NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE, operation->GetLockId(), writeMeta.GetSource(), operation->GetCookie(), {}), txBody, - txc); + TTxController::TTxInfo( + NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE, operation->GetLockId(), writeMeta.GetSource(), operation->GetCookie(), {}), + txBody, txc); AFL_VERIFY(!op->IsFail()); ResultOperators.emplace_back(op); } else { + auto& info = Self->OperationsManager->GetLockVerified(operation->GetLockId()); NKikimrDataEvents::TLock lock; lock.SetLockId(operation->GetLockId()); lock.SetDataShard(Self->TabletID()); - lock.SetGeneration(1); - lock.SetCounter(1); + lock.SetGeneration(info.GetGeneration()); + lock.SetCounter(info.GetInternalGenerationCounter()); + lock.SetPathId(writeMeta.GetTableId()); auto ev = NEvents::TDataEvents::TEvWriteResult::BuildCompleted(Self->TabletID(), operation->GetLockId(), lock); Results.emplace_back(std::move(ev), writeMeta.GetSource(), operation->GetCookie()); } } else { - Y_ABORT_UNLESS(aggr->GetWriteIds().size() == 1); - auto ev = std::make_unique(Self->TabletID(), writeMeta, (ui64)aggr->GetWriteIds().front(), NKikimrTxColumnShard::EResultStatus::SUCCESS); + Y_ABORT_UNLESS(aggr->GetInsertWriteIds().size() == 1); + auto ev = std::make_unique( + Self->TabletID(), writeMeta, (ui64)aggr->GetInsertWriteIds().front(), NKikimrTxColumnShard::EResultStatus::SUCCESS); Results.emplace_back(std::move(ev), writeMeta.GetSource(), 0); } } @@ -118,7 +128,8 @@ bool TTxWrite::Execute(TTransactionContext& txc, const TActorContext&) { void TTxWrite::Complete(const TActorContext& ctx) { TMemoryProfileGuard mpg("TTxWrite::Complete"); - NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_BLOBS)("tablet_id", Self->TabletID())("tx_state", "complete"); + NActors::TLogContextGuard logGuard = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_BLOBS)("tablet_id", Self->TabletID())("tx_state", "complete"); const auto now = TMonotonic::Now(); const NOlap::TWritingBuffer& buffer = PutBlobResult->Get()->MutableWritesBuffer(); for (auto&& i : buffer.GetAddActions()) { @@ -136,11 +147,22 @@ void TTxWrite::Complete(const TActorContext& ctx) { i.DoSendReply(ctx); } for (ui32 i = 0; i < buffer.GetAggregations().size(); ++i) { - const auto& writeMeta = buffer.GetAggregations()[i]->GetWriteData()->GetWriteMeta(); - Self->CSCounters.OnWriteTxComplete(now - writeMeta.GetWriteStartInstant()); - Self->CSCounters.OnSuccessWriteResponse(); + const auto& writeMeta = buffer.GetAggregations()[i]->GetWriteMeta(); + if (!writeMeta.HasLongTxId()) { + auto op = Self->GetOperationsManager().GetOperationVerified((TOperationWriteId)writeMeta.GetWriteId()); + if (op->GetBehaviour() == EOperationBehaviour::WriteWithLock || op->GetBehaviour() == EOperationBehaviour::NoTxWrite) { + auto evWrite = std::make_shared(writeMeta.GetTableId(), + buffer.GetAggregations()[i]->GetRecordBatch(), Self->GetIndexOptional()->GetVersionedIndex().GetPrimaryKey()); + Self->GetOperationsManager().AddEventForLock(*Self, op->GetLockId(), evWrite); + } + if (op->GetBehaviour() == EOperationBehaviour::NoTxWrite) { + Self->OperationsManager->CommitTransactionOnComplete(*Self, op->GetLockId(), Self->GetLastTxSnapshot()); + } + } + Self->Counters.GetCSCounters().OnWriteTxComplete(now - writeMeta.GetWriteStartInstant()); + Self->Counters.GetCSCounters().OnSuccessWriteResponse(); } - + Self->Counters.GetTabletCounters()->IncCounter(COUNTER_IMMEDIATE_TX_COMPLETED); } -} +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.h b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.h index 98de301e5e16..84ffbe7a9005 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.h +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.h @@ -43,7 +43,7 @@ class TTxWrite : public NTabletFlatExecutor::TTransactionBase { std::vector> ResultOperators; - bool InsertOneBlob(TTransactionContext& txc, const NOlap::TWideSerializedBatch& batch, const TWriteId writeId); + bool InsertOneBlob(TTransactionContext& txc, const NOlap::TWideSerializedBatch& batch, const TInsertWriteId writeId); TStringBuilder TxPrefix() const { return TStringBuilder() << "TxWrite[" << ToString(TabletTxNo) << "] "; diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.cpp b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.cpp index a04c6fb2abef..57a1eee50146 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.cpp +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.cpp @@ -43,7 +43,7 @@ bool TTxWriteIndex::Execute(TTransactionContext& txc, const TActorContext& ctx) LOG_S_ERROR(TxPrefix() << " (" << changes->TypeString() << ") cannot write index blobs" << TxSuffix()); } - Self->EnqueueProgressTx(ctx); + Self->EnqueueProgressTx(ctx, std::nullopt); return true; } diff --git a/ydb/core/tx/columnshard/columnshard.cpp b/ydb/core/tx/columnshard/columnshard.cpp index fb4e4a9f53b4..f3a6b9e99db9 100644 --- a/ydb/core/tx/columnshard/columnshard.cpp +++ b/ydb/core/tx/columnshard/columnshard.cpp @@ -1,14 +1,17 @@ #include "columnshard_impl.h" + +#include "bg_tasks/manager/manager.h" #include "blobs_reader/actor.h" +#include "counters/aggregation/table_stats.h" +#include "engines/column_engine_logs.h" +#include "engines/writer/buffer/actor.h" #include "hooks/abstract/abstract.h" #include "resource_subscriber/actor.h" -#include "engines/writer/buffer/actor.h" -#include "engines/column_engine_logs.h" -#include "bg_tasks/manager/manager.h" +#include "transactions/locks/read_finished.h" -#include -#include #include +#include +#include namespace NKikimr { @@ -16,7 +19,7 @@ IActor* CreateColumnShard(const TActorId& tablet, TTabletStorageInfo* info) { return new NColumnShard::TColumnShard(info, tablet); } -} +} // namespace NKikimr namespace NKikimr::NColumnShard { @@ -26,6 +29,9 @@ void TColumnShard::CleanupActors(const TActorContext& ctx) { } ctx.Send(ResourceSubscribeActor, new TEvents::TEvPoisonPill); ctx.Send(BufferizationWriteActorId, new TEvents::TEvPoisonPill); + for (auto&& i : ActorsToStop) { + ctx.Send(i, new TEvents::TEvPoisonPill); + } StoragesManager->Stop(); DataLocksManager->Stop(); @@ -43,7 +49,8 @@ void TColumnShard::BecomeBroken(const TActorContext& ctx) { void TColumnShard::SwitchToWork(const TActorContext& ctx) { { - const TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("self_id", SelfId()); + const TLogContextGuard gLogging = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("self_id", SelfId()); AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "initialize_shard")("step", "SwitchToWork"); for (auto&& i : TablesManager.GetTables()) { @@ -54,24 +61,30 @@ void TColumnShard::SwitchToWork(const TActorContext& ctx) { SignalTabletActive(ctx); AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "initialize_shard")("step", "SignalTabletActive"); TryRegisterMediatorTimeCast(); - EnqueueProgressTx(ctx); + EnqueueProgressTx(ctx, std::nullopt); } - CSCounters.OnIndexMetadataLimit(NOlap::IColumnEngine::GetMetadataLimit()); + Counters.GetCSCounters().OnIndexMetadataLimit(NOlap::IColumnEngine::GetMetadataLimit()); EnqueueBackgroundActivities(); BackgroundSessionsManager->Start(); + ctx.Send(SelfId(), new NActors::TEvents::TEvWakeup()); ctx.Send(SelfId(), new TEvPrivate::TEvPeriodicWakeup()); + ctx.Send(SelfId(), new TEvPrivate::TEvPingSnapshotsUsage()); NYDBTest::TControllers::GetColumnShardController()->OnSwitchToWork(TabletID()); + AFL_VERIFY(!!StartInstant); + Counters.GetCSCounters().Initialization.OnSwitchToWork(TMonotonic::Now() - *StartInstant, TMonotonic::Now() - CreateInstant); } void TColumnShard::OnActivateExecutor(const TActorContext& ctx) { - const TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("self_id", SelfId()); + StartInstant = TMonotonic::Now(); + Counters.GetCSCounters().Initialization.OnActivateExecutor(TMonotonic::Now() - CreateInstant); + const TLogContextGuard gLogging = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("self_id", SelfId()); AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "initialize_shard")("step", "OnActivateExecutor"); - Executor()->RegisterExternalTabletCounters(TabletCountersPtr.release()); + Executor()->RegisterExternalTabletCounters(TabletCountersHolder.release()); const auto selfActorId = SelfId(); StoragesManager->Initialize(Executor()->Generation()); - Tiers = std::make_shared(TabletID(), SelfId(), - [selfActorId](const TActorContext& ctx) { + Tiers = std::make_shared(TabletID(), SelfId(), [selfActorId](const TActorContext& ctx) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "tiering_new_event"); ctx.Send(selfActorId, new TEvPrivate::TEvTieringModified); }); @@ -79,7 +92,8 @@ void TColumnShard::OnActivateExecutor(const TActorContext& ctx) { if (!NMetadata::NProvider::TServiceOperator::IsEnabled()) { Tiers->TakeConfigs(NYDBTest::TControllers::GetColumnShardController()->GetFallbackTiersSnapshot(), nullptr); } - BackgroundSessionsManager = std::make_shared(std::make_shared(selfActorId, (NOlap::TTabletId)TabletID(), *this)); + BackgroundSessionsManager = std::make_shared( + std::make_shared(selfActorId, (NOlap::TTabletId)TabletID(), *this)); AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "initialize_shard")("step", "initialize_tiring_finished"); auto& icb = *AppData(ctx)->Icb; @@ -141,14 +155,14 @@ void TColumnShard::Handle(TEvTabletPipe::TEvServerDisconnected::TPtr& ev, const LOG_S_DEBUG("Server pipe reset at tablet " << TabletID()); } -void TColumnShard::Handle(TEvPrivate::TEvScanStats::TPtr& ev, const TActorContext &ctx) { +void TColumnShard::Handle(TEvPrivate::TEvScanStats::TPtr& ev, const TActorContext& ctx) { Y_UNUSED(ctx); - IncCounter(COUNTER_SCANNED_ROWS, ev->Get()->Rows); - IncCounter(COUNTER_SCANNED_BYTES, ev->Get()->Bytes); + Counters.GetTabletCounters()->IncCounter(COUNTER_SCANNED_ROWS, ev->Get()->Rows); + Counters.GetTabletCounters()->IncCounter(COUNTER_SCANNED_BYTES, ev->Get()->Bytes); } -void TColumnShard::Handle(TEvPrivate::TEvReadFinished::TPtr& ev, const TActorContext &ctx) { +void TColumnShard::Handle(TEvPrivate::TEvReadFinished::TPtr& ev, const TActorContext& ctx) { Y_UNUSED(ctx); ui64 readCookie = ev->Get()->RequestCookie; LOG_S_DEBUG("Finished read cookie: " << readCookie << " at tablet " << TabletID()); @@ -156,17 +170,28 @@ void TColumnShard::Handle(TEvPrivate::TEvReadFinished::TPtr& ev, const TActorCon if (HasIndex()) { index = &GetIndexAs().GetVersionedIndex(); } - InFlightReadsTracker.RemoveInFlightRequest(ev->Get()->RequestCookie, index); + + auto readMetaBase = InFlightReadsTracker.ExtractInFlightRequest(ev->Get()->RequestCookie, index, TInstant::Now()); + readMetaBase->OnReadFinished(*this); ui64 txId = ev->Get()->TxId; if (ScanTxInFlight.contains(txId)) { TDuration duration = TAppData::TimeProvider->Now() - ScanTxInFlight[txId]; - IncCounter(COUNTER_SCAN_LATENCY, duration); + Counters.GetTabletCounters()->IncCounter(COUNTER_SCAN_LATENCY, duration); ScanTxInFlight.erase(txId); - SetCounter(COUNTER_SCAN_IN_FLY, ScanTxInFlight.size()); + Counters.GetTabletCounters()->SetCounter(COUNTER_SCAN_IN_FLY, ScanTxInFlight.size()); + Counters.GetTabletCounters()->IncCounter(COUNTER_IMMEDIATE_TX_COMPLETED); } } +void TColumnShard::Handle(TEvPrivate::TEvPingSnapshotsUsage::TPtr& /*ev*/, const TActorContext& ctx) { + if (auto writeTx = + InFlightReadsTracker.Ping(this, NYDBTest::TControllers::GetColumnShardController()->GetPingCheckPeriod(), TInstant::Now())) { + Execute(writeTx.release(), ctx); + } + ctx.Schedule(0.3 * GetMaxReadStaleness(), new TEvPrivate::TEvPingSnapshotsUsage()); +} + void TColumnShard::Handle(TEvPrivate::TEvPeriodicWakeup::TPtr& ev, const TActorContext& ctx) { if (ev->Get()->Manual) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "TEvPrivate::TEvPeriodicWakeup::MANUAL")("tablet_id", TabletID()); @@ -176,10 +201,20 @@ void TColumnShard::Handle(TEvPrivate::TEvPeriodicWakeup::TPtr& ev, const TActorC SendWaitPlanStep(GetOutdatedStep()); SendPeriodicStats(); + EnqueueBackgroundActivities(); ctx.Schedule(PeriodicWakeupActivationPeriod, new TEvPrivate::TEvPeriodicWakeup()); } } +void TColumnShard::Handle(NActors::TEvents::TEvWakeup::TPtr& ev, const TActorContext& ctx) { + if (ev->Get()->Tag == 0) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "TEvPrivate::TEvPeriodicWakeup::MANUAL")("tablet_id", TabletID()); + const TMonotonic now = TMonotonic::Now(); + GetProgressTxController().PingTimeouts(now); + ctx.Schedule(TDuration::Seconds(1), new NActors::TEvents::TEvWakeup(0)); + } +} + void TColumnShard::Handle(TEvMediatorTimecast::TEvRegisterTabletResult::TPtr& ev, const TActorContext&) { const auto* msg = ev->Get(); Y_ABORT_UNLESS(msg->TabletId == TabletID()); @@ -213,14 +248,13 @@ void TColumnShard::UpdateInsertTableCounters() { auto& prepared = InsertTable->GetCountersPrepared(); auto& committed = InsertTable->GetCountersCommitted(); - SetCounter(COUNTER_PREPARED_RECORDS, prepared.Rows); - SetCounter(COUNTER_PREPARED_BYTES, prepared.Bytes); - SetCounter(COUNTER_COMMITTED_RECORDS, committed.Rows); - SetCounter(COUNTER_COMMITTED_BYTES, committed.Bytes); + Counters.GetTabletCounters()->SetCounter(COUNTER_PREPARED_RECORDS, prepared.Rows); + Counters.GetTabletCounters()->SetCounter(COUNTER_PREPARED_BYTES, prepared.Bytes); + Counters.GetTabletCounters()->SetCounter(COUNTER_COMMITTED_RECORDS, committed.Rows); + Counters.GetTabletCounters()->SetCounter(COUNTER_COMMITTED_BYTES, committed.Bytes); - LOG_S_INFO("InsertTable. Prepared: " << prepared.Bytes << " in " << prepared.Rows - << " records, committed: " << committed.Bytes << " in " << committed.Rows - << " records at tablet " << TabletID()); + LOG_S_TRACE("InsertTable. Prepared: " << prepared.Bytes << " in " << prepared.Rows << " records, committed: " << committed.Bytes << " in " + << committed.Rows << " records at tablet " << TabletID()); } void TColumnShard::UpdateIndexCounters() { @@ -229,70 +263,63 @@ void TColumnShard::UpdateIndexCounters() { } auto& stats = TablesManager.MutablePrimaryIndex().GetTotalStats(); - SetCounter(COUNTER_INDEX_TABLES, stats.Tables); - SetCounter(COUNTER_INDEX_COLUMN_RECORDS, stats.ColumnRecords); - SetCounter(COUNTER_INSERTED_PORTIONS, stats.GetInsertedStats().Portions); - SetCounter(COUNTER_INSERTED_BLOBS, stats.GetInsertedStats().Blobs); - SetCounter(COUNTER_INSERTED_ROWS, stats.GetInsertedStats().Rows); - SetCounter(COUNTER_INSERTED_BYTES, stats.GetInsertedStats().Bytes); - SetCounter(COUNTER_INSERTED_RAW_BYTES, stats.GetInsertedStats().RawBytes); - SetCounter(COUNTER_COMPACTED_PORTIONS, stats.GetCompactedStats().Portions); - SetCounter(COUNTER_COMPACTED_BLOBS, stats.GetCompactedStats().Blobs); - SetCounter(COUNTER_COMPACTED_ROWS, stats.GetCompactedStats().Rows); - SetCounter(COUNTER_COMPACTED_BYTES, stats.GetCompactedStats().Bytes); - SetCounter(COUNTER_COMPACTED_RAW_BYTES, stats.GetCompactedStats().RawBytes); - SetCounter(COUNTER_SPLIT_COMPACTED_PORTIONS, stats.GetSplitCompactedStats().Portions); - SetCounter(COUNTER_SPLIT_COMPACTED_BLOBS, stats.GetSplitCompactedStats().Blobs); - SetCounter(COUNTER_SPLIT_COMPACTED_ROWS, stats.GetSplitCompactedStats().Rows); - SetCounter(COUNTER_SPLIT_COMPACTED_BYTES, stats.GetSplitCompactedStats().Bytes); - SetCounter(COUNTER_SPLIT_COMPACTED_RAW_BYTES, stats.GetSplitCompactedStats().RawBytes); - SetCounter(COUNTER_INACTIVE_PORTIONS, stats.GetInactiveStats().Portions); - SetCounter(COUNTER_INACTIVE_BLOBS, stats.GetInactiveStats().Blobs); - SetCounter(COUNTER_INACTIVE_ROWS, stats.GetInactiveStats().Rows); - SetCounter(COUNTER_INACTIVE_BYTES, stats.GetInactiveStats().Bytes); - SetCounter(COUNTER_INACTIVE_RAW_BYTES, stats.GetInactiveStats().RawBytes); - SetCounter(COUNTER_EVICTED_PORTIONS, stats.GetEvictedStats().Portions); - SetCounter(COUNTER_EVICTED_BLOBS, stats.GetEvictedStats().Blobs); - SetCounter(COUNTER_EVICTED_ROWS, stats.GetEvictedStats().Rows); - SetCounter(COUNTER_EVICTED_BYTES, stats.GetEvictedStats().Bytes); - SetCounter(COUNTER_EVICTED_RAW_BYTES, stats.GetEvictedStats().RawBytes); - - LOG_S_DEBUG("Index: tables " << stats.Tables - << " inserted " << stats.GetInsertedStats().DebugString() - << " compacted " << stats.GetCompactedStats().DebugString() - << " s-compacted " << stats.GetSplitCompactedStats().DebugString() - << " inactive " << stats.GetInactiveStats().DebugString() - << " evicted " << stats.GetEvictedStats().DebugString() - << " column records " << stats.ColumnRecords - << " at tablet " << TabletID()); + const std::shared_ptr& counters = Counters.GetTabletCounters(); + counters->SetCounter(COUNTER_INDEX_TABLES, stats.Tables); + counters->SetCounter(COUNTER_INDEX_COLUMN_RECORDS, stats.ColumnRecords); + counters->SetCounter(COUNTER_INSERTED_PORTIONS, stats.GetInsertedStats().Portions); + counters->SetCounter(COUNTER_INSERTED_BLOBS, stats.GetInsertedStats().Blobs); + counters->SetCounter(COUNTER_INSERTED_ROWS, stats.GetInsertedStats().Rows); + counters->SetCounter(COUNTER_INSERTED_BYTES, stats.GetInsertedStats().Bytes); + counters->SetCounter(COUNTER_INSERTED_RAW_BYTES, stats.GetInsertedStats().RawBytes); + counters->SetCounter(COUNTER_COMPACTED_PORTIONS, stats.GetCompactedStats().Portions); + counters->SetCounter(COUNTER_COMPACTED_BLOBS, stats.GetCompactedStats().Blobs); + counters->SetCounter(COUNTER_COMPACTED_ROWS, stats.GetCompactedStats().Rows); + counters->SetCounter(COUNTER_COMPACTED_BYTES, stats.GetCompactedStats().Bytes); + counters->SetCounter(COUNTER_COMPACTED_RAW_BYTES, stats.GetCompactedStats().RawBytes); + counters->SetCounter(COUNTER_SPLIT_COMPACTED_PORTIONS, stats.GetSplitCompactedStats().Portions); + counters->SetCounter(COUNTER_SPLIT_COMPACTED_BLOBS, stats.GetSplitCompactedStats().Blobs); + counters->SetCounter(COUNTER_SPLIT_COMPACTED_ROWS, stats.GetSplitCompactedStats().Rows); + counters->SetCounter(COUNTER_SPLIT_COMPACTED_BYTES, stats.GetSplitCompactedStats().Bytes); + counters->SetCounter(COUNTER_SPLIT_COMPACTED_RAW_BYTES, stats.GetSplitCompactedStats().RawBytes); + counters->SetCounter(COUNTER_INACTIVE_PORTIONS, stats.GetInactiveStats().Portions); + counters->SetCounter(COUNTER_INACTIVE_BLOBS, stats.GetInactiveStats().Blobs); + counters->SetCounter(COUNTER_INACTIVE_ROWS, stats.GetInactiveStats().Rows); + counters->SetCounter(COUNTER_INACTIVE_BYTES, stats.GetInactiveStats().Bytes); + counters->SetCounter(COUNTER_INACTIVE_RAW_BYTES, stats.GetInactiveStats().RawBytes); + counters->SetCounter(COUNTER_EVICTED_PORTIONS, stats.GetEvictedStats().Portions); + counters->SetCounter(COUNTER_EVICTED_BLOBS, stats.GetEvictedStats().Blobs); + counters->SetCounter(COUNTER_EVICTED_ROWS, stats.GetEvictedStats().Rows); + counters->SetCounter(COUNTER_EVICTED_BYTES, stats.GetEvictedStats().Bytes); + counters->SetCounter(COUNTER_EVICTED_RAW_BYTES, stats.GetEvictedStats().RawBytes); + + LOG_S_DEBUG("Index: tables " << stats.Tables << " inserted " << stats.GetInsertedStats().DebugString() << " compacted " + << stats.GetCompactedStats().DebugString() << " s-compacted " << stats.GetSplitCompactedStats().DebugString() + << " inactive " << stats.GetInactiveStats().DebugString() << " evicted " + << stats.GetEvictedStats().DebugString() << " column records " << stats.ColumnRecords << " at tablet " + << TabletID()); } ui64 TColumnShard::MemoryUsage() const { - ui64 memory = - ProgressTxController->GetMemoryUsage() + - ScanTxInFlight.size() * (sizeof(ui64) + sizeof(TInstant)) + - LongTxWrites.size() * (sizeof(TWriteId) + sizeof(TLongTxWriteInfo)) + - LongTxWritesByUniqueId.size() * (sizeof(TULID) + sizeof(void*)) + - (WaitingScans.size()) * (sizeof(NOlap::TSnapshot) + sizeof(void*)) + - TabletCounters->Simple()[COUNTER_PREPARED_RECORDS].Get() * sizeof(NOlap::TInsertedData) + - TabletCounters->Simple()[COUNTER_COMMITTED_RECORDS].Get() * sizeof(NOlap::TInsertedData); + ui64 memory = ProgressTxController->GetMemoryUsage() + ScanTxInFlight.size() * (sizeof(ui64) + sizeof(TInstant)) + + LongTxWrites.size() * (sizeof(TInsertWriteId) + sizeof(TLongTxWriteInfo)) + + LongTxWritesByUniqueId.size() * (sizeof(TULID) + sizeof(void*)) + + (WaitingScans.size()) * (sizeof(NOlap::TSnapshot) + sizeof(void*)) + + Counters.GetTabletCounters()->GetValue(COUNTER_PREPARED_RECORDS) * sizeof(NOlap::TInsertedData) + + Counters.GetTabletCounters()->GetValue(COUNTER_COMMITTED_RECORDS) * sizeof(NOlap::TInsertedData); memory += TablesManager.GetMemoryUsage(); return memory; } void TColumnShard::UpdateResourceMetrics(const TActorContext& ctx, const TUsage& usage) { - auto * metrics = Executor()->GetResourceMetrics(); + auto* metrics = Executor()->GetResourceMetrics(); if (!metrics) { return; } ui64 storageBytes = - TabletCounters->Simple()[COUNTER_PREPARED_BYTES].Get() + - TabletCounters->Simple()[COUNTER_COMMITTED_BYTES].Get() + - TabletCounters->Simple()[COUNTER_INSERTED_BYTES].Get() + - TabletCounters->Simple()[COUNTER_COMPACTED_BYTES].Get() + - TabletCounters->Simple()[COUNTER_SPLIT_COMPACTED_BYTES].Get() + - TabletCounters->Simple()[COUNTER_INACTIVE_BYTES].Get(); + Counters.GetTabletCounters()->GetValue(COUNTER_PREPARED_BYTES) + Counters.GetTabletCounters()->GetValue(COUNTER_COMMITTED_BYTES) + + Counters.GetTabletCounters()->GetValue(COUNTER_INSERTED_BYTES) + Counters.GetTabletCounters()->GetValue(COUNTER_COMPACTED_BYTES) + + Counters.GetTabletCounters()->GetValue(COUNTER_SPLIT_COMPACTED_BYTES) + Counters.GetTabletCounters()->GetValue(COUNTER_INACTIVE_BYTES); ui64 memory = MemoryUsage(); @@ -308,35 +335,6 @@ void TColumnShard::UpdateResourceMetrics(const TActorContext& ctx, const TUsage& metrics->TryUpdate(ctx); } -void TColumnShard::ConfigureStats(const NOlap::TColumnEngineStats& indexStats, - ::NKikimrTableStats::TTableStats* tabletStats) { - NOlap::TSnapshot lastIndexUpdate = TablesManager.GetPrimaryIndexSafe().LastUpdate(); - auto activeIndexStats = indexStats.Active(); // data stats excluding inactive and evicted - - if (activeIndexStats.Rows < 0 || activeIndexStats.Bytes < 0) { - LOG_S_WARN("Negative stats counter. Rows: " << activeIndexStats.Rows << " Bytes: " << activeIndexStats.Bytes - << TabletID()); - - activeIndexStats.Rows = (activeIndexStats.Rows < 0) ? 0 : activeIndexStats.Rows; - activeIndexStats.Bytes = (activeIndexStats.Bytes < 0) ? 0 : activeIndexStats.Bytes; - } - - tabletStats->SetRowCount(activeIndexStats.Rows); - tabletStats->SetDataSize(activeIndexStats.Bytes + TabletCounters->Simple()[COUNTER_COMMITTED_BYTES].Get()); - - // TODO: we need row/dataSize counters for evicted data (managed by tablet but stored outside) - // tabletStats->SetIndexSize(); // TODO: calc size of internal tables - - tabletStats->SetLastAccessTime(LastAccessTime.MilliSeconds()); - tabletStats->SetLastUpdateTime(lastIndexUpdate.GetPlanStep()); -} - -void TColumnShard::FillTxTableStats(::NKikimrTableStats::TTableStats* tableStats) const { - tableStats->SetTxRejectedByOverload(TabletCounters->Cumulative()[COUNTER_WRITE_OVERLOAD].Get()); - tableStats->SetTxRejectedBySpace(TabletCounters->Cumulative()[COUNTER_OUT_OF_SPACE].Get()); - tableStats->SetInFlightTxCount(Executor()->GetStats().TxInFly); -} - void TColumnShard::FillOlapStats(const TActorContext& ctx, std::unique_ptr& ev) { ev->Record.SetShardState(2); // NKikimrTxDataShard.EDatashardState.Ready ev->Record.SetGeneration(Executor()->Generation()); @@ -346,30 +344,24 @@ void TColumnShard::FillOlapStats(const TActorContext& ctx, std::unique_ptrGetResourceMetrics()) { resourceMetrics->Fill(*ev->Record.MutableTabletMetrics()); } - auto* tabletStats = ev->Record.MutableTableStats(); - FillTxTableStats(tabletStats); + if (TablesManager.HasPrimaryIndex()) { - const auto& indexStats = TablesManager.MutablePrimaryIndex().GetTotalStats(); - ConfigureStats(indexStats, tabletStats); + TTableStatsBuilder statsBuilder(Counters, Executor(), TablesManager.MutablePrimaryIndex()); + statsBuilder.FillTotalTableStats(*ev->Record.MutableTableStats()); } } -void TColumnShard::FillColumnTableStats(const TActorContext& ctx, - std::unique_ptr& ev) { - if (!TablesManager.HasPrimaryIndex()) { - return; - } - const auto& tablesIndexStats = TablesManager.MutablePrimaryIndex().GetStats(); - LOG_S_DEBUG("There are stats for " << tablesIndexStats.size() << " tables"); - for (const auto& [tableLocalID, columnStats] : tablesIndexStats) { - if (!columnStats) { - LOG_S_ERROR("SendPeriodicStats: empty stats"); - continue; - } +void TColumnShard::FillColumnTableStats(const TActorContext& ctx, std::unique_ptr& ev) { + auto tables = TablesManager.GetTables(); + std::optional tableStatsBuilder = + TablesManager.HasPrimaryIndex() ? std::make_optional(Counters, Executor(), TablesManager.MutablePrimaryIndex()) + : std::nullopt; + LOG_S_DEBUG("There are stats for " << tables.size() << " tables"); + for (const auto& [pathId, _] : tables) { auto* periodicTableStats = ev->Record.AddTables(); periodicTableStats->SetDatashardId(TabletID()); - periodicTableStats->SetTableLocalId(tableLocalID); + periodicTableStats->SetTableLocalId(pathId); periodicTableStats->SetShardState(2); // NKikimrTxDataShard.EDatashardState.Ready periodicTableStats->SetGeneration(Executor()->Generation()); @@ -381,11 +373,11 @@ void TColumnShard::FillColumnTableStats(const TActorContext& ctx, resourceMetrics->Fill(*periodicTableStats->MutableTabletMetrics()); } - auto* tableStats = periodicTableStats->MutableTableStats(); - FillTxTableStats(tableStats); - ConfigureStats(*columnStats, tableStats); + if (tableStatsBuilder) { + tableStatsBuilder->FillTableStats(pathId, *(periodicTableStats->MutableTableStats())); + } - LOG_S_TRACE("Add stats for table, tableLocalID=" << tableLocalID); + LOG_S_TRACE("Add stats for table, tableLocalID=" << pathId); } } diff --git a/ydb/core/tx/columnshard/columnshard.h b/ydb/core/tx/columnshard/columnshard.h index 90ae1fd8c6b7..186d665153af 100644 --- a/ydb/core/tx/columnshard/columnshard.h +++ b/ydb/core/tx/columnshard/columnshard.h @@ -98,6 +98,7 @@ struct TEvColumnShard { struct TEvInternalScan: public TEventLocal { private: YDB_READONLY(ui64, PathId, 0); + YDB_READONLY_DEF(std::optional, LockId); YDB_ACCESSOR(bool, Reverse, false); YDB_ACCESSOR(ui32, ItemsLimit, 0); YDB_READONLY_DEF(std::vector, ColumnIds); @@ -116,8 +117,9 @@ struct TEvColumnShard { ColumnNames.emplace_back(columnName); } - TEvInternalScan(const ui64 pathId) + TEvInternalScan(const ui64 pathId, const std::optional lockId) : PathId(pathId) + , LockId(lockId) { } diff --git a/ydb/core/tx/columnshard/columnshard__init.cpp b/ydb/core/tx/columnshard/columnshard__init.cpp index aadee209f6dd..09cf1f4ef71f 100644 --- a/ydb/core/tx/columnshard/columnshard__init.cpp +++ b/ydb/core/tx/columnshard/columnshard__init.cpp @@ -19,6 +19,9 @@ namespace NKikimr::NColumnShard { using namespace NTabletFlatExecutor; class TTxInit : public TTransactionBase { +private: + const TMonotonic StartInstant = TMonotonic::Now(); + public: TTxInit(TColumnShard* self) : TBase(self) @@ -38,7 +41,6 @@ void TTxInit::SetDefaults() { Self->CurrentSchemeShardId = 0; Self->LastSchemaSeqNo = { }; Self->ProcessingParams.reset(); - Self->LastWriteId = TWriteId{0}; Self->LastPlannedStep = 0; Self->LastPlannedTxId = 0; Self->LastCompletedTx = NOlap::TSnapshot::Zero(); @@ -70,7 +72,6 @@ bool TTxInit::Precharge(TTransactionContext& txc) { ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastSchemaSeqNoGeneration, Self->LastSchemaSeqNo.Generation); ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastSchemaSeqNoRound, Self->LastSchemaSeqNo.Round); ready = ready && Schema::GetSpecialProtoValue(db, Schema::EValueIds::ProcessingParams, Self->ProcessingParams); - ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastWriteId, Self->LastWriteId); ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastPlannedStep, Self->LastPlannedStep); ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastPlannedTxId, Self->LastPlannedTxId); ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastExportNumber, Self->LastExportNo); @@ -104,7 +105,7 @@ bool TTxInit::ReadEverything(TTransactionContext& txc, const TActorContext& ctx) ACFL_DEBUG("step", "TInsertTable::Load_Start"); TMemoryProfileGuard g("TTxInit/InsertTable"); auto localInsertTable = std::make_unique(); - if (!localInsertTable->Load(dbTable, TAppData::TimeProvider->Now())) { + if (!localInsertTable->Load(db, dbTable, TAppData::TimeProvider->Now())) { ACFL_ERROR("step", "TInsertTable::Load_Fails"); return false; } @@ -165,9 +166,9 @@ bool TTxInit::ReadEverything(TTransactionContext& txc, const TActorContext& ctx) } Self->TablesManager = std::move(tManagerLocal); - Self->SetCounter(COUNTER_TABLES, Self->TablesManager.GetTables().size()); - Self->SetCounter(COUNTER_TABLE_PRESETS, Self->TablesManager.GetSchemaPresets().size()); - Self->SetCounter(COUNTER_TABLE_TTLS, Self->TablesManager.GetTtl().PathsCount()); + Self->Counters.GetTabletCounters()->SetCounter(COUNTER_TABLES, Self->TablesManager.GetTables().size()); + Self->Counters.GetTabletCounters()->SetCounter(COUNTER_TABLE_PRESETS, Self->TablesManager.GetSchemaPresets().size()); + Self->Counters.GetTabletCounters()->SetCounter(COUNTER_TABLE_TTLS, Self->TablesManager.GetTtl().PathsCount()); ACFL_DEBUG("step", "TTablesManager::Load_Finish"); } @@ -179,7 +180,7 @@ bool TTxInit::ReadEverything(TTransactionContext& txc, const TActorContext& ctx) } while (!rowset.EndOfSet()) { - const TWriteId writeId = TWriteId{ rowset.GetValue() }; + const TInsertWriteId writeId = (TInsertWriteId)rowset.GetValue(); const ui32 writePartId = rowset.GetValue(); NKikimrLongTxService::TLongTxId proto; Y_ABORT_UNLESS(proto.ParseFromString(rowset.GetValue())); @@ -222,6 +223,14 @@ bool TTxInit::ReadEverything(TTransactionContext& txc, const TActorContext& ctx) } Self->SharingSessionsManager = local; } + { + TMemoryProfileGuard g("TTxInit/TInFlightReadsTracker"); + TInFlightReadsTracker local(Self->StoragesManager, Self->Counters.GetRequestsTracingCounters()); + if (!local.LoadFromDatabase(txc.DB)) { + return false; + } + Self->InFlightReadsTracker = std::move(local); + } Self->UpdateInsertTableCounters(); Self->UpdateIndexCounters(); @@ -250,7 +259,7 @@ bool TTxInit::Execute(TTransactionContext& txc, const TActorContext& ctx) { } void TTxInit::Complete(const TActorContext& ctx) { - Self->ProgressTxController->StartOperators(); + Self->Counters.GetCSCounters().Initialization.OnTxInitFinished(TMonotonic::Now() - StartInstant); Self->ProgressTxController->OnTabletInit(); Self->SwitchToWork(ctx); NYDBTest::TControllers::GetColumnShardController()->OnTabletInitCompleted(*Self); @@ -258,6 +267,8 @@ void TTxInit::Complete(const TActorContext& ctx) { class TTxUpdateSchema : public TTransactionBase { std::vector NormalizerTasks; + const TMonotonic StartInstant = TMonotonic::Now(); + public: TTxUpdateSchema(TColumnShard* self) : TBase(self) @@ -296,6 +307,7 @@ bool TTxUpdateSchema::Execute(TTransactionContext& txc, const TActorContext&) { void TTxUpdateSchema::Complete(const TActorContext& ctx) { AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("step", "TTxUpdateSchema.Complete"); + Self->Counters.GetCSCounters().Initialization.OnTxUpdateSchemaFinished(TMonotonic::Now() - StartInstant); if (NormalizerTasks.empty()) { AFL_VERIFY(Self->NormalizerController.IsNormalizationFinished())("details", Self->NormalizerController.DebugString()); Self->Execute(new TTxInit(Self), ctx); @@ -361,6 +373,9 @@ void TTxApplyNormalizer::Complete(const TActorContext& ctx) { /// Create local database on tablet start if none class TTxInitSchema : public TTransactionBase { +private: + const TMonotonic StartInstant = TMonotonic::Now(); + public: TTxInitSchema(TColumnShard* self) : TBase(self) @@ -423,6 +438,7 @@ bool TTxInitSchema::Execute(TTransactionContext& txc, const TActorContext&) { } void TTxInitSchema::Complete(const TActorContext& ctx) { + Self->Counters.GetCSCounters().Initialization.OnTxInitSchemaFinished(TMonotonic::Now() - StartInstant); LOG_S_DEBUG("TxInitSchema.Complete at tablet " << Self->TabletID();); Self->Execute(new TTxUpdateSchema(Self), ctx); } diff --git a/ydb/core/tx/columnshard/columnshard__notify_tx_completion.cpp b/ydb/core/tx/columnshard/columnshard__notify_tx_completion.cpp index cc498125c326..883844cc0357 100644 --- a/ydb/core/tx/columnshard/columnshard__notify_tx_completion.cpp +++ b/ydb/core/tx/columnshard/columnshard__notify_tx_completion.cpp @@ -14,7 +14,7 @@ class TTxNotifyTxCompletion : public TTransactionBase { LOG_S_DEBUG("TTxNotifyTxCompletion.Execute at tablet " << Self->TabletID()); const ui64 txId = Ev->Get()->Record.GetTxId(); - auto txOperator = Self->ProgressTxController->GetTxOperator(txId); + auto txOperator = Self->ProgressTxController->GetTxOperatorOptional(txId); if (txOperator) { txOperator->RegisterSubscriber(Ev->Sender); return true; diff --git a/ydb/core/tx/columnshard/columnshard__plan_step.cpp b/ydb/core/tx/columnshard/columnshard__plan_step.cpp index 80dda8112e0b..33420df5905c 100644 --- a/ydb/core/tx/columnshard/columnshard__plan_step.cpp +++ b/ydb/core/tx/columnshard/columnshard__plan_step.cpp @@ -102,10 +102,10 @@ bool TTxPlanStep::Execute(TTransactionContext& txc, const TActorContext& ctx) { Result = std::make_unique(Self->TabletID(), step); - Self->IncCounter(COUNTER_PLAN_STEP_ACCEPTED); + Self->Counters.GetTabletCounters()->IncCounter(COUNTER_PLAN_STEP_ACCEPTED); if (plannedCount > 0 || Self->ProgressTxController->HaveOutdatedTxs()) { - Self->EnqueueProgressTx(ctx); + Self->EnqueueProgressTx(ctx, std::nullopt); } return true; } diff --git a/ydb/core/tx/columnshard/columnshard__progress_tx.cpp b/ydb/core/tx/columnshard/columnshard__progress_tx.cpp index 0b035d591dd5..73a4a0200d97 100644 --- a/ydb/core/tx/columnshard/columnshard__progress_tx.cpp +++ b/ydb/core/tx/columnshard/columnshard__progress_tx.cpp @@ -2,37 +2,62 @@ #include "columnshard_schema.h" #include + #include namespace NKikimr::NColumnShard { -class TColumnShard::TTxProgressTx : public TTransactionBase { +class TColumnShard::TTxProgressTx: public TTransactionBase { +private: + bool AbortedThroughRemoveExpired = false; + TTxController::ITransactionOperator::TPtr TxOperator; + const ui32 TabletTxNo; + std::optional LastCompletedTx; + std::optional PlannedQueueItem; + std::optional StartExecution; + const TMonotonic ConstructionInstant = TMonotonic::Now(); + public: TTxProgressTx(TColumnShard* self) : TTransactionBase(self) - , TabletTxNo(++Self->TabletTxCounter) - {} + , TabletTxNo(++Self->TabletTxCounter) { + } - TTxType GetTxType() const override { return TXTYPE_PROGRESS; } + TTxType GetTxType() const override { + return TXTYPE_PROGRESS; + } bool Execute(TTransactionContext& txc, const TActorContext& ctx) override { - NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", Self->TabletID())("tx_state", "execute"); + NActors::TLogContextGuard logGuard = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", Self->TabletID())("tx_state", "execute"); Y_ABORT_UNLESS(Self->ProgressTxInFlight); - Self->TabletCounters->Simple()[COUNTER_TX_COMPLETE_LAG].Set(Self->GetTxCompleteLag().MilliSeconds()); + Self->Counters.GetTabletCounters()->SetCounter(COUNTER_TX_COMPLETE_LAG, Self->GetTxCompleteLag().MilliSeconds()); - size_t removedCount = Self->ProgressTxController->CleanExpiredTxs(txc); + const size_t removedCount = Self->ProgressTxController->CleanExpiredTxs(txc); if (removedCount > 0) { // We cannot continue with this transaction, start a new transaction + AbortedThroughRemoveExpired = true; Self->Execute(new TTxProgressTx(Self), ctx); return true; } // Process a single transaction at the front of the queue - auto plannedItem = Self->ProgressTxController->StartPlannedTx(); + const auto plannedItem = Self->ProgressTxController->GetFirstPlannedTx(); if (!!plannedItem) { PlannedQueueItem.emplace(plannedItem->PlanStep, plannedItem->TxId); ui64 step = plannedItem->PlanStep; ui64 txId = plannedItem->TxId; + TxOperator = Self->ProgressTxController->GetTxOperatorVerified(txId); + if (auto txPrepare = TxOperator->BuildTxPrepareForProgress(Self)) { + AbortedThroughRemoveExpired = true; + Self->ProgressTxInFlight = txId; + Self->Execute(txPrepare.release(), ctx); + return true; + } else { + Self->ProgressTxController->PopFirstPlannedTx(); + } + StartExecution = TMonotonic::Now(); + LastCompletedTx = NOlap::TSnapshot(step, txId); if (LastCompletedTx > Self->LastCompletedTx) { NIceDb::TNiceDb db(txc.DB); @@ -40,43 +65,51 @@ class TColumnShard::TTxProgressTx : public TTransactionBase { Schema::SaveSpecialValue(db, Schema::EValueIds::LastCompletedTxId, LastCompletedTx->GetTxId()); } - TxOperator = Self->ProgressTxController->GetVerifiedTxOperator(txId); - AFL_VERIFY(TxOperator->ExecuteOnProgress(*Self, NOlap::TSnapshot(step, txId), txc)); - Self->ProgressTxController->FinishPlannedTx(txId, txc); + AFL_VERIFY(TxOperator->ProgressOnExecute(*Self, NOlap::TSnapshot(step, txId), txc)); + Self->ProgressTxController->ProgressOnExecute(txId, txc); + Self->Counters.GetTabletCounters()->IncCounter(COUNTER_PLANNED_TX_COMPLETED); + } + Self->ProgressTxInFlight = std::nullopt; + if (!!Self->ProgressTxController->GetPlannedTx()) { + Self->EnqueueProgressTx(ctx, std::nullopt); } return true; } void Complete(const TActorContext& ctx) override { - NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", Self->TabletID())("tx_state", "complete"); + if (AbortedThroughRemoveExpired) { + return; + } + NActors::TLogContextGuard logGuard = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", Self->TabletID())("tx_state", "complete"); if (TxOperator) { - TxOperator->CompleteOnProgress(*Self, ctx); + TxOperator->ProgressOnComplete(*Self, ctx); Self->RescheduleWaitingReads(); } if (PlannedQueueItem) { - Self->GetProgressTxController().CompleteRunningTx(*PlannedQueueItem); + AFL_VERIFY(TxOperator); + Self->GetProgressTxController().GetCounters().OnTxProgressLag( + TxOperator->GetOpType(), TMonotonic::Now() - TMonotonic::MilliSeconds(PlannedQueueItem->Step)); + Self->GetProgressTxController().ProgressOnComplete(*PlannedQueueItem); } if (LastCompletedTx) { Self->LastCompletedTx = std::max(*LastCompletedTx, Self->LastCompletedTx); } - Self->ProgressTxInFlight = false; - if (!!Self->ProgressTxController->GetPlannedTx()) { - Self->EnqueueProgressTx(ctx); + if (StartExecution) { + Self->GetProgressTxController().GetCounters().OnTxExecuteDuration(TxOperator->GetOpType(), TMonotonic::Now() - *StartExecution); + Self->GetProgressTxController().GetCounters().OnTxLiveDuration(TxOperator->GetOpType(), TMonotonic::Now() - ConstructionInstant); } Self->SetupIndexation(); } - -private: - TTxController::ITransactionOperator::TPtr TxOperator; - const ui32 TabletTxNo; - std::optional LastCompletedTx; - std::optional PlannedQueueItem; }; -void TColumnShard::EnqueueProgressTx(const TActorContext& ctx) { +void TColumnShard::EnqueueProgressTx(const TActorContext& ctx, const std::optional continueTxId) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "EnqueueProgressTx")("tablet_id", TabletID()); - if (!ProgressTxInFlight) { - ProgressTxInFlight = true; + if (continueTxId) { + AFL_VERIFY(!ProgressTxInFlight || ProgressTxInFlight == continueTxId)("current", ProgressTxInFlight)("expected", continueTxId); + } + if (!ProgressTxInFlight || ProgressTxInFlight == continueTxId) { + ProgressTxInFlight = continueTxId.value_or(0); Execute(new TTxProgressTx(this), ctx); } } @@ -101,4 +134,4 @@ void TColumnShard::Handle(TEvColumnShard::TEvCheckPlannedTransaction::TPtr& ev, // For now do not return result for not finished tx. It would be sent in TTxProgressTx::Complete() } -} +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/columnshard__propose_transaction.cpp b/ydb/core/tx/columnshard/columnshard__propose_transaction.cpp index d42ad4fc8d4f..d4ded82be3d8 100644 --- a/ydb/core/tx/columnshard/columnshard__propose_transaction.cpp +++ b/ydb/core/tx/columnshard/columnshard__propose_transaction.cpp @@ -25,13 +25,14 @@ class TTxProposeTransaction: public NTabletFlatExecutor::TTransactionBaseIncCounter(COUNTER_PREPARE_REQUEST); + Self->Counters.GetTabletCounters()->IncCounter(COUNTER_PREPARE_REQUEST); auto& record = Proto(Ev->Get()); const auto txKind = record.GetTxKind(); const ui64 txId = record.GetTxId(); const auto& txBody = record.GetTxBody(); - NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("tablet_id", Self->TabletID())("tx_id", txId)("this", (ui64)this); + NActors::TLogContextGuard lGuard = + NActors::TLogContextBuilder::Build()("tablet_id", Self->TabletID())("tx_id", txId)("this", (ui64)this); if (txKind == NKikimrTxColumnShard::TX_KIND_TTL) { auto proposeResult = ProposeTtlDeprecated(txBody); @@ -51,7 +52,7 @@ class TTxProposeTransaction: public NTabletFlatExecutor::TTransactionBaseCurrentSchemeShardId = record.GetSchemeShardId(); Schema::SaveSpecialValue(db, Schema::EValueIds::CurrentSchemeShardId, Self->CurrentSchemeShardId); } else { - Y_ABORT_UNLESS(Self->CurrentSchemeShardId == record.GetSchemeShardId()); + AFL_VERIFY(Self->CurrentSchemeShardId == record.GetSchemeShardId()); } } std::optional msgSeqNo; @@ -79,28 +80,34 @@ class TTxProposeTransaction: public NTabletFlatExecutor::TTransactionBaseTabletID())("request_tx", TxInfo->DebugString())( - "this", (ui64)this)("op_tx", TxOperator->GetTxInfo().DebugString()); + NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("tablet_id", Self->TabletID())( + "request_tx", TxInfo->DebugString())("this", (ui64)this)("op_tx", TxOperator->GetTxInfo().DebugString()); + + Self->TryRegisterMediatorTimeCast(); if (TxOperator->IsFail()) { TxOperator->SendReply(*Self, ctx); + return; + } + auto internalOp = Self->GetProgressTxController().GetTxOperatorOptional(txId); + if (!internalOp) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "removed tx operator"); + return; + } + NActors::TLogContextGuard lGuardTx = + NActors::TLogContextBuilder::Build()("int_op_tx", internalOp->GetTxInfo().DebugString())("int_this", (ui64)internalOp.get()); + if (!internalOp->CheckTxInfoForReply(*TxInfo)) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "deprecated tx operator"); + return; + } + + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "actual tx operator"); + if (internalOp->IsAsync()) { + Self->GetProgressTxController().StartProposeOnComplete(*internalOp, ctx); } else { - auto internalOp = Self->GetProgressTxController().GetVerifiedTxOperator(TxOperator->GetTxId()); - NActors::TLogContextGuard lGuardTx = NActors::TLogContextBuilder::Build()("int_op_tx", internalOp->GetTxInfo().DebugString()); - if (!TxOperator->CheckTxInfoForReply(*TxInfo)) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "deprecated tx operator"); - return; - } else { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "actual tx operator"); - } - if (TxOperator->IsAsync()) { - Self->GetProgressTxController().StartProposeOnComplete(txId, ctx); - } else { - Self->GetProgressTxController().FinishProposeOnComplete(txId, ctx); - } + Self->GetProgressTxController().FinishProposeOnComplete(*internalOp, ctx); } - Self->TryRegisterMediatorTimeCast(); } TTxType GetTxType() const override { @@ -138,11 +145,14 @@ class TTxProposeTransaction: public NTabletFlatExecutor::TTransactionBaseTablesManager.GetPrimaryIndexSafe().GetVersionedIndex().GetLastSchema()->GetSchema(); - auto ttlColumn = schema->GetFieldByName(columnName); - if (!ttlColumn) { - return TTxController::TProposeResult(NKikimrTxColumnShard::EResultStatus::SCHEMA_ERROR, "TTL tx wrong TTL column '" + columnName + "'"); + auto schemaSnapshot = Self->TablesManager.GetPrimaryIndexSafe().GetVersionedIndex().GetLastSchema(); + auto schema = schemaSnapshot->GetSchema(); + auto index = schemaSnapshot->GetColumnIdOptional(columnName); + if (!index) { + return TTxController::TProposeResult( + NKikimrTxColumnShard::EResultStatus::SCHEMA_ERROR, "TTL tx wrong TTL column '" + columnName + "'"); } + auto ttlColumn = schemaSnapshot->GetFieldByColumnIdVerified(*index); const TInstant now = TlsActivationContext ? AppData()->TimeProvider->Now() : TInstant::Now(); for (ui64 pathId : ttlBody.GetPathIds()) { diff --git a/ydb/core/tx/columnshard/columnshard__scan.cpp b/ydb/core/tx/columnshard/columnshard__scan.cpp index a749e3c4f571..dd60f823139e 100644 --- a/ydb/core/tx/columnshard/columnshard__scan.cpp +++ b/ydb/core/tx/columnshard/columnshard__scan.cpp @@ -5,6 +5,7 @@ #include "engines/reader/transaction/tx_internal_scan.h" #include +#include namespace NKikimr::NColumnShard { @@ -29,9 +30,9 @@ void TColumnShard::Handle(TEvColumnShard::TEvScan::TPtr& ev, const TActorContext return; } - LastAccessTime = TAppData::TimeProvider->Now(); - ScanTxInFlight.insert({txId, LastAccessTime}); - SetCounter(COUNTER_SCAN_IN_FLY, ScanTxInFlight.size()); + Counters.GetColumnTablesCounters()->GetPathIdCounter(record.GetLocalPathId())->OnReadEvent(); + ScanTxInFlight.insert({txId, TAppData::TimeProvider->Now()}); + Counters.GetTabletCounters()->SetCounter(COUNTER_SCAN_IN_FLY, ScanTxInFlight.size()); Execute(new NOlap::NReader::TTxScan(this, ev), ctx); } diff --git a/ydb/core/tx/columnshard/columnshard__statistics.cpp b/ydb/core/tx/columnshard/columnshard__statistics.cpp index 779af24b4e1e..84b1a89982b8 100644 --- a/ydb/core/tx/columnshard/columnshard__statistics.cpp +++ b/ydb/core/tx/columnshard/columnshard__statistics.cpp @@ -1,16 +1,99 @@ #include "columnshard.h" #include "columnshard_impl.h" +#include "ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/meta.h" #include +#include + +#include + namespace NKikimr::NColumnShard { +void TColumnShard::Handle(NStat::TEvStatistics::TEvAnalyzeTable::TPtr& ev, const TActorContext&) { + auto& requestRecord = ev->Get()->Record; + // TODO Start a potentially long analysis process. + // ... + + + + // Return the response when the analysis is completed + auto response = std::make_unique(); + auto& responseRecord = response->Record; + responseRecord.SetOperationId(requestRecord.GetOperationId()); + responseRecord.MutablePathId()->CopyFrom(requestRecord.GetTable().GetPathId()); + responseRecord.SetShardTabletId(TabletID()); + Send(ev->Sender, response.release(), 0, ev->Cookie); +} + void TColumnShard::Handle(NStat::TEvStatistics::TEvStatisticsRequest::TPtr& ev, const TActorContext&) { + const auto& record = ev->Get()->Record; + auto response = std::make_unique(); - auto& record = response->Record; - record.SetShardTabletId(TabletID()); + auto& respRecord = response->Record; + respRecord.SetShardTabletId(TabletID()); + + if (record.TypesSize() > 0 && (record.TypesSize() > 1 || record.GetTypes(0) != NKikimrStat::TYPE_COUNT_MIN_SKETCH)) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", "Unsupported statistic type in statistics request"); + + respRecord.SetStatus(NKikimrStat::TEvStatisticsResponse::STATUS_ERROR); + + Send(ev->Sender, response.release(), 0, ev->Cookie); + return; + } + + AFL_VERIFY(HasIndex()); + auto index = GetIndexAs(); + auto spg = index.GetGranuleOptional(record.GetTable().GetPathId().GetLocalId()); + AFL_VERIFY(spg); + + std::set columnTagsRequested; + for (ui32 tag : record.GetTable().GetColumnTags()) { + columnTagsRequested.insert(tag); + } + if (columnTagsRequested.empty()) { + auto schema = index.GetVersionedIndex().GetLastSchema(); + auto allColumnIds = schema->GetIndexInfo().GetColumnIds(false); + columnTagsRequested = std::set(allColumnIds.begin(), allColumnIds.end()); + } + + std::map> sketchesByColumns; + for (auto id : columnTagsRequested) { + sketchesByColumns.emplace(id, TCountMinSketch::Create()); + } + + for (const auto& [_, portionInfo] : spg->GetPortions()) { + if (portionInfo->IsVisible(GetMaxReadVersion())) { + std::shared_ptr portionSchema = portionInfo->GetSchema(index.GetVersionedIndex()); + for (ui32 columnId : columnTagsRequested) { + auto indexMeta = portionSchema->GetIndexInfo().GetIndexMetaCountMinSketch({columnId}); + + if (!indexMeta) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", "Missing countMinSketch index for columnId " + ToString(columnId)); + continue; + } + AFL_VERIFY(indexMeta->GetColumnIds().size() == 1); + + const std::vector data = portionInfo->GetIndexInplaceDataVerified(indexMeta->GetIndexId()); + + for (const auto& sketchAsString : data) { + auto sketch = std::unique_ptr(TCountMinSketch::FromString(sketchAsString.data(), sketchAsString.size())); + *sketchesByColumns[columnId] += *sketch; + } + } + } + } + + respRecord.SetStatus(NKikimrStat::TEvStatisticsResponse::STATUS_SUCCESS); + + for (ui32 columnTag : columnTagsRequested) { + auto* column = respRecord.AddColumns(); + column->SetTag(columnTag); - record.SetStatus(NKikimrStat::TEvStatisticsResponse::SUCCESS); + auto* statistic = column->AddStatistics(); + statistic->SetType(NStat::COUNT_MIN_SKETCH); + statistic->SetData(TString(sketchesByColumns[columnTag]->AsStringBuf())); + } Send(ev->Sender, response.release(), 0, ev->Cookie); } diff --git a/ydb/core/tx/columnshard/columnshard__write.cpp b/ydb/core/tx/columnshard/columnshard__write.cpp index fb70e400254e..8410fe52ad16 100644 --- a/ydb/core/tx/columnshard/columnshard__write.cpp +++ b/ydb/core/tx/columnshard/columnshard__write.cpp @@ -1,9 +1,16 @@ #include "columnshard_impl.h" -#include "blobs_action/transaction/tx_write.h" + #include "blobs_action/transaction/tx_draft.h" +#include "blobs_action/transaction/tx_write.h" +#include "common/limits.h" #include "counters/columnshard.h" +#include "engines/column_engine_logs.h" #include "operations/batch_builder/builder.h" +#include "operations/manager.h" #include "operations/write_data.h" +#include "transactions/operators/ev_write/primary.h" +#include "transactions/operators/ev_write/secondary.h" +#include "transactions/operators/ev_write/sync.h" #include #include @@ -12,41 +19,36 @@ namespace NKikimr::NColumnShard { using namespace NTabletFlatExecutor; -void TColumnShard::OverloadWriteFail(const EOverloadStatus overloadReason, const NEvWrite::TWriteData& writeData, const ui64 cookie, std::unique_ptr&& event, const TActorContext& ctx) { - IncCounter(COUNTER_WRITE_FAIL); +void TColumnShard::OverloadWriteFail(const EOverloadStatus overloadReason, const NEvWrite::TWriteMeta& writeMeta, const ui64 writeSize, const ui64 cookie, + std::unique_ptr&& event, const TActorContext& ctx) { + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); switch (overloadReason) { case EOverloadStatus::Disk: - IncCounter(COUNTER_OUT_OF_SPACE); + Counters.OnWriteOverloadDisk(); break; case EOverloadStatus::InsertTable: - IncCounter(COUNTER_WRITE_OVERLOAD); - CSCounters.OnOverloadInsertTable(writeData.GetSize()); + Counters.OnWriteOverloadInsertTable(writeSize); break; case EOverloadStatus::OverloadMetadata: - IncCounter(COUNTER_WRITE_OVERLOAD); - CSCounters.OnOverloadMetadata(writeData.GetSize()); + Counters.OnWriteOverloadMetadata(writeSize); break; case EOverloadStatus::ShardTxInFly: - IncCounter(COUNTER_WRITE_OVERLOAD); - CSCounters.OnOverloadShardTx(writeData.GetSize()); + Counters.OnWriteOverloadShardTx(writeSize); break; case EOverloadStatus::ShardWritesInFly: - IncCounter(COUNTER_WRITE_OVERLOAD); - CSCounters.OnOverloadShardWrites(writeData.GetSize()); + Counters.OnWriteOverloadShardWrites(writeSize); break; case EOverloadStatus::ShardWritesSizeInFly: - IncCounter(COUNTER_WRITE_OVERLOAD); - CSCounters.OnOverloadShardWritesSize(writeData.GetSize()); + Counters.OnWriteOverloadShardWritesSize(writeSize); break; case EOverloadStatus::None: Y_ABORT("invalid function usage"); } - LOG_S_INFO("Write (overload) " << writeData.GetSize() << " bytes into pathId " << writeData.GetWriteMeta().GetTableId() - << " overload reason: [" << overloadReason << "]" - << " at tablet " << TabletID()); + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "write_overload")("size", writeSize)("path_id", writeMeta.GetTableId())( + "reason", overloadReason); - ctx.Send(writeData.GetWriteMeta().GetSource(), event.release(), 0, cookie); + ctx.Send(writeMeta.GetSource(), event.release(), 0, cookie); } TColumnShard::EOverloadStatus TColumnShard::CheckOverloaded(const ui64 tableId) const { @@ -58,7 +60,7 @@ TColumnShard::EOverloadStatus TColumnShard::CheckOverloaded(const ui64 tableId) return EOverloadStatus::InsertTable; } - CSCounters.OnIndexMetadataLimit(NOlap::IColumnEngine::GetMetadataLimit()); + Counters.GetCSCounters().OnIndexMetadataLimit(NOlap::IColumnEngine::GetMetadataLimit()); if (TablesManager.GetPrimaryIndex() && TablesManager.GetPrimaryIndex()->IsOverloadedByMetadata(NOlap::IColumnEngine::GetMetadataLimit())) { return EOverloadStatus::OverloadMetadata; } @@ -67,22 +69,26 @@ TColumnShard::EOverloadStatus TColumnShard::CheckOverloaded(const ui64 tableId) ui64 writesLimit = Settings.OverloadWritesInFlight; ui64 writesSizeLimit = Settings.OverloadWritesSizeInFlight; if (txLimit && Executor()->GetStats().TxInFly > txLimit) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "shard_overload")("reason", "tx_in_fly")("sum", Executor()->GetStats().TxInFly)("limit", txLimit); + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "shard_overload")("reason", "tx_in_fly")("sum", Executor()->GetStats().TxInFly)( + "limit", txLimit); return EOverloadStatus::ShardTxInFly; } - if (writesLimit && WritesMonitor.GetWritesInFlight() > writesLimit) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "shard_overload")("reason", "writes_in_fly")("sum", WritesMonitor.GetWritesInFlight())("limit", writesLimit); + if (writesLimit && Counters.GetWritesMonitor()->GetWritesInFlight() > writesLimit) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "shard_overload")("reason", "writes_in_fly")( + "sum", Counters.GetWritesMonitor()->GetWritesInFlight())("limit", writesLimit); return EOverloadStatus::ShardWritesInFly; } - if (writesSizeLimit && WritesMonitor.GetWritesSizeInFlight() > writesSizeLimit) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "shard_overload")("reason", "writes_size_in_fly")("sum", WritesMonitor.GetWritesSizeInFlight())("limit", writesSizeLimit); + if (writesSizeLimit && Counters.GetWritesMonitor()->GetWritesSizeInFlight() > writesSizeLimit) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "shard_overload")("reason", "writes_size_in_fly")( + "sum", Counters.GetWritesMonitor()->GetWritesSizeInFlight())("limit", writesSizeLimit); return EOverloadStatus::ShardWritesSizeInFly; } return EOverloadStatus::None; } void TColumnShard::Handle(TEvPrivate::TEvWriteBlobsResult::TPtr& ev, const TActorContext& ctx) { - NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("event", "TEvWriteBlobsResult"); + NActors::TLogContextGuard gLogging = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("event", "TEvWriteBlobsResult"); auto& putResult = ev->Get()->GetPutResult(); OnYellowChannels(putResult); @@ -90,25 +96,25 @@ void TColumnShard::Handle(TEvPrivate::TEvWriteBlobsResult::TPtr& ev, const TActo auto baseAggregations = wBuffer.GetAggregations(); wBuffer.InitReplyReceived(TMonotonic::Now()); - auto wg = WritesMonitor.FinishWrite(wBuffer.GetSumSize(), wBuffer.GetAggregations().size()); + Counters.GetWritesMonitor()->OnFinishWrite(wBuffer.GetSumSize(), wBuffer.GetAggregations().size()); for (auto&& aggr : baseAggregations) { - const auto& writeMeta = aggr->GetWriteData()->GetWriteMeta(); + const auto& writeMeta = aggr->GetWriteMeta(); if (!TablesManager.IsReadyForWrite(writeMeta.GetTableId())) { ACFL_ERROR("event", "absent_pathId")("path_id", writeMeta.GetTableId())("has_index", TablesManager.HasPrimaryIndex()); - IncCounter(COUNTER_WRITE_FAIL); + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); auto result = std::make_unique(TabletID(), writeMeta, NKikimrTxColumnShard::EResultStatus::ERROR); ctx.Send(writeMeta.GetSource(), result.release()); - CSCounters.OnFailedWriteResponse(EWriteFailReason::NoTable); + Counters.GetCSCounters().OnFailedWriteResponse(EWriteFailReason::NoTable); wBuffer.RemoveData(aggr, StoragesManager->GetInsertOperator()); continue; } if (putResult.GetPutStatus() != NKikimrProto::OK) { - CSCounters.OnWritePutBlobsFail(TMonotonic::Now() - writeMeta.GetWriteStartInstant()); - IncCounter(COUNTER_WRITE_FAIL); + Counters.GetCSCounters().OnWritePutBlobsFail(TMonotonic::Now() - writeMeta.GetWriteStartInstant()); + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); auto errCode = NKikimrTxColumnShard::EResultStatus::STORAGE_ERROR; if (putResult.GetPutStatus() == NKikimrProto::TIMEOUT || putResult.GetPutStatus() == NKikimrProto::DEADLINE) { @@ -123,26 +129,26 @@ void TColumnShard::Handle(TEvPrivate::TEvWriteBlobsResult::TPtr& ev, const TActo auto result = std::make_unique(TabletID(), writeMeta, errCode); ctx.Send(writeMeta.GetSource(), result.release()); } else { - auto operation = OperationsManager->GetOperation((TWriteId)writeMeta.GetWriteId()); + auto operation = OperationsManager->GetOperation((TOperationWriteId)writeMeta.GetWriteId()); Y_ABORT_UNLESS(operation); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), operation->GetLockId(), NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR, - ev->Get()->GetErrorMessage() ? ev->Get()->GetErrorMessage() : "put data fails"); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), operation->GetLockId(), + ev->Get()->GetWriteResultStatus(), ev->Get()->GetErrorMessage() ? ev->Get()->GetErrorMessage() : "put data fails"); ctx.Send(writeMeta.GetSource(), result.release(), 0, operation->GetCookie()); } - CSCounters.OnFailedWriteResponse(EWriteFailReason::PutBlob); + Counters.GetCSCounters().OnFailedWriteResponse(EWriteFailReason::PutBlob); wBuffer.RemoveData(aggr, StoragesManager->GetInsertOperator()); } else { const TMonotonic now = TMonotonic::Now(); - CSCounters.OnWritePutBlobsSuccess(now - writeMeta.GetWriteStartInstant()); - CSCounters.OnWriteMiddle1PutBlobsSuccess(now - writeMeta.GetWriteMiddle1StartInstant()); - CSCounters.OnWriteMiddle2PutBlobsSuccess(now - writeMeta.GetWriteMiddle2StartInstant()); - CSCounters.OnWriteMiddle3PutBlobsSuccess(now - writeMeta.GetWriteMiddle3StartInstant()); - CSCounters.OnWriteMiddle4PutBlobsSuccess(now - writeMeta.GetWriteMiddle4StartInstant()); - CSCounters.OnWriteMiddle5PutBlobsSuccess(now - writeMeta.GetWriteMiddle5StartInstant()); - CSCounters.OnWriteMiddle6PutBlobsSuccess(now - writeMeta.GetWriteMiddle6StartInstant()); + Counters.OnWritePutBlobsSuccess(now - writeMeta.GetWriteStartInstant(), aggr->GetRows()); + Counters.GetCSCounters().OnWriteMiddle1PutBlobsSuccess(now - writeMeta.GetWriteMiddle1StartInstant()); + Counters.GetCSCounters().OnWriteMiddle2PutBlobsSuccess(now - writeMeta.GetWriteMiddle2StartInstant()); + Counters.GetCSCounters().OnWriteMiddle3PutBlobsSuccess(now - writeMeta.GetWriteMiddle3StartInstant()); + Counters.GetCSCounters().OnWriteMiddle4PutBlobsSuccess(now - writeMeta.GetWriteMiddle4StartInstant()); + Counters.GetCSCounters().OnWriteMiddle5PutBlobsSuccess(now - writeMeta.GetWriteMiddle5StartInstant()); + Counters.GetCSCounters().OnWriteMiddle6PutBlobsSuccess(now - writeMeta.GetWriteMiddle6StartInstant()); LOG_S_DEBUG("Write (record) into pathId " << writeMeta.GetTableId() - << (writeMeta.GetWriteId() ? (" writeId " + ToString(writeMeta.GetWriteId())).c_str() : "") << " at tablet " << TabletID()); - + << (writeMeta.GetWriteId() ? (" writeId " + ToString(writeMeta.GetWriteId())).c_str() : "") + << " at tablet " << TabletID()); } } Execute(new TTxWrite(this, ev), ctx); @@ -153,8 +159,7 @@ void TColumnShard::Handle(TEvPrivate::TEvWriteDraft::TPtr& ev, const TActorConte } void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContext& ctx) { - CSCounters.OnStartWriteRequest(); - LastAccessTime = TAppData::TimeProvider->Now(); + Counters.GetCSCounters().OnStartWriteRequest(); const auto& record = Proto(ev->Get()); const ui64 tableId = record.GetTableId(); @@ -163,6 +168,8 @@ void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContex const TString dedupId = record.GetDedupId(); const auto source = ev->Sender; + Counters.GetColumnTablesCounters()->GetPathIdCounter(tableId)->OnWriteEvent(); + std::optional granuleShardingVersion; if (record.HasGranuleShardingVersion()) { granuleShardingVersion = record.GetGranuleShardingVersion(); @@ -177,181 +184,336 @@ void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContex writeMeta.SetLongTxId(NLongTxService::TLongTxId::FromProto(record.GetLongTxId())); writeMeta.SetWritePartId(record.GetWritePartId()); - const auto returnFail = [&](const NColumnShard::ECumulativeCounters signalIndex) { - IncCounter(signalIndex); + const auto returnFail = [&](const NColumnShard::ECumulativeCounters signalIndex, const EWriteFailReason reason) { + Counters.GetTabletCounters()->IncCounter(signalIndex); ctx.Send(source, std::make_unique(TabletID(), writeMeta, NKikimrTxColumnShard::EResultStatus::ERROR)); + Counters.GetCSCounters().OnFailedWriteResponse(reason); return; }; if (!AppDataVerified().ColumnShardConfig.GetWritingEnabled()) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_writing")("reason", "disabled"); - CSCounters.OnFailedWriteResponse(EWriteFailReason::Disabled); - return returnFail(COUNTER_WRITE_FAIL); + return returnFail(COUNTER_WRITE_FAIL, EWriteFailReason::Disabled); } if (!TablesManager.IsReadyForWrite(tableId)) { - LOG_S_NOTICE("Write (fail) into pathId:" << writeMeta.GetTableId() << (TablesManager.HasPrimaryIndex()? "": " no index") - << " at tablet " << TabletID()); + LOG_S_NOTICE("Write (fail) into pathId:" << writeMeta.GetTableId() << (TablesManager.HasPrimaryIndex() ? "" : " no index") + << " at tablet " << TabletID()); + + return returnFail(COUNTER_WRITE_FAIL, EWriteFailReason::NoTable); + } + + { + auto& portionsIndex = + TablesManager.GetPrimaryIndexAsVerified().GetGranuleVerified(writeMeta.GetTableId()).GetPortionsIndex(); + { + const ui64 minMemoryRead = portionsIndex.GetMinRawMemoryRead(); + if (NOlap::TGlobalLimits::DefaultReduceMemoryIntervalLimit < minMemoryRead) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "overlimit")("reason", "read_raw_memory")("current", minMemoryRead)( + "limit", NOlap::TGlobalLimits::DefaultReduceMemoryIntervalLimit)("table_id", writeMeta.GetTableId()); + return returnFail(COUNTER_WRITE_FAIL, EWriteFailReason::OverlimitReadRawMemory); + } + } - CSCounters.OnFailedWriteResponse(EWriteFailReason::NoTable); - return returnFail(COUNTER_WRITE_FAIL); + { + const ui64 minMemoryRead = portionsIndex.GetMinBlobMemoryRead(); + if (NOlap::TGlobalLimits::DefaultBlobsMemoryIntervalLimit < minMemoryRead) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "overlimit")("reason", "read_blob_memory")("current", minMemoryRead)( + "limit", NOlap::TGlobalLimits::DefaultBlobsMemoryIntervalLimit)("table_id", writeMeta.GetTableId()); + return returnFail(COUNTER_WRITE_FAIL, EWriteFailReason::OverlimitReadBlobMemory); + } + } } const auto& snapshotSchema = TablesManager.GetPrimaryIndex()->GetVersionedIndex().GetLastSchema(); auto arrowData = std::make_shared(snapshotSchema); if (!arrowData->ParseFromProto(record)) { - LOG_S_ERROR("Write (fail) " << record.GetData().size() << " bytes into pathId " << writeMeta.GetTableId() - << " at tablet " << TabletID()); - CSCounters.OnFailedWriteResponse(EWriteFailReason::IncorrectSchema); - return returnFail(COUNTER_WRITE_FAIL); + LOG_S_ERROR( + "Write (fail) " << record.GetData().size() << " bytes into pathId " << writeMeta.GetTableId() << " at tablet " << TabletID()); + return returnFail(COUNTER_WRITE_FAIL, EWriteFailReason::IncorrectSchema); } NEvWrite::TWriteData writeData(writeMeta, arrowData, snapshotSchema->GetIndexInfo().GetReplaceKey(), StoragesManager->GetInsertOperator()->StartWritingAction(NOlap::NBlobOperations::EConsumer::WRITING)); auto overloadStatus = CheckOverloaded(tableId); if (overloadStatus != EOverloadStatus::None) { - std::unique_ptr result = std::make_unique(TabletID(), writeData.GetWriteMeta(), NKikimrTxColumnShard::EResultStatus::OVERLOADED); - OverloadWriteFail(overloadStatus, writeData, cookie, std::move(result), ctx); - CSCounters.OnFailedWriteResponse(EWriteFailReason::Overload); + std::unique_ptr result = std::make_unique( + TabletID(), writeData.GetWriteMeta(), NKikimrTxColumnShard::EResultStatus::OVERLOADED); + OverloadWriteFail(overloadStatus, writeData.GetWriteMeta(), writeData.GetSize(), cookie, std::move(result), ctx); + Counters.GetCSCounters().OnFailedWriteResponse(EWriteFailReason::Overload); } else { if (ui64 writeId = (ui64)HasLongTxWrite(writeMeta.GetLongTxIdUnsafe(), writeMeta.GetWritePartId())) { - LOG_S_DEBUG("Write (duplicate) into pathId " << writeMeta.GetTableId() - << " longTx " << writeMeta.GetLongTxIdUnsafe().ToString() - << " at tablet " << TabletID()); + LOG_S_DEBUG("Write (duplicate) into pathId " << writeMeta.GetTableId() << " longTx " << writeMeta.GetLongTxIdUnsafe().ToString() + << " at tablet " << TabletID()); - IncCounter(COUNTER_WRITE_DUPLICATE); + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_DUPLICATE); - auto result = std::make_unique( - TabletID(), writeMeta, writeId, NKikimrTxColumnShard::EResultStatus::SUCCESS); + auto result = + std::make_unique(TabletID(), writeMeta, writeId, NKikimrTxColumnShard::EResultStatus::SUCCESS); ctx.Send(writeMeta.GetSource(), result.release()); - CSCounters.OnFailedWriteResponse(EWriteFailReason::LongTxDuplication); + Counters.GetCSCounters().OnFailedWriteResponse(EWriteFailReason::LongTxDuplication); return; } - WritesMonitor.RegisterWrite(writeData.GetSize()); + Counters.GetWritesMonitor()->OnStartWrite(writeData.GetSize()); LOG_S_DEBUG("Write (blob) " << writeData.GetSize() << " bytes into pathId " << writeMeta.GetTableId() - << (writeMeta.GetWriteId()? (" writeId " + ToString(writeMeta.GetWriteId())).c_str() : " ") - << WritesMonitor.DebugString() - << " at tablet " << TabletID()); + << (writeMeta.GetWriteId() ? (" writeId " + ToString(writeMeta.GetWriteId())).c_str() : " ") + << Counters.GetWritesMonitor()->DebugString() << " at tablet " << TabletID()); writeData.MutableWriteMeta().SetWriteMiddle1StartInstant(TMonotonic::Now()); - std::shared_ptr task = std::make_shared(TabletID(), SelfId(), BufferizationWriteActorId, std::move(writeData), - snapshotSchema, GetLastTxSnapshot()); + std::shared_ptr task = std::make_shared( + TabletID(), SelfId(), BufferizationWriteActorId, std::move(writeData), snapshotSchema, GetLastTxSnapshot(), Counters.GetCSCounters().WritingCounters); NConveyor::TInsertServiceOperator::AsyncTaskToExecute(task); } } class TCommitOperation { +private: + const ui64 TabletId; + public: using TPtr = std::shared_ptr; - bool Parse(const NEvents::TDataEvents::TEvWrite& evWrite) { - if (evWrite.Record.GetLocks().GetLocks().size() != 1) { - return false; + bool NeedSyncLocks() const { + return SendingShards.size() && ReceivingShards.size(); + } + + bool IsPrimary() const { + AFL_VERIFY(NeedSyncLocks()); + return TabletId == *ReceivingShards.begin(); + } + + TCommitOperation(const ui64 tabletId) + : TabletId(tabletId) { + } + + TConclusionStatus Parse(const NEvents::TDataEvents::TEvWrite& evWrite) { + AFL_VERIFY(evWrite.Record.GetLocks().GetLocks().size() >= 1); + auto& locks = evWrite.Record.GetLocks(); + auto& lock = evWrite.Record.GetLocks().GetLocks()[0]; + SendingShards = std::set(locks.GetSendingShards().begin(), locks.GetSendingShards().end()); + if ((ui32)locks.GetSendingShards().size() != SendingShards.size()) { + return TConclusionStatus::Fail("duplications in SendingShards proto field"); + } + ReceivingShards = std::set(locks.GetReceivingShards().begin(), locks.GetReceivingShards().end()); + if ((ui32)locks.GetReceivingShards().size() != ReceivingShards.size()) { + return TConclusionStatus::Fail("duplications in ReceivingShards proto field"); } - LockId = evWrite.Record.GetLocks().GetLocks()[0].GetLockId(); TxId = evWrite.Record.GetTxId(); - KqpLocks = evWrite.Record.GetLocks(); - return !!LockId && !!TxId && KqpLocks.GetOp() == NKikimrDataEvents::TKqpLocks::Commit; + LockId = lock.GetLockId(); + Generation = lock.GetGeneration(); + InternalGenerationCounter = lock.GetCounter(); + if (!GetLockId()) { + return TConclusionStatus::Fail("not initialized lock info in commit message"); + } + if (!TxId) { + return TConclusionStatus::Fail("not initialized TxId for commit event"); + } + if (evWrite.Record.GetLocks().GetOp() != NKikimrDataEvents::TKqpLocks::Commit) { + return TConclusionStatus::Fail("incorrect message type"); + } + if (!ReceivingShards.size() || !SendingShards.size()) { + ReceivingShards.clear(); + SendingShards.clear(); + } else { + if (!ReceivingShards.contains(TabletId) && !SendingShards.contains(TabletId)) { + return TConclusionStatus::Fail("shard is incorrect for sending/receiving lists"); + } + } + return TConclusionStatus::Success(); + } + + std::unique_ptr CreateTxOperator( + const NKikimrTxColumnShard::ETransactionKind kind) const { + AFL_VERIFY(ReceivingShards.size()); + if (IsPrimary()) { + return std::make_unique( + TFullTxInfo::BuildFake(kind), LockId, ReceivingShards, SendingShards); + } else { + return std::make_unique( + TFullTxInfo::BuildFake(kind), LockId, *ReceivingShards.begin(), ReceivingShards.contains(TabletId)); + } } private: - NKikimrDataEvents::TKqpLocks KqpLocks; YDB_READONLY(ui64, LockId, 0); + YDB_READONLY(ui64, Generation, 0); + YDB_READONLY(ui64, InternalGenerationCounter, 0); YDB_READONLY(ui64, TxId, 0); + YDB_READONLY_DEF(std::set, SendingShards); + YDB_READONLY_DEF(std::set, ReceivingShards); }; -class TProposeWriteTransaction : public NTabletFlatExecutor::TTransactionBase { + +class TProposeWriteTransaction: public NTabletFlatExecutor::TTransactionBase { private: using TBase = NTabletFlatExecutor::TTransactionBase; + public: TProposeWriteTransaction(TColumnShard* self, TCommitOperation::TPtr op, const TActorId source, const ui64 cookie) : TBase(self) , WriteCommit(op) , Source(source) - , Cookie(cookie) - {} + , Cookie(cookie) { + } + + virtual bool Execute(TTransactionContext& txc, const TActorContext&) override { + NKikimrTxColumnShard::TCommitWriteTxBody proto; + NKikimrTxColumnShard::ETransactionKind kind; + if (WriteCommit->NeedSyncLocks()) { + if (WriteCommit->IsPrimary()) { + kind = NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE_PRIMARY; + } else { + kind = NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE_SECONDARY; + } + proto = WriteCommit->CreateTxOperator(kind)->SerializeToProto(); + } else { + kind = NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE; + } + proto.SetLockId(WriteCommit->GetLockId()); + TxOperator = Self->GetProgressTxController().StartProposeOnExecute( + TTxController::TTxInfo(kind, WriteCommit->GetTxId(), Source, Cookie, {}), proto.SerializeAsString(), txc); + return true; + } - bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; - void Complete(const TActorContext& ctx) override; - TTxType GetTxType() const override { return TXTYPE_PROPOSE; } + virtual void Complete(const TActorContext& ctx) override { + Self->GetProgressTxController().FinishProposeOnComplete(WriteCommit->GetTxId(), ctx); + } + TTxType GetTxType() const override { + return TXTYPE_PROPOSE; + } private: TCommitOperation::TPtr WriteCommit; TActorId Source; ui64 Cookie; + std::shared_ptr TxOperator; }; -bool TProposeWriteTransaction::Execute(TTransactionContext& txc, const TActorContext&) { - NKikimrTxColumnShard::TCommitWriteTxBody proto; - proto.SetLockId(WriteCommit->GetLockId()); - TString txBody; - Y_ABORT_UNLESS(proto.SerializeToString(&txBody)); - Y_UNUSED(Self->GetProgressTxController().StartProposeOnExecute( - TTxController::TTxInfo(NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE, WriteCommit->GetTxId(), Source, Cookie, {}), txBody, txc)); - return true; -} +class TAbortWriteTransaction: public NTabletFlatExecutor::TTransactionBase { +private: + using TBase = NTabletFlatExecutor::TTransactionBase; -void TProposeWriteTransaction::Complete(const TActorContext& ctx) { - Self->GetProgressTxController().FinishProposeOnComplete(WriteCommit->GetTxId(), ctx); -} +public: + TAbortWriteTransaction(TColumnShard* self, const ui64 txId, const TActorId source, const ui64 cookie) + : TBase(self) + , TxId(txId) + , Source(source) + , Cookie(cookie) { + } + + virtual bool Execute(TTransactionContext& txc, const TActorContext&) override { + Self->GetOperationsManager().AbortTransactionOnExecute(*Self, TxId, txc); + return true; + } + + virtual void Complete(const TActorContext& ctx) override { + Self->GetOperationsManager().AbortTransactionOnComplete(*Self, TxId); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildCompleted(Self->TabletID(), TxId); + ctx.Send(Source, result.release(), 0, Cookie); + } + TTxType GetTxType() const override { + return TXTYPE_PROPOSE; + } + +private: + ui64 TxId; + TActorId Source; + ui64 Cookie; +}; void TColumnShard::Handle(NEvents::TDataEvents::TEvWrite::TPtr& ev, const TActorContext& ctx) { - NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("event", "TEvWrite"); + NActors::TLogContextGuard gLogging = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("event", "TEvWrite"); const auto& record = ev->Get()->Record; const auto source = ev->Sender; const auto cookie = ev->Cookie; - const auto behaviour = TOperationsManager::GetBehaviour(*ev->Get()); - - if (behaviour == EOperationBehaviour::Undefined) { - IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "invalid write event"); + const auto behaviourConclusion = TOperationsManager::GetBehaviour(*ev->Get()); + // AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("ev_write", record.DebugString()); + if (behaviourConclusion.IsFail()) { + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, + "invalid write event: " + behaviourConclusion.GetErrorMessage()); ctx.Send(source, result.release(), 0, cookie); return; } + auto behaviour = *behaviourConclusion; + + if (behaviour == EOperationBehaviour::AbortWriteLock) { + Execute(new TAbortWriteTransaction(this, record.GetLocks().GetLocks()[0].GetLockId(), source, cookie), ctx); + return; + } if (behaviour == EOperationBehaviour::CommitWriteLock) { - auto commitOperation = std::make_shared(); - if (!commitOperation->Parse(*ev->Get())) { - IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "invalid commit event"); + auto commitOperation = std::make_shared(TabletID()); + const auto sendError = [&](const TString& message, const NKikimrDataEvents::TEvWriteResult::EStatus status) { + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, status, message); ctx.Send(source, result.release(), 0, cookie); + }; + auto conclusionParse = commitOperation->Parse(*ev->Get()); + if (conclusionParse.IsFail()) { + sendError(conclusionParse.GetErrorMessage(), NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST); + } else { + if (commitOperation->NeedSyncLocks()) { + auto* lockInfo = OperationsManager->GetLockOptional(commitOperation->GetLockId()); + if (!lockInfo) { + sendError("haven't lock for commit: " + ::ToString(commitOperation->GetLockId()), + NKikimrDataEvents::TEvWriteResult::STATUS_ABORTED); + } else { + if (lockInfo->GetGeneration() != commitOperation->GetGeneration()) { + sendError("tablet lock have another generation: " + ::ToString(lockInfo->GetGeneration()) + + " != " + ::ToString(commitOperation->GetGeneration()), + NKikimrDataEvents::TEvWriteResult::STATUS_LOCKS_BROKEN); + } else if (lockInfo->GetInternalGenerationCounter() != commitOperation->GetInternalGenerationCounter()) { + sendError( + "tablet lock have another internal generation counter: " + ::ToString(lockInfo->GetInternalGenerationCounter()) + + " != " + ::ToString(commitOperation->GetInternalGenerationCounter()), + NKikimrDataEvents::TEvWriteResult::STATUS_LOCKS_BROKEN); + } else { + Execute(new TProposeWriteTransaction(this, commitOperation, source, cookie), ctx); + } + } + } else { + Execute(new TProposeWriteTransaction(this, commitOperation, source, cookie), ctx); + } } - Execute(new TProposeWriteTransaction(this, commitOperation, source, cookie), ctx); return; } - const ui64 lockId = (behaviour == EOperationBehaviour::InTxWrite) ? record.GetTxId() : record.GetLockTxId(); - if (record.GetOperations().size() != 1) { - IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "only single operation is supported"); + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError( + TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "only single operation is supported"); ctx.Send(source, result.release(), 0, cookie); return; } const auto& operation = record.GetOperations()[0]; - const std::optional mType = TEnumOperator::DeserializeFromProto(operation.GetType()); + const std::optional mType = + TEnumOperator::DeserializeFromProto(operation.GetType()); if (!mType) { - IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "operation " + NKikimrDataEvents::TEvWrite::TOperation::EOperationType_Name(operation.GetType()) + " is not supported"); ctx.Send(source, result.release(), 0, cookie); return; } if (!operation.GetTableId().HasSchemaVersion()) { - IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "schema version not set"); + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError( + TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "schema version not set"); ctx.Send(source, result.release(), 0, cookie); return; } auto schema = TablesManager.GetPrimaryIndex()->GetVersionedIndex().GetSchema(operation.GetTableId().GetSchemaVersion()); if (!schema) { - IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "unknown schema version"); + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError( + TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "unknown schema version"); ctx.Send(source, result.release(), 0, cookie); return; } @@ -359,38 +521,50 @@ void TColumnShard::Handle(NEvents::TDataEvents::TEvWrite::TPtr& ev, const TActor const auto tableId = operation.GetTableId().GetTableId(); if (!TablesManager.IsReadyForWrite(tableId)) { - IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR, "table not writable"); + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError( + TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR, "table not writable"); ctx.Send(source, result.release(), 0, cookie); return; } auto arrowData = std::make_shared(schema); if (!arrowData->Parse(operation, NEvWrite::TPayloadReader(*ev->Get()))) { - IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "parsing data error"); + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError( + TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "parsing data error"); ctx.Send(source, result.release(), 0, cookie); } auto overloadStatus = CheckOverloaded(tableId); if (overloadStatus != EOverloadStatus::None) { - NEvWrite::TWriteData writeData(NEvWrite::TWriteMeta(0, tableId, source, {}), arrowData, nullptr, nullptr); - std::unique_ptr result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_OVERLOADED, "overload data error"); - OverloadWriteFail(overloadStatus, writeData, cookie, std::move(result), ctx); + std::unique_ptr result = NEvents::TDataEvents::TEvWriteResult::BuildError( + TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_OVERLOADED, "overload data error"); + OverloadWriteFail(overloadStatus, NEvWrite::TWriteMeta(0, tableId, source, {}), arrowData->GetSize(), cookie, std::move(result), ctx); return; } - auto wg = WritesMonitor.RegisterWrite(arrowData->GetSize()); + Counters.GetWritesMonitor()->OnStartWrite(arrowData->GetSize()); std::optional granuleShardingVersionId; if (record.HasGranuleShardingVersionId()) { granuleShardingVersionId = record.GetGranuleShardingVersionId(); } + ui64 lockId = 0; + if (behaviour == EOperationBehaviour::NoTxWrite) { + lockId = BuildEphemeralTxId(); + } else if (behaviour == EOperationBehaviour::InTxWrite) { + lockId = record.GetTxId(); + } else { + lockId = record.GetLockTxId(); + } + + OperationsManager->RegisterLock(lockId, Generation()); auto writeOperation = OperationsManager->RegisterOperation(lockId, cookie, granuleShardingVersionId, *mType); Y_ABORT_UNLESS(writeOperation); writeOperation->SetBehaviour(behaviour); writeOperation->Start(*this, tableId, arrowData, source, schema, ctx); } -} +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/columnshard__write_index.cpp b/ydb/core/tx/columnshard/columnshard__write_index.cpp index 69d54a68d1bd..27497b4e8fc6 100644 --- a/ydb/core/tx/columnshard/columnshard__write_index.cpp +++ b/ydb/core/tx/columnshard/columnshard__write_index.cpp @@ -41,7 +41,7 @@ void TColumnShard::Handle(TEvPrivate::TEvWriteIndex::TPtr& ev, const TActorConte if (IsAnyChannelYellowStop()) { ACFL_ERROR("event", "TEvWriteIndex failed")("reason", "channel yellow stop"); - IncCounter(COUNTER_OUT_OF_SPACE); + Counters.GetTabletCounters()->IncCounter(COUNTER_OUT_OF_SPACE); ev->Get()->SetPutStatus(NKikimrProto::TRYLATER); NOlap::TChangesFinishContext context("out of disk space"); ev->Get()->IndexChanges->Abort(*this, context); diff --git a/ydb/core/tx/columnshard/columnshard_common.cpp b/ydb/core/tx/columnshard/columnshard_common.cpp deleted file mode 100644 index d9f0dcc5e79b..000000000000 --- a/ydb/core/tx/columnshard/columnshard_common.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include "columnshard_common.h" -#include - -namespace NKikimr::NColumnShard { - -namespace { - -using EOperation = NArrow::EOperation; -using EAggregate = NArrow::EAggregate; -using TAssign = NSsa::TAssign; -using TAggregateAssign = NSsa::TAggregateAssign; - -} - -using EOperation = NArrow::EOperation; -using TPredicate = NOlap::TPredicate; - -} diff --git a/ydb/core/tx/columnshard/columnshard_common.h b/ydb/core/tx/columnshard/columnshard_common.h deleted file mode 100644 index 455f39a512cc..000000000000 --- a/ydb/core/tx/columnshard/columnshard_common.h +++ /dev/null @@ -1,94 +0,0 @@ -#pragma once -#include "engines/reader/common/description.h" -#include "engines/predicate/predicate.h" - -#include - -namespace NKikimr::NOlap { - struct TIndexInfo; -} - -namespace NKikimr::NColumnShard { - -using TReadDescription = NOlap::NReader::TReadDescription; -using IColumnResolver = NOlap::IColumnResolver; -using NOlap::TWriteId; - -class TBatchCache { -public: - using TUnifiedBlobId = NOlap::TUnifiedBlobId; - using TInsertedBatch = std::pair>; - - static constexpr ui32 MAX_COMMITTED_COUNT = 2 * TLimits::MIN_SMALL_BLOBS_TO_INSERT; - static constexpr ui32 MAX_INSERTED_COUNT = 2 * TLimits::MIN_SMALL_BLOBS_TO_INSERT; - static constexpr ui64 MAX_TOTAL_SIZE = 2 * TLimits::MIN_BYTES_TO_INSERT; - - TBatchCache() - : Inserted(MAX_INSERTED_COUNT) - , Committed(MAX_COMMITTED_COUNT) - {} - - void Insert(TWriteId writeId, const TUnifiedBlobId& blobId, const std::shared_ptr& batch) { - if (Bytes() + blobId.BlobSize() > MAX_TOTAL_SIZE) { - return; - } - InsertedBytes += blobId.BlobSize(); - Inserted.Insert(writeId, {blobId, batch}); - } - - void Commit(TWriteId writeId) { - auto it = Inserted.FindWithoutPromote(writeId); - if (it != Inserted.End()) { - auto& blobId = it->first; - InsertedBytes -= blobId.BlobSize(); - CommittedBytes += blobId.BlobSize(); - - Committed.Insert(blobId, it->second); - Inserted.Erase(it); - } - } - - void EraseInserted(TWriteId writeId) { - auto it = Inserted.FindWithoutPromote(writeId); - if (it != Inserted.End()) { - InsertedBytes -= (*it).first.BlobSize(); - Inserted.Erase(it); - } - } - - void EraseCommitted(const TUnifiedBlobId& blobId) { - auto it = Committed.FindWithoutPromote(blobId); - if (it != Committed.End()) { - CommittedBytes -= blobId.BlobSize(); - Committed.Erase(it); - } - } - - TInsertedBatch GetInserted(TWriteId writeId) const { - auto it = Inserted.Find(writeId); - if (it != Inserted.End()) { - return *it; - } - return {}; - } - - std::shared_ptr Get(const TUnifiedBlobId& blobId) const { - auto it = Committed.Find(blobId); - if (it != Committed.End()) { - return *it; - } - return {}; - } - - ui64 Bytes() const { - return InsertedBytes + CommittedBytes; - } - -private: - mutable TLRUCache Inserted; - mutable TLRUCache> Committed; - ui64 InsertedBytes{0}; - ui64 CommittedBytes{0}; -}; - -} diff --git a/ydb/core/tx/columnshard/columnshard_impl.cpp b/ydb/core/tx/columnshard/columnshard_impl.cpp index 79deb83616a9..9f1c7a10859d 100644 --- a/ydb/core/tx/columnshard/columnshard_impl.cpp +++ b/ydb/core/tx/columnshard/columnshard_impl.cpp @@ -28,6 +28,7 @@ #include "engines/changes/ttl.h" #include "resource_subscriber/counters.h" +#include "transactions/operators/ev_write/sync.h" #include "bg_tasks/adapter/adapter.h" #include "bg_tasks/manager/manager.h" @@ -41,6 +42,7 @@ #include #include #include +#include namespace NKikimr::NColumnShard { @@ -64,33 +66,26 @@ NTabletPipe::TClientConfig GetPipeClientConfig() { TColumnShard::TColumnShard(TTabletStorageInfo* info, const TActorId& tablet) : TActor(&TThis::StateInit) - , TTabletExecutedFlat(info, tablet, nullptr) + , TTabletExecutedFlat(info, tablet, new NMiniKQL::TMiniKQLFactory) + , TabletCountersHolder(new TProtobufTabletCounters()) + , Counters(*TabletCountersHolder) , ProgressTxController(std::make_unique(*this)) , StoragesManager(std::make_shared(*this)) , DataLocksManager(std::make_shared()) - , PeriodicWakeupActivationPeriod(NYDBTest::TControllers::GetColumnShardController()->GetPeriodicWakeupActivationPeriod(TSettings::DefaultPeriodicWakeupActivationPeriod)) - , StatsReportInterval(NYDBTest::TControllers::GetColumnShardController()->GetStatsReportInterval(TSettings::DefaultStatsReportInterval)) - , InFlightReadsTracker(StoragesManager) + , PeriodicWakeupActivationPeriod(NYDBTest::TControllers::GetColumnShardController()->GetPeriodicWakeupActivationPeriod()) + , StatsReportInterval(NYDBTest::TControllers::GetColumnShardController()->GetStatsReportInterval()) + , InFlightReadsTracker(StoragesManager, Counters.GetRequestsTracingCounters()) , TablesManager(StoragesManager, info->TabletID) , Subscribers(std::make_shared(*this)) , PipeClientCache(NTabletPipe::CreateBoundedClientCache(new NTabletPipe::TBoundedClientCacheConfig(), GetPipeClientConfig())) , InsertTable(std::make_unique()) - , SubscribeCounters(std::make_shared()) - , InsertTaskSubscription(NOlap::TInsertColumnEngineChanges::StaticTypeName(), SubscribeCounters) - , CompactTaskSubscription(NOlap::TCompactColumnEngineChanges::StaticTypeName(), SubscribeCounters) - , TTLTaskSubscription(NOlap::TTTLColumnEngineChanges::StaticTypeName(), SubscribeCounters) - , ScanCounters("Scan") - , WritesMonitor(*this) - , NormalizerController(StoragesManager, SubscribeCounters) - , SysLocks(this) -{ - TabletCountersPtr.reset(new TProtobufTabletCounters< - ESimpleCounters_descriptor, - ECumulativeCounters_descriptor, - EPercentileCounters_descriptor, - ETxTypes_descriptor - >()); - TabletCounters = TabletCountersPtr.get(); + , InsertTaskSubscription(NOlap::TInsertColumnEngineChanges::StaticTypeName(), Counters.GetSubscribeCounters()) + , CompactTaskSubscription(NOlap::TCompactColumnEngineChanges::StaticTypeName(), Counters.GetSubscribeCounters()) + , TTLTaskSubscription(NOlap::TTTLColumnEngineChanges::StaticTypeName(), Counters.GetSubscribeCounters()) + , BackgroundController(Counters.GetBackgroundControllerCounters()) + , NormalizerController(StoragesManager, Counters.GetSubscribeCounters()) + , SysLocks(this) { } void TColumnShard::OnDetach(const TActorContext& ctx) { @@ -191,86 +186,109 @@ ui64 TColumnShard::GetOutdatedStep() const { return step; } -ui64 TColumnShard::GetMinReadStep() const { - const TDuration maxReadStaleness = NYDBTest::TControllers::GetColumnShardController()->GetReadTimeoutClean(TDuration::Minutes(5)); - ui64 delayMillisec = maxReadStaleness.MilliSeconds(); +NOlap::TSnapshot TColumnShard::GetMinReadSnapshot() const { + ui64 delayMillisec = GetMaxReadStaleness().MilliSeconds(); ui64 passedStep = GetOutdatedStep(); ui64 minReadStep = (passedStep > delayMillisec ? passedStep - delayMillisec : 0); - return minReadStep; + + if (auto ssClean = InFlightReadsTracker.GetSnapshotToClean()) { + if (ssClean->GetPlanStep() < minReadStep) { + Counters.GetRequestsTracingCounters()->OnDefaultMinSnapshotInstant(TInstant::MilliSeconds(ssClean->GetPlanStep())); + return *ssClean; + } + } + Counters.GetRequestsTracingCounters()->OnDefaultMinSnapshotInstant(TInstant::MilliSeconds(minReadStep)); + return NOlap::TSnapshot::MaxForPlanStep(minReadStep); } -TWriteId TColumnShard::HasLongTxWrite(const NLongTxService::TLongTxId& longTxId, const ui32 partId) const { +TInsertWriteId TColumnShard::HasLongTxWrite(const NLongTxService::TLongTxId& longTxId, const ui32 partId) const { auto it = LongTxWritesByUniqueId.find(longTxId.UniqueId); if (it != LongTxWritesByUniqueId.end()) { auto itPart = it->second.find(partId); if (itPart != it->second.end()) { - return (TWriteId)itPart->second->WriteId; + return itPart->second->InsertWriteId; } } - return (TWriteId)0; + return (TInsertWriteId)0; } -TWriteId TColumnShard::GetLongTxWrite(NIceDb::TNiceDb& db, const NLongTxService::TLongTxId& longTxId, const ui32 partId, const std::optional granuleShardingVersionId) { +TInsertWriteId TColumnShard::GetLongTxWrite(NIceDb::TNiceDb& db, const NLongTxService::TLongTxId& longTxId, const ui32 partId, const std::optional granuleShardingVersionId) { auto it = LongTxWritesByUniqueId.find(longTxId.UniqueId); if (it != LongTxWritesByUniqueId.end()) { auto itPart = it->second.find(partId); if (itPart != it->second.end()) { - return (TWriteId)itPart->second->WriteId; + return itPart->second->InsertWriteId; } } else { it = LongTxWritesByUniqueId.emplace(longTxId.UniqueId, TPartsForLTXShard()).first; } - TWriteId writeId = BuildNextWriteId(db); - auto& lw = LongTxWrites[writeId]; - lw.WriteId = (ui64)writeId; + TInsertWriteId insertWriteId = InsertTable->BuildNextWriteId(db); + auto& lw = LongTxWrites[insertWriteId]; + lw.InsertWriteId = insertWriteId; lw.WritePartId = partId; lw.LongTxId = longTxId; lw.GranuleShardingVersionId = granuleShardingVersionId; it->second[partId] = &lw; - Schema::SaveLongTxWrite(db, writeId, partId, longTxId, granuleShardingVersionId); - return writeId; -} - -TWriteId TColumnShard::BuildNextWriteId(NTabletFlatExecutor::TTransactionContext& txc) { - NIceDb::TNiceDb db(txc.DB); - return BuildNextWriteId(db); -} - -TWriteId TColumnShard::BuildNextWriteId(NIceDb::TNiceDb& db) { - TWriteId writeId = ++LastWriteId; - Schema::SaveSpecialValue(db, Schema::EValueIds::LastWriteId, (ui64)writeId); - return writeId; + Schema::SaveLongTxWrite(db, insertWriteId, partId, longTxId, granuleShardingVersionId); + return insertWriteId; } -void TColumnShard::AddLongTxWrite(TWriteId writeId, ui64 txId) { - auto& lw = LongTxWrites.at(writeId); - lw.PreparedTxId = txId; +void TColumnShard::AddLongTxWrite(const TInsertWriteId writeId, ui64 txId) { + auto it = LongTxWrites.find(writeId); + AFL_VERIFY(it != LongTxWrites.end()); + it->second.PreparedTxId = txId; } -void TColumnShard::LoadLongTxWrite(TWriteId writeId, const ui32 writePartId, const NLongTxService::TLongTxId& longTxId, const std::optional granuleShardingVersion) { +void TColumnShard::LoadLongTxWrite(const TInsertWriteId writeId, const ui32 writePartId, const NLongTxService::TLongTxId& longTxId, const std::optional granuleShardingVersion) { auto& lw = LongTxWrites[writeId]; lw.WritePartId = writePartId; - lw.WriteId = (ui64)writeId; + lw.InsertWriteId = writeId; lw.LongTxId = longTxId; lw.GranuleShardingVersionId = granuleShardingVersion; LongTxWritesByUniqueId[longTxId.UniqueId][writePartId] = &lw; } -bool TColumnShard::RemoveLongTxWrite(NIceDb::TNiceDb& db, const TWriteId writeId, const ui64 txId) { - auto* lw = LongTxWrites.FindPtr(writeId); - AFL_VERIFY(lw)("write_id", (ui64)writeId)("tx_id", txId); - const ui64 prepared = lw->PreparedTxId; - AFL_VERIFY(!prepared || txId == prepared)("tx", txId)("prepared", prepared); - Schema::EraseLongTxWrite(db, writeId); - auto& ltxParts = LongTxWritesByUniqueId[lw->LongTxId.UniqueId]; - ltxParts.erase(lw->WritePartId); - if (ltxParts.empty()) { - AFL_VERIFY(LongTxWritesByUniqueId.erase(lw->LongTxId.UniqueId)); - } - LongTxWrites.erase(writeId); - return true; +bool TColumnShard::RemoveLongTxWrite(NIceDb::TNiceDb& db, const TInsertWriteId writeId, const ui64 txId) { + if (auto* lw = LongTxWrites.FindPtr(writeId)) { + ui64 prepared = lw->PreparedTxId; + if (!prepared || txId == prepared) { + Schema::EraseLongTxWrite(db, writeId); + auto& ltxParts = LongTxWritesByUniqueId[lw->LongTxId.UniqueId]; + ltxParts.erase(lw->WritePartId); + if (ltxParts.empty()) { + LongTxWritesByUniqueId.erase(lw->LongTxId.UniqueId); + } + LongTxWrites.erase(writeId); + return true; + } else { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_remove_prepared_tx_insertion")("write_id", (ui64)writeId)("tx_id", txId); + return false; + } + } else { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_remove_removed_tx_insertion")("write_id", (ui64)writeId)("tx_id", txId); + return true; + } +} + +void TColumnShard::TryAbortWrites(NIceDb::TNiceDb& db, NOlap::TDbWrapper& dbTable, THashSet&& writesToAbort) { + std::vector failedAborts; + for (auto& writeId : writesToAbort) { + if (!RemoveLongTxWrite(db, writeId, 0)) { + failedAborts.push_back(writeId); + } + } + if (failedAborts.size()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "failed_aborts")("count", failedAborts.size())("writes_count", writesToAbort.size()); + } + for (auto& writeId : failedAborts) { + InsertTable->MarkAsNotAbortable(writeId); + writesToAbort.erase(writeId); + } + if (!writesToAbort.empty()) { + InsertTable->Abort(dbTable, writesToAbort); + } } void TColumnShard::UpdateSchemaSeqNo(const TMessageSeqNo& seqNo, NTabletFlatExecutor::TTransactionContext& txc) { @@ -401,9 +419,9 @@ void TColumnShard::RunEnsureTable(const NKikimrTxColumnShard::TCreateTable& tabl TablesManager.AddTableVersion(pathId, version, tableVerProto, db, Tiers); - SetCounter(COUNTER_TABLES, TablesManager.GetTables().size()); - SetCounter(COUNTER_TABLE_PRESETS, TablesManager.GetSchemaPresets().size()); - SetCounter(COUNTER_TABLE_TTLS, TablesManager.GetTtl().PathsCount()); + Counters.GetTabletCounters()->SetCounter(COUNTER_TABLES, TablesManager.GetTables().size()); + Counters.GetTabletCounters()->SetCounter(COUNTER_TABLE_PRESETS, TablesManager.GetSchemaPresets().size()); + Counters.GetTabletCounters()->SetCounter(COUNTER_TABLE_TTLS, TablesManager.GetTtl().PathsCount()); } void TColumnShard::RunAlterTable(const NKikimrTxColumnShard::TAlterTable& alterProto, const NOlap::TSnapshot& version, @@ -451,11 +469,6 @@ void TColumnShard::RunDropTable(const NKikimrTxColumnShard::TDropTable& dropProt LOG_S_DEBUG("DropTable for pathId: " << pathId << " at tablet " << TabletID()); TablesManager.DropTable(pathId, version, db); - - // TODO: Allow to read old snapshots after DROP - TBlobGroupSelector dsGroupSelector(Info()); - NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); - InsertTable->DropPath(dbTable, pathId); } void TColumnShard::RunAlterStore(const NKikimrTxColumnShard::TAlterStore& proto, const NOlap::TSnapshot& version, @@ -487,8 +500,7 @@ void TColumnShard::EnqueueBackgroundActivities(const bool periodic) { ACFL_DEBUG("event", "EnqueueBackgroundActivities")("periodic", periodic); StoragesManager->GetOperatorVerified(NOlap::IStoragesManager::DefaultStorageId); StoragesManager->GetSharedBlobsManager()->GetStorageManagerVerified(NOlap::IStoragesManager::DefaultStorageId); - CSCounters.OnStartBackground(); - SendPeriodicStats(); + Counters.GetCSCounters().OnStartBackground(); if (!TablesManager.HasPrimaryIndex()) { AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("problem", "Background activities cannot be started: no index at tablet"); @@ -515,7 +527,7 @@ class TChangesTask: public NConveyor::ITask { TString ClassId; NOlap::TSnapshot LastCompletedTx; protected: - virtual bool DoExecute() override { + virtual TConclusionStatus DoExecute(const std::shared_ptr& /*taskPtr*/) override { NActors::TLogContextGuard g(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletId)("parent_id", ParentActorId)); { NOlap::TConstructionContext context(*TxEvent->IndexInfo, Counters, LastCompletedTx); @@ -525,7 +537,7 @@ class TChangesTask: public NConveyor::ITask { } } TActorContext::AsActorContext().Send(ParentActorId, std::move(TxEvent)); - return true; + return TConclusionStatus::Success(); } public: virtual TString GetTaskClassIdentifier() const override { @@ -609,13 +621,16 @@ class TTTLChangesReadTask: public TChangesReadTask, public TMonitoringObjectsCou using TBase::TBase; }; -void TColumnShard::StartIndexTask(std::vector&& dataToIndex, const i64 bytesToIndex) { - CSCounters.IndexationInput(bytesToIndex); +void TColumnShard::StartIndexTask(std::vector&& dataToIndex, const i64 bytesToIndex) { + Counters.GetCSCounters().IndexationInput(bytesToIndex); - std::vector data; + std::vector data; data.reserve(dataToIndex.size()); for (auto& ptr : dataToIndex) { data.push_back(*ptr); + if (!TablesManager.HasTable(data.back().GetPathId())) { + data.back().SetRemove(); + } } Y_ABORT_UNLESS(data.size()); @@ -632,7 +647,8 @@ void TColumnShard::StartIndexTask(std::vector&& dat NOlap::NResourceBroker::NSubscribe::ITask::StartResourceSubscription( ResourceSubscribeActor, std::make_shared( - std::make_shared(std::move(ev), SelfId(), TabletID(), IndexationCounters, GetLastCompletedTx()), 0, indexChanges->CalcMemoryForUsage(), externalTaskId, InsertTaskSubscription)); + std::make_shared(std::move(ev), SelfId(), TabletID(), Counters.GetIndexationCounters(), GetLastCompletedTx()), + 0, indexChanges->CalcMemoryForUsage(), externalTaskId, InsertTaskSubscription)); } void TColumnShard::SetupIndexation() { @@ -652,8 +668,8 @@ void TColumnShard::SetupIndexation() { if (InsertTable->GetPathPriorities().size() && InsertTable->GetPathPriorities().rbegin()->first.GetCategory() == NOlap::TPathInfoIndexPriority::EIndexationPriority::PreventOverload) { force = true; } - const ui64 bytesLimit = NYDBTest::TControllers::GetColumnShardController()->GetGuaranteeIndexationStartBytesLimit(TSettings::GuaranteeIndexationStartBytesLimit); - const TDuration durationLimit = NYDBTest::TControllers::GetColumnShardController()->GetGuaranteeIndexationInterval(TSettings::GuaranteeIndexationInterval); + const ui64 bytesLimit = NYDBTest::TControllers::GetColumnShardController()->GetGuaranteeIndexationStartBytesLimit(); + const TDuration durationLimit = NYDBTest::TControllers::GetColumnShardController()->GetGuaranteeIndexationInterval(); if (!force && InsertTable->GetCountersCommitted().Bytes < bytesLimit && TMonotonic::Now() < BackgroundController.GetLastIndexationInstant() + durationLimit) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "skip_indexation")("reason", "not_enough_data_and_too_frequency") @@ -662,10 +678,10 @@ void TColumnShard::SetupIndexation() { } AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start_indexation_tasks")("insert_overload_size", InsertTable->GetCountersCommitted().Bytes); - CSCounters.OnSetupIndexation(); + Counters.GetCSCounters().OnSetupIndexation(); ui64 bytesToIndex = 0; ui64 txBytesWrite = 0; - std::vector dataToIndex; + std::vector dataToIndex; dataToIndex.reserve(TLimits::MIN_SMALL_BLOBS_TO_INSERT); for (auto it = InsertTable->GetPathPriorities().rbegin(); it != InsertTable->GetPathPriorities().rend(); ++it) { for (auto* pathInfo : it->second) { @@ -693,7 +709,7 @@ void TColumnShard::SetupCompaction() { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_compaction")("reason", "disabled"); return; } - CSCounters.OnSetupCompaction(); + Counters.GetCSCounters().OnSetupCompaction(); BackgroundController.CheckDeadlines(); while (BackgroundController.GetCompactionsCount() < TSettings::MAX_ACTIVE_COMPACTIONS) { @@ -712,7 +728,7 @@ void TColumnShard::SetupCompaction() { NOlap::NResourceBroker::NSubscribe::ITask::StartResourceSubscription( ResourceSubscribeActor, std::make_shared( - std::make_shared(std::move(ev), SelfId(), TabletID(), CompactionCounters, GetLastCompletedTx()), 0, indexChanges->CalcMemoryForUsage(), externalTaskId, CompactTaskSubscription)); + std::make_shared(std::move(ev), SelfId(), TabletID(), Counters.GetCompactionCounters(), GetLastCompletedTx()), 0, indexChanges->CalcMemoryForUsage(), externalTaskId, CompactTaskSubscription)); } LOG_S_DEBUG("ActiveCompactions: " << BackgroundController.GetCompactionsCount() << " at tablet " << TabletID()); @@ -723,7 +739,7 @@ bool TColumnShard::SetupTtl(const THashMap& pathTtls) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_ttl")("reason", "disabled"); return false; } - CSCounters.OnSetupTtl(); + Counters.GetCSCounters().OnSetupTtl(); THashMap eviction = pathTtls; for (auto&& i : eviction) { ACFL_DEBUG("background", "ttl")("path", i.first)("info", i.second.GetDebugString()); @@ -747,7 +763,8 @@ bool TColumnShard::SetupTtl(const THashMap& pathTtls) { if (needWrites) { NOlap::NResourceBroker::NSubscribe::ITask::StartResourceSubscription( ResourceSubscribeActor, std::make_shared( - std::make_shared(std::move(ev), SelfId(), TabletID(), CompactionCounters, GetLastCompletedTx()), 0, i->CalcMemoryForUsage(), externalTaskId, TTLTaskSubscription)); + std::make_shared(std::move(ev), SelfId(), TabletID(), Counters.GetCompactionCounters(), GetLastCompletedTx()), + 0, i->CalcMemoryForUsage(), externalTaskId, TTLTaskSubscription)); } else { ev->SetPutStatus(NKikimrProto::OK); ActorContext().Send(SelfId(), std::move(ev)); @@ -757,7 +774,7 @@ bool TColumnShard::SetupTtl(const THashMap& pathTtls) { } void TColumnShard::SetupCleanupPortions() { - CSCounters.OnSetupCleanup(); + Counters.GetCSCounters().OnSetupCleanup(); if (!AppDataVerified().ColumnShardConfig.GetCleanupEnabled() || !NYDBTest::TControllers::GetColumnShardController()->IsBackgroundEnabled(NYDBTest::ICSController::EBackground::Cleanup)) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_cleanup")("reason", "disabled"); return; @@ -767,9 +784,8 @@ void TColumnShard::SetupCleanupPortions() { return; } - NOlap::TSnapshot cleanupSnapshot{GetMinReadStep(), 0}; - - auto changes = TablesManager.MutablePrimaryIndex().StartCleanupPortions(cleanupSnapshot, TablesManager.GetPathsToDrop(), DataLocksManager); + auto changes = + TablesManager.MutablePrimaryIndex().StartCleanupPortions(GetMinReadSnapshot(), TablesManager.GetPathsToDrop(), DataLocksManager); if (!changes) { ACFL_DEBUG("background", "cleanup")("skip_reason", "no_changes"); return; @@ -786,13 +802,21 @@ void TColumnShard::SetupCleanupPortions() { } void TColumnShard::SetupCleanupTables() { - CSCounters.OnSetupCleanup(); + Counters.GetCSCounters().OnSetupCleanup(); if (BackgroundController.IsCleanupTablesActive()) { ACFL_DEBUG("background", "cleanup")("skip_reason", "in_progress"); return; } - auto changes = TablesManager.MutablePrimaryIndex().StartCleanupTables(TablesManager.MutablePathsToDrop()); + THashSet pathIdsEmptyInInsertTable; + for (auto&& i : TablesManager.GetPathsToDrop()) { + if (InsertTable->HasPathIdData(i)) { + continue; + } + pathIdsEmptyInInsertTable.emplace(i); + } + + auto changes = TablesManager.MutablePrimaryIndex().StartCleanupTables(pathIdsEmptyInInsertTable); if (!changes) { ACFL_DEBUG("background", "cleanup")("skip_reason", "no_changes"); return; @@ -827,21 +851,22 @@ void TColumnShard::Handle(TEvPrivate::TEvGarbageCollectionFinished::TPtr& ev, co } void TColumnShard::SetupCleanupInsertTable() { + auto writeIdsToCleanup = InsertTable->OldWritesToAbort(AppData()->TimeProvider->Now()); + if (BackgroundController.IsCleanupInsertTableActive()) { ACFL_DEBUG("background", "cleanup_insert_table")("skip_reason", "in_progress"); return; } - if (!InsertTable->GetAborted().size()) { + if (!InsertTable->GetAborted().size() && !writeIdsToCleanup.size()) { return; } - AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "cleanup_started")("aborted", InsertTable->GetAborted().size()); + AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "cleanup_started")("aborted", InsertTable->GetAborted().size())("to_cleanup", writeIdsToCleanup.size()); BackgroundController.StartCleanupInsertTable(); - Execute(new TTxInsertTableCleanup(this), TActorContext::AsActorContext()); + Execute(new TTxInsertTableCleanup(this, std::move(writeIdsToCleanup)), TActorContext::AsActorContext()); } void TColumnShard::Die(const TActorContext& ctx) { - // TODO CleanupActors(ctx); NTabletPipe::CloseAndForgetClient(SelfId(), StatsReportPipe); UnregisterMediatorTimeCast(); @@ -863,6 +888,39 @@ void TColumnShard::Handle(NActors::TEvents::TEvUndelivered::TPtr& ev, const TAct } } +void TColumnShard::Handle(TEvTxProcessing::TEvReadSet::TPtr& ev, const TActorContext& ctx) { + const ui64 txId = ev->Get()->Record.GetTxId(); + if (!GetProgressTxController().GetTxOperatorOptional(txId)) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "read_set_ignored")("proto", ev->Get()->Record.DebugString()); + Send(MakePipePerNodeCacheID(false), + new TEvPipeCache::TEvForward( + new TEvTxProcessing::TEvReadSetAck(0, txId, TabletID(), ev->Get()->Record.GetTabletProducer(), TabletID(), 0), + ev->Get()->Record.GetTabletProducer(), true), + IEventHandle::FlagTrackDelivery, txId); + return; + } + auto op = GetProgressTxController().GetTxOperatorVerifiedAs(txId); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "read_set")("proto", ev->Get()->Record.DebugString())("lock_id", op->GetLockId()); + NKikimrTx::TReadSetData data; + AFL_VERIFY(data.ParseFromArray(ev->Get()->Record.GetReadSet().data(), ev->Get()->Record.GetReadSet().size())); + auto tx = op->CreateReceiveBrokenFlagTx( + *this, ev->Get()->Record.GetTabletProducer(), data.GetDecision() != NKikimrTx::TReadSetData::DECISION_COMMIT); + Execute(tx.release(), ctx); +} + +void TColumnShard::Handle(TEvTxProcessing::TEvReadSetAck::TPtr& ev, const TActorContext& ctx) { + auto opPtr = GetProgressTxController().GetTxOperatorOptional(ev->Get()->Record.GetTxId()); + if (!opPtr) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "missed_read_set_ack")("proto", ev->Get()->Record.DebugString())( + "tx_id", ev->Get()->Record.GetTxId()); + return; + } + auto op = TValidator::CheckNotNull(dynamic_pointer_cast(opPtr)); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "read_set_ack")("proto", ev->Get()->Record.DebugString())("lock_id", op->GetLockId()); + auto tx = op->CreateReceiveResultAckTx(*this, ev->Get()->Record.GetTabletConsumer()); + Execute(tx.release(), ctx); +} + void TColumnShard::Handle(NOlap::NDataSharing::NEvents::TEvProposeFromInitiator::TPtr& ev, const TActorContext& ctx) { AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("process", "BlobsSharing")("event", "TEvProposeFromInitiator"); auto reqSession = std::make_shared(); @@ -1050,9 +1108,13 @@ void TColumnShard::Handle(TAutoPtrIsSharingInProgress()) { ctx.Send(NActors::ActorIdFromProto(ev->Get()->Record.GetSourceActorId()), - new NOlap::NBlobOperations::NEvents::TEvDeleteSharedBlobsFinished((NOlap::TTabletId)TabletID(), - NKikimrColumnShardBlobOperationsProto::TEvDeleteSharedBlobsFinished::DestinationCurrenlyLocked)); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "sharing_in_progress"); + new NOlap::NBlobOperations::NEvents::TEvDeleteSharedBlobsFinished( + (NOlap::TTabletId)TabletID(), NKikimrColumnShardBlobOperationsProto::TEvDeleteSharedBlobsFinished::DestinationCurrenlyLocked)); + for (auto&& i : ev->Get()->Record.GetBlobIds()) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_BLOBS)("event", "sharing_in_progress")("blob_id", i)( + "from_tablet", ev->Get()->Record.GetSourceTabletId()); + } + return; } @@ -1111,4 +1173,8 @@ const NKikimr::NColumnShard::NTiers::TManager* TColumnShard::GetTierManagerPoint return Tiers->GetManagerOptional(tierId); } +TDuration TColumnShard::GetMaxReadStaleness() { + return NYDBTest::TControllers::GetColumnShardController()->GetReadTimeoutClean(); +} + } diff --git a/ydb/core/tx/columnshard/columnshard_impl.h b/ydb/core/tx/columnshard/columnshard_impl.h index ea6ca9b3164c..8e303896ce75 100644 --- a/ydb/core/tx/columnshard/columnshard_impl.h +++ b/ydb/core/tx/columnshard/columnshard_impl.h @@ -3,7 +3,6 @@ #include "background_controller.h" #include "counters.h" #include "columnshard.h" -#include "columnshard_common.h" #include "columnshard_ttl.h" #include "columnshard_private_events.h" #include "tables_manager.h" @@ -13,9 +12,11 @@ #include "transactions/tx_controller.h" #include "inflight_request_tracker.h" #include "counters/columnshard.h" +#include "counters/counters_manager.h" #include "resource_subscriber/counters.h" #include "resource_subscriber/task.h" #include "normalizer/abstract/abstract.h" +#include "operations/manager.h" #include "export/events/events.h" @@ -202,6 +203,8 @@ class TColumnShard void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx); void Handle(TEvTabletPipe::TEvServerConnected::TPtr& ev, const TActorContext& ctx); void Handle(TEvTabletPipe::TEvServerDisconnected::TPtr& ev, const TActorContext& ctx); + void Handle(TEvTxProcessing::TEvReadSet::TPtr& ev, const TActorContext& ctx); + void Handle(TEvTxProcessing::TEvReadSetAck::TPtr& ev, const TActorContext& ctx); void Handle(TEvColumnShard::TEvProposeTransaction::TPtr& ev, const TActorContext& ctx); void Handle(TEvColumnShard::TEvCheckPlannedTransaction::TPtr& ev, const TActorContext& ctx); void Handle(TEvColumnShard::TEvCancelTransactionProposal::TPtr& ev, const TActorContext& ctx); @@ -216,6 +219,9 @@ class TColumnShard void Handle(TEvPrivate::TEvScanStats::TPtr &ev, const TActorContext &ctx); void Handle(TEvPrivate::TEvReadFinished::TPtr &ev, const TActorContext &ctx); void Handle(TEvPrivate::TEvPeriodicWakeup::TPtr& ev, const TActorContext& ctx); + void Handle(NActors::TEvents::TEvWakeup::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPrivate::TEvPingSnapshotsUsage::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPrivate::TEvWriteIndex::TPtr& ev, const TActorContext& ctx); void Handle(NMetadata::NProvider::TEvRefreshSubscriberData::TPtr& ev); void Handle(NEvents::TDataEvents::TEvWrite::TPtr& ev, const TActorContext& ctx); @@ -224,6 +230,7 @@ class TColumnShard void Handle(TEvPrivate::TEvTieringModified::TPtr& ev, const TActorContext&); void Handle(TEvPrivate::TEvNormalizerResult::TPtr& ev, const TActorContext&); + void Handle(NStat::TEvStatistics::TEvAnalyzeTable::TPtr& ev, const TActorContext& ctx); void Handle(NStat::TEvStatistics::TEvStatisticsRequest::TPtr& ev, const TActorContext& ctx); void Handle(NActors::TEvents::TEvUndelivered::TPtr& ev, const TActorContext&); @@ -275,17 +282,16 @@ class TColumnShard putStatus.OnYellowChannels(Executor()); } - void SetCounter(NColumnShard::ESimpleCounters counter, ui64 num) const { - TabletCounters->Simple()[counter].Set(num); - } - - void IncCounter(NColumnShard::ECumulativeCounters counter, ui64 num = 1) const { - TabletCounters->Cumulative()[counter].Increment(num); - } - void ActivateTiering(const ui64 pathId, const TString& useTiering); void OnTieringModified(const std::optional pathId = {}); + public: + ui64 BuildEphemeralTxId() { + static TAtomicCounter Counter = 0; + static constexpr ui64 shift = (ui64)1 << 47; + return shift | Counter.Inc(); + } + enum class EOverloadStatus { ShardTxInFly /* "shard_tx" */, ShardWritesInFly /* "shard_writes" */, @@ -296,25 +302,17 @@ class TColumnShard None /* "none" */ }; - void IncCounter(NColumnShard::EPercentileCounters counter, const TDuration& latency) const { - TabletCounters->Percentile()[counter].IncrementFor(latency.MicroSeconds()); - } - - void IncCounter(NDataShard::ESimpleCounters counter, ui64 num = 1) const { - TabletCounters->Simple()[counter].Add(num); - } - // For syslocks void IncCounter(NDataShard::ECumulativeCounters counter, ui64 num = 1) const { - TabletCounters->Cumulative()[counter].Increment(num); + Counters.GetTabletCounters()->IncCounter(counter, num); } void IncCounter(NDataShard::EPercentileCounters counter, ui64 num) const { - TabletCounters->Percentile()[counter].IncrementFor(num); + Counters.GetTabletCounters()->IncCounter(counter, num); } void IncCounter(NDataShard::EPercentileCounters counter, const TDuration& latency) const { - TabletCounters->Percentile()[counter].IncrementFor(latency.MilliSeconds()); + Counters.GetTabletCounters()->IncCounter(counter, latency); } inline TRowVersion LastCompleteTxVersion() const { @@ -328,7 +326,7 @@ class TColumnShard } private: - void OverloadWriteFail(const EOverloadStatus overloadReason, const NEvWrite::TWriteData& writeData, const ui64 cookie, std::unique_ptr&& event, const TActorContext& ctx); + void OverloadWriteFail(const EOverloadStatus overloadReason, const NEvWrite::TWriteMeta& writeMeta, const ui64 writeSize, const ui64 cookie, std::unique_ptr&& event, const TActorContext& ctx); EOverloadStatus CheckOverloaded(const ui64 tableId) const; protected: @@ -356,6 +354,9 @@ class TColumnShard switch (ev->GetTypeRewrite()) { hFunc(NMetadata::NProvider::TEvRefreshSubscriberData, Handle); + HFunc(TEvTxProcessing::TEvReadSet, Handle); + HFunc(TEvTxProcessing::TEvReadSetAck, Handle); + HFunc(TEvTabletPipe::TEvClientConnected, Handle); HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); HFunc(TEvTabletPipe::TEvServerConnected, Handle); @@ -375,11 +376,15 @@ class TColumnShard HFunc(TEvPrivate::TEvScanStats, Handle); HFunc(TEvPrivate::TEvReadFinished, Handle); HFunc(TEvPrivate::TEvPeriodicWakeup, Handle); + HFunc(NActors::TEvents::TEvWakeup, Handle); + HFunc(TEvPrivate::TEvPingSnapshotsUsage, Handle); + HFunc(NEvents::TDataEvents::TEvWrite, Handle); HFunc(TEvPrivate::TEvWriteDraft, Handle); HFunc(TEvPrivate::TEvGarbageCollectionFinished, Handle); HFunc(TEvPrivate::TEvTieringModified, Handle); + HFunc(NStat::TEvStatistics::TEvAnalyzeTable, Handle); HFunc(NStat::TEvStatistics::TEvStatisticsRequest, Handle); HFunc(NActors::TEvents::TEvUndelivered, Handle); @@ -408,6 +413,9 @@ class TColumnShard } private: + std::unique_ptr TabletCountersHolder; + TCountersManager Counters; + std::unique_ptr ProgressTxController; std::unique_ptr OperationsManager; std::shared_ptr SharingSessionsManager; @@ -418,68 +426,20 @@ class TColumnShard using TSchemaPreset = TSchemaPreset; using TTableInfo = TTableInfo; + const TMonotonic CreateInstant = TMonotonic::Now(); + std::optional StartInstant; + struct TLongTxWriteInfo { - ui64 WriteId; + TInsertWriteId InsertWriteId; ui32 WritePartId; NLongTxService::TLongTxId LongTxId; ui64 PreparedTxId = 0; std::optional GranuleShardingVersionId; }; - class TWritesMonitor { - private: - TColumnShard& Owner; - YDB_READONLY(ui64, WritesInFlight, 0); - YDB_READONLY(ui64, WritesSizeInFlight, 0); - - public: - class TGuard: public TNonCopyable { - friend class TWritesMonitor; - private: - TWritesMonitor& Owner; - - explicit TGuard(TWritesMonitor& owner) - : Owner(owner) - {} - - public: - ~TGuard() { - Owner.UpdateCounters(); - } - }; - - TWritesMonitor(TColumnShard& owner) - : Owner(owner) - {} - - TGuard RegisterWrite(const ui64 dataSize) { - ++WritesInFlight; - WritesSizeInFlight += dataSize; - return TGuard(*this); - } - - TGuard FinishWrite(const ui64 dataSize, const ui32 writesCount = 1) { - Y_ABORT_UNLESS(WritesInFlight > 0); - Y_ABORT_UNLESS(WritesSizeInFlight >= dataSize); - WritesInFlight -= writesCount; - WritesSizeInFlight -= dataSize; - return TGuard(*this); - } - - TString DebugString() const { - return TStringBuilder() << "{object=write_monitor;count=" << WritesInFlight << ";size=" << WritesSizeInFlight << "}"; - } - - private: - void UpdateCounters() { - Owner.SetCounter(COUNTER_WRITES_IN_FLY, WritesInFlight); - } - }; - ui64 CurrentSchemeShardId = 0; TMessageSeqNo LastSchemaSeqNo; std::optional ProcessingParams; - TWriteId LastWriteId = TWriteId{0}; ui64 LastPlannedStep = 0; ui64 LastPlannedTxId = 0; NOlap::TSnapshot LastCompletedTx = NOlap::TSnapshot::Zero(); @@ -495,35 +455,26 @@ class TColumnShard const TDuration PeriodicWakeupActivationPeriod; TDuration FailActivationDelay = TDuration::Seconds(1); const TDuration StatsReportInterval; - TInstant LastAccessTime; TInstant LastStatsReport; TActorId ResourceSubscribeActor; TActorId BufferizationWriteActorId; TActorId StatsReportPipe; + std::vector ActorsToStop; TInFlightReadsTracker InFlightReadsTracker; TTablesManager TablesManager; std::shared_ptr Subscribers; std::shared_ptr Tiers; - std::unique_ptr TabletCountersPtr; - TTabletCountersBase* TabletCounters; std::unique_ptr PipeClientCache; std::unique_ptr InsertTable; - std::shared_ptr SubscribeCounters; NOlap::NResourceBroker::NSubscribe::TTaskContext InsertTaskSubscription; NOlap::NResourceBroker::NSubscribe::TTaskContext CompactTaskSubscription; NOlap::NResourceBroker::NSubscribe::TTaskContext TTLTaskSubscription; - const TScanCounters ScanCounters; - const TIndexationCounters CompactionCounters = TIndexationCounters("GeneralCompaction"); - const TIndexationCounters IndexationCounters = TIndexationCounters("Indexation"); - const TIndexationCounters EvictionCounters = TIndexationCounters("Eviction"); - - const TCSCounters CSCounters; - TWritesMonitor WritesMonitor; - bool ProgressTxInFlight = false; + + std::optional ProgressTxInFlight; THashMap ScanTxInFlight; - THashMap LongTxWrites; + THashMap LongTxWrites; using TPartsForLTXShard = THashMap; THashMap LongTxWritesByUniqueId; TMultiMap WaitingScans; @@ -532,31 +483,29 @@ class TColumnShard TLimits Limits; NOlap::TNormalizationController NormalizerController; NDataShard::TSysLocks SysLocks; + static TDuration GetMaxReadStaleness(); void TryRegisterMediatorTimeCast(); void UnregisterMediatorTimeCast(); + void TryAbortWrites(NIceDb::TNiceDb& db, NOlap::TDbWrapper& dbTable, THashSet&& writesToAbort); bool WaitPlanStep(ui64 step); void SendWaitPlanStep(ui64 step); void RescheduleWaitingReads(); NOlap::TSnapshot GetMaxReadVersion() const; - ui64 GetMinReadStep() const; + NOlap::TSnapshot GetMinReadSnapshot() const; ui64 GetOutdatedStep() const; TDuration GetTxCompleteLag() const { ui64 mediatorTime = MediatorTimeCastEntry ? MediatorTimeCastEntry->Get(TabletID()) : 0; return ProgressTxController->GetTxCompleteLag(mediatorTime); } - TWriteId HasLongTxWrite(const NLongTxService::TLongTxId& longTxId, const ui32 partId) const; - TWriteId GetLongTxWrite(NIceDb::TNiceDb& db, const NLongTxService::TLongTxId& longTxId, const ui32 partId, const std::optional granuleShardingVersionId); - void AddLongTxWrite(TWriteId writeId, ui64 txId); - void LoadLongTxWrite(TWriteId writeId, const ui32 writePartId, const NLongTxService::TLongTxId& longTxId, const std::optional granuleShardingVersion); - bool RemoveLongTxWrite(NIceDb::TNiceDb& db, const TWriteId writeId, const ui64 txId); + TInsertWriteId HasLongTxWrite(const NLongTxService::TLongTxId& longTxId, const ui32 partId) const; + TInsertWriteId GetLongTxWrite(NIceDb::TNiceDb& db, const NLongTxService::TLongTxId& longTxId, const ui32 partId, const std::optional granuleShardingVersionId); + void AddLongTxWrite(const TInsertWriteId writeId, ui64 txId); + void LoadLongTxWrite(const TInsertWriteId writeId, const ui32 writePartId, const NLongTxService::TLongTxId& longTxId, const std::optional granuleShardingVersion); + bool RemoveLongTxWrite(NIceDb::TNiceDb& db, const TInsertWriteId writeId, const ui64 txId); - TWriteId BuildNextWriteId(NTabletFlatExecutor::TTransactionContext& txc); - TWriteId BuildNextWriteId(NIceDb::TNiceDb& db); - - void EnqueueProgressTx(const TActorContext& ctx); void EnqueueBackgroundActivities(const bool periodic = false); virtual void Enqueue(STFUNC_SIG) override; @@ -570,7 +519,7 @@ class TColumnShard void RunDropTable(const NKikimrTxColumnShard::TDropTable& body, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc); void RunAlterStore(const NKikimrTxColumnShard::TAlterStore& body, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc); - void StartIndexTask(std::vector&& dataToIndex, const i64 bytesToIndex); + void StartIndexTask(std::vector&& dataToIndex, const i64 bytesToIndex); void SetupIndexation(); void SetupCompaction(); bool SetupTtl(const THashMap& pathTtls = {}); @@ -587,12 +536,14 @@ class TColumnShard void SendPeriodicStats(); void FillOlapStats(const TActorContext& ctx, std::unique_ptr& ev); void FillColumnTableStats(const TActorContext& ctx, std::unique_ptr& ev); - void ConfigureStats(const NOlap::TColumnEngineStats& indexStats, ::NKikimrTableStats::TTableStats* tabletStats); - void FillTxTableStats(::NKikimrTableStats::TTableStats* tableStats) const; public: ui64 TabletTxCounter = 0; + bool HasLongTxWrites(const TInsertWriteId insertWriteId) const { + return LongTxWrites.contains(insertWriteId); + } + void EnqueueProgressTx(const TActorContext& ctx, const std::optional continueTxId); NOlap::TSnapshot GetLastTxSnapshot() const { return NOlap::TSnapshot(LastPlannedStep, LastPlannedTxId); } @@ -620,6 +571,11 @@ class TColumnShard return *ProgressTxController; } + TOperationsManager& GetOperationsManager() const { + AFL_VERIFY(OperationsManager); + return *OperationsManager; + } + bool HasIndex() const { return !!TablesManager.GetPrimaryIndex(); } diff --git a/ydb/core/tx/columnshard/columnshard_private_events.h b/ydb/core/tx/columnshard/columnshard_private_events.h index f46c8e8b7191..cb0e8cd97150 100644 --- a/ydb/core/tx/columnshard/columnshard_private_events.h +++ b/ydb/core/tx/columnshard/columnshard_private_events.h @@ -11,6 +11,10 @@ #include #include +namespace NKikimr::NOlap::NReader { +class IApplyAction; +} + namespace NKikimr::NColumnShard { struct TEvPrivate { @@ -41,11 +45,28 @@ struct TEvPrivate { EvExportCursorSaved, EvExportSaveCursor, + EvTaskProcessedResult, + EvPingSnapshotsUsage, + EvEnd }; static_assert(EvEnd < EventSpaceEnd(TEvents::ES_PRIVATE), "expect EvEnd < EventSpaceEnd(TEvents::ES_PRIVATE)"); + class TEvTaskProcessedResult: public NActors::TEventLocal { + private: + TConclusion> Result; + + public: + TConclusion> ExtractResult() { + return std::move(Result); + } + + TEvTaskProcessedResult(const TConclusion>& result) + : Result(result) { + } + }; + struct TEvTieringModified: public TEventLocal { }; @@ -122,8 +143,9 @@ struct TEvPrivate { struct TEvReadFinished : public TEventLocal { explicit TEvReadFinished(ui64 requestCookie, ui64 txId = 0) - : RequestCookie(requestCookie), TxId(txId) - {} + : RequestCookie(requestCookie) + , TxId(txId) { + } ui64 RequestCookie; ui64 TxId; @@ -137,17 +159,39 @@ struct TEvPrivate { bool Manual; }; - class TEvWriteBlobsResult : public TEventLocal { + struct TEvPingSnapshotsUsage: public TEventLocal { + TEvPingSnapshotsUsage() = default; + }; + + class TEvWriteBlobsResult: public TEventLocal { + public: + enum EErrorClass { + Internal, + Request + }; private: NColumnShard::TBlobPutResult::TPtr PutResult; NOlap::TWritingBuffer WritesBuffer; YDB_READONLY_DEF(TString, ErrorMessage); + YDB_ACCESSOR(EErrorClass, ErrorClass, EErrorClass::Internal); + public: + + NKikimrDataEvents::TEvWriteResult::EStatus GetWriteResultStatus() const { + switch (ErrorClass) { + case EErrorClass::Internal: + return NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR; + case EErrorClass::Request: + return NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST; + } + } - static std::unique_ptr Error(const NKikimrProto::EReplyStatus status, NOlap::TWritingBuffer&& writesBuffer, const TString& error) { - std::unique_ptr result = std::make_unique(std::make_shared(status), - std::move(writesBuffer)); + static std::unique_ptr Error( + const NKikimrProto::EReplyStatus status, NOlap::TWritingBuffer&& writesBuffer, const TString& error, const EErrorClass errorClass) { + std::unique_ptr result = + std::make_unique(std::make_shared(status), std::move(writesBuffer)); result->ErrorMessage = error; + result->ErrorClass = errorClass; return result; } diff --git a/ydb/core/tx/columnshard/columnshard_schema.cpp b/ydb/core/tx/columnshard/columnshard_schema.cpp index 3d9dc8e7a9b0..c9a60029a892 100644 --- a/ydb/core/tx/columnshard/columnshard_schema.cpp +++ b/ydb/core/tx/columnshard/columnshard_schema.cpp @@ -4,51 +4,24 @@ namespace NKikimr::NColumnShard { bool Schema::InsertTable_Load(NIceDb::TNiceDb& db, const IBlobGroupSelector* dsGroupSelector, NOlap::TInsertTableAccessor& insertTable, const TInstant& /*loadTime*/) { - auto rowset = db.Table().GreaterOrEqual(0, 0, 0, 0, "").Select(); + auto rowset = db.Table().Select(); if (!rowset.IsReady()) { return false; } while (!rowset.EndOfSet()) { - EInsertTableIds recType = (EInsertTableIds)rowset.GetValue(); - ui64 planStep = rowset.GetValue(); - ui64 writeTxId = rowset.GetValueOrDefault(); - ui64 pathId = rowset.GetValue(); - TString dedupId = rowset.GetValue(); - TString strBlobId = rowset.GetValue(); - TString metaStr = rowset.GetValue(); - ui64 schemaVersion = rowset.HaveValue() ? rowset.GetValue() : 0; + NOlap::TInsertTableRecordLoadContext constructor; + constructor.ParseFromDatabase(rowset); - TString error; - NOlap::TUnifiedBlobId blobId = NOlap::TUnifiedBlobId::ParseFromString(strBlobId, dsGroupSelector, error); - Y_ABORT_UNLESS(blobId.IsValid(), "Failied to parse blob id: %s", error.c_str()); - - NKikimrTxColumnShard::TLogicalMetadata meta; - if (metaStr) { - Y_ABORT_UNLESS(meta.ParseFromString(metaStr)); - } - - std::optional rangeOffset; - if (rowset.HaveValue()) { - rangeOffset = rowset.GetValue(); - } - std::optional rangeSize; - if (rowset.HaveValue()) { - rangeSize = rowset.GetValue(); - } - - AFL_VERIFY(!!rangeOffset == !!rangeSize); - TInsertedData data(planStep, writeTxId, pathId, dedupId, NOlap::TBlobRange(blobId, rangeOffset.value_or(0), rangeSize.value_or(blobId.BlobSize())), meta, schemaVersion, {}); - - switch (recType) { - case EInsertTableIds::Inserted: - insertTable.AddInserted(std::move(data), true); + switch (constructor.GetRecType()) { + case Schema::EInsertTableIds::Inserted: + insertTable.AddInserted(constructor.BuildInsertedOrAborted(dsGroupSelector), true); break; - case EInsertTableIds::Committed: - insertTable.AddCommitted(std::move(data), true); + case Schema::EInsertTableIds::Committed: + insertTable.AddCommitted(constructor.BuildCommitted(dsGroupSelector), true); break; - case EInsertTableIds::Aborted: - insertTable.AddAborted(std::move(data), true); + case Schema::EInsertTableIds::Aborted: + insertTable.AddAborted(constructor.BuildInsertedOrAborted(dsGroupSelector), true); break; } if (!rowset.Next()) { @@ -59,6 +32,7 @@ bool Schema::InsertTable_Load(NIceDb::TNiceDb& db, const IBlobGroupSelector* dsG } void Schema::SaveTxInfo(NIceDb::TNiceDb& db, const TFullTxInfo& txInfo, const TString& txBody) { + AFL_VERIFY(txInfo.TxKind != NKikimrTxColumnShard::TX_KIND_NONE); db.Table().Key(txInfo.TxId).Update( NIceDb::TUpdate(txInfo.TxKind), NIceDb::TUpdate(txBody), @@ -70,11 +44,14 @@ void Schema::SaveTxInfo(NIceDb::TNiceDb& db, const TFullTxInfo& txInfo, const TS } void Schema::UpdateTxInfoSource(NIceDb::TNiceDb& db, const TFullTxInfo& txInfo) { - db.Table().Key(txInfo.GetTxId()).Update( - NIceDb::TUpdate(txInfo.Source), - NIceDb::TUpdate(txInfo.Cookie), - NIceDb::TUpdate(txInfo.SerializeSeqNoAsString()) - ); + db.Table() + .Key(txInfo.GetTxId()) + .Update(NIceDb::TUpdate(txInfo.Source), NIceDb::TUpdate(txInfo.Cookie), + NIceDb::TUpdate(txInfo.SerializeSeqNoAsString())); } +void Schema::UpdateTxInfoBody(NIceDb::TNiceDb& db, const ui64 txId, const TString& txBody) { + db.Table().Key(txId).Update(NIceDb::TUpdate(txBody)); } + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/columnshard_schema.h b/ydb/core/tx/columnshard/columnshard_schema.h index 50c67db625b1..8b104b9dcd58 100644 --- a/ydb/core/tx/columnshard/columnshard_schema.h +++ b/ydb/core/tx/columnshard/columnshard_schema.h @@ -17,7 +17,7 @@ class TColumnChunkLoadContext; namespace NKikimr::NColumnShard { -using NOlap::TWriteId; +using NOlap::TInsertWriteId; using NOlap::IBlobGroupSelector; struct TFullTxInfo; @@ -31,6 +31,7 @@ struct Schema : NIceDb::Schema { using TSettings = SchemaSettings; using TInsertedData = NOlap::TInsertedData; + using TCommittedData = NOlap::TCommittedData; using TColumnRecord = NOlap::TColumnRecord; enum EIndexTables : ui32 { @@ -108,7 +109,11 @@ struct Schema : NIceDb::Schema { TableVersionInfo = 11, SmallBlobs = 12, OneToOneEvictedBlobs = 13, - BlobsToDeleteWT = 14 + BlobsToDeleteWT = 14, + InFlightSnapshots = 15, + TxDependencies = 16, + TxStates = 17, + TxEvents = 18 }; // Tablet tables @@ -250,6 +255,40 @@ struct Schema : NIceDb::Schema { using TColumns = TableColumns; }; + struct InFlightSnapshots: Table<(ui32)ECommonTables::InFlightSnapshots> { + struct PlanStep: Column<1, NScheme::NTypeIds::Uint64> {}; + struct TxId: Column<2, NScheme::NTypeIds::Uint64> {}; + + using TKey = TableKey; + using TColumns = TableColumns; + }; + + struct TxDependencies: Table<(ui32)ECommonTables::TxDependencies> { + struct CommitTxId: Column<1, NScheme::NTypeIds::Uint64> {}; + struct BrokenTxId: Column<2, NScheme::NTypeIds::Uint64> {}; + + using TKey = TableKey; + using TColumns = TableColumns; + }; + + struct TxStates: Table<(ui32)ECommonTables::TxStates> { + struct TxId: Column<1, NScheme::NTypeIds::Uint64> {}; + struct Broken: Column<2, NScheme::NTypeIds::Bool> {}; + + using TKey = TableKey; + using TColumns = TableColumns; + }; + + struct TxEvents: Table<(ui32)ECommonTables::TxEvents> { + struct TxId: Column<1, NScheme::NTypeIds::Uint64> {}; + struct GenerationId: Column<2, NScheme::NTypeIds::Uint64> {}; + struct GenerationInternalId: Column<3, NScheme::NTypeIds::Uint64> {}; + struct Data: Column<4, NScheme::NTypeIds::String> {}; + + using TKey = TableKey; + using TColumns = TableColumns; + }; + // Index tables // InsertTable - common for all indices @@ -372,9 +411,10 @@ struct Schema : NIceDb::Schema { struct Size: Column<7, NScheme::NTypeIds::Uint32> {}; struct RecordsCount: Column<8, NScheme::NTypeIds::Uint32> {}; struct RawBytes: Column<9, NScheme::NTypeIds::Uint64> {}; + struct BlobData: Column<10, NScheme::NTypeIds::String> {}; using TKey = TableKey; - using TColumns = TableColumns; + using TColumns = TableColumns; }; struct SharedBlobIds: NIceDb::Schema::Table { @@ -544,7 +584,11 @@ struct Schema : NIceDb::Schema { BackgroundSessions, ShardingInfo, Normalizers, - NormalizerEvents + NormalizerEvents, + InFlightSnapshots, + TxDependencies, + TxStates, + TxEvents >; // @@ -660,8 +704,8 @@ struct Schema : NIceDb::Schema { static void SaveTxInfo(NIceDb::TNiceDb& db, const TFullTxInfo& txInfo, const TString& txBody); + static void UpdateTxInfoBody(NIceDb::TNiceDb& db, const ui64 txId, const TString& txBody); static void UpdateTxInfoSource(NIceDb::TNiceDb& db, const TFullTxInfo& txInfo); - static void UpdateTxInfoSource(NIceDb::TNiceDb& db, ui64 txId, const TActorId& source, ui64 cookie) { db.Table().Key(txId).Update( NIceDb::TUpdate(source), @@ -741,7 +785,7 @@ struct Schema : NIceDb::Schema { db.Table().Key(pathId).Delete(); } - static void SaveLongTxWrite(NIceDb::TNiceDb& db, TWriteId writeId, const ui32 writePartId, const NLongTxService::TLongTxId& longTxId, const std::optional granuleShardingVersion) { + static void SaveLongTxWrite(NIceDb::TNiceDb& db, const TInsertWriteId writeId, const ui32 writePartId, const NLongTxService::TLongTxId& longTxId, const std::optional granuleShardingVersion) { NKikimrLongTxService::TLongTxId proto; longTxId.ToProto(&proto); TString serialized; @@ -753,32 +797,49 @@ struct Schema : NIceDb::Schema { ); } - static void EraseLongTxWrite(NIceDb::TNiceDb& db, TWriteId writeId) { + static void EraseLongTxWrite(NIceDb::TNiceDb& db, const TInsertWriteId writeId) { db.Table().Key((ui64)writeId).Delete(); } // InsertTable activities - static void InsertTable_Upsert(NIceDb::TNiceDb& db, EInsertTableIds recType, const TInsertedData& data) { - db.Table().Key((ui8)recType, data.PlanStep, data.WriteTxId, data.PathId, data.DedupId).Update( - NIceDb::TUpdate(data.GetBlobRange().GetBlobId().ToStringLegacy()), - NIceDb::TUpdate(data.GetBlobRange().Offset), - NIceDb::TUpdate(data.GetBlobRange().Size), - NIceDb::TUpdate(data.GetMeta().SerializeToProto().SerializeAsString()), - NIceDb::TUpdate(data.GetSchemaVersion()) - ); + static void InsertTable_Upsert(NIceDb::TNiceDb& db, const EInsertTableIds recType, const TInsertedData& data) { + db.Table() + .Key((ui8)recType, 0, (ui64)data.GetInsertWriteId(), data.GetPathId(), "") + .Update(NIceDb::TUpdate(data.GetBlobRange().GetBlobId().ToStringLegacy()), + NIceDb::TUpdate(data.GetBlobRange().Offset), + NIceDb::TUpdate(data.GetBlobRange().Size), + NIceDb::TUpdate(data.GetMeta().SerializeToProto().SerializeAsString()), + NIceDb::TUpdate(data.GetSchemaVersion())); + } + + static void InsertTable_Upsert(NIceDb::TNiceDb& db, const TCommittedData& data) { + db.Table() + .Key((ui8)EInsertTableIds::Committed, data.GetSnapshot().GetPlanStep(), data.GetSnapshot().GetTxId(), data.GetPathId(), + data.GetDedupId()) + .Update(NIceDb::TUpdate(data.GetBlobRange().GetBlobId().ToStringLegacy()), + NIceDb::TUpdate(data.GetBlobRange().Offset), + NIceDb::TUpdate(data.GetBlobRange().Size), + NIceDb::TUpdate(data.GetMeta().SerializeToProto().SerializeAsString()), + NIceDb::TUpdate(data.GetSchemaVersion())); } static void InsertTable_Erase(NIceDb::TNiceDb& db, EInsertTableIds recType, const TInsertedData& data) { - db.Table().Key((ui8)recType, data.PlanStep, data.WriteTxId, data.PathId, data.DedupId).Delete(); + db.Table().Key((ui8)recType, 0, (ui64)data.GetInsertWriteId(), data.GetPathId(), "").Delete(); + } + + static void InsertTable_Erase(NIceDb::TNiceDb& db, const TCommittedData& data) { + db.Table() + .Key((ui8)EInsertTableIds::Committed, data.GetSnapshot().GetPlanStep(), data.GetSnapshot().GetTxId(), data.GetPathId(), data.GetDedupId()) + .Delete(); } static void InsertTable_Insert(NIceDb::TNiceDb& db, const TInsertedData& data) { InsertTable_Upsert(db, EInsertTableIds::Inserted, data); } - static void InsertTable_Commit(NIceDb::TNiceDb& db, const TInsertedData& data) { - InsertTable_Upsert(db, EInsertTableIds::Committed, data); + static void InsertTable_Commit(NIceDb::TNiceDb& db, const TCommittedData& data) { + InsertTable_Upsert(db, data); } static void InsertTable_Abort(NIceDb::TNiceDb& db, const TInsertedData& data) { @@ -789,8 +850,8 @@ struct Schema : NIceDb::Schema { InsertTable_Erase(db, EInsertTableIds::Inserted, data); } - static void InsertTable_EraseCommitted(NIceDb::TNiceDb& db, const TInsertedData& data) { - InsertTable_Erase(db, EInsertTableIds::Committed, data); + static void InsertTable_EraseCommitted(NIceDb::TNiceDb& db, const TCommittedData& data) { + InsertTable_Erase(db, data); } static void InsertTable_EraseAborted(NIceDb::TNiceDb& db, const TInsertedData& data) { @@ -876,13 +937,20 @@ class TColumnChunkLoadContext { class TIndexChunkLoadContext { private: - YDB_READONLY_DEF(TBlobRange, BlobRange); + YDB_READONLY_DEF(std::optional, BlobRange); + YDB_READONLY_DEF(std::optional, BlobData); TChunkAddress Address; const ui32 RecordsCount; const ui32 RawBytes; public: TIndexChunk BuildIndexChunk(const TBlobRangeLink16::TLinkId blobLinkId) const { - return TIndexChunk(Address.GetColumnId(), Address.GetChunkIdx(), RecordsCount, RawBytes, BlobRange.BuildLink(blobLinkId)); + AFL_VERIFY(BlobRange); + return TIndexChunk(Address.GetColumnId(), Address.GetChunkIdx(), RecordsCount, RawBytes, BlobRange->BuildLink(blobLinkId)); + } + + TIndexChunk BuildIndexChunk() const { + AFL_VERIFY(BlobData); + return TIndexChunk(Address.GetColumnId(), Address.GetChunkIdx(), RecordsCount, RawBytes, *BlobData); } template @@ -892,14 +960,141 @@ class TIndexChunkLoadContext { , RawBytes(rowset.template GetValue()) { AFL_VERIFY(Address.GetColumnId())("event", "incorrect address")("address", Address.DebugString()); - TString strBlobId = rowset.template GetValue(); - Y_ABORT_UNLESS(strBlobId.size() == sizeof(TLogoBlobID), "Size %" PRISZT " doesn't match TLogoBlobID", strBlobId.size()); - TLogoBlobID logoBlobId((const ui64*)strBlobId.data()); - BlobRange.BlobId = NOlap::TUnifiedBlobId(dsGroupSelector->GetGroup(logoBlobId), logoBlobId); - BlobRange.Offset = rowset.template GetValue(); - BlobRange.Size = rowset.template GetValue(); - AFL_VERIFY(BlobRange.BlobId.IsValid() && BlobRange.Size)("event", "incorrect blob")("blob", BlobRange.ToString()); + if (rowset.template HaveValue()) { + TBlobRange& bRange = BlobRange.emplace(); + TString strBlobId = rowset.template GetValue(); + Y_ABORT_UNLESS(strBlobId.size() == sizeof(TLogoBlobID), "Size %" PRISZT " doesn't match TLogoBlobID", strBlobId.size()); + TLogoBlobID logoBlobId((const ui64*)strBlobId.data()); + bRange.BlobId = NOlap::TUnifiedBlobId(dsGroupSelector->GetGroup(logoBlobId), logoBlobId); + bRange.Offset = rowset.template GetValue(); + bRange.Size = rowset.template GetValue(); + AFL_VERIFY(bRange.BlobId.IsValid() && bRange.Size)("event", "incorrect blob")("blob", bRange.ToString()); + } else if (rowset.template HaveValue()) { + BlobData = rowset.template GetValue(); + } else { + AFL_VERIFY(false); + } } }; -} +class TInsertTableRecordLoadContext { +private: + NColumnShard::Schema::EInsertTableIds RecType; + ui64 PlanStep; + ui64 WriteTxId; + ui64 PathId; + YDB_ACCESSOR_DEF(TString, DedupId); + ui64 SchemaVersion; + TString BlobIdString; + std::optional BlobId; + TString MetadataString; + std::optional Metadata; + std::optional RangeOffset; + std::optional RangeSize; + + void Prepare(const IBlobGroupSelector* dsGroupSelector) { + AFL_VERIFY(!PreparedFlag); + PreparedFlag = true; + TString error; + NOlap::TUnifiedBlobId blobId = NOlap::TUnifiedBlobId::ParseFromString(BlobIdString, dsGroupSelector, error); + Y_ABORT_UNLESS(blobId.IsValid(), "Failied to parse blob id: %s", error.c_str()); + BlobId = blobId; + + NKikimrTxColumnShard::TLogicalMetadata meta; + AFL_VERIFY(MetadataString); + Y_ABORT_UNLESS(meta.ParseFromString(MetadataString)); + Metadata = std::move(meta); + AFL_VERIFY(!!RangeOffset == !!RangeSize); + } + + bool PreparedFlag = false; + bool ParsedFlag = false; + +public: + TInsertWriteId GetInsertWriteId() const { + AFL_VERIFY(ParsedFlag); + AFL_VERIFY(RecType != NColumnShard::Schema::EInsertTableIds::Committed); + return (TInsertWriteId)WriteTxId; + } + + NColumnShard::Schema::EInsertTableIds GetRecType() const { + AFL_VERIFY(ParsedFlag); + return RecType; + } + + ui64 GetPlanStep() const { + AFL_VERIFY(ParsedFlag); + return PlanStep; + } + + void Remove(NIceDb::TNiceDb& db) const { + AFL_VERIFY(ParsedFlag); + db.Table().Key((ui8)RecType, PlanStep, WriteTxId, PathId, DedupId).Delete(); + } + + void Upsert(NIceDb::TNiceDb& db) const { + AFL_VERIFY(ParsedFlag); + using namespace NColumnShard; + if (RangeOffset) { + db.Table() + .Key((ui8)RecType, PlanStep, WriteTxId, PathId, DedupId) + .Update(NIceDb::TUpdate(BlobIdString), + NIceDb::TUpdate(*RangeOffset), + NIceDb::TUpdate(*RangeSize), NIceDb::TUpdate(MetadataString), + NIceDb::TUpdate(SchemaVersion)); + } else { + db.Table() + .Key((ui8)RecType, PlanStep, WriteTxId, PathId, DedupId) + .Update(NIceDb::TUpdate(BlobIdString), NIceDb::TUpdate(MetadataString), + NIceDb::TUpdate(SchemaVersion)); + } + } + + template + void ParseFromDatabase(TRowset& rowset) { + AFL_VERIFY(!ParsedFlag)("problem", "duplication parsing"); + ParsedFlag = true; + using namespace NColumnShard; + RecType = (Schema::EInsertTableIds)rowset.template GetValue(); + PlanStep = rowset.template GetValue(); + WriteTxId = rowset.template GetValueOrDefault(); + AFL_VERIFY(WriteTxId); + + PathId = rowset.template GetValue(); + DedupId = rowset.template GetValue(); + SchemaVersion = + rowset.template HaveValue() ? rowset.template GetValue() : 0; + BlobIdString = rowset.template GetValue(); + MetadataString = rowset.template GetValue(); + if (rowset.template HaveValue()) { + RangeOffset = rowset.template GetValue(); + } + if (rowset.template HaveValue()) { + RangeSize = rowset.template GetValue(); + } + } + + NOlap::TCommittedData BuildCommitted(const IBlobGroupSelector* dsGroupSelector) { + Prepare(dsGroupSelector); + using namespace NColumnShard; + AFL_VERIFY(RecType == Schema::EInsertTableIds::Committed); + auto userData = std::make_shared(PathId, + NOlap::TBlobRange(*BlobId, RangeOffset.value_or(0), RangeSize.value_or(BlobId->BlobSize())), *Metadata, SchemaVersion, std::nullopt); + AFL_VERIFY(!!DedupId); + AFL_VERIFY(PlanStep); + return NOlap::TCommittedData(userData, PlanStep, WriteTxId, DedupId); + } + + NOlap::TInsertedData BuildInsertedOrAborted(const IBlobGroupSelector* dsGroupSelector) { + Prepare(dsGroupSelector); + using namespace NColumnShard; + AFL_VERIFY(RecType != Schema::EInsertTableIds::Committed); + auto userData = std::make_shared(PathId, + NOlap::TBlobRange(*BlobId, RangeOffset.value_or(0), RangeSize.value_or(BlobId->BlobSize())), *Metadata, SchemaVersion, std::nullopt); + AFL_VERIFY(!DedupId); + AFL_VERIFY(!PlanStep); + return NOlap::TInsertedData((TInsertWriteId)WriteTxId, userData); + } +}; + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/common/limits.h b/ydb/core/tx/columnshard/common/limits.h index 04a5cb55128e..b30432dfb2fd 100644 --- a/ydb/core/tx/columnshard/common/limits.h +++ b/ydb/core/tx/columnshard/common/limits.h @@ -4,6 +4,15 @@ namespace NKikimr::NOlap { class TGlobalLimits { public: - static const inline ui64 TxWriteLimitBytes = 256 * 1024 * 1024; + static constexpr inline ui64 TxWriteLimitBytes = 256 * 1024 * 1024; + static constexpr inline ui64 TTLCompactionMemoryLimit = 1ULL << 30; + static constexpr inline ui64 InsertCompactionMemoryLimit = 1ULL << 30; + static constexpr inline ui64 GeneralCompactionMemoryLimit = 3ULL << 30; + static constexpr inline ui64 ScanMemoryLimit = 3ULL << 30; + + static constexpr inline ui64 DefaultBlobsMemoryIntervalLimit = ScanMemoryLimit; + static constexpr inline ui64 DefaultRejectMemoryIntervalLimit = ScanMemoryLimit; + static constexpr inline ui64 DefaultReduceMemoryIntervalLimit = 0.8 * ScanMemoryLimit; + static constexpr inline ui64 DefaultReadSequentiallyBufferSize = ((ui64)8) << 20; }; } \ No newline at end of file diff --git a/ydb/core/tx/columnshard/common/scalars.cpp b/ydb/core/tx/columnshard/common/scalars.cpp index d614253e9ec1..d85622edeeec 100644 --- a/ydb/core/tx/columnshard/common/scalars.cpp +++ b/ydb/core/tx/columnshard/common/scalars.cpp @@ -1,6 +1,6 @@ #include "scalars.h" -#include +#include #include #include diff --git a/ydb/core/tx/columnshard/common/scalars.h b/ydb/core/tx/columnshard/common/scalars.h index 328296048620..7635caa19e11 100644 --- a/ydb/core/tx/columnshard/common/scalars.h +++ b/ydb/core/tx/columnshard/common/scalars.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include #include diff --git a/ydb/core/tx/columnshard/common/snapshot.cpp b/ydb/core/tx/columnshard/common/snapshot.cpp index 6ca80d818986..eb6e62ccac0c 100644 --- a/ydb/core/tx/columnshard/common/snapshot.cpp +++ b/ydb/core/tx/columnshard/common/snapshot.cpp @@ -35,4 +35,12 @@ TString TSnapshot::SerializeToString() const { return SerializeToProto().SerializeAsString(); } +NKikimr::NOlap::TSnapshot TSnapshot::MaxForPlanStep(const ui64 planStep) noexcept { + return TSnapshot(planStep, ::Max()); +} + +NKikimr::NOlap::TSnapshot TSnapshot::MaxForPlanInstant(const TInstant planInstant) noexcept { + return TSnapshot(planInstant.MilliSeconds(), ::Max()); +} + }; diff --git a/ydb/core/tx/columnshard/common/snapshot.h b/ydb/core/tx/columnshard/common/snapshot.h index ffa48670371e..4bc99d268420 100644 --- a/ydb/core/tx/columnshard/common/snapshot.h +++ b/ydb/core/tx/columnshard/common/snapshot.h @@ -54,6 +54,10 @@ class TSnapshot { return TSnapshot(-1ll, -1ll); } + static TSnapshot MaxForPlanInstant(const TInstant planInstant) noexcept; + + static TSnapshot MaxForPlanStep(const ui64 planStep) noexcept; + constexpr bool operator==(const TSnapshot&) const noexcept = default; constexpr auto operator<=>(const TSnapshot&) const noexcept = default; diff --git a/ydb/core/tx/columnshard/common/tests/shard_reader.cpp b/ydb/core/tx/columnshard/common/tests/shard_reader.cpp deleted file mode 100644 index 2789a63e38a8..000000000000 --- a/ydb/core/tx/columnshard/common/tests/shard_reader.cpp +++ /dev/null @@ -1,4 +0,0 @@ -#include "shard_reader.h" - -namespace NKikimr::NOlap::NTests { -} diff --git a/ydb/core/tx/columnshard/common/tests/ya.make b/ydb/core/tx/columnshard/common/tests/ya.make deleted file mode 100644 index 8b3c3809006b..000000000000 --- a/ydb/core/tx/columnshard/common/tests/ya.make +++ /dev/null @@ -1,14 +0,0 @@ -LIBRARY() - -SRCS( - shard_reader.cpp -) - -PEERDIR( - ydb/core/formats/arrow/protos - contrib/libs/apache/arrow - ydb/core/formats/arrow - ydb/core/kqp/compute_actor -) - -END() diff --git a/ydb/core/tx/columnshard/common/ya.make b/ydb/core/tx/columnshard/common/ya.make index 87bd2c16b26b..c7d8a27bf3ee 100644 --- a/ydb/core/tx/columnshard/common/ya.make +++ b/ydb/core/tx/columnshard/common/ya.make @@ -1,7 +1,7 @@ LIBRARY() SRCS( - limits.h + limits.cpp reverse_accessor.cpp scalars.cpp snapshot.cpp @@ -11,7 +11,7 @@ SRCS( ) PEERDIR( - ydb/core/formats/arrow/protos + ydb/library/formats/arrow/protos contrib/libs/apache/arrow ydb/core/formats/arrow ydb/core/tx/columnshard/common/protos diff --git a/ydb/core/tx/columnshard/counters/aggregation/table_stats.h b/ydb/core/tx/columnshard/counters/aggregation/table_stats.h new file mode 100644 index 000000000000..68f39a4191de --- /dev/null +++ b/ydb/core/tx/columnshard/counters/aggregation/table_stats.h @@ -0,0 +1,47 @@ +#pragma once + +#include +#include +#include +#include + +namespace NKikimr::NColumnShard { + +class TTableStatsBuilder { +private: + TCountersManager& Counters; + const NTabletFlatExecutor::NFlatExecutorSetup::IExecutor& Executor; + NOlap::IColumnEngine& ColumnEngine; + +public: + TTableStatsBuilder( + TCountersManager& counters, const NTabletFlatExecutor::NFlatExecutorSetup::IExecutor* executor, NOlap::IColumnEngine& columnEngine) + : Counters(counters) + , Executor(*executor) + , ColumnEngine(columnEngine) { + } + + void FillTableStats(ui64 pathId, ::NKikimrTableStats::TTableStats& tableStats) { + Counters.FillTableStats(pathId, tableStats); + + auto columnEngineStats = ColumnEngine.GetStats().FindPtr(pathId); + if (columnEngineStats && *columnEngineStats) { + auto activeStats = (*columnEngineStats)->Active(); + tableStats.SetRowCount(activeStats.Rows); + tableStats.SetDataSize(activeStats.Bytes); + } + } + + void FillTotalTableStats(::NKikimrTableStats::TTableStats& tableStats) { + Counters.FillTotalTableStats(tableStats); + + tableStats.SetInFlightTxCount(Executor.GetStats().TxInFly); + tableStats.SetHasLoanedParts(Executor.HasLoanedParts()); + + auto activeStats = ColumnEngine.GetTotalStats().Active(); + tableStats.SetRowCount(activeStats.Rows); + tableStats.SetDataSize(activeStats.Bytes); + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/counters/aggregation/ya.make b/ydb/core/tx/columnshard/counters/aggregation/ya.make new file mode 100644 index 000000000000..95687733d093 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/aggregation/ya.make @@ -0,0 +1,10 @@ +LIBRARY() + +SRCS() + +PEERDIR( + ydb/core/protos + ydb/core/base +) + +END() diff --git a/ydb/core/tx/columnshard/counters/background_controller.cpp b/ydb/core/tx/columnshard/counters/background_controller.cpp new file mode 100644 index 000000000000..fcc89f6ee940 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/background_controller.cpp @@ -0,0 +1,18 @@ +#include "background_controller.h" + +#include +#include + +namespace NKikimr::NColumnShard { + +void TBackgroundControllerCounters::OnCompactionFinish(ui64 pathId) { + TInstant now = TAppData::TimeProvider->Now(); + TInstant& lastFinish = LastCompactionFinishByPathId[pathId]; + lastFinish = std::max(lastFinish, now); + + if (LastCompactionFinish < now) { + LastCompactionFinish = now; + } +} + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/counters/background_controller.h b/ydb/core/tx/columnshard/counters/background_controller.h new file mode 100644 index 000000000000..d46d7cdacf91 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/background_controller.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include +#include + +namespace NKikimr::NColumnShard { + +class TBackgroundControllerCounters { +private: + THashMap LastCompactionFinishByPathId; + TInstant LastCompactionFinish; + +public: + void OnCompactionFinish(ui64 pathId); + + void FillStats(ui64 pathId, ::NKikimrTableStats::TTableStats& output) const { + output.SetLastFullCompactionTs(GetLastCompactionFinishInstant(pathId).value_or(TInstant::Zero()).Seconds()); + } + + void FillTotalStats(::NKikimrTableStats::TTableStats& output) const { + output.SetLastFullCompactionTs(LastCompactionFinish.Seconds()); + } + +private: + std::optional GetLastCompactionFinishInstant(const ui64 pathId) const { + auto findInstant = LastCompactionFinishByPathId.FindPtr(pathId); + if (!findInstant) { + return std::nullopt; + } + return *findInstant; + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/counters/blobs_manager.cpp b/ydb/core/tx/columnshard/counters/blobs_manager.cpp index 1da1ac7ff86b..edd9603d0d03 100644 --- a/ydb/core/tx/columnshard/counters/blobs_manager.cpp +++ b/ydb/core/tx/columnshard/counters/blobs_manager.cpp @@ -7,58 +7,51 @@ namespace NKikimr::NColumnShard { TBlobsManagerCounters::TBlobsManagerCounters(const TString& module) : TCommonCountersOwner(module) -{ - SkipCollection = TBase::GetDeriviative("GC/Skip/Count"); - StartCollection = TBase::GetDeriviative("GC/Start/Count"); - CollectDropExplicitBytes = TBase::GetDeriviative("GC/Drop/Explicit/Bytes"); - CollectDropExplicitCount = TBase::GetDeriviative("GC/Drop/Explicit/Count"); - CollectDropImplicitBytes = TBase::GetDeriviative("GC/Drop/Implicit/Bytes"); - CollectDropImplicitCount = TBase::GetDeriviative("GC/Drop/Implicit/Count"); - CollectKeepBytes = TBase::GetDeriviative("GC/Keep/Bytes"); - CollectKeepCount = TBase::GetDeriviative("GC/Keep/Count"); - PutBlobBytes = TBase::GetDeriviative("GC/PutBlob/Bytes"); - PutBlobCount = TBase::GetDeriviative("GC/PutBlob/Count"); - CollectGen = TBase::GetValue("GC/Gen"); - CollectStep = TBase::GetValue("GC/Step"); - - DeleteBlobMarkerBytes = TBase::GetDeriviative("GC/MarkerDeleteBlob/Bytes"); - DeleteBlobMarkerCount = TBase::GetDeriviative("GC/MarkerDeleteBlob/Count"); - DeleteBlobDelayedMarkerBytes = TBase::GetDeriviative("GC/MarkerDelayedDeleteBlob/Bytes"); - DeleteBlobDelayedMarkerCount = TBase::GetDeriviative("GC/MarkerDelayedDeleteBlob/Count"); - AddSmallBlobBytes = TBase::GetDeriviative("GC/AddSmallBlob/Bytes"); - AddSmallBlobCount = TBase::GetDeriviative("GC/AddSmallBlob/Count"); - DeleteSmallBlobBytes = TBase::GetDeriviative("GC/DeleteSmallBlob/Bytes"); - DeleteSmallBlobCount = TBase::GetDeriviative("GC/DeleteSmallBlob/Count"); - - BlobsKeepCount = TBase::GetValue("GC/BlobsKeep/Count"); - BlobsKeepBytes = TBase::GetValue("GC/BlobsKeep/Bytes"); - BlobsDeleteCount = TBase::GetValue("GC/BlobsDelete/Count"); - BlobsDeleteBytes = TBase::GetValue("GC/BlobsDelete/Bytes"); + , BlobsToDeleteCount(TBase::GetValue("BlobsToDelete/Count")) + , BlobsToDeleteDelayedCount(TBase::GetValue("BlobsToDeleteDelayed/Count")) + , BlobsToKeepCount(TBase::GetValue("BlobsToKeep/Count")) + , CurrentGen(TBase::GetValue("CurrentGen")) + , CurrentStep(TBase::GetValue("CurrentStep")) + , GCCounters(*this, "GC") - BrokenKeepCount = TBase::GetDeriviative("GC/BrokenKeep/Count"); - BrokenKeepBytes = TBase::GetDeriviative("GC/BrokenKeep/Bytes"); +{ - KeepMarkerCount = TBase::GetDeriviative("GC/KeepMarker/Count"); - KeepMarkerBytes = TBase::GetDeriviative("GC/KeepMarker/Bytes"); } -void TBlobsManagerCounters::OnBlobsKeep(const std::map<::NKikimr::TGenStep, std::set>& blobs) const { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "OnBlobsKeep")("count", blobs.size()); -// BlobsKeepCount->Set(blobs.size()); -// ui64 size = 0; -// for (auto&& i : blobs) { -// size += i.BlobSize(); -// } -// BlobsKeepBytes->Set(size); +TBlobsManagerGCCounters::TBlobsManagerGCCounters(const TCommonCountersOwner& sameAs, const TString& componentName) + : TBase(sameAs, componentName) + , SkipCollectionEmpty(TBase::GetDeriviative("Skip/Empty/Count")) + , SkipCollectionThrottling(TBase::GetDeriviative("Skip/Throttling/Count")) +{ + KeepsCountTasks = TBase::GetHistogram("Tasks/Keeps/Count", NMonitoring::ExponentialHistogram(16, 2, 100)); + KeepsCountBlobs = TBase::GetHistogram("Tasks/Keeps/Blobs", NMonitoring::ExponentialHistogram(16, 2, 100)); + KeepsCountBytes = TBase::GetHistogram("Tasks/Keeps/Bytes", NMonitoring::ExponentialHistogram(16, 2, 1024)); + DeletesCountBlobs = TBase::GetHistogram("Tasks/Deletes/Count", NMonitoring::ExponentialHistogram(16, 2, 100)); + DeletesCountTasks = TBase::GetHistogram("Tasks/Deletes/Blobs", NMonitoring::ExponentialHistogram(16, 2, 100)); + DeletesCountBytes = TBase::GetHistogram("Tasks/Deletes/Bytes", NMonitoring::ExponentialHistogram(16, 2, 1024)); + FullGCTasks = TBase::GetDeriviative("Tasks/Full/Count"); + MoveBarriers = TBase::GetDeriviative("Tasks/Barrier/Move"); + DontMoveBarriers = TBase::GetDeriviative("Tasks/Barrier/DontMove"); + GCTasks = TBase::GetDeriviative("Tasks/All/Count"); + EmptyGCTasks = TBase::GetDeriviative("Tasks/Empty/Count"); } -void TBlobsManagerCounters::OnBlobsDelete(const NOlap::TTabletsByBlob& /*blobs*/) const { - // BlobsDeleteCount->Set(blobs.size()); - // ui64 size = 0; - // for (auto&& i : blobs) { - // size += i.BlobSize(); - // } - // BlobsDeleteBytes->Set(size); +void TBlobsManagerGCCounters::OnGCTask(const ui32 keepsCount, const ui32 keepBytes, const ui32 deleteCount, const ui32 deleteBytes, const bool isFull, const bool moveBarrier) const { + GCTasks->Add(1); + if (isFull) { + FullGCTasks->Add(1); + } + KeepsCountTasks->Collect(keepsCount); + KeepsCountBlobs->Collect((i64)keepsCount, keepsCount); + KeepsCountBytes->Collect((i64)keepsCount, keepBytes); + DeletesCountTasks->Collect(deleteCount); + DeletesCountBlobs->Collect((i64)deleteCount, deleteCount); + DeletesCountBytes->Collect((i64)deleteCount, deleteBytes); + if (moveBarrier) { + MoveBarriers->Add(1); + } else { + DontMoveBarriers->Add(1); + } } } diff --git a/ydb/core/tx/columnshard/counters/blobs_manager.h b/ydb/core/tx/columnshard/counters/blobs_manager.h index 2c555eaac9f4..8490ff38814e 100644 --- a/ydb/core/tx/columnshard/counters/blobs_manager.h +++ b/ydb/core/tx/columnshard/counters/blobs_manager.h @@ -2,6 +2,7 @@ #include "common/owner.h" #include +#include #include #include @@ -13,99 +14,53 @@ class TTabletsByBlob; namespace NKikimr::NColumnShard { -class TBlobsManagerCounters: public TCommonCountersOwner { +class TBlobsManagerGCCounters: public TCommonCountersOwner { private: using TBase = TCommonCountersOwner; - NMonitoring::TDynamicCounters::TCounterPtr CollectDropExplicitBytes; - NMonitoring::TDynamicCounters::TCounterPtr CollectDropExplicitCount; - NMonitoring::TDynamicCounters::TCounterPtr CollectDropImplicitBytes; - NMonitoring::TDynamicCounters::TCounterPtr CollectDropImplicitCount; - NMonitoring::TDynamicCounters::TCounterPtr CollectKeepBytes; - NMonitoring::TDynamicCounters::TCounterPtr CollectKeepCount; - NMonitoring::TDynamicCounters::TCounterPtr PutBlobBytes; - NMonitoring::TDynamicCounters::TCounterPtr PutBlobCount; - NMonitoring::TDynamicCounters::TCounterPtr CollectGen; - NMonitoring::TDynamicCounters::TCounterPtr CollectStep; - NMonitoring::TDynamicCounters::TCounterPtr DeleteBlobMarkerBytes; - NMonitoring::TDynamicCounters::TCounterPtr DeleteBlobMarkerCount; - NMonitoring::TDynamicCounters::TCounterPtr DeleteBlobDelayedMarkerBytes; - NMonitoring::TDynamicCounters::TCounterPtr DeleteBlobDelayedMarkerCount; - NMonitoring::TDynamicCounters::TCounterPtr AddSmallBlobBytes; - NMonitoring::TDynamicCounters::TCounterPtr AddSmallBlobCount; - NMonitoring::TDynamicCounters::TCounterPtr DeleteSmallBlobBytes; - NMonitoring::TDynamicCounters::TCounterPtr DeleteSmallBlobCount; - NMonitoring::TDynamicCounters::TCounterPtr BrokenKeepCount; - NMonitoring::TDynamicCounters::TCounterPtr BrokenKeepBytes; - NMonitoring::TDynamicCounters::TCounterPtr BlobsKeepCount; - NMonitoring::TDynamicCounters::TCounterPtr BlobsKeepBytes; - NMonitoring::TDynamicCounters::TCounterPtr BlobsDeleteCount; - NMonitoring::TDynamicCounters::TCounterPtr BlobsDeleteBytes; - NMonitoring::TDynamicCounters::TCounterPtr KeepMarkerCount; - NMonitoring::TDynamicCounters::TCounterPtr KeepMarkerBytes; - + NMonitoring::THistogramPtr KeepsCountBytes; + NMonitoring::THistogramPtr KeepsCountBlobs; + NMonitoring::THistogramPtr KeepsCountTasks; + NMonitoring::THistogramPtr DeletesCountBytes; + NMonitoring::THistogramPtr DeletesCountBlobs; + NMonitoring::THistogramPtr DeletesCountTasks; + NMonitoring::TDynamicCounters::TCounterPtr FullGCTasks; + NMonitoring::TDynamicCounters::TCounterPtr MoveBarriers; + NMonitoring::TDynamicCounters::TCounterPtr DontMoveBarriers; + NMonitoring::TDynamicCounters::TCounterPtr GCTasks; + NMonitoring::TDynamicCounters::TCounterPtr EmptyGCTasks; public: - NMonitoring::TDynamicCounters::TCounterPtr SkipCollection; - NMonitoring::TDynamicCounters::TCounterPtr StartCollection; - - TBlobsManagerCounters(const TString& module); - - void OnKeepMarker(const ui64 size) const { - KeepMarkerCount->Add(1); - KeepMarkerBytes->Add(size); - } - - void OnBlobsKeep(const std::map<::NKikimr::TGenStep, std::set>& blobs) const; - - void OnBlobsDelete(const NOlap::TTabletsByBlob& blobs) const; - - void OnAddSmallBlob(const ui32 bSize) const { - AddSmallBlobBytes->Add(bSize); - AddSmallBlobCount->Add(1); - } - - void OnDeleteBlobDelayedMarker(const ui32 bSize) const { - DeleteBlobDelayedMarkerBytes->Add(bSize); - DeleteBlobDelayedMarkerCount->Add(1); - } - - void OnDeleteBlobMarker(const ui32 bSize) const { - DeleteBlobMarkerBytes->Add(bSize); - DeleteBlobMarkerCount->Add(1); - } + const NMonitoring::TDynamicCounters::TCounterPtr SkipCollectionEmpty; + const NMonitoring::TDynamicCounters::TCounterPtr SkipCollectionThrottling; - void OnNewCollectStep(const ui32 gen, const ui32 step) const { - CollectGen->Set(gen); - CollectStep->Set(step); - } + TBlobsManagerGCCounters(const TCommonCountersOwner& sameAs, const TString& componentName); - void OnDeleteSmallBlob(const ui32 bSize) const { - DeleteSmallBlobBytes->Add(bSize); - DeleteSmallBlobCount->Add(1); - } + void OnGCTask(const ui32 keepsCount, const ui32 keepBytes, const ui32 deleteCount, const ui32 deleteBytes, + const bool isFull, const bool moveBarrier) const; - void OnPutResult(const ui32 bSize) const { - PutBlobBytes->Add(bSize); - PutBlobCount->Add(1); - } - - void OnCollectKeep(const ui32 bSize) const { - CollectKeepBytes->Add(bSize); - CollectKeepCount->Add(1); + void OnEmptyGCTask() const { + EmptyGCTasks->Add(1); } +}; - void OnBrokenKeep(const ui32 bSize) const { - BrokenKeepBytes->Add(bSize); - BrokenKeepCount->Add(1); +class TBlobsManagerCounters: public TCommonCountersOwner { +private: + using TBase = TCommonCountersOwner; + const NMonitoring::TDynamicCounters::TCounterPtr BlobsToDeleteCount; + const NMonitoring::TDynamicCounters::TCounterPtr BlobsToDeleteDelayedCount; + const NMonitoring::TDynamicCounters::TCounterPtr BlobsToKeepCount; +public: + const NMonitoring::TDynamicCounters::TCounterPtr CurrentGen; + const NMonitoring::TDynamicCounters::TCounterPtr CurrentStep; + const TBlobsManagerGCCounters GCCounters; + TBlobsManagerCounters(const TString& module); + void OnBlobsToDelete(const NOlap::TTabletsByBlob& blobs) const { + BlobsToDeleteCount->Set(blobs.GetSize()); } - - void OnCollectDropExplicit(const ui32 bSize) const { - CollectDropExplicitBytes->Add(bSize); - CollectDropExplicitCount->Add(1); + void OnBlobsToKeep(const NOlap::TBlobsByGenStep& blobs) const { + BlobsToKeepCount->Set(blobs.GetSize()); } - - void OnCollectDropImplicit(const ui32 bSize) const { - CollectDropImplicitBytes->Add(bSize); - CollectDropImplicitCount->Add(1); + void OnBlobsToDeleteDelayed(const NOlap::TTabletsByBlob& blobs) const { + BlobsToDeleteDelayedCount->Set(blobs.GetSize()); } }; diff --git a/ydb/core/tx/columnshard/counters/column_tables.cpp b/ydb/core/tx/columnshard/counters/column_tables.cpp new file mode 100644 index 000000000000..51b9ecf6283e --- /dev/null +++ b/ydb/core/tx/columnshard/counters/column_tables.cpp @@ -0,0 +1,13 @@ +#include "column_tables.h" + +namespace NKikimr::NColumnShard { + +std::shared_ptr TColumnTablesCounters::GetPathIdCounter(ui64 pathId) { + auto findCounter = PathIdCounters.FindPtr(pathId); + if (findCounter) { + return *findCounter; + } + return PathIdCounters.emplace(pathId, std::make_shared(*this)).first->second; +} + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/counters/column_tables.h b/ydb/core/tx/columnshard/counters/column_tables.h new file mode 100644 index 000000000000..db00069218fb --- /dev/null +++ b/ydb/core/tx/columnshard/counters/column_tables.h @@ -0,0 +1,85 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace NKikimr::NColumnShard { + +class TSingleColumnTableCounters; + +class TColumnTablesCounters { +private: + YDB_READONLY_CONST(std::shared_ptr, LastAccessTime); + YDB_READONLY_CONST(std::shared_ptr, LastUpdateTime); + + THashMap> PathIdCounters; + + friend class TSingleColumnTableCounters; + +public: + TColumnTablesCounters() + : LastAccessTime(std::make_shared()) + , LastUpdateTime(std::make_shared()) { + } + + void FillStats(::NKikimrTableStats::TTableStats& output) const { + output.SetLastAccessTime(LastAccessTime->MilliSeconds()); + output.SetLastUpdateTime(LastUpdateTime->MilliSeconds()); + } + + std::shared_ptr GetPathIdCounter(ui64 pathId); +}; + +class TSingleColumnTableCounters { +private: + YDB_READONLY(TInstant, PathIdLastAccessTime, TInstant::Zero()); + YDB_READONLY(TInstant, PathIdLastUpdateTime, TInstant::Zero()); + + const std::shared_ptr TotalLastAccessTime; + const std::shared_ptr TotalLastUpdateTime; + +public: + TSingleColumnTableCounters(TColumnTablesCounters& owner) + : TotalLastAccessTime(owner.LastAccessTime) + , TotalLastUpdateTime(owner.LastUpdateTime) { + } + + void OnReadEvent() { + UpdateLastAccessTime(TAppData::TimeProvider->Now()); + } + + void OnWriteEvent() { + TInstant now = TAppData::TimeProvider->Now(); + UpdateLastUpdateTime(now); + UpdateLastAccessTime(now); + } + + void FillStats(::NKikimrTableStats::TTableStats& output) const { + output.SetLastAccessTime(PathIdLastAccessTime.MilliSeconds()); + output.SetLastUpdateTime(PathIdLastUpdateTime.MilliSeconds()); + } + +private: + void UpdateLastAccessTime(TInstant value) { + if (PathIdLastAccessTime < value) { + PathIdLastAccessTime = value; + } + if (*TotalLastAccessTime < value) { + *TotalLastAccessTime = value; + } + } + + void UpdateLastUpdateTime(TInstant value) { + if (PathIdLastUpdateTime < value) { + PathIdLastUpdateTime = value; + } + if (*TotalLastUpdateTime < value) { + *TotalLastUpdateTime = value; + } + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/counters/columnshard.cpp b/ydb/core/tx/columnshard/counters/columnshard.cpp index 64c7e2931c5f..67c569e1313b 100644 --- a/ydb/core/tx/columnshard/counters/columnshard.cpp +++ b/ydb/core/tx/columnshard/counters/columnshard.cpp @@ -8,7 +8,9 @@ namespace NKikimr::NColumnShard { TCSCounters::TCSCounters() : TBase("CS") -{ + , WritingCounters(std::make_shared(*this)) + , Initialization(*this) + , TxProgress(*this) { StartBackgroundCount = TBase::GetDeriviative("StartBackground/Count"); TooEarlyBackgroundCount = TBase::GetDeriviative("TooEarlyBackground/Count"); SetupCompactionCount = TBase::GetDeriviative("SetupCompaction/Count"); @@ -52,6 +54,7 @@ TCSCounters::TCSCounters() HistogramSuccessWriteMiddle6PutBlobsDurationMs = TBase::GetHistogram("SuccessWriteMiddle6PutBlobsDurationMs", NMonitoring::ExponentialHistogram(18, 2, 5)); HistogramFailedWritePutBlobsDurationMs = TBase::GetHistogram("FailedWritePutBlobsDurationMs", NMonitoring::ExponentialHistogram(18, 2, 5)); HistogramWriteTxCompleteDurationMs = TBase::GetHistogram("WriteTxCompleteDurationMs", NMonitoring::ExponentialHistogram(18, 2, 5)); + WritePutBlobsCount = TBase::GetValue("WritePutBlobs"); WriteRequests = TBase::GetValue("WriteRequests"); diff --git a/ydb/core/tx/columnshard/counters/columnshard.h b/ydb/core/tx/columnshard/counters/columnshard.h index 6bada377df17..81df8b300eb8 100644 --- a/ydb/core/tx/columnshard/counters/columnshard.h +++ b/ydb/core/tx/columnshard/counters/columnshard.h @@ -1,8 +1,11 @@ #pragma once #include "common/owner.h" +#include "initialization.h" +#include "tx_progress.h" -#include +#include +#include #include namespace NKikimr::NColumnShard { @@ -13,7 +16,32 @@ enum class EWriteFailReason { LongTxDuplication /* "long_tx_duplication" */, NoTable /* "no_table" */, IncorrectSchema /* "incorrect_schema" */, - Overload /* "overload" */ + Overload /* "overload" */, + OverlimitReadRawMemory /* "overlimit_read_raw_memory" */, + OverlimitReadBlobMemory /* "overlimit_read_blob_memory" */ +}; + +class TWriteCounters: public TCommonCountersOwner { +private: + using TBase = TCommonCountersOwner; + NMonitoring::TDynamicCounters::TCounterPtr VolumeWriteData; + NMonitoring::THistogramPtr HistogramBytesWriteDataCount; + NMonitoring::THistogramPtr HistogramBytesWriteDataBytes; + +public: + TWriteCounters(TCommonCountersOwner& owner) + : TBase(owner, "activity", "writing") + { + VolumeWriteData = TBase::GetDeriviative("Write/Incoming/Bytes"); + HistogramBytesWriteDataCount = TBase::GetHistogram("Write/Incoming/ByBytes/Count", NMonitoring::ExponentialHistogram(18, 2, 100)); + HistogramBytesWriteDataBytes = TBase::GetHistogram("Write/Incoming/ByBytes/Bytes", NMonitoring::ExponentialHistogram(18, 2, 100)); + } + + void OnIncomingData(const ui64 dataSize) const { + VolumeWriteData->Add(dataSize); + HistogramBytesWriteDataCount->Collect((i64)dataSize, 1); + HistogramBytesWriteDataBytes->Collect((i64)dataSize, dataSize); + } }; class TCSCounters: public TCommonCountersOwner { @@ -62,11 +90,17 @@ class TCSCounters: public TCommonCountersOwner { NMonitoring::THistogramPtr HistogramSuccessWriteMiddle6PutBlobsDurationMs; NMonitoring::THistogramPtr HistogramFailedWritePutBlobsDurationMs; NMonitoring::THistogramPtr HistogramWriteTxCompleteDurationMs; + NMonitoring::TDynamicCounters::TCounterPtr WritePutBlobsCount; NMonitoring::TDynamicCounters::TCounterPtr WriteRequests; THashMap FailedWriteRequests; NMonitoring::TDynamicCounters::TCounterPtr SuccessWriteRequests; + public: + const std::shared_ptr WritingCounters; + const TCSInitialization Initialization; + TTxProgressCounters TxProgress; + void OnStartWriteRequest() const { WriteRequests->Add(1); } @@ -80,7 +114,6 @@ class TCSCounters: public TCommonCountersOwner { void OnWritePutBlobsSuccess(const TDuration d) const { HistogramSuccessWritePutBlobsDurationMs->Collect(d.MilliSeconds()); - WritePutBlobsCount->Sub(1); } void OnWriteMiddle1PutBlobsSuccess(const TDuration d) const { @@ -109,11 +142,6 @@ class TCSCounters: public TCommonCountersOwner { void OnWritePutBlobsFail(const TDuration d) const { HistogramFailedWritePutBlobsDurationMs->Collect(d.MilliSeconds()); - WritePutBlobsCount->Sub(1); - } - - void OnWritePutBlobsStart() const { - WritePutBlobsCount->Add(1); } void OnWriteTxComplete(const TDuration d) const { @@ -130,27 +158,27 @@ class TCSCounters: public TCommonCountersOwner { SplitCompactionGranulePortionsCount->SetValue(portionsCount); } - void OnOverloadInsertTable(const ui64 size) const { + void OnWriteOverloadInsertTable(const ui64 size) const { OverloadInsertTableBytes->Add(size); OverloadInsertTableCount->Add(1); } - void OnOverloadMetadata(const ui64 size) const { + void OnWriteOverloadMetadata(const ui64 size) const { OverloadMetadataBytes->Add(size); OverloadMetadataCount->Add(1); } - void OnOverloadShardTx(const ui64 size) const { + void OnWriteOverloadShardTx(const ui64 size) const { OverloadShardTxBytes->Add(size); OverloadShardTxCount->Add(1); } - void OnOverloadShardWrites(const ui64 size) const { + void OnWriteOverloadShardWrites(const ui64 size) const { OverloadShardWritesBytes->Add(size); OverloadShardWritesCount->Add(1); } - void OnOverloadShardWritesSize(const ui64 size) const { + void OnWriteOverloadShardWritesSize(const ui64 size) const { OverloadShardWritesSizeBytes->Add(size); OverloadShardWritesSizeCount->Add(1); } diff --git a/ydb/core/tx/columnshard/counters/counters_manager.cpp b/ydb/core/tx/columnshard/counters/counters_manager.cpp new file mode 100644 index 000000000000..11b18888bc31 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/counters_manager.cpp @@ -0,0 +1,5 @@ +#include "counters_manager.h" + +namespace NKikimr::NColumnShard { + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/counters/counters_manager.h b/ydb/core/tx/columnshard/counters/counters_manager.h new file mode 100644 index 000000000000..17336ca3410d --- /dev/null +++ b/ydb/core/tx/columnshard/counters/counters_manager.h @@ -0,0 +1,97 @@ +#pragma once + +#include "background_controller.h" +#include "column_tables.h" +#include "columnshard.h" +#include "indexation.h" +#include "req_tracer.h" +#include "scan.h" +#include "tablet_counters.h" +#include "writes_monitor.h" + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace NKikimr::NColumnShard { + +class TCountersManager { +private: + YDB_READONLY_DEF(std::shared_ptr, TabletCounters); + YDB_READONLY_DEF(std::shared_ptr, WritesMonitor); + + YDB_READONLY_DEF(std::shared_ptr, BackgroundControllerCounters); + YDB_READONLY_DEF(std::shared_ptr, ColumnTablesCounters); + + YDB_READONLY(TCSCounters, CSCounters, TCSCounters()); + YDB_READONLY(TIndexationCounters, EvictionCounters, TIndexationCounters("Eviction")); + YDB_READONLY(TIndexationCounters, IndexationCounters, TIndexationCounters("Indexation")); + YDB_READONLY(TIndexationCounters, CompactionCounters, TIndexationCounters("GeneralCompaction")); + YDB_READONLY(TScanCounters, ScanCounters, TScanCounters("Scan")); + YDB_READONLY_DEF(std::shared_ptr, RequestsTracingCounters); + YDB_READONLY_DEF(std::shared_ptr, SubscribeCounters); + +public: + TCountersManager(TTabletCountersBase& tabletCounters) + : TabletCounters(std::make_shared(tabletCounters)) + , WritesMonitor(std::make_shared(tabletCounters)) + , BackgroundControllerCounters(std::make_shared()) + , ColumnTablesCounters(std::make_shared()) + , RequestsTracingCounters(std::make_shared()) + , SubscribeCounters(std::make_shared()) { + } + + void OnWriteOverloadDisk() const { + TabletCounters->IncCounter(COUNTER_OUT_OF_SPACE); + } + + void OnWriteOverloadInsertTable(const ui64 size) const { + TabletCounters->IncCounter(COUNTER_WRITE_OVERLOAD); + CSCounters.OnWriteOverloadInsertTable(size); + } + + void OnWriteOverloadMetadata(const ui64 size) const { + TabletCounters->IncCounter(COUNTER_WRITE_OVERLOAD); + CSCounters.OnWriteOverloadMetadata(size); + } + + void OnWriteOverloadShardTx(const ui64 size) const { + TabletCounters->IncCounter(COUNTER_WRITE_OVERLOAD); + CSCounters.OnWriteOverloadShardTx(size); + } + + void OnWriteOverloadShardWrites(const ui64 size) const { + TabletCounters->IncCounter(COUNTER_WRITE_OVERLOAD); + CSCounters.OnWriteOverloadShardWrites(size); + } + + void OnWriteOverloadShardWritesSize(const ui64 size) const { + TabletCounters->IncCounter(COUNTER_WRITE_OVERLOAD); + CSCounters.OnWriteOverloadShardWritesSize(size); + } + + void FillTableStats(ui64 pathId, ::NKikimrTableStats::TTableStats& tableStats) { + ColumnTablesCounters->GetPathIdCounter(pathId)->FillStats(tableStats); + BackgroundControllerCounters->FillStats(pathId, tableStats); + } + + void FillTotalTableStats(::NKikimrTableStats::TTableStats& tableStats) { + ColumnTablesCounters->FillStats(tableStats); + TabletCounters->FillStats(tableStats); + BackgroundControllerCounters->FillTotalStats(tableStats); + ScanCounters.FillStats(tableStats); + } + + void OnWritePutBlobsSuccess(const TDuration d, const ui64 rowsWritten) const { + TabletCounters->OnWritePutBlobsSuccess(rowsWritten); + CSCounters.OnWritePutBlobsSuccess(d); + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/counters/engine_logs.cpp b/ydb/core/tx/columnshard/counters/engine_logs.cpp index 368eebc40ef1..7a38e052c5ed 100644 --- a/ydb/core/tx/columnshard/counters/engine_logs.cpp +++ b/ydb/core/tx/columnshard/counters/engine_logs.cpp @@ -91,9 +91,11 @@ void TEngineLogsCounters::TPortionsInfoGuard::OnNewPortion(const std::shared_ptr } } for (auto&& i : portion->GetIndexes()) { - const auto blobId = portion->GetBlobId(i.GetBlobRange().GetBlobIdxVerified()); - if (blobIds.emplace(blobId).second) { - BlobGuards[producedId]->Add(blobId.BlobSize(), blobId.BlobSize()); + if (i.HasBlobRange()) { + const auto blobId = portion->GetBlobId(i.GetBlobRangeVerified().GetBlobIdxVerified()); + if (blobIds.emplace(blobId).second) { + BlobGuards[producedId]->Add(blobId.BlobSize(), blobId.BlobSize()); + } } } PortionRecordCountGuards[producedId]->Add(portion->GetRecordsCount(), 1); @@ -111,9 +113,11 @@ void TEngineLogsCounters::TPortionsInfoGuard::OnDropPortion(const std::shared_pt } } for (auto&& i : portion->GetIndexes()) { - const auto blobId = portion->GetBlobId(i.GetBlobRange().GetBlobIdxVerified()); - if (blobIds.emplace(blobId).second) { - BlobGuards[producedId]->Sub(blobId.BlobSize(), blobId.BlobSize()); + if (i.HasBlobRange()) { + const auto blobId = portion->GetBlobId(i.GetBlobRangeVerified().GetBlobIdxVerified()); + if (blobIds.emplace(blobId).second) { + BlobGuards[producedId]->Sub(blobId.BlobSize(), blobId.BlobSize()); + } } } PortionRecordCountGuards[producedId]->Sub(portion->GetRecordsCount(), 1); diff --git a/ydb/core/tx/columnshard/counters/engine_logs.h b/ydb/core/tx/columnshard/counters/engine_logs.h index 97a4716652c3..2cbaf7fa234e 100644 --- a/ydb/core/tx/columnshard/counters/engine_logs.h +++ b/ydb/core/tx/columnshard/counters/engine_logs.h @@ -85,17 +85,44 @@ class TAgentDataClassCounters: public TCommonCountersOwner { } }; +class TIntervalMemoryCounters { +public: + const std::shared_ptr MinReadBytes; + TIntervalMemoryCounters(const std::shared_ptr& minReadBytes) + : MinReadBytes(minReadBytes) + { + + } +}; + +class TPortionsIndexCounters { +public: + const TIntervalMemoryCounters RawBytes; + const TIntervalMemoryCounters BlobBytes; + TPortionsIndexCounters(TIntervalMemoryCounters&& rawBytes, TIntervalMemoryCounters&& blobBytes) + : RawBytes(std::move(rawBytes)) + , BlobBytes(std::move(blobBytes)) { + } +}; + class TGranuleDataCounters { private: const TDataClassCounters InsertedData; const TDataClassCounters CompactedData; const TDataClassCounters FullData; + const TPortionsIndexCounters PortionsIndexCounters; + public: - TGranuleDataCounters(const TDataClassCounters& insertedData, const TDataClassCounters& compactedData, const TDataClassCounters& fullData) + const TPortionsIndexCounters& GetPortionsIndexCounters() const { + return PortionsIndexCounters; + } + + TGranuleDataCounters(const TDataClassCounters& insertedData, const TDataClassCounters& compactedData, const TDataClassCounters& fullData, + TPortionsIndexCounters&& portionsIndexCounters) : InsertedData(insertedData) , CompactedData(compactedData) , FullData(fullData) - { + , PortionsIndexCounters(std::move(portionsIndexCounters)) { } void OnPortionsDataRefresh(const TBaseGranuleDataClassSummary& inserted, const TBaseGranuleDataClassSummary& compacted) const { @@ -105,20 +132,60 @@ class TGranuleDataCounters { } }; +class TIntervalMemoryAgentCounters: public TCommonCountersOwner { +private: + using TBase = TCommonCountersOwner; + const std::shared_ptr ReadBytes; +public: + TIntervalMemoryAgentCounters(const TCommonCountersOwner& base, const TString& memoryType) + : TBase(base, "memory", memoryType) + , ReadBytes(TBase::GetValueAutoAggregations("Bytes")) { + } + + TIntervalMemoryCounters GetClient() const { + return TIntervalMemoryCounters(ReadBytes->GetClient()); + } +}; + +class TPortionsIndexAgentsCounters: public TCommonCountersOwner { +private: + using TBase = TCommonCountersOwner; + TIntervalMemoryAgentCounters ReadRawBytes; + TIntervalMemoryAgentCounters ReadBlobBytes; + +public: + + TPortionsIndexAgentsCounters(const TString& baseName) + : TBase(baseName) + , ReadRawBytes(TBase::CreateSubGroup("control", "read_memory"), "raw") + , ReadBlobBytes(TBase::CreateSubGroup("control", "read_memory"), "blob") + { + } + + TPortionsIndexCounters BuildCounters() const { + return TPortionsIndexCounters(ReadRawBytes.GetClient(), ReadBlobBytes.GetClient()); + } +}; + class TAgentGranuleDataCounters { private: TAgentDataClassCounters InsertedData; TAgentDataClassCounters CompactedData; TAgentDataClassCounters FullData; + TPortionsIndexAgentsCounters PortionsIndex; + public: TAgentGranuleDataCounters(const TString& ownerId) : InsertedData(ownerId, "ByGranule/Inserted") , CompactedData(ownerId, "ByGranule/Compacted") - , FullData(ownerId, "ByGranule/Full") { + , FullData(ownerId, "ByGranule/Full") + , PortionsIndex("ByGranule/PortionsIndex") + { } TGranuleDataCounters RegisterClient() const { - return TGranuleDataCounters(InsertedData.RegisterClient(), CompactedData.RegisterClient(), FullData.RegisterClient()); + return TGranuleDataCounters( + InsertedData.RegisterClient(), CompactedData.RegisterClient(), FullData.RegisterClient(), PortionsIndex.BuildCounters()); } }; diff --git a/ydb/core/tx/columnshard/counters/initialization.h b/ydb/core/tx/columnshard/counters/initialization.h new file mode 100644 index 000000000000..2a6b432d6135 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/initialization.h @@ -0,0 +1,56 @@ +#pragma once +#include "common/owner.h" + +#include + +namespace NKikimr::NColumnShard { + +class TCSInitialization: public TCommonCountersOwner { +private: + using TBase = TCommonCountersOwner; + + const NMonitoring::THistogramPtr HistogramTabletInitializationMs; + const NMonitoring::THistogramPtr HistogramTxInitDurationMs; + const NMonitoring::THistogramPtr HistogramTxUpdateSchemaDurationMs; + const NMonitoring::THistogramPtr HistogramTxInitSchemaDurationMs; + const NMonitoring::THistogramPtr HistogramActivateExecutorFromActivationDurationMs; + const NMonitoring::THistogramPtr HistogramSwitchToWorkFromActivationDurationMs; + const NMonitoring::THistogramPtr HistogramSwitchToWorkFromCreateDurationMs; + +public: + void OnTxInitFinished(const TDuration d) const { + HistogramTxInitDurationMs->Collect(d.MilliSeconds()); + } + + void OnTxUpdateSchemaFinished(const TDuration d) const { + HistogramTxUpdateSchemaDurationMs->Collect(d.MilliSeconds()); + } + + void OnTxInitSchemaFinished(const TDuration d) const { + HistogramTxInitSchemaDurationMs->Collect(d.MilliSeconds()); + } + + void OnActivateExecutor(const TDuration fromCreate) const { + HistogramActivateExecutorFromActivationDurationMs->Collect(fromCreate.MilliSeconds()); + } + void OnSwitchToWork(const TDuration fromStart, const TDuration fromCreate) const { + HistogramSwitchToWorkFromActivationDurationMs->Collect(fromStart.MilliSeconds()); + HistogramSwitchToWorkFromCreateDurationMs->Collect(fromCreate.MilliSeconds()); + } + + TCSInitialization(TCommonCountersOwner& owner) + : TBase(owner, "stage", "initialization") + , HistogramTabletInitializationMs(TBase::GetHistogram("TabletInitializationMs", NMonitoring::ExponentialHistogram(15, 2, 32))) + , HistogramTxInitDurationMs(TBase::GetHistogram("TxInitDurationMs", NMonitoring::ExponentialHistogram(15, 2, 32))) + , HistogramTxUpdateSchemaDurationMs(TBase::GetHistogram("TxInitDurationMs", NMonitoring::ExponentialHistogram(15, 2, 32))) + , HistogramTxInitSchemaDurationMs(TBase::GetHistogram("TxInitSchemaDurationMs", NMonitoring::ExponentialHistogram(15, 2, 32))) + , HistogramActivateExecutorFromActivationDurationMs( + TBase::GetHistogram("ActivateExecutorFromActivationDurationMs", NMonitoring::ExponentialHistogram(15, 2, 32))) + , HistogramSwitchToWorkFromActivationDurationMs( + TBase::GetHistogram("SwitchToWorkFromActivationDurationMs", NMonitoring::ExponentialHistogram(15, 2, 32))) + , HistogramSwitchToWorkFromCreateDurationMs( + TBase::GetHistogram("SwitchToWorkFromCreateDurationMs", NMonitoring::ExponentialHistogram(15, 2, 32))) { + } +}; + +} diff --git a/ydb/core/tx/columnshard/counters/req_tracer.cpp b/ydb/core/tx/columnshard/counters/req_tracer.cpp new file mode 100644 index 000000000000..e40342f21db1 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/req_tracer.cpp @@ -0,0 +1,5 @@ +#include "req_tracer.h" + +namespace NKikimr::NColumnShard { + +} diff --git a/ydb/core/tx/columnshard/counters/req_tracer.h b/ydb/core/tx/columnshard/counters/req_tracer.h new file mode 100644 index 000000000000..f70cd02e4840 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/req_tracer.h @@ -0,0 +1,51 @@ +#pragma once +#include "common/owner.h" +#include + +namespace NKikimr::NColumnShard { + +class TRequestsTracerCounters: public TCommonCountersOwner { +private: + using TBase = TCommonCountersOwner; + NMonitoring::TDynamicCounters::TCounterPtr RequestedMinSnapshotAge; + NMonitoring::TDynamicCounters::TCounterPtr DefaultMinSnapshotAge; + NMonitoring::TDynamicCounters::TCounterPtr SnapshotsCount; + NMonitoring::TDynamicCounters::TCounterPtr SnapshotLock; + NMonitoring::TDynamicCounters::TCounterPtr SnapshotUnlock; + +public: + + TRequestsTracerCounters() + : TBase("cs_requests_tracing") + , RequestedMinSnapshotAge(TBase::GetValue("Snapshots/RequestedAge/Seconds")) + , DefaultMinSnapshotAge(TBase::GetValue("Snapshots/DefaultAge/Seconds")) + , SnapshotsCount(TBase::GetValue("Snapshots/Count")) + , SnapshotLock(TBase::GetDeriviative("Snapshots/Lock")) + , SnapshotUnlock(TBase::GetDeriviative("Snapshots/Unlock")) + { + + } + + void OnDefaultMinSnapshotInstant(const TInstant instant) const { + DefaultMinSnapshotAge->Set((TInstant::Now() - instant).Seconds()); + } + + void OnSnapshotsInfo(const ui32 count, const std::optional snapshotPlanStep) const { + if (snapshotPlanStep) { + RequestedMinSnapshotAge->Set((TInstant::Now() - snapshotPlanStep->GetPlanInstant()).Seconds()); + } else { + RequestedMinSnapshotAge->Set(0); + } + SnapshotsCount->Set(count); + + } + + void OnSnapshotLocked() const { + SnapshotLock->Add(1); + } + void OnSnapshotUnlocked() const { + SnapshotUnlock->Add(1); + } +}; + +} diff --git a/ydb/core/tx/columnshard/counters/scan.cpp b/ydb/core/tx/columnshard/counters/scan.cpp index 075aa0e880ec..cdfd42aa9bc4 100644 --- a/ydb/core/tx/columnshard/counters/scan.cpp +++ b/ydb/core/tx/columnshard/counters/scan.cpp @@ -88,12 +88,14 @@ TScanCounters::TScanCounters(const TString& module) ScanIntervalState = std::make_shared(*this); ResourcesSubscriberCounters = std::make_shared(); ScanDurationByStatus.resize((ui32)EStatusFinish::COUNT); + ScansFinishedByStatus.resize((ui32)EStatusFinish::COUNT); ui32 idx = 0; for (auto&& i : GetEnumAllValues()) { if (i == EStatusFinish::COUNT) { continue; } ScanDurationByStatus[(ui32)i] = TBase::GetHistogram("ScanDuration/" + ::ToString(i) + "/Milliseconds", NMonitoring::ExponentialHistogram(18, 2, 1)); + ScansFinishedByStatus[(ui32)i] = TBase::GetDeriviative("ScansFinished/" + ::ToString(i)); AFL_VERIFY(idx == (ui32)i); ++idx; } @@ -103,4 +105,8 @@ NKikimr::NColumnShard::TScanAggregations TScanCounters::BuildAggregations() { return TScanAggregations(GetModuleId()); } +void TScanCounters::FillStats(::NKikimrTableStats::TTableStats& output) const { + output.SetRangeReads(ScansFinishedByStatus[(ui32)EStatusFinish::Success]->Val()); +} + } diff --git a/ydb/core/tx/columnshard/counters/scan.h b/ydb/core/tx/columnshard/counters/scan.h index d9bbd6b898ce..6d1202288514 100644 --- a/ydb/core/tx/columnshard/counters/scan.h +++ b/ydb/core/tx/columnshard/counters/scan.h @@ -1,8 +1,10 @@ #pragma once #include "common/owner.h" #include "common/histogram.h" +#include #include #include +#include #include namespace NKikimr::NColumnShard { @@ -10,31 +12,30 @@ namespace NKikimr::NColumnShard { class TScanAggregations: public TCommonCountersOwner { private: using TBase = TCommonCountersOwner; - std::shared_ptr ReadBlobs; - std::shared_ptr GranulesProcessing; - std::shared_ptr GranulesReady; std::shared_ptr ResultsReady; + std::shared_ptr RequestedResourcesMemory; std::shared_ptr ScanDuration; std::shared_ptr BlobsWaitingDuration; public: TScanAggregations(const TString& moduleId) : TBase(moduleId) - , GranulesProcessing(std::make_shared(moduleId, "InFlight/Granules/Processing")) , ResultsReady(std::make_shared(moduleId, "InFlight/Results/Ready")) + , RequestedResourcesMemory(std::make_shared(moduleId, "InFlight/Resources/Requested")) , ScanDuration(TBase::GetValueAutoAggregationsClient("ScanDuration")) , BlobsWaitingDuration(TBase::GetValueAutoAggregationsClient("BlobsWaitingDuration")) { } + std::shared_ptr GetRequestedResourcesMemory() const { + return RequestedResourcesMemory; + } + void OnBlobWaitingDuration(const TDuration d, const TDuration fullScanDuration) const { BlobsWaitingDuration->Add(d.MicroSeconds()); ScanDuration->SetValue(fullScanDuration.MicroSeconds()); } - const std::shared_ptr& GetGranulesProcessing() const { - return GranulesProcessing; - } const std::shared_ptr& GetResultsReady() const { return ResultsReady; } @@ -127,6 +128,7 @@ class TScanCounters: public TCommonCountersOwner { NMonitoring::TDynamicCounters::TCounterPtr AckWaitingDuration; std::vector ScanDurationByStatus; + std::vector ScansFinishedByStatus; NMonitoring::TDynamicCounters::TCounterPtr NoScanRecords; NMonitoring::TDynamicCounters::TCounterPtr NoScanIntervals; @@ -212,9 +214,10 @@ class TScanCounters: public TCommonCountersOwner { LogScanIntervals->Add(1); } - void OnScanDuration(const EStatusFinish status, const TDuration d) const { + void OnScanFinished(const EStatusFinish status, const TDuration d) const { AFL_VERIFY((ui32)status < ScanDurationByStatus.size()); ScanDurationByStatus[(ui32)status]->Collect(d.MilliSeconds()); + ScansFinishedByStatus[(ui32)status]->Add(1); } void AckWaitingInfo(const TDuration d) const { @@ -257,6 +260,8 @@ class TScanCounters: public TCommonCountersOwner { } TScanAggregations BuildAggregations(); + + void FillStats(::NKikimrTableStats::TTableStats& output) const; }; class TCounterGuard: TNonCopyable { diff --git a/ydb/core/tx/columnshard/counters/tablet_counters.h b/ydb/core/tx/columnshard/counters/tablet_counters.h new file mode 100644 index 000000000000..99292c4f73b8 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/tablet_counters.h @@ -0,0 +1,132 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace NKikimr::NColumnShard { + +class TTabletCountersHandle { +private: + TTabletCountersBase& TabletCounters; + +public: + TTabletCountersHandle(TTabletCountersBase& stats) + : TabletCounters(stats) { + } + + void SetCounter(NColumnShard::ESimpleCounters counter, ui64 num) const { + TabletCounters.Simple()[counter].Set(num); + } + + void IncCounter(NColumnShard::ECumulativeCounters counter, ui64 num = 1) const { + TabletCounters.Cumulative()[counter].Increment(num); + } + + void IncCounter(NColumnShard::EPercentileCounters counter, const TDuration& latency) const { + TabletCounters.Percentile()[counter].IncrementFor(latency.MicroSeconds()); + } + + void IncCounter(NDataShard::ESimpleCounters counter, ui64 num = 1) const { + TabletCounters.Simple()[counter].Add(num); + } + + void IncCounter(NDataShard::ECumulativeCounters counter, ui64 num = 1) const { + TabletCounters.Cumulative()[counter].Increment(num); + } + + void IncCounter(NDataShard::EPercentileCounters counter, ui64 num) const { + TabletCounters.Percentile()[counter].IncrementFor(num); + } + + void IncCounter(NDataShard::EPercentileCounters counter, const TDuration& latency) const { + TabletCounters.Percentile()[counter].IncrementFor(latency.MilliSeconds()); + } + + ui64 GetValue(NColumnShard::ESimpleCounters counter) const { + return TabletCounters.Simple()[counter].Get(); + } + + ui64 GetValue(NColumnShard::ECumulativeCounters counter) const { + return TabletCounters.Cumulative()[counter].Get(); + } + + const TTabletPercentileCounter& GetValue(NColumnShard::EPercentileCounters counter) const { + return TabletCounters.Percentile()[counter]; + } + + ui64 GetValue(NDataShard::ESimpleCounters counter) const { + return TabletCounters.Simple()[counter].Get(); + } + + ui64 GetValue(NDataShard::ECumulativeCounters counter) const { + return TabletCounters.Cumulative()[counter].Get(); + } + + const TTabletPercentileCounter& GetCounter(NDataShard::EPercentileCounters counter) const { + return TabletCounters.Percentile()[counter]; + } + + void OnWriteSuccess(const ui64 blobsWritten, const ui64 bytesWritten) const { + IncCounter(NColumnShard::COUNTER_OPERATIONS_BLOBS_WRITTEN, blobsWritten); + IncCounter(NColumnShard::COUNTER_OPERATIONS_BYTES_WRITTEN, bytesWritten); + IncCounter(NColumnShard::COUNTER_WRITE_SUCCESS); + } + + void OnWriteFailure() const { + IncCounter(NColumnShard::COUNTER_WRITE_FAIL); + } + + void OnScanStarted(const NOlap::TSelectInfo::TStats& countersDelta) const { + IncCounter(NColumnShard::COUNTER_READ_INDEX_PORTIONS, countersDelta.Portions); + IncCounter(NColumnShard::COUNTER_READ_INDEX_BLOBS, countersDelta.Blobs); + IncCounter(NColumnShard::COUNTER_READ_INDEX_ROWS, countersDelta.Rows); + IncCounter(NColumnShard::COUNTER_READ_INDEX_BYTES, countersDelta.Bytes); + } + + void OnWriteCommitted(const NOlap::TInsertionSummary::TCounters& countersDelta) const { + IncCounter(COUNTER_BLOBS_COMMITTED, countersDelta.Rows); + IncCounter(COUNTER_BYTES_COMMITTED, countersDelta.Bytes); + IncCounter(COUNTER_RAW_BYTES_COMMITTED, countersDelta.RawBytes); + } + + void OnCompactionWriteIndexCompleted(bool success, const ui64 blobsWritten, const ui64 bytesWritten) const { + IncCounter(success ? NColumnShard::COUNTER_SPLIT_COMPACTION_SUCCESS : NColumnShard::COUNTER_SPLIT_COMPACTION_FAIL); + IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_BLOBS_WRITTEN, blobsWritten); + IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_BYTES_WRITTEN, bytesWritten); + } + + void OnInsertionWriteIndexCompleted(const ui64 blobsWritten, const ui64 bytesWritten, const TDuration duration) const { + IncCounter(NColumnShard::COUNTER_INDEXING_BLOBS_WRITTEN, blobsWritten); + IncCounter(NColumnShard::COUNTER_INDEXING_BYTES_WRITTEN, bytesWritten); + IncCounter(NColumnShard::COUNTER_INDEXING_TIME, duration.MilliSeconds()); + } + + void OnWritePutBlobsSuccess(const ui64 rowsWritten) const { + IncCounter(NColumnShard::COUNTER_OPERATIONS_ROWS_WRITTEN, rowsWritten); + } + + void OnDropPortionEvent(const ui64 rawBytes, const ui64 blobBytes, const ui64 rows) const { + IncCounter(NColumnShard::COUNTER_RAW_BYTES_ERASED, rawBytes); + IncCounter(NColumnShard::COUNTER_BYTES_ERASED, blobBytes); + IncCounter(NColumnShard::COUNTER_ROWS_ERASED, rows); + } + + void FillStats(::NKikimrTableStats::TTableStats& output) const { + output.SetRowUpdates(GetValue(COUNTER_OPERATIONS_ROWS_WRITTEN)); + output.SetRowDeletes(GetValue(COUNTER_ROWS_ERASED)); + output.SetRowReads(0); // all reads are range reads + output.SetRangeReadRows(GetValue(COUNTER_READ_INDEX_ROWS)); + + output.SetImmediateTxCompleted(GetValue(COUNTER_IMMEDIATE_TX_COMPLETED)); + output.SetTxRejectedByOverload(GetValue(COUNTER_WRITE_OVERLOAD)); + output.SetTxRejectedBySpace(GetValue(COUNTER_OUT_OF_SPACE)); + output.SetPlannedTxCompleted(GetValue(COUNTER_PLANNED_TX_COMPLETED)); + output.SetTxCompleteLagMsec(GetValue(COUNTER_TX_COMPLETE_LAG)); + } +}; + +} diff --git a/ydb/core/tx/columnshard/counters/tx_progress.h b/ydb/core/tx/columnshard/counters/tx_progress.h new file mode 100644 index 000000000000..24319a3ab748 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/tx_progress.h @@ -0,0 +1,110 @@ +#pragma once +#include "common/owner.h" + +#include +#include + +namespace NKikimr::NColumnShard { + +class TTxProgressCounters: public TCommonCountersOwner { +private: + using TBase = TCommonCountersOwner; + using TOpType = TString; + + class TProgressCounters: public TCommonCountersOwner { + private: + using TBase = TCommonCountersOwner; + + public: + NMonitoring::TDynamicCounters::TCounterPtr RegisterTx; + NMonitoring::TDynamicCounters::TCounterPtr RegisterTxWithDeadline; + NMonitoring::TDynamicCounters::TCounterPtr StartProposeOnExecute; + NMonitoring::TDynamicCounters::TCounterPtr StartProposeOnComplete; + NMonitoring::TDynamicCounters::TCounterPtr FinishProposeOnExecute; + NMonitoring::TDynamicCounters::TCounterPtr FinishProposeOnComplete; + NMonitoring::TDynamicCounters::TCounterPtr FinishPlannedTx; + NMonitoring::TDynamicCounters::TCounterPtr AbortTx; + NMonitoring::THistogramPtr HistogramTxExecuteDuration; + NMonitoring::THistogramPtr HistogramTxLiveDuration; + NMonitoring::THistogramPtr HistogramTxProgressLag; + + TProgressCounters(const TCommonCountersOwner& owner) + : TBase(owner) + , RegisterTx(TBase::GetDeriviative("RegisterTx")) + , RegisterTxWithDeadline(TBase::GetDeriviative("RegisterTxWithDeadline")) + , StartProposeOnExecute(TBase::GetDeriviative("StartProposeOnExecute")) + , StartProposeOnComplete(TBase::GetDeriviative("StartProposeOnComplete")) + , FinishProposeOnExecute(TBase::GetDeriviative("FinishProposeOnExecute")) + , FinishProposeOnComplete(TBase::GetDeriviative("FinishProposeOnComplete")) + , FinishPlannedTx(TBase::GetDeriviative("FinishPlannedTx")) + , AbortTx(TBase::GetDeriviative("AbortTx")) + , HistogramTxExecuteDuration(TBase::GetHistogram("TxProgress/Execution/DurationMs", NMonitoring::ExponentialHistogram(18, 2, 5))) + , HistogramTxLiveDuration(TBase::GetHistogram("TxProgress/Live/DurationMs", NMonitoring::ExponentialHistogram(18, 2, 5))) + , HistogramTxProgressLag(TBase::GetHistogram("TxProgress/LagOnComplete/DurationMs", NMonitoring::ExponentialHistogram(18, 2, 5))) { + } + }; + + THashMap CountersByOpType; + +public: + void OnTxExecuteDuration(const TString& opType, const TDuration d) { + GetSubGroup(opType).HistogramTxExecuteDuration->Collect(d.MilliSeconds()); + } + + void OnTxLiveDuration(const TString& opType, const TDuration d) { + GetSubGroup(opType).HistogramTxLiveDuration->Collect(d.MilliSeconds()); + } + + void OnTxProgressLag(const TString& opType, const TDuration d) { + GetSubGroup(opType).HistogramTxProgressLag->Collect(d.MilliSeconds()); + } + + void OnRegisterTx(const TOpType& opType) { + GetSubGroup(opType).RegisterTx->Add(1); + } + + void OnRegisterTxWithDeadline(const TOpType& opType) { + GetSubGroup(opType).RegisterTxWithDeadline->Add(1); + } + + void OnStartProposeOnExecute(const TOpType& opType) { + GetSubGroup(opType).StartProposeOnExecute->Add(1); + } + + void OnStartProposeOnComplete(const TOpType& opType) { + GetSubGroup(opType).StartProposeOnComplete->Add(1); + } + + void OnFinishProposeOnExecute(const TOpType& opType) { + GetSubGroup(opType).FinishProposeOnExecute->Add(1); + } + + void OnFinishProposeOnComplete(const TOpType& opType) { + GetSubGroup(opType).FinishProposeOnComplete->Add(1); + } + + void OnFinishPlannedTx(const TOpType& opType) { + GetSubGroup(opType).FinishPlannedTx->Add(1); + } + + void OnAbortTx(const TOpType& opType) { + GetSubGroup(opType).AbortTx->Add(1); + } + + TTxProgressCounters(TCommonCountersOwner& owner) + : TBase(owner, "TxProgress") { + } + +private: + TProgressCounters& GetSubGroup(const TOpType& opType) { + auto findSubGroup = CountersByOpType.FindPtr(opType); + if (findSubGroup) { + return *findSubGroup; + } + + auto subGroup = TBase::CreateSubGroup("operation", opType); + return CountersByOpType.emplace(opType, subGroup).first->second; + } +}; + +} diff --git a/ydb/core/tx/columnshard/counters/writes_monitor.h b/ydb/core/tx/columnshard/counters/writes_monitor.h new file mode 100644 index 000000000000..ad8ad6e474cc --- /dev/null +++ b/ydb/core/tx/columnshard/counters/writes_monitor.h @@ -0,0 +1,46 @@ +#pragma once + +#include +#include +#include + +namespace NKikimr::NColumnShard { + +class TWritesMonitor { +private: + TTabletCountersBase& Stats; + + YDB_READONLY(ui64, WritesInFlight, 0); + YDB_READONLY(ui64, WritesSizeInFlight, 0); + +public: + TWritesMonitor(TTabletCountersBase& stats) + : Stats(stats) { + } + + void OnStartWrite(const ui64 dataSize) { + ++WritesInFlight; + WritesSizeInFlight += dataSize; + UpdateTabletCounters(); + } + + void OnFinishWrite(const ui64 dataSize, const ui32 writesCount = 1) { + Y_ABORT_UNLESS(WritesInFlight > 0); + Y_ABORT_UNLESS(WritesSizeInFlight >= dataSize); + WritesInFlight -= writesCount; + WritesSizeInFlight -= dataSize; + UpdateTabletCounters(); + } + + TString DebugString() const { + return TStringBuilder() << "{object=write_monitor;count=" << WritesInFlight << ";size=" << WritesSizeInFlight + << "}"; + } + +private: + void UpdateTabletCounters() { + Stats.Simple()[COUNTER_WRITES_IN_FLY].Set(WritesInFlight); + } +}; + +} diff --git a/ydb/core/tx/columnshard/counters/ya.make b/ydb/core/tx/columnshard/counters/ya.make index 65797cb34752..8707d6080e30 100644 --- a/ydb/core/tx/columnshard/counters/ya.make +++ b/ydb/core/tx/columnshard/counters/ya.make @@ -1,18 +1,23 @@ LIBRARY() SRCS( - indexation.cpp - scan.cpp - engine_logs.cpp + background_controller.cpp + counters_manager.cpp blobs_manager.cpp + column_tables.cpp columnshard.cpp - insert_table.cpp common_data.cpp + engine_logs.cpp + indexation.cpp + insert_table.cpp + req_tracer.cpp + scan.cpp splitter.cpp ) PEERDIR( library/cpp/monlib/dynamic_counters + ydb/core/tx/columnshard/counters/aggregation ydb/core/tx/columnshard/counters/common ydb/core/base ) diff --git a/ydb/core/tx/columnshard/data_reader/actor.cpp b/ydb/core/tx/columnshard/data_reader/actor.cpp index c66cfc42cacc..4fd69af8a7ab 100644 --- a/ydb/core/tx/columnshard/data_reader/actor.cpp +++ b/ydb/core/tx/columnshard/data_reader/actor.cpp @@ -35,7 +35,10 @@ void TActor::HandleExecute(NKqp::TEvKqpCompute::TEvScanInitActor::TPtr& ev) { } void TActor::HandleExecute(NKqp::TEvKqpCompute::TEvScanError::TPtr& ev) { - AFL_VERIFY(false)("error", NYql::IssuesFromMessageAsString(ev->Get()->Record.GetIssues())); + SwitchStage(EStage::WaitData, EStage::Finished); + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "problem_on_restore_data")( + "reason", NYql::IssuesFromMessageAsString(ev->Get()->Record.GetIssues())); + RestoreTask->OnError(NYql::IssuesFromMessageAsString(ev->Get()->Record.GetIssues())); } void TActor::Bootstrap(const TActorContext& /*ctx*/) { diff --git a/ydb/core/tx/columnshard/data_reader/actor.h b/ydb/core/tx/columnshard/data_reader/actor.h index b18af6cf9920..2eca911a87e2 100644 --- a/ydb/core/tx/columnshard/data_reader/actor.h +++ b/ydb/core/tx/columnshard/data_reader/actor.h @@ -12,6 +12,7 @@ class IRestoreTask { YDB_READONLY_DEF(NActors::TActorId, TabletActorId); virtual TConclusionStatus DoOnDataChunk(const std::shared_ptr& data) = 0; virtual TConclusionStatus DoOnFinished() = 0; + virtual void DoOnError(const TString& errorMessage) = 0; virtual std::unique_ptr DoBuildRequestInitiator() const = 0; public: @@ -24,6 +25,10 @@ class IRestoreTask { return DoOnFinished(); } + void OnError(const TString& errorMessage) { + DoOnError(errorMessage); + } + std::unique_ptr BuildRequestInitiator() const { return DoBuildRequestInitiator(); } diff --git a/ydb/core/tx/columnshard/data_sharing/common/transactions/tx_extension.h b/ydb/core/tx/columnshard/data_sharing/common/transactions/tx_extension.h index 055081e37173..ea9c99c7fd9d 100644 --- a/ydb/core/tx/columnshard/data_sharing/common/transactions/tx_extension.h +++ b/ydb/core/tx/columnshard/data_sharing/common/transactions/tx_extension.h @@ -15,11 +15,11 @@ class TExtendedTransactionBase: public NTabletFlatExecutor::TTransactionBaseTabletID())("tx_no", TabletTxNo)("tx_info", TxInfo); + NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build()("tablet_id", TBase::Self->TabletID())("local_tx_no", TabletTxNo)("tx_info", TxInfo); return DoExecute(txc, ctx); } virtual void Complete(const NActors::TActorContext& ctx) override final { - NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build()("tablet_id", TBase::Self->TabletID())("tx_no", TabletTxNo)("tx_info", TxInfo); + NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build()("tablet_id", TBase::Self->TabletID())("local_tx_no", TabletTxNo)("tx_info", TxInfo); return DoComplete(ctx); } diff --git a/ydb/core/tx/columnshard/data_sharing/protos/data.proto b/ydb/core/tx/columnshard/data_sharing/protos/data.proto index e602bc4dbba4..8b376e919946 100644 --- a/ydb/core/tx/columnshard/data_sharing/protos/data.proto +++ b/ydb/core/tx/columnshard/data_sharing/protos/data.proto @@ -14,7 +14,10 @@ message TColumnRecord { message TIndexChunk { optional uint32 IndexId = 1; optional uint32 ChunkIdx = 2; - optional NKikimrColumnShardProto.TBlobRangeLink16 BlobRange = 3; + oneof DataImplementation { + NKikimrColumnShardProto.TBlobRangeLink16 BlobRange = 3; + string BlobData = 5; + } message TMeta { optional uint32 RecordsCount = 1; diff --git a/ydb/core/tx/columnshard/data_sharing/source/session/cursor.cpp b/ydb/core/tx/columnshard/data_sharing/source/session/cursor.cpp index 1072d6ff1cb6..5bc37cd29122 100644 --- a/ydb/core/tx/columnshard/data_sharing/source/session/cursor.cpp +++ b/ydb/core/tx/columnshard/data_sharing/source/session/cursor.cpp @@ -1,7 +1,7 @@ #include "source.h" #include #include -#include +#include namespace NKikimr::NOlap::NDataSharing { diff --git a/ydb/core/tx/columnshard/engines/changes/abstract/abstract.cpp b/ydb/core/tx/columnshard/engines/changes/abstract/abstract.cpp index 7dfe36689d48..7d37981a9039 100644 --- a/ydb/core/tx/columnshard/engines/changes/abstract/abstract.cpp +++ b/ydb/core/tx/columnshard/engines/changes/abstract/abstract.cpp @@ -46,7 +46,7 @@ void TColumnEngineChanges::WriteIndexOnComplete(NColumnShard::TColumnShard* self DoWriteIndexOnComplete(self, context); if (self) { OnFinish(*self, context); - self->IncCounter(GetCounterIndex(context.FinishedSuccessfully)); + self->Counters.GetTabletCounters()->IncCounter(GetCounterIndex(context.FinishedSuccessfully)); } } diff --git a/ydb/core/tx/columnshard/engines/changes/abstract/abstract.h b/ydb/core/tx/columnshard/engines/changes/abstract/abstract.h index 7234e4710f53..721270ea63f3 100644 --- a/ydb/core/tx/columnshard/engines/changes/abstract/abstract.h +++ b/ydb/core/tx/columnshard/engines/changes/abstract/abstract.h @@ -275,7 +275,7 @@ class TColumnEngineChanges { void Start(NColumnShard::TColumnShard& self); virtual ui32 GetWritePortionsCount() const = 0; - virtual TWritePortionInfoWithBlobs* GetWritePortionInfo(const ui32 index) = 0; + virtual TWritePortionInfoWithBlobsResult* GetWritePortionInfo(const ui32 index) = 0; virtual bool NeedWritePortion(const ui32 index) const = 0; void WriteIndexOnExecute(NColumnShard::TColumnShard* self, TWriteIndexContext& context); diff --git a/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp b/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp index 3aa29ed01a13..7917b77682b9 100644 --- a/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp +++ b/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp @@ -17,19 +17,19 @@ void TCleanupPortionsColumnEngineChanges::DoDebugString(TStringOutput& out) cons void TCleanupPortionsColumnEngineChanges::DoWriteIndexOnExecute(NColumnShard::TColumnShard* self, TWriteIndexContext& context) { THashSet pathIds; - if (self) { - THashMap> blobIdsByStorage; - for (auto&& p : PortionsToDrop) { - p.RemoveFromDatabase(context.DBWrapper); - - p.FillBlobIdsByStorage(blobIdsByStorage, context.EngineLogs.GetVersionedIndex()); - pathIds.emplace(p.GetPathId()); - } - for (auto&& i : blobIdsByStorage) { - auto action = BlobsAction.GetRemoving(i.first); - for (auto&& b : i.second) { - action->DeclareRemove((TTabletId)self->TabletID(), b); - } + if (!self) { + return; + } + THashMap> blobIdsByStorage; + for (auto&& p : PortionsToDrop) { + p.RemoveFromDatabase(context.DBWrapper); + p.FillBlobIdsByStorage(blobIdsByStorage, context.EngineLogs.GetVersionedIndex()); + pathIds.emplace(p.GetPathId()); + } + for (auto&& i : blobIdsByStorage) { + auto action = BlobsAction.GetRemoving(i.first); + for (auto&& b : i.second) { + action->DeclareRemove((TTabletId)self->TabletID(), b); } } } @@ -41,9 +41,9 @@ void TCleanupPortionsColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::T } } if (self) { - self->IncCounter(NColumnShard::COUNTER_PORTIONS_ERASED, PortionsToDrop.size()); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_PORTIONS_ERASED, PortionsToDrop.size()); for (auto&& p : PortionsToDrop) { - self->IncCounter(NColumnShard::COUNTER_RAW_BYTES_ERASED, p.GetTotalRawBytes()); + self->Counters.GetTabletCounters()->OnDropPortionEvent(p.GetTotalRawBytes(), p.GetTotalBlobBytes(), p.NumRows()); } } } diff --git a/ydb/core/tx/columnshard/engines/changes/cleanup_portions.h b/ydb/core/tx/columnshard/engines/changes/cleanup_portions.h index 71a48e78be6e..a77d172be9e9 100644 --- a/ydb/core/tx/columnshard/engines/changes/cleanup_portions.h +++ b/ydb/core/tx/columnshard/engines/changes/cleanup_portions.h @@ -42,7 +42,7 @@ class TCleanupPortionsColumnEngineChanges: public TColumnEngineChanges { virtual ui32 GetWritePortionsCount() const override { return 0; } - virtual TWritePortionInfoWithBlobs* GetWritePortionInfo(const ui32 /*index*/) override { + virtual TWritePortionInfoWithBlobsResult* GetWritePortionInfo(const ui32 /*index*/) override { return nullptr; } virtual bool NeedWritePortion(const ui32 /*index*/) const override { diff --git a/ydb/core/tx/columnshard/engines/changes/cleanup_tables.cpp b/ydb/core/tx/columnshard/engines/changes/cleanup_tables.cpp index 3bd8cb6bc333..34d7354b5124 100644 --- a/ydb/core/tx/columnshard/engines/changes/cleanup_tables.cpp +++ b/ydb/core/tx/columnshard/engines/changes/cleanup_tables.cpp @@ -17,13 +17,15 @@ void TCleanupTablesColumnEngineChanges::DoDebugString(TStringOutput& out) const void TCleanupTablesColumnEngineChanges::DoWriteIndexOnExecute(NColumnShard::TColumnShard* self, TWriteIndexContext& context) { if (self && context.DB) { for (auto&& t : TablesToDrop) { - self->TablesManager.TryFinalizeDropPathOnExecute(*context.DB, t); + AFL_VERIFY(!self->InsertTable->HasDataInPathId(t)); + AFL_VERIFY(self->TablesManager.TryFinalizeDropPathOnExecute(*context.DB, t)); } } } void TCleanupTablesColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& /*context*/) { for (auto&& t : TablesToDrop) { + self->InsertTable->ErasePath(t); self->TablesManager.TryFinalizeDropPathOnComplete(t); } self->Subscribers->OnEvent(std::make_shared(TablesToDrop)); diff --git a/ydb/core/tx/columnshard/engines/changes/cleanup_tables.h b/ydb/core/tx/columnshard/engines/changes/cleanup_tables.h index f39d33f5871c..33c7fe34cb1d 100644 --- a/ydb/core/tx/columnshard/engines/changes/cleanup_tables.h +++ b/ydb/core/tx/columnshard/engines/changes/cleanup_tables.h @@ -40,7 +40,7 @@ class TCleanupTablesColumnEngineChanges: public TColumnEngineChanges { virtual ui32 GetWritePortionsCount() const override { return 0; } - virtual TWritePortionInfoWithBlobs* GetWritePortionInfo(const ui32 /*index*/) override { + virtual TWritePortionInfoWithBlobsResult* GetWritePortionInfo(const ui32 /*index*/) override { return nullptr; } virtual bool NeedWritePortion(const ui32 /*index*/) const override { diff --git a/ydb/core/tx/columnshard/engines/changes/compaction.cpp b/ydb/core/tx/columnshard/engines/changes/compaction.cpp index a94d160158e6..2441ce4248b8 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction.cpp +++ b/ydb/core/tx/columnshard/engines/changes/compaction.cpp @@ -53,7 +53,7 @@ void TCompactColumnEngineChanges::DoStart(NColumnShard::TColumnShard& self) { void TCompactColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) { TBase::DoWriteIndexOnComplete(self, context); if (self) { - self->IncCounter(NColumnShard::COUNTER_COMPACTION_TIME, context.Duration.MilliSeconds()); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_COMPACTION_TIME, context.Duration.MilliSeconds()); } } diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/abstract/merger.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/abstract/merger.cpp new file mode 100644 index 000000000000..9977ef51219c --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/abstract/merger.cpp @@ -0,0 +1,31 @@ +#include "merger.h" + +namespace NKikimr::NOlap::NCompaction { + +void IColumnMerger::Start(const std::vector>& input, TMergingContext& mergeContext) { + AFL_VERIFY(!Started); + Started = true; + for (auto&& i : input) { + if (!i) { + continue; + } + AFL_VERIFY(i->GetDataType()->Equals(*Context.GetResultField()->type()))("input", i->GetDataType()->ToString())( + "result", Context.GetResultField()->ToString()); + } + return DoStart(input, mergeContext); +} + + TMergingChunkContext::TMergingChunkContext(const std::shared_ptr& pkAndAddresses) { + auto columnPortionIdx = pkAndAddresses->GetColumnByName(IColumnMerger::PortionIdFieldName); + auto columnPortionRecordIdx = pkAndAddresses->GetColumnByName(IColumnMerger::PortionRecordIndexFieldName); + Y_ABORT_UNLESS(columnPortionIdx && columnPortionRecordIdx); + Y_ABORT_UNLESS(columnPortionIdx->type_id() == arrow::UInt16Type::type_id); + Y_ABORT_UNLESS(columnPortionRecordIdx->type_id() == arrow::UInt32Type::type_id); + IdxArray = static_pointer_cast(columnPortionIdx); + RecordIdxArray = static_pointer_cast(columnPortionRecordIdx); + + AFL_VERIFY(pkAndAddresses->num_rows() == IdxArray->length()); + AFL_VERIFY(pkAndAddresses->num_rows() == RecordIdxArray->length()); +} + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/abstract/merger.h b/ydb/core/tx/columnshard/engines/changes/compaction/abstract/merger.h new file mode 100644 index 000000000000..bf8da708f36b --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/abstract/merger.h @@ -0,0 +1,143 @@ +#pragma once +#include +#include + +namespace NKikimr::NOlap::NCompaction { + +class TMergingChunkContext { +private: + std::shared_ptr IdxArray; + std::shared_ptr RecordIdxArray; + +public: + const arrow::UInt16Array& GetIdxArray() const { + return *IdxArray; + } + const arrow::UInt32Array& GetRecordIdxArray() const { + return *RecordIdxArray; + } + + TMergingChunkContext(const std::shared_ptr& pkAndAddresses); +}; + +class TMergingContext { +public: + class TAddress { + private: + YDB_ACCESSOR(i32, ChunkIdx, -1); + YDB_ACCESSOR(i32, GlobalPosition, -1); + + public: + TAddress() = default; + bool operator<(const TAddress& item) const { + if (ChunkIdx < item.ChunkIdx) { + return true; + } else if (item.ChunkIdx < ChunkIdx) { + return false; + } else { + return GlobalPosition < item.GlobalPosition; + } + } + + bool IsValid() const { + return ChunkIdx >= 0 && GlobalPosition >= 0; + } + }; + +private: + YDB_READONLY_DEF(std::vector, Chunks); + std::vector> InputContainers; + + std::optional>> RemapPortionIndexToResultIndex; + +public: + const TMergingChunkContext& GetChunk(const ui32 idx) const { + AFL_VERIFY(idx < Chunks.size()); + return Chunks[idx]; + } + + bool HasRemapInfo(const ui32 idx) { + return GetRemapPortionIndexToResultIndex(idx).size(); + } + + const std::vector>& GetRemapPortionIndexToResultIndex() { + if (!RemapPortionIndexToResultIndex) { + std::vector> result; + result.resize(InputContainers.size()); + { + ui32 idx = 0; + for (auto&& p : InputContainers) { + if (p) { + result[idx].resize(p->GetRecordsCount()); + } + ++idx; + } + } + ui32 chunkIdx = 0; + for (auto&& i : Chunks) { + auto& pIdxArray = i.GetIdxArray(); + auto& pRecordIdxArray = i.GetRecordIdxArray(); + for (ui32 recordIdx = 0; recordIdx < i.GetIdxArray().length(); ++recordIdx) { + auto& sourceRemap = result[pIdxArray.Value(recordIdx)]; + if (sourceRemap.size()) { + sourceRemap[pRecordIdxArray.Value(recordIdx)].SetChunkIdx(chunkIdx); + sourceRemap[pRecordIdxArray.Value(recordIdx)].SetGlobalPosition(recordIdx); + } + } + ++chunkIdx; + } + RemapPortionIndexToResultIndex = std::move(result); + } + return *RemapPortionIndexToResultIndex; + } + + const std::vector& GetRemapPortionIndexToResultIndex(const ui32 idx) { + auto& result = GetRemapPortionIndexToResultIndex(); + AFL_VERIFY(idx < result.size()); + return result[idx]; + } + + TMergingContext(const std::vector>& pkAndAddresses, + const std::vector>& inputContainers) + : InputContainers(inputContainers) + { + for (auto&& i : pkAndAddresses) { + Chunks.emplace_back(i); + } + } +}; + +class IColumnMerger { +public: + using TFactory = NObjectFactory::TParametrizedObjectFactory; + +private: + bool Started = false; + + virtual std::vector DoExecute(const TChunkMergeContext& context, TMergingContext& mergeContext) = 0; + virtual void DoStart(const std::vector>& input, TMergingContext& mergeContext) = 0; + +protected: + const TColumnMergeContext& Context; + +public: + static inline const TString PortionIdFieldName = "$$__portion_id"; + static inline const TString PortionRecordIndexFieldName = "$$__portion_record_idx"; + static inline const std::shared_ptr PortionIdField = + std::make_shared(PortionIdFieldName, std::make_shared()); + static inline const std::shared_ptr PortionRecordIndexField = + std::make_shared(PortionRecordIndexFieldName, std::make_shared()); + + IColumnMerger(const TColumnMergeContext& context) + : Context(context) { + } + virtual ~IColumnMerger() = default; + + void Start(const std::vector>& input, TMergingContext& mergeContext); + + std::vector Execute(const TChunkMergeContext& context, TMergingContext& mergeContext) { + return DoExecute(context, mergeContext); + } +}; + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/abstract/ya.make b/ydb/core/tx/columnshard/engines/changes/compaction/abstract/ya.make new file mode 100644 index 000000000000..07be3f70eb68 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/abstract/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + merger.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/changes/compaction/common +) + +END() diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/column_cursor.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/column_cursor.cpp deleted file mode 100644 index cdb81296cf73..000000000000 --- a/ydb/core/tx/columnshard/engines/changes/compaction/column_cursor.cpp +++ /dev/null @@ -1,83 +0,0 @@ -#include "column_cursor.h" -#include - -namespace NKikimr::NOlap::NCompaction { - -bool TPortionColumnCursor::Fetch(TMergedColumn& column) { - Y_ABORT_UNLESS(ChunkIdx < ColumnChunks.size()); - Y_ABORT_UNLESS(RecordIndexStart); - ui32 currentStartPortionIdx = *RecordIndexStart; - ui32 currentFinishPortionIdx = RecordIndexFinish; -// NActors::TLogContextGuard lg(NActors::TLogContextBuilder::Build()("portion_id", PortionId)); - while (currentStartPortionIdx - ChunkRecordIndexStartPosition >= CurrentChunkRecordsCount) { - if (!NextChunk()) { - return false; - } - } - - ui32 currentStart = currentStartPortionIdx - ChunkRecordIndexStartPosition; - while (currentFinishPortionIdx - ChunkRecordIndexStartPosition >= CurrentChunkRecordsCount) { - const ui32 currentFinish = CurrentChunkRecordsCount; -// if (currentStart == 0 && CurrentColumnChunk) { -// column.AppendBlob(CurrentBlobChunk->GetData(), *CurrentColumnChunk); -// } else { - column.AppendSlice(GetCurrentArray(), currentStart, currentFinish - currentStart); -// } - currentStart = 0; - if (!NextChunk()) { - return false; - } - } - - const ui32 currentFinish = currentFinishPortionIdx - ChunkRecordIndexStartPosition; - if (currentStart < currentFinish) { - Y_ABORT_UNLESS(currentFinish < CurrentChunkRecordsCount); - column.AppendSlice(GetCurrentArray(), currentStart, currentFinish - currentStart); - } - - RecordIndexStart.reset(); - RecordIndexFinish = 0; - return true; -} - -bool TPortionColumnCursor::Next(const ui32 portionRecordIdx, TMergedColumn& column) { - Y_ABORT_UNLESS(ChunkRecordIndexStartPosition <= portionRecordIdx); - if (!RecordIndexStart) { - RecordIndexStart = portionRecordIdx; - RecordIndexFinish = portionRecordIdx + 1; - } else if (RecordIndexFinish == portionRecordIdx) { - RecordIndexFinish = portionRecordIdx + 1; - } else { - Fetch(column); - RecordIndexStart = portionRecordIdx; - RecordIndexFinish = portionRecordIdx + 1; - } - return true; -} - -bool TPortionColumnCursor::NextChunk() { - CurrentArray = nullptr; - if (++ChunkIdx == ColumnChunks.size()) { - return false; - } else { - ChunkRecordIndexStartPosition += CurrentChunkRecordsCount; - CurrentBlobChunk = BlobChunks[ChunkIdx]; - CurrentColumnChunk = ColumnChunks[ChunkIdx]; - CurrentChunkRecordsCount = CurrentBlobChunk->GetRecordsCountVerified(); - return true; - } -} - -const std::shared_ptr& TPortionColumnCursor::GetCurrentArray() { - Y_ABORT_UNLESS(ChunkIdx < ColumnChunks.size()); - Y_ABORT_UNLESS(CurrentBlobChunk); - - if (!CurrentArray) { - auto res = NArrow::TStatusValidator::GetValid(ColumnLoader->Apply(CurrentBlobChunk->GetData())); - AFL_VERIFY(res->num_columns() == 1); - CurrentArray = res->column(0); - } - return CurrentArray; -} - -} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/column_cursor.h b/ydb/core/tx/columnshard/engines/changes/compaction/column_cursor.h deleted file mode 100644 index 493cd6268f6a..000000000000 --- a/ydb/core/tx/columnshard/engines/changes/compaction/column_cursor.h +++ /dev/null @@ -1,55 +0,0 @@ -#pragma once -#include "merged_column.h" -#include -#include -#include -#include - -namespace NKikimr::NOlap::NCompaction { - -class TPortionColumnCursor { -private: - std::vector> BlobChunks; - std::vector ColumnChunks; - std::optional RecordIndexStart; - YDB_READONLY(ui32, RecordIndexFinish, 0); - ui32 ChunkRecordIndexStartPosition = 0; - ui32 ChunkIdx = 0; - std::shared_ptr CurrentBlobChunk; - const TColumnRecord* CurrentColumnChunk = nullptr; - ui32 CurrentChunkRecordsCount = 0; - std::shared_ptr CurrentArray; - std::shared_ptr ColumnLoader; - const ui64 PortionId; - - const std::shared_ptr& GetCurrentArray(); - - bool NextChunk(); - -public: - ~TPortionColumnCursor() { - AFL_VERIFY(!RecordIndexStart || ChunkIdx == ColumnChunks.size())("chunk", ChunkIdx) - ("size", ColumnChunks.size())("start", RecordIndexStart)("finish", RecordIndexFinish) - ("max", CurrentBlobChunk->GetRecordsCount())("current_start_position", ChunkRecordIndexStartPosition); - } - - bool Next(const ui32 portionRecordIdx, TMergedColumn& column); - - bool Fetch(TMergedColumn& column); - - TPortionColumnCursor(const std::vector>& columnChunks, const std::vector& records, const std::shared_ptr& loader, const ui64 portionId) - : BlobChunks(columnChunks) - , ColumnChunks(records) - , ColumnLoader(loader) - , PortionId(portionId) { - AFL_VERIFY(ColumnLoader); - Y_UNUSED(PortionId); - Y_ABORT_UNLESS(BlobChunks.size()); - Y_ABORT_UNLESS(ColumnChunks.size() == BlobChunks.size()); - CurrentBlobChunk = BlobChunks.front(); - CurrentColumnChunk = ColumnChunks.front(); - CurrentChunkRecordsCount = CurrentBlobChunk->GetRecordsCountVerified(); - } -}; - -} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/column_portion_chunk.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/column_portion_chunk.cpp deleted file mode 100644 index 09eed586ac20..000000000000 --- a/ydb/core/tx/columnshard/engines/changes/compaction/column_portion_chunk.cpp +++ /dev/null @@ -1,74 +0,0 @@ -#include "column_portion_chunk.h" -#include -#include -#include -#include - -namespace NKikimr::NOlap::NCompaction { - -std::shared_ptr TColumnPortion::AppendBlob(const TString& data, const TColumnRecord& columnChunk, ui32& remained) { -// if (CurrentPortionRecords + columnChunk.GetMeta().GetNumRows() <= Context.GetPortionRowsCountLimit() && -// columnChunk.GetMeta().GetRawBytes() < Context.GetChunkRawBytesLimit() && -// data.size() < Context.GetChunkPackedBytesLimit() && -// columnChunk.GetMeta().GetRawBytes() > Context.GetStorePackedChunkSizeLimit() && Context.GetSaver().IsHardPacker() && -// Context.GetUseWholeChunksOptimization()) -// { -// NChanges::TGeneralCompactionCounters::OnFullBlobAppend(columnChunk.BlobRange.GetBlobSize()); -// FlushBuffer(); -// Chunks.emplace_back(std::make_shared(data, columnChunk, Context.GetSchemaInfo())); -// PackedSize += Chunks.back()->GetPackedSize(); -// CurrentPortionRecords += columnChunk.GetMeta().GetNumRows(); -// return nullptr; -// } else { - NChanges::TGeneralCompactionCounters::OnSplittedBlobAppend(columnChunk.BlobRange.GetSize()); - auto batch = NArrow::TStatusValidator::GetValid(Context.GetLoader()->Apply(data)); - AFL_VERIFY(batch->num_columns() == 1); - auto batchArray = batch->column(0); - remained = AppendSlice(batchArray, 0, batch->num_rows()); - if (remained) { - return batchArray; - } else { - return nullptr; - } -// } -} - -ui32 TColumnPortion::AppendSlice(const std::shared_ptr& a, const ui32 startIndex, const ui32 length) { - Y_ABORT_UNLESS(a); - Y_ABORT_UNLESS(length); - Y_ABORT_UNLESS(CurrentPortionRecords < Context.GetPortionRowsCountLimit()); - Y_ABORT_UNLESS(startIndex + length <= a->length()); - ui32 i = startIndex; - const ui32 packedRecordSize = Context.GetColumnStat() ? Context.GetColumnStat()->GetPackedRecordSize() : 0; - for (; i < startIndex + length; ++i) { - ui64 recordSize = 0; - AFL_VERIFY(NArrow::Append(*Builder, *a, i, &recordSize))("a", a->ToString())("a_type", a->type()->ToString())("builder_type", Builder->type()->ToString()); - CurrentChunkRawSize += recordSize; - PredictedPackedBytes += packedRecordSize ? packedRecordSize : (recordSize / 2); - if (++CurrentPortionRecords == Context.GetPortionRowsCountLimit()) { - FlushBuffer(); - ++i; - break; - } - if (CurrentChunkRawSize >= Context.GetChunkRawBytesLimit() || PredictedPackedBytes >= Context.GetExpectedBlobPackedBytes()) { - FlushBuffer(); - } - } - return startIndex + length - i; -} - -bool TColumnPortion::FlushBuffer() { - if (Builder->length()) { - auto newArrayChunk = NArrow::TStatusValidator::GetValid(Builder->Finish()); - Chunks.emplace_back(std::make_shared(Context.GetSaver().Apply(newArrayChunk, Context.GetResultField()), newArrayChunk, TChunkAddress(Context.GetColumnId(), 0), ColumnInfo)); - Builder = Context.MakeBuilder(); - CurrentChunkRawSize = 0; - PredictedPackedBytes = 0; - PackedSize += Chunks.back()->GetPackedSize(); - return true; - } else { - return false; - } -} - -} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/column_portion_chunk.h b/ydb/core/tx/columnshard/engines/changes/compaction/column_portion_chunk.h deleted file mode 100644 index f1d4cbadd6cf..000000000000 --- a/ydb/core/tx/columnshard/engines/changes/compaction/column_portion_chunk.h +++ /dev/null @@ -1,71 +0,0 @@ -#pragma once -#include "merge_context.h" -#include -#include -#include -#include -#include -#include -#include - -namespace NKikimr::NOlap::NCompaction { - -class TColumnPortionResult { -protected: - std::vector> Chunks; - ui64 CurrentPortionRecords = 0; - const ui32 ColumnId; - ui64 PackedSize = 0; -public: - ui64 GetPackedSize() const { - return PackedSize; - } - - TColumnPortionResult(const ui32 columnId) - : ColumnId(columnId) { - - } - - const std::vector>& GetChunks() const { - return Chunks; - } - - ui64 GetCurrentPortionRecords() const { - return CurrentPortionRecords; - } - - TString DebugString() const { - return TStringBuilder() << "chunks=" << Chunks.size() << ";records=" << CurrentPortionRecords << ";"; - } - -}; - -class TColumnPortion: public TColumnPortionResult { -private: - using TBase = TColumnPortionResult; - std::unique_ptr Builder; - const TColumnMergeContext& Context; - YDB_READONLY(ui64, CurrentChunkRawSize, 0); - double PredictedPackedBytes = 0; - const TSimpleColumnInfo ColumnInfo; -public: - TColumnPortion(const TColumnMergeContext& context) - : TBase(context.GetColumnId()) - , Context(context) - , ColumnInfo(Context.GetIndexInfo().GetColumnFeaturesVerified(context.GetColumnId())) - { - Builder = Context.MakeBuilder(); - } - - bool IsFullPortion() const { - Y_ABORT_UNLESS(CurrentPortionRecords <= Context.GetPortionRowsCountLimit()); - return CurrentPortionRecords == Context.GetPortionRowsCountLimit(); - } - - bool FlushBuffer(); - - std::shared_ptr AppendBlob(const TString& data, const TColumnRecord& columnChunk, ui32& remained); - ui32 AppendSlice(const std::shared_ptr& a, const ui32 startIndex, const ui32 length); -}; - -} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/common/context.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/common/context.cpp new file mode 100644 index 000000000000..35fbf111c993 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/common/context.cpp @@ -0,0 +1,5 @@ +#include "context.h" + +namespace NKikimr::NOlap::NCompaction { + +} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/common/context.h b/ydb/core/tx/columnshard/engines/changes/compaction/common/context.h new file mode 100644 index 000000000000..73117725614d --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/common/context.h @@ -0,0 +1,80 @@ +#pragma once +#include +#include +#include +#include + +#include + +namespace NKikimr::NOlap::NCompaction { + +class TColumnMergeContext { +private: + YDB_READONLY(ui32, ColumnId, 0); + ISnapshotSchema::TPtr SchemaInfo; + YDB_ACCESSOR_DEF(TColumnSaver, Saver); + YDB_READONLY_DEF(std::shared_ptr, Loader); + YDB_READONLY_DEF(std::shared_ptr, ResultField); + YDB_READONLY(ui64, ChunkPackedBytesLimit, 7 * 1024 * 1024); + YDB_READONLY(ui64, ExpectedBlobPackedBytes, 4 * 1024 * 1024); + YDB_READONLY(ui64, ChunkRawBytesLimit, 50 * 1024 * 1024); + YDB_READONLY(ui64, StorePackedChunkSizeLimit, 512 * 1024); + YDB_READONLY(bool, UseWholeChunksOptimization, true); + + std::optional ColumnStat; + + const TIndexInfo& IndexInfo; + +public: + std::shared_ptr GetDefaultValue() const { + return Loader->GetDefaultValue(); + } + + ISnapshotSchema::TPtr GetSchemaInfo() const { + return SchemaInfo; + } + + const std::optional& GetColumnStat() const { + return ColumnStat; + } + + std::unique_ptr MakeBuilder() const { + return NArrow::MakeBuilder(ResultField); + } + + const TIndexInfo& GetIndexInfo() const { + return IndexInfo; + } + + TColumnMergeContext(const ui32 columnId, const ISnapshotSchema::TPtr& schema, const ui32 chunkRawBytesLimit, + const std::optional& columnStat) + : ColumnId(columnId) + , SchemaInfo(schema) + , Saver(schema->GetColumnSaver(columnId)) + , Loader(schema->GetColumnLoaderOptional(columnId)) + , ResultField(schema->GetIndexInfo().GetColumnFieldVerified(columnId)) + , ChunkRawBytesLimit(chunkRawBytesLimit) + , UseWholeChunksOptimization(!schema->GetIndexInfo().GetReplaceKey()->GetFieldByName(ResultField->name())) + , ColumnStat(columnStat) + , IndexInfo(schema->GetIndexInfo()) { + Y_ABORT_UNLESS(ChunkRawBytesLimit); + } +}; + +class TChunkMergeContext { +private: + YDB_READONLY(ui32, PortionRowsCountLimit, 10000); + YDB_READONLY(ui32, BatchIdx, 0); + YDB_READONLY(ui32, RecordsCount, 0); + +public: + TChunkMergeContext(const ui32 portionRowsCountLimit, const ui32 batchIdx, const ui32 recordsCount) + : PortionRowsCountLimit(portionRowsCountLimit) + , BatchIdx(batchIdx) + , RecordsCount(recordsCount) + { + AFL_VERIFY(RecordsCount); + AFL_VERIFY(PortionRowsCountLimit); + } +}; +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/common/result.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/common/result.cpp new file mode 100644 index 000000000000..6482ee301543 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/common/result.cpp @@ -0,0 +1,10 @@ +#include "result.h" +#include + +namespace NKikimr::NOlap::NCompaction { + +TString TColumnPortionResult::DebugString() const { + return TStringBuilder() << "chunks=" << Chunks.size() << ";"; +} + +} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/common/result.h b/ydb/core/tx/columnshard/engines/changes/compaction/common/result.h new file mode 100644 index 000000000000..850e1f6eebe0 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/common/result.h @@ -0,0 +1,25 @@ +#pragma once +#include + +namespace NKikimr::NOlap::NCompaction { + +class TColumnPortionResult { +protected: + std::vector> Chunks; + const ui32 ColumnId; +public: + + TColumnPortionResult(const ui32 columnId) + : ColumnId(columnId) { + + } + + const std::vector>& GetChunks() const { + return Chunks; + } + + TString DebugString() const; + +}; + +} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/common/ya.make b/ydb/core/tx/columnshard/engines/changes/compaction/common/ya.make new file mode 100644 index 000000000000..30667909c931 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/common/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +SRCS( + context.cpp + result.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/scheme +) + +END() diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/merge_context.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/merge_context.cpp deleted file mode 100644 index 8280e58eec95..000000000000 --- a/ydb/core/tx/columnshard/engines/changes/compaction/merge_context.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include "merge_context.h" - -namespace NKikimr::NOlap::NCompaction { - -} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/merge_context.h b/ydb/core/tx/columnshard/engines/changes/compaction/merge_context.h deleted file mode 100644 index a5da857c2aff..000000000000 --- a/ydb/core/tx/columnshard/engines/changes/compaction/merge_context.h +++ /dev/null @@ -1,62 +0,0 @@ -#pragma once -#include -#include -#include -#include - -#include - -namespace NKikimr::NOlap::NCompaction { - -class TColumnMergeContext { -private: - YDB_READONLY(ui32, ColumnId, 0); - ISnapshotSchema::TPtr SchemaInfo; - YDB_READONLY_DEF(TColumnSaver, Saver); - YDB_READONLY_DEF(std::shared_ptr, Loader); - YDB_READONLY_DEF(std::shared_ptr, ResultField); - YDB_READONLY(ui32, PortionRowsCountLimit, 10000); - YDB_READONLY(ui64, ChunkPackedBytesLimit, 7 * 1024 * 1024); - YDB_READONLY(ui64, ExpectedBlobPackedBytes, 4 * 1024 * 1024); - YDB_READONLY(ui64, ChunkRawBytesLimit, 50 * 1024 * 1024); - YDB_READONLY(ui64, StorePackedChunkSizeLimit, 512 * 1024); - YDB_READONLY(bool, UseWholeChunksOptimization, true); - - std::optional ColumnStat; - const TIndexInfo& IndexInfo; -public: - ISnapshotSchema::TPtr GetSchemaInfo() const { - return SchemaInfo; - } - - const std::optional& GetColumnStat() const { - return ColumnStat; - } - - std::unique_ptr MakeBuilder() const { - return NArrow::MakeBuilder(ResultField); - } - - const TIndexInfo& GetIndexInfo() const { - return IndexInfo; - } - - TColumnMergeContext(const ui32 columnId, const ISnapshotSchema::TPtr& schema, const ui32 portionRowsCountLimit, const ui32 chunkRawBytesLimit, - const std::optional& columnStat) - : ColumnId(columnId) - , SchemaInfo(schema) - , Saver(schema->GetColumnSaver(columnId)) - , Loader(schema->GetColumnLoaderOptional(columnId)) - , ResultField(schema->GetIndexInfo().GetColumnFieldVerified(columnId)) - , PortionRowsCountLimit(portionRowsCountLimit) - , ChunkRawBytesLimit(chunkRawBytesLimit) - , UseWholeChunksOptimization(!schema->GetIndexInfo().GetReplaceKey()->GetFieldByName(ResultField->name())) - , ColumnStat(columnStat) - , IndexInfo(schema->GetIndexInfo()) - { - Y_ABORT_UNLESS(PortionRowsCountLimit); - Y_ABORT_UNLESS(ChunkRawBytesLimit); - } -}; - -} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/merged_column.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/merged_column.cpp deleted file mode 100644 index 5f638a30f155..000000000000 --- a/ydb/core/tx/columnshard/engines/changes/compaction/merged_column.cpp +++ /dev/null @@ -1,50 +0,0 @@ -#include "merged_column.h" - -namespace NKikimr::NOlap::NCompaction { - -void TMergedColumn::AppendBlob(const TString& data, const TColumnRecord& columnChunk) { - RecordsCount += columnChunk.GetMeta().GetNumRows(); - ui32 remained; - std::shared_ptr dataArray = Portions.back().AppendBlob(data, columnChunk, remained); - while (remained) { - Y_ABORT_UNLESS(Portions.back().IsFullPortion()); - NewPortion(); - remained = Portions.back().AppendSlice(dataArray, dataArray->length() - remained, remained); - } - if (Portions.back().IsFullPortion()) { - NewPortion(); - } -} - -void TMergedColumn::AppendSlice(const std::shared_ptr& data, const ui32 startIndex, const ui32 length) { - RecordsCount += length; - Y_ABORT_UNLESS(data); - ui32 remained = length; - while (remained = Portions.back().AppendSlice(data, startIndex + length - remained, remained)) { - Y_ABORT_UNLESS(Portions.back().IsFullPortion()); - NewPortion(); - } - if (Portions.back().IsFullPortion()) { - NewPortion(); - } -} - -std::vector TMergedColumn::BuildResult() { - std::vector result; - if (Portions.size()) { - Portions.back().FlushBuffer(); - } - for (auto&& i : Portions) { - result.emplace_back(i); - } - return result; -} - -void TMergedColumn::NewPortion() { - if (Portions.size()) { - Portions.back().FlushBuffer(); - } - Portions.emplace_back(TColumnPortion(Context)); -} - -} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/merged_column.h b/ydb/core/tx/columnshard/engines/changes/compaction/merged_column.h deleted file mode 100644 index f0a90bdd9b44..000000000000 --- a/ydb/core/tx/columnshard/engines/changes/compaction/merged_column.h +++ /dev/null @@ -1,28 +0,0 @@ -#pragma once -#include "column_portion_chunk.h" -#include "merge_context.h" -#include - -namespace NKikimr::NOlap::NCompaction { - -class TMergedColumn { -private: - TColumnMergeContext Context; - YDB_READONLY_DEF(std::vector, Portions); - YDB_READONLY(ui32, RecordsCount, 0); - - void NewPortion(); - -public: - TMergedColumn(const TColumnMergeContext& context) - : Context(context) { - NewPortion(); - } - - void AppendBlob(const TString& data, const TColumnRecord& columnChunk); - void AppendSlice(const std::shared_ptr& data, const ui32 startIndex, const ui32 length); - - std::vector BuildResult(); -}; - -} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp new file mode 100644 index 000000000000..439426439867 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp @@ -0,0 +1,176 @@ +#include "merger.h" + +#include "abstract/merger.h" +#include "plain/logic.h" +#include "sparsed/logic.h" + +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap::NCompaction { + +std::vector TMerger::Execute(const std::shared_ptr& stats, + const NArrow::NMerger::TIntervalPositions& checkPoints, const std::shared_ptr& resultFiltered, const ui64 pathId, + const std::optional shardingActualVersion) { + AFL_VERIFY(Batches.size() == Filters.size()); + std::vector> batchResults; + { + arrow::FieldVector indexFields; + indexFields.emplace_back(IColumnMerger::PortionIdField); + indexFields.emplace_back(IColumnMerger::PortionRecordIndexField); + if (resultFiltered->HasColumnId((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG)) { + IIndexInfo::AddDeleteFields(indexFields); + } + IIndexInfo::AddSnapshotFields(indexFields); + auto dataSchema = std::make_shared(indexFields); + NArrow::NMerger::TMergePartialStream mergeStream( + resultFiltered->GetIndexInfo().GetReplaceKey(), dataSchema, false, IIndexInfo::GetSnapshotColumnNames()); + + ui32 idx = 0; + for (auto&& batch : Batches) { + { + NArrow::NConstruction::IArrayBuilder::TPtr column = + std::make_shared>>( + IColumnMerger::PortionIdFieldName, idx); + batch->AddField(IColumnMerger::PortionIdField, column->BuildArray(batch->num_rows())).Validate(); + } + { + NArrow::NConstruction::IArrayBuilder::TPtr column = + std::make_shared>>( + IColumnMerger::PortionRecordIndexFieldName); + batch->AddField(IColumnMerger::PortionRecordIndexField, column->BuildArray(batch->num_rows())).Validate(); + } + mergeStream.AddSource(batch, Filters[idx]); + ++idx; + } + batchResults = mergeStream.DrainAllParts(checkPoints, indexFields); + } + + std::vector>> chunkGroups; + chunkGroups.resize(batchResults.size()); + + using TColumnData = std::vector>; + THashMap columnsData; + { + ui32 batchIdx = 0; + for (auto&& p : Batches) { + ui32 columnIdx = 0; + for (auto&& i : p->GetSchema()->GetFields()) { + const std::optional columnId = resultFiltered->GetIndexInfo().GetColumnIdOptional(i->name()); + if (columnId) { + auto it = columnsData.find(*columnId); + if (it == columnsData.end()) { + it = columnsData.emplace(*columnId, TColumnData(Batches.size())).first; + } + it->second[batchIdx] = p->GetColumnVerified(columnIdx); + } + ++columnIdx; + } + ++batchIdx; + } + } + + TMergingContext mergingContext(batchResults, Batches); + + for (auto&& [columnId, columnData] : columnsData) { + const TString& columnName = resultFiltered->GetIndexInfo().GetColumnName(columnId); + NActors::TLogContextGuard logGuard(NActors::TLogContextBuilder::Build()("field_name", columnName)); + auto columnInfo = stats->GetColumnInfo(columnId); + + TColumnMergeContext commonContext( + columnId, resultFiltered, NSplitter::TSplitSettings().GetExpectedUnpackColumnChunkRawSize(), columnInfo); + if (OptimizationWritingPackMode) { + commonContext.MutableSaver().AddSerializerWithBorder( + 100, std::make_shared(arrow::Compression::type::UNCOMPRESSED)); + commonContext.MutableSaver().AddSerializerWithBorder( + Max(), std::make_shared(arrow::Compression::type::LZ4_FRAME)); + } + + THolder merger = + IColumnMerger::TFactory::MakeHolder(commonContext.GetLoader()->GetAccessorConstructor().GetClassName(), commonContext); + AFL_VERIFY(!!merger)("problem", "cannot create merger")( + "class_name", commonContext.GetLoader()->GetAccessorConstructor().GetClassName()); + merger->Start(columnData, mergingContext); + + ui32 batchIdx = 0; + for (auto&& batchResult : batchResults) { + const ui32 portionRecordsCountLimit = + batchResult->num_rows() / (batchResult->num_rows() / NSplitter::TSplitSettings().GetExpectedRecordsCountOnPage() + 1) + 1; + + TChunkMergeContext context(portionRecordsCountLimit, batchIdx, batchResult->num_rows()); + chunkGroups[batchIdx][columnId] = merger->Execute(context, mergingContext); + ++batchIdx; + } + } + ui32 batchIdx = 0; + + const auto groups = + resultFiltered->GetIndexInfo().GetEntityGroupsByStorageId(IStoragesManager::DefaultStorageId, *SaverContext.GetStoragesManager()); + std::vector result; + for (auto&& columnChunks : chunkGroups) { + auto batchResult = batchResults[batchIdx]; + ++batchIdx; + Y_ABORT_UNLESS(columnChunks.size()); + + for (auto&& i : columnChunks) { + if (i.second.size() != columnChunks.begin()->second.size()) { + for (ui32 p = 0; p < std::min(columnChunks.begin()->second.size(), i.second.size()); ++p) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("p_first", columnChunks.begin()->second[p].DebugString())( + "p", i.second[p].DebugString()); + } + } + AFL_VERIFY(i.second.size() == columnChunks.begin()->second.size())("first", columnChunks.begin()->second.size())( + "current", i.second.size())("first_name", columnChunks.begin()->first)("current_name", i.first); + } + auto columnSnapshotPlanStepIdx = batchResult->GetColumnByName(TIndexInfo::SPEC_COL_PLAN_STEP); + auto columnSnapshotTxIdx = batchResult->GetColumnByName(TIndexInfo::SPEC_COL_TX_ID); + Y_ABORT_UNLESS(columnSnapshotPlanStepIdx); + Y_ABORT_UNLESS(columnSnapshotTxIdx); + Y_ABORT_UNLESS(columnSnapshotPlanStepIdx->type_id() == arrow::UInt64Type::type_id); + Y_ABORT_UNLESS(columnSnapshotTxIdx->type_id() == arrow::UInt64Type::type_id); + + std::vector batchSlices; + std::shared_ptr schemaDetails(new TDefaultSchemaDetails(resultFiltered, stats)); + + for (ui32 i = 0; i < columnChunks.begin()->second.size(); ++i) { + THashMap>> portionColumns; + for (auto&& p : columnChunks) { + portionColumns.emplace(p.first, p.second[i].GetChunks()); + } + batchSlices.emplace_back(portionColumns, schemaDetails, Context.Counters.SplitterCounters); + } + NArrow::NSplitter::TSimilarPacker slicer(NSplitter::TSplitSettings().GetExpectedPortionSize()); + auto packs = slicer.Split(batchSlices); + + ui32 recordIdx = 0; + for (auto&& i : packs) { + TGeneralSerializedSlice slicePrimary(std::move(i)); + auto dataWithSecondary = resultFiltered->GetIndexInfo() + .AppendIndexes(slicePrimary.GetPortionChunksToHash(), SaverContext.GetStoragesManager()) + .DetachResult(); + TGeneralSerializedSlice slice(dataWithSecondary.GetExternalData(), schemaDetails, Context.Counters.SplitterCounters); + + auto b = batchResult->Slice(recordIdx, slice.GetRecordsCount()); + const ui32 deletionsCount = IIndexInfo::CalcDeletions(b, false); + auto constructor = TWritePortionInfoWithBlobsConstructor::BuildByBlobs(slice.GroupChunksByBlobs(groups), + dataWithSecondary.GetSecondaryInplaceData(), pathId, resultFiltered->GetVersion(), resultFiltered->GetSnapshot(), + SaverContext.GetStoragesManager()); + + NArrow::TFirstLastSpecialKeys primaryKeys(slice.GetFirstLastPKBatch(resultFiltered->GetIndexInfo().GetReplaceKey())); + NArrow::TMinMaxSpecialKeys snapshotKeys(b, TIndexInfo::ArrowSchemaSnapshot()); + constructor.GetPortionConstructor().AddMetadata(*resultFiltered, deletionsCount, primaryKeys, snapshotKeys); + constructor.GetPortionConstructor().MutableMeta().SetTierName(IStoragesManager::DefaultStorageId); + if (shardingActualVersion) { + constructor.GetPortionConstructor().SetShardingVersion(*shardingActualVersion); + } + result.emplace_back(std::move(constructor)); + recordIdx += slice.GetRecordsCount(); + } + } + return result; +} + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/merger.h b/ydb/core/tx/columnshard/engines/changes/compaction/merger.h new file mode 100644 index 000000000000..9c84799fe8ad --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/merger.h @@ -0,0 +1,44 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap::NCompaction { +class TMerger { +private: + YDB_ACCESSOR(bool, OptimizationWritingPackMode, false); + std::vector> Batches; + std::vector> Filters; + const TConstructionContext& Context; + const TSaverContext& SaverContext; + +public: + void AddBatch(const std::shared_ptr& batch, const std::shared_ptr& filter) { + AFL_VERIFY(batch); + Batches.emplace_back(batch); + Filters.emplace_back(filter); + } + + TMerger(const TConstructionContext& context, const TSaverContext& saverContext) + : Context(context) + , SaverContext(saverContext) { + } + + TMerger(const TConstructionContext& context, const TSaverContext& saverContext, + std::vector>&& batches, std::vector>&& filters) + : Batches(std::move(batches)) + , Filters(std::move(filters)) + , Context(context) + , SaverContext(saverContext) { + AFL_VERIFY(Batches.size() == Filters.size()); + } + + std::vector Execute(const std::shared_ptr& stats, + const NArrow::NMerger::TIntervalPositions& checkPoints, const std::shared_ptr& resultFiltered, + const ui64 pathId, const std::optional shardingActualVersion); +}; +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_cursor.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_cursor.cpp new file mode 100644 index 000000000000..9fd0c4d301e1 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_cursor.cpp @@ -0,0 +1,59 @@ +#include "column_cursor.h" +#include + +namespace NKikimr::NOlap::NCompaction { + +bool TPortionColumnCursor::Fetch(TMergedColumn& column) { + Y_ABORT_UNLESS(RecordIndexStart); + if (!BlobChunks) { + if (!DefaultArray || DefaultArray->length() < RecordIndexFinish - *RecordIndexStart) { + DefaultArray = NArrow::TThreadSimpleArraysCache::Get(DataType, DefaultValue, RecordIndexFinish - *RecordIndexStart); + } + column.AppendSlice(DefaultArray, 0, RecordIndexFinish - *RecordIndexStart); + } else { + if (CurrentChunk && CurrentChunk->GetAddress().Contains(*RecordIndexStart)) { + } else { + CurrentChunk = BlobChunks->GetChunk(CurrentChunk, *RecordIndexStart); + } + + ui32 currentStart = *RecordIndexStart; + while (CurrentChunk->GetAddress().GetGlobalFinishPosition() <= RecordIndexFinish) { + column.AppendSlice(CurrentChunk->GetArray(), CurrentChunk->GetAddress().GetLocalIndex(currentStart), + CurrentChunk->GetAddress().GetGlobalFinishPosition() - currentStart); + currentStart = CurrentChunk->GetAddress().GetGlobalFinishPosition(); + if (currentStart < BlobChunks->GetRecordsCount()) { + CurrentChunk = BlobChunks->GetChunk(CurrentChunk, currentStart); + } else { + CurrentChunk.reset(); + break; + } + } + + if (currentStart < RecordIndexFinish) { + AFL_VERIFY(CurrentChunk); + Y_ABORT_UNLESS(RecordIndexFinish < CurrentChunk->GetAddress().GetGlobalFinishPosition()); + column.AppendSlice( + CurrentChunk->GetArray(), CurrentChunk->GetAddress().GetLocalIndex(currentStart), RecordIndexFinish - currentStart); + } + } + RecordIndexStart.reset(); + RecordIndexFinish = 0; + + return true; +} + +bool TPortionColumnCursor::Next(const ui32 portionRecordIdx, TMergedColumn& column) { + if (!RecordIndexStart) { + RecordIndexStart = portionRecordIdx; + RecordIndexFinish = portionRecordIdx + 1; + } else if (RecordIndexFinish == portionRecordIdx) { + RecordIndexFinish = portionRecordIdx + 1; + } else { + Fetch(column); + RecordIndexStart = portionRecordIdx; + RecordIndexFinish = portionRecordIdx + 1; + } + return true; +} + +} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_cursor.h b/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_cursor.h new file mode 100644 index 000000000000..4180444c9487 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_cursor.h @@ -0,0 +1,43 @@ +#pragma once +#include "merged_column.h" + +#include +#include +#include + +#include + +namespace NKikimr::NOlap::NCompaction { + +class TPortionColumnCursor { +private: + std::optional CurrentChunk; + std::shared_ptr BlobChunks; + std::shared_ptr DefaultArray; + std::optional RecordIndexStart; + std::shared_ptr DataType; + std::shared_ptr DefaultValue; + YDB_READONLY(ui32, RecordIndexFinish, 0); + +public: + ~TPortionColumnCursor() { + AFL_VERIFY(!RecordIndexStart)("start", RecordIndexStart)("finish", RecordIndexFinish); + } + + bool Next(const ui32 portionRecordIdx, TMergedColumn& column); + + bool Fetch(TMergedColumn& column); + + TPortionColumnCursor(const std::shared_ptr& columnChunks) + : BlobChunks(columnChunks) { + AFL_VERIFY(BlobChunks); + } + + TPortionColumnCursor(const std::shared_ptr& dataType, const std::shared_ptr& defaultValue) + : DataType(dataType) + , DefaultValue(defaultValue) { + AFL_VERIFY(DataType); + } +}; + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_portion_chunk.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_portion_chunk.cpp new file mode 100644 index 000000000000..3db4127653b8 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_portion_chunk.cpp @@ -0,0 +1,50 @@ +#include "column_portion_chunk.h" + +#include +#include +#include +#include + +namespace NKikimr::NOlap::NCompaction { + +ui32 TColumnPortion::AppendSlice(const std::shared_ptr& a, const ui32 startIndex, const ui32 length) { + Y_ABORT_UNLESS(a); + Y_ABORT_UNLESS(length); + Y_ABORT_UNLESS(CurrentPortionRecords < ChunkContext.GetPortionRowsCountLimit()); + Y_ABORT_UNLESS(startIndex + length <= a->length()); + AFL_VERIFY(Type->id() == a->type_id())("own", Type->ToString())("a", a->type()->ToString()); + ui32 i = startIndex; + const ui32 packedRecordSize = Context.GetColumnStat() ? Context.GetColumnStat()->GetPackedRecordSize() : 0; + for (; i < startIndex + length; ++i) { + ui64 recordSize = 0; + AFL_VERIFY(NArrow::Append(*Builder, *a, i, &recordSize))("a", a->ToString())("a_type", a->type()->ToString())( + "builder_type", Builder->type()->ToString()); + CurrentChunkRawSize += recordSize; + PredictedPackedBytes += packedRecordSize ? packedRecordSize : (recordSize / 2); + if (++CurrentPortionRecords == ChunkContext.GetPortionRowsCountLimit()) { + FlushBuffer(); + ++i; + break; + } + if (CurrentChunkRawSize >= Context.GetChunkRawBytesLimit() || PredictedPackedBytes >= Context.GetExpectedBlobPackedBytes()) { + FlushBuffer(); + } + } + return startIndex + length - i; +} + +bool TColumnPortion::FlushBuffer() { + if (!Builder->length()) { + return false; + } + auto newArrayChunk = NArrow::TStatusValidator::GetValid(Builder->Finish()); + Chunks.emplace_back(std::make_shared(Context.GetSaver().Apply(newArrayChunk, Context.GetResultField()), + std::make_shared(newArrayChunk), TChunkAddress(Context.GetColumnId(), 0), ColumnInfo)); + Builder = Context.MakeBuilder(); + CurrentChunkRawSize = 0; + PredictedPackedBytes = 0; + PackedSize += Chunks.back()->GetPackedSize(); + return true; +} + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_portion_chunk.h b/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_portion_chunk.h new file mode 100644 index 000000000000..59c6a3b460da --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_portion_chunk.h @@ -0,0 +1,47 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap::NCompaction { + +class TColumnPortion: public TColumnPortionResult { +private: + using TBase = TColumnPortionResult; + std::unique_ptr Builder; + std::shared_ptr Type; + const TColumnMergeContext& Context; + const TChunkMergeContext& ChunkContext; + YDB_READONLY(ui64, CurrentChunkRawSize, 0); + double PredictedPackedBytes = 0; + const TSimpleColumnInfo ColumnInfo; + ui64 PackedSize = 0; + ui64 CurrentPortionRecords = 0; + +public: + TColumnPortion(const TColumnMergeContext& context, const TChunkMergeContext& chunkContext) + : TBase(context.GetColumnId()) + , Context(context) + , ChunkContext(chunkContext) + , ColumnInfo(Context.GetIndexInfo().GetColumnFeaturesVerified(context.GetColumnId())) { + Builder = Context.MakeBuilder(); + Type = Builder->type(); + } + + bool IsFullPortion() const { + Y_ABORT_UNLESS(CurrentPortionRecords <= ChunkContext.GetPortionRowsCountLimit()); + return CurrentPortionRecords == ChunkContext.GetPortionRowsCountLimit(); + } + + bool FlushBuffer(); + + ui32 AppendSlice(const std::shared_ptr& a, const ui32 startIndex, const ui32 length); +}; + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/logic.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/plain/logic.cpp new file mode 100644 index 000000000000..0081d33d01b5 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/logic.cpp @@ -0,0 +1,39 @@ +#include "logic.h" + +namespace NKikimr::NOlap::NCompaction { + +void TPlainMerger::DoStart(const std::vector>& input, TMergingContext& /*mContext*/) { + for (auto&& p : input) { + if (p) { + Cursors.emplace_back(NCompaction::TPortionColumnCursor(p)); + } else { + Cursors.emplace_back( + NCompaction::TPortionColumnCursor(Context.GetLoader()->GetResultField()->type(), Context.GetLoader()->GetDefaultValue())); + } + + } +} + +std::vector TPlainMerger::DoExecute( + const TChunkMergeContext& chunkContext, TMergingContext& mContext) { + NCompaction::TMergedColumn mColumn(Context, chunkContext); + auto& chunkInfo = mContext.GetChunk(chunkContext.GetBatchIdx()); + std::optional predPortionIdx; + for (ui32 idx = 0; idx < chunkInfo.GetIdxArray().length(); ++idx) { + const ui16 portionIdx = chunkInfo.GetIdxArray().Value(idx); + const ui32 portionRecordIdx = chunkInfo.GetRecordIdxArray().Value(idx); + auto& cursor = Cursors[portionIdx]; + cursor.Next(portionRecordIdx, mColumn); + if (predPortionIdx && portionIdx != *predPortionIdx) { + Cursors[*predPortionIdx].Fetch(mColumn); + } + if (idx + 1 == chunkInfo.GetIdxArray().length()) { + cursor.Fetch(mColumn); + } + predPortionIdx = portionIdx; + } + AFL_VERIFY(chunkInfo.GetIdxArray().length() == mColumn.GetRecordsCount()); + return mColumn.BuildResult(); +} + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/logic.h b/ydb/core/tx/columnshard/engines/changes/compaction/plain/logic.h new file mode 100644 index 000000000000..5b3c53f2eec9 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/logic.h @@ -0,0 +1,22 @@ +#pragma once +#include "column_cursor.h" + +#include +#include +#include + +namespace NKikimr::NOlap::NCompaction { +class TPlainMerger: public IColumnMerger { +private: + static inline auto Registrator = TFactory::TRegistrator(NArrow::NAccessor::TGlobalConst::PlainDataAccessorName); + using TBase = IColumnMerger; + std::vector Cursors; + virtual void DoStart(const std::vector>& input, TMergingContext& mergeContext) override; + + virtual std::vector DoExecute(const TChunkMergeContext& context, TMergingContext& mergeContext) override; + +public: + using TBase::TBase; +}; + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/merged_column.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/plain/merged_column.cpp new file mode 100644 index 000000000000..84dd8608ffc4 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/merged_column.cpp @@ -0,0 +1,36 @@ +#include "merged_column.h" + +namespace NKikimr::NOlap::NCompaction { + +void TMergedColumn::AppendSlice(const std::shared_ptr& data, const ui32 startIndex, const ui32 length) { + RecordsCount += length; + Y_ABORT_UNLESS(data); + ui32 remained = length; + while (remained = Portions.back().AppendSlice(data, startIndex + length - remained, remained)) { + Y_ABORT_UNLESS(Portions.back().IsFullPortion()); + NewPortion(); + } + if (Portions.back().IsFullPortion()) { + NewPortion(); + } +} + +std::vector TMergedColumn::BuildResult() { + std::vector result; + if (Portions.size()) { + Portions.back().FlushBuffer(); + } + for (auto&& i : Portions) { + result.emplace_back(i); + } + return result; +} + +void TMergedColumn::NewPortion() { + if (Portions.size()) { + Portions.back().FlushBuffer(); + } + Portions.emplace_back(TColumnPortion(Context, ChunkContext)); +} + +} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/merged_column.h b/ydb/core/tx/columnshard/engines/changes/compaction/plain/merged_column.h new file mode 100644 index 000000000000..2433bb8f4862 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/merged_column.h @@ -0,0 +1,31 @@ +#pragma once +#include "column_portion_chunk.h" + +#include +#include + +namespace NKikimr::NOlap::NCompaction { + +class TMergedColumn { +private: + TColumnMergeContext Context; + TChunkMergeContext ChunkContext; + YDB_READONLY_DEF(std::vector, Portions); + YDB_READONLY(ui32, RecordsCount, 0); + + void NewPortion(); + +public: + TMergedColumn(const TColumnMergeContext& context, const TChunkMergeContext& chunkContext) + : Context(context) + , ChunkContext(chunkContext) + { + NewPortion(); + } + + void AppendSlice(const std::shared_ptr& data, const ui32 startIndex, const ui32 length); + + std::vector BuildResult(); +}; + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/ya.make b/ydb/core/tx/columnshard/engines/changes/compaction/plain/ya.make new file mode 100644 index 000000000000..91991ea51097 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +SRCS( + column_cursor.cpp + column_portion_chunk.cpp + merged_column.cpp + GLOBAL logic.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/changes/compaction/common +) + +END() diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/logic.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/logic.cpp new file mode 100644 index 000000000000..d2c4e14f0664 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/logic.cpp @@ -0,0 +1,171 @@ +#include "logic.h" + +#include +#include + +namespace NKikimr::NOlap::NCompaction { + +void TSparsedMerger::DoStart(const std::vector>& input, TMergingContext& mergingContext) { + ui32 idx = 0; + for (auto&& p : input) { + if (p) { + Cursors.emplace_back(p, Context); + if (mergingContext.HasRemapInfo(idx)) { + CursorPositions.emplace_back(TCursorPosition(&Cursors.back(), mergingContext.GetRemapPortionIndexToResultIndex(idx))); + if (CursorPositions.back().IsFinished()) { + CursorPositions.pop_back(); + } + } + } + ++idx; + } +} + +std::vector TSparsedMerger::DoExecute(const TChunkMergeContext& chunkContext, TMergingContext& /*mergeContext*/) { + std::vector result; + std::shared_ptr writer = std::make_shared(Context); + const auto addSkipsToWriter = [&](i64 delta) { + if (!delta) { + return; + } + AFL_VERIFY(delta >= 0); + if (chunkContext.GetPortionRowsCountLimit() <= writer->GetCurrentSize() + delta) { + const i64 diff = chunkContext.GetPortionRowsCountLimit() - writer->GetCurrentSize(); + writer->AddPositions(diff); + result.emplace_back(writer->Flush()); + writer = std::make_shared(Context); + delta -= diff; + } + while (chunkContext.GetPortionRowsCountLimit() <= delta) { + writer->AddPositions(chunkContext.GetPortionRowsCountLimit()); + result.emplace_back(writer->Flush()); + writer = std::make_shared(Context); + delta -= chunkContext.GetPortionRowsCountLimit(); + } + if (delta) { + writer->AddPositions(delta); + } + }; + + std::vector heap; + for (auto it = CursorPositions.begin(); it != CursorPositions.end();) { + AFL_VERIFY(chunkContext.GetBatchIdx() <= it->GetCurrentGlobalChunkIdx()); + if (it->GetCurrentGlobalChunkIdx() == chunkContext.GetBatchIdx()) { + heap.emplace_back(std::move(*it)); + it = CursorPositions.erase(it); + } else { + ++it; + } + } + std::make_heap(heap.begin(), heap.end()); + ui32 nextGlobalPosition = 0; + while (heap.size()) { + std::pop_heap(heap.begin(), heap.end()); + while (heap.size() == 1 || (heap.size() > 1 && heap.front() < heap.back())) { + { + auto& address = heap.back().GetCurrentAddress(); + AFL_VERIFY(nextGlobalPosition <= (ui32)address.GetGlobalPosition()); + addSkipsToWriter(address.GetGlobalPosition() - nextGlobalPosition); + + heap.back().AddIndexTo(*writer); + if (chunkContext.GetPortionRowsCountLimit() == writer->GetCurrentSize()) { + result.emplace_back(writer->Flush()); + writer = std::make_shared(Context); + } + nextGlobalPosition = address.GetGlobalPosition() + 1; + } + if (!heap.back().Next()) { + heap.pop_back(); + break; + } else if (heap.back().GetCurrentGlobalChunkIdx() != chunkContext.GetBatchIdx()) { + CursorPositions.emplace_back(std::move(heap.back())); + heap.pop_back(); + break; + } + } + std::push_heap(heap.begin(), heap.end()); + } + AFL_VERIFY(nextGlobalPosition <= chunkContext.GetRecordsCount()); + addSkipsToWriter(chunkContext.GetRecordsCount() - nextGlobalPosition); + if (writer->HasData()) { + result.emplace_back(writer->Flush()); + } + return result; +} + +void TSparsedMerger::TWriter::AddRealData(const std::shared_ptr& arr, const ui32 index) { + AFL_VERIFY(arr); + AFL_VERIFY(NArrow::Append(*ValueBuilder, *arr, index)); + NArrow::TStatusValidator::Validate(IndexBuilderImpl->Append(CurrentRecordIdx)); + ++UsefulRecordsCount; + ++CurrentRecordIdx; +} + +TColumnPortionResult TSparsedMerger::TWriter::Flush() { + std::vector> fields = { std::make_shared("index", arrow::uint32()), + std::make_shared("value", DataType) }; + auto schema = std::make_shared(fields); + std::vector> columns = { NArrow::TStatusValidator::GetValid(IndexBuilder->Finish()), + NArrow::TStatusValidator::GetValid(ValueBuilder->Finish()) }; + + auto recordBatch = arrow::RecordBatch::Make(schema, UsefulRecordsCount, columns); + NArrow::NAccessor::TSparsedArray::TBuilder builder( + Context.GetIndexInfo().GetColumnFeaturesVerified(Context.GetColumnId()).GetDefaultValue().GetValue(), Context.GetResultField()->type()); + builder.AddChunk(CurrentRecordIdx, recordBatch); + Chunks.emplace_back(std::make_shared(Context.GetSaver().Apply(recordBatch), builder.Finish(), + TChunkAddress(ColumnId, 0), Context.GetIndexInfo().GetColumnFeaturesVerified(ColumnId))); + return *this; +} + +TSparsedMerger::TWriter::TWriter(const TColumnMergeContext& context) + : TBase(context.GetColumnId()) + , DataType(context.GetResultField()->type()) + , Context(context) { + IndexBuilder = NArrow::MakeBuilder(arrow::uint32()); + ValueBuilder = NArrow::MakeBuilder(DataType); + IndexBuilderImpl = (arrow::UInt32Builder*)(IndexBuilder.get()); +} + +bool TSparsedMerger::TPlainChunkCursor::AddIndexTo(const ui32 index, TWriter& writer) { + AFL_VERIFY(ChunkStartPosition <= index); + writer.AddRealData(ChunkAddress->GetArray(), index - ChunkStartPosition); + return true; +} + +bool TSparsedMerger::TSparsedChunkCursor::AddIndexTo(const ui32 index, TWriter& writer) { + AFL_VERIFY(ChunkStartGlobalPosition <= index); + AFL_VERIFY(index == NextGlobalPosition)("index", index)("next", NextGlobalPosition); + writer.AddRealData(Chunk->GetColValue(), NextLocalPosition); + return true; +} + +bool TSparsedMerger::TCursor::AddIndexTo(const ui32 index, TWriter& writer) { + if (FinishGlobalPosition <= index) { + InitArrays(index); + } + if (SparsedCursor) { + return SparsedCursor->AddIndexTo(index, writer); + } else if (PlainCursor) { + return PlainCursor->AddIndexTo(index, writer); + } else { + return false; + } +} + +void TSparsedMerger::TCursor::InitArrays(const ui32 position) { + AFL_VERIFY(!CurrentOwnedArray || !CurrentOwnedArray->GetAddress().Contains(position)); + CurrentOwnedArray = Array->GetArray(CurrentOwnedArray, position, Array); + if (CurrentOwnedArray->GetArray()->GetType() == NArrow::NAccessor::IChunkedArray::EType::SparsedArray) { + auto sparsedArray = static_pointer_cast(CurrentOwnedArray->GetArray()); + SparsedCursor = std::make_shared(sparsedArray, &*CurrentOwnedArray); + PlainCursor = nullptr; + } else { + PlainCursor = make_shared(CurrentOwnedArray->GetArray(), &*CurrentOwnedArray); + SparsedCursor = nullptr; + } + AFL_VERIFY(CurrentOwnedArray->GetAddress().GetGlobalStartPosition() <= position); + FinishGlobalPosition = CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + CurrentOwnedArray->GetArray()->GetRecordsCount(); + AFL_VERIFY(position < FinishGlobalPosition); +} + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/logic.h b/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/logic.h new file mode 100644 index 000000000000..9fc64606a092 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/logic.h @@ -0,0 +1,286 @@ +#pragma once +#include +#include +#include +#include + +namespace NKikimr::NOlap::NCompaction { + +class TSparsedMerger: public IColumnMerger { +private: + static inline auto Registrator = TFactory::TRegistrator(NArrow::NAccessor::TGlobalConst::SparsedDataAccessorName); + + using TBase = IColumnMerger; + class TWriter: public TColumnPortionResult { + private: + using TBase = TColumnPortionResult; + const std::shared_ptr DataType; + const TColumnMergeContext& Context; + std::unique_ptr IndexBuilder; + std::unique_ptr ValueBuilder; + arrow::UInt32Builder* IndexBuilderImpl = nullptr; + ui32 CurrentRecordIdx = 0; + ui32 UsefulRecordsCount = 0; + + public: + TWriter(const TColumnMergeContext& context); + + bool HasData() const { + return CurrentRecordIdx; + } + + ui32 GetCurrentSize() const { + return CurrentRecordIdx; + } + + bool HasUsefulData() const { + return UsefulRecordsCount; + } + + ui32 AddPositions(const i32 delta) { + AFL_VERIFY(delta > 0); + CurrentRecordIdx += delta; + return CurrentRecordIdx; + } + + void AddRealData(const std::shared_ptr& arr, const ui32 index); + + TColumnPortionResult Flush(); + }; + + class TPlainChunkCursor { + private: + std::shared_ptr CurrentChunkedArray; + std::optional ChunkAddress; + const NArrow::NAccessor::IChunkedArray::TFullChunkedArrayAddress* CurrentOwnedArray; + ui32 ChunkStartPosition = 0; + ui32 ChunkFinishPosition = 0; + + void InitArrays(const ui32 position) { + AFL_VERIFY(!ChunkAddress || ChunkFinishPosition <= position); + AFL_VERIFY(CurrentOwnedArray->GetAddress().GetGlobalStartPosition() <= position)("pos", position)( + "global", CurrentOwnedArray->GetAddress().GetGlobalStartPosition()); + ChunkAddress = CurrentChunkedArray->GetChunk(ChunkAddress, position - CurrentOwnedArray->GetAddress().GetGlobalStartPosition()); + AFL_VERIFY(ChunkAddress); + ChunkStartPosition = CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + ChunkAddress->GetAddress().GetGlobalStartPosition(); + ChunkFinishPosition = + CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + ChunkAddress->GetAddress().GetGlobalFinishPosition(); + AFL_VERIFY(position < ChunkFinishPosition)("finish", ChunkFinishPosition)("pos", position); + AFL_VERIFY(ChunkStartPosition <= position)("start", ChunkStartPosition)("pos", position); + } + + public: + TPlainChunkCursor(const std::shared_ptr& chunked, + const NArrow::NAccessor::IChunkedArray::TFullChunkedArrayAddress* currentOwnedArray) + : CurrentChunkedArray(chunked) + , CurrentOwnedArray(currentOwnedArray) { + AFL_VERIFY(CurrentChunkedArray); + AFL_VERIFY(CurrentOwnedArray); + InitArrays(CurrentOwnedArray->GetAddress().GetGlobalStartPosition()); + } + bool AddIndexTo(const ui32 index, TWriter& writer); + std::optional MoveToSignificant(const ui32 currentGlobalPosition, const TColumnMergeContext& context) { + AFL_VERIFY(ChunkStartPosition <= currentGlobalPosition)("start", ChunkStartPosition)("pos", currentGlobalPosition)( + "global_start", CurrentOwnedArray->GetAddress().GetGlobalStartPosition()); + ui32 currentIndex = currentGlobalPosition; + while (true) { + if (CurrentOwnedArray->GetAddress().GetGlobalFinishPosition() <= currentIndex) { + return {}; + } + if (ChunkFinishPosition <= currentIndex) { + InitArrays(currentIndex); + continue; + } + for (; currentIndex < ChunkFinishPosition; ++currentIndex) { + if (!NArrow::ColumnEqualsScalar( + ChunkAddress->GetArray(), currentIndex - ChunkStartPosition, context.GetLoader()->GetDefaultValue())) { + return currentIndex; + } + } + } + } + }; + + class TSparsedChunkCursor { + private: + std::shared_ptr CurrentSparsedArray; + const NArrow::NAccessor::TSparsedArrayChunk* Chunk = nullptr; + const NArrow::NAccessor::IChunkedArray::TFullChunkedArrayAddress* CurrentOwnedArray; + ui32 ChunkStartGlobalPosition = 0; + ui32 NextGlobalPosition = 0; + ui32 NextLocalPosition = 0; + ui32 FinishGlobalPosition = 0; + void InitArrays(const ui32 position) { + AFL_VERIFY(!Chunk || CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + Chunk->GetFinishPosition() <= position); + Chunk = &CurrentSparsedArray->GetSparsedChunk(CurrentOwnedArray->GetAddress().GetLocalIndex(position)); + AFL_VERIFY(Chunk->GetRecordsCount()); + AFL_VERIFY(CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + Chunk->GetStartPosition() <= position && + position < CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + Chunk->GetFinishPosition()) + ("pos", position)("start", Chunk->GetStartPosition())("finish", Chunk->GetFinishPosition())( + "shift", CurrentOwnedArray->GetAddress().GetGlobalStartPosition()); + ChunkStartGlobalPosition = CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + Chunk->GetStartPosition(); + NextGlobalPosition = CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + Chunk->GetFirstIndexNotDefault(); + NextLocalPosition = 0; + FinishGlobalPosition = CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + Chunk->GetFinishPosition(); + } + + public: + std::optional MoveToSignificant(const ui32 currentGlobalPosition, const TColumnMergeContext& /*context*/) { + while (true) { + if (NextGlobalPosition == CurrentOwnedArray->GetAddress().GetGlobalFinishPosition()) { + return {}; + } + if (NextGlobalPosition == FinishGlobalPosition) { + InitArrays(NextGlobalPosition); + continue; + } + if (currentGlobalPosition == NextGlobalPosition) { + return NextGlobalPosition; + } + for (; NextLocalPosition < Chunk->GetNotDefaultRecordsCount(); ++NextLocalPosition) { + NextGlobalPosition = ChunkStartGlobalPosition + Chunk->GetIndexUnsafeFast(NextLocalPosition); + if (currentGlobalPosition <= NextGlobalPosition) { + return NextGlobalPosition; + } + } + NextGlobalPosition = FinishGlobalPosition; + } + } + bool AddIndexTo(const ui32 index, TWriter& writer); + TSparsedChunkCursor(const std::shared_ptr& sparsed, + const NArrow::NAccessor::IChunkedArray::TFullChunkedArrayAddress* currentOwnedArray) + : CurrentSparsedArray(sparsed) + , CurrentOwnedArray(currentOwnedArray) { + AFL_VERIFY(sparsed); + AFL_VERIFY(currentOwnedArray); + InitArrays(CurrentOwnedArray->GetAddress().GetGlobalStartPosition()); + } + }; + + class TCursor { + private: + std::shared_ptr Array; + std::optional CurrentOwnedArray; + std::shared_ptr SparsedCursor; + std::shared_ptr PlainCursor; + ui32 FinishGlobalPosition = 0; + const TColumnMergeContext& Context; + void InitArrays(const ui32 position); + + public: + TCursor(const std::shared_ptr& array, const TColumnMergeContext& context) + : Array(array) + , Context(context) { + AFL_VERIFY(Array); + AFL_VERIFY(Array->GetRecordsCount()); + InitArrays(0); + } + + ui32 GetRecordsCount() const { + return Array->GetRecordsCount(); + } + + ui32 MoveToSignificant(const ui32 start) { + ui32 currentPosition = start; + while (true) { + std::optional significantIndex; + if (SparsedCursor) { + significantIndex = SparsedCursor->MoveToSignificant(currentPosition, Context); + } else if (PlainCursor) { + significantIndex = PlainCursor->MoveToSignificant(currentPosition, Context); + } + if (significantIndex) { + return *significantIndex; + } + if (FinishGlobalPosition == Array->GetRecordsCount()) { + return FinishGlobalPosition; + } else { + currentPosition = FinishGlobalPosition; + InitArrays(FinishGlobalPosition); + } + } + } + + bool AddIndexTo(const ui32 index, TWriter& writer); + }; + + class TCursorPosition: TMoveOnly { + private: + TCursor* Cursor; + ui32 CurrentIndex = 0; + const std::vector* GlobalSequence = nullptr; + TMergingContext::TAddress CurrentAddress; + + bool InitPosition(const ui32 start) { + CurrentIndex = start; + while (true) { + CurrentIndex = Cursor->MoveToSignificant(CurrentIndex); + if (CurrentIndex == GlobalSequence->size()) { + return false; + } + auto& addr = (*GlobalSequence)[CurrentIndex]; + if (addr.GetGlobalPosition() != -1) { + CurrentAddress = addr; + return true; + } + if (++CurrentIndex == GlobalSequence->size()) { + return false; + } + } + } + + public: + TCursor* operator->() { + return Cursor; + } + + void AddIndexTo(TWriter& writer) const { + AFL_VERIFY(Cursor->AddIndexTo(CurrentIndex, writer)); + } + + TCursorPosition(TCursor* cursor, const std::vector& globalSequence) + : Cursor(cursor) + , GlobalSequence(&globalSequence) { + AFL_VERIFY(GlobalSequence->size() == cursor->GetRecordsCount()); + InitPosition(0); + } + + bool IsFinished() const { + AFL_VERIFY(CurrentIndex <= GlobalSequence->size()); + return CurrentIndex == GlobalSequence->size(); + } + + ui32 GetCurrentGlobalPosition() const { + return CurrentAddress.GetGlobalPosition(); + } + + ui32 GetCurrentGlobalChunkIdx() const { + return CurrentAddress.GetChunkIdx(); + } + + const TMergingContext::TAddress& GetCurrentAddress() const { + return CurrentAddress; + } + + bool operator<(const TCursorPosition& item) const { + return item.GetCurrentAddress() < GetCurrentAddress(); + } + + [[nodiscard]] bool Next() { + return InitPosition(++CurrentIndex); + } + }; + + std::deque Cursors; + std::list CursorPositions; + + virtual void DoStart( + const std::vector>& input, TMergingContext& mergeContext) override; + + virtual std::vector DoExecute(const TChunkMergeContext& context, TMergingContext& mergeContext) override; + +public: + using TBase::TBase; +}; + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/ya.make b/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/ya.make new file mode 100644 index 000000000000..e24e8341aa7d --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + GLOBAL logic.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/changes/compaction/common +) + +END() diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/ya.make b/ydb/core/tx/columnshard/engines/changes/compaction/ya.make index aa52c0f9d6a0..5e76aa0d8971 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/ya.make +++ b/ydb/core/tx/columnshard/engines/changes/compaction/ya.make @@ -1,14 +1,15 @@ LIBRARY() SRCS( - merge_context.cpp - column_cursor.cpp - column_portion_chunk.cpp - merged_column.cpp + merger.cpp ) PEERDIR( ydb/core/tx/tiering + ydb/core/tx/columnshard/engines/changes/compaction/abstract + ydb/core/tx/columnshard/engines/changes/compaction/common + ydb/core/tx/columnshard/engines/changes/compaction/plain + ydb/core/tx/columnshard/engines/changes/compaction/sparsed ) END() diff --git a/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp b/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp index 87b66aa9473e..ea7b6ddc2eb4 100644 --- a/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp +++ b/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp @@ -1,75 +1,39 @@ #include "general_compaction.h" -#include "compaction/column_cursor.h" -#include "compaction/column_portion_chunk.h" -#include "compaction/merge_context.h" -#include "compaction/merged_column.h" #include "counters/general.h" +#include "compaction/merger.h" -#include -#include -#include +#include #include -#include -#include -#include -#include -#include namespace NKikimr::NOlap::NCompaction { -void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByFullBatches(TConstructionContext& context, std::vector&& portions) noexcept { - std::vector> batchResults; - auto resultSchema = context.SchemaVersions.GetLastSchema(); - auto shardingActual = context.SchemaVersions.GetShardingInfoActual(GranuleMeta->GetPathId()); - { - auto resultDataSchema = resultSchema->GetIndexInfo().ArrowSchemaWithSpecials(); - NArrow::NMerger::TMergePartialStream mergeStream(resultSchema->GetIndexInfo().GetReplaceKey(), resultDataSchema, false, IIndexInfo::GetSnapshotColumnNames()); - - THashSet portionsInUsage; - for (auto&& i : portions) { - AFL_VERIFY(portionsInUsage.emplace(i.GetPortionInfo().GetPortionId()).second); - } - - for (auto&& i : portions) { - auto dataSchema = i.GetPortionInfo().GetSchema(context.SchemaVersions); - auto batch = i.GetBatch(dataSchema, *resultSchema); - batch = resultSchema->NormalizeBatch(*dataSchema, batch).DetachResult(); - batch = IIndexInfo::NormalizeDeletionColumn(batch); - Y_DEBUG_ABORT_UNLESS(NArrow::IsSortedAndUnique(batch, resultSchema->GetIndexInfo().GetReplaceKey())); - auto filter = BuildPortionFilter(shardingActual, batch, i.GetPortionInfo(), portionsInUsage, resultSchema); - mergeStream.AddSource(batch, filter); - } - batchResults = mergeStream.DrainAllParts(CheckPoints, resultDataSchema->fields()); - } - Y_ABORT_UNLESS(batchResults.size()); - for (auto&& b : batchResults) { - auto portions = MakeAppendedPortions(b, GranuleMeta->GetPathId(), resultSchema->GetSnapshot(), GranuleMeta.get(), context, {}); - Y_ABORT_UNLESS(portions.size()); - for (auto& portion : portions) { - if (shardingActual) { - portion.GetPortionConstructor().SetShardingVersion(shardingActual->GetSnapshotVersion()); - } - AppendedPortions.emplace_back(std::move(portion)); - } - } -} - -std::shared_ptr TGeneralCompactColumnEngineChanges::BuildPortionFilter(const std::optional& shardingActual, - const std::shared_ptr& batch, const TPortionInfo& pInfo, const THashSet& portionsInUsage, const ISnapshotSchema::TPtr& resultSchema) const { +std::shared_ptr TGeneralCompactColumnEngineChanges::BuildPortionFilter( + const std::optional& shardingActual, const std::shared_ptr& batch, + const TPortionInfo& pInfo, const THashSet& portionsInUsage, const ISnapshotSchema::TPtr& resultSchema) const { std::shared_ptr filter; if (shardingActual && pInfo.NeedShardingFilter(*shardingActual)) { - filter = shardingActual->GetShardingInfo()->GetFilter(batch); + std::set fieldNames; + for (auto&& i : shardingActual->GetShardingInfo()->GetColumnNames()) { + fieldNames.emplace(i); + } + auto table = batch->BuildTableVerified(fieldNames); + AFL_VERIFY(table); + filter = shardingActual->GetShardingInfo()->GetFilter(table); } NArrow::TColumnFilter filterDeleted = NArrow::TColumnFilter::BuildAllowFilter(); if (pInfo.GetMeta().GetDeletionsCount()) { - auto col = batch->GetColumnByName(TIndexInfo::SPEC_COL_DELETE_FLAG); + auto table = batch->BuildTableVerified(std::set({ TIndexInfo::SPEC_COL_DELETE_FLAG })); + AFL_VERIFY(table); + auto col = table->GetColumnByName(TIndexInfo::SPEC_COL_DELETE_FLAG); AFL_VERIFY(col); AFL_VERIFY(col->type()->id() == arrow::Type::BOOL); - auto bCol = static_pointer_cast(col); - for (ui32 i = 0; i < bCol->length(); ++i) { - filterDeleted.Add(!bCol->GetView(i)); + for (auto&& c : col->chunks()) { + auto bCol = static_pointer_cast(c); + for (ui32 i = 0; i < bCol->length(); ++i) { + filterDeleted.Add(!bCol->GetView(i)); + } } NArrow::TColumnFilter filterCorrection = NArrow::TColumnFilter::BuildDenyFilter(); auto pkSchema = resultSchema->GetIndexInfo().GetReplaceKey(); @@ -79,14 +43,17 @@ std::shared_ptr TGeneralCompactColumnEngineChanges::Build for (auto&& i : excludedIntervalsInfo.GetExcludedIntervals()) { NArrow::NMerger::TSortableBatchPosition startForFound(i.GetStart().ToBatch(pkSchema), 0, pkSchema->field_names(), {}, false); NArrow::NMerger::TSortableBatchPosition finishForFound(i.GetFinish().ToBatch(pkSchema), 0, pkSchema->field_names(), {}, false); - auto foundStart = NArrow::NMerger::TSortableBatchPosition::FindPosition(pos, pos.GetPosition(), batch->num_rows() - 1, startForFound, true); + auto foundStart = + NArrow::NMerger::TSortableBatchPosition::FindPosition(pos, pos.GetPosition(), batch->num_rows() - 1, startForFound, true); AFL_VERIFY(foundStart); AFL_VERIFY(!foundStart->IsLess())("pos", pos.DebugJson())("start", startForFound.DebugJson())("found", foundStart->DebugString()); - auto foundFinish = NArrow::NMerger::TSortableBatchPosition::FindPosition(pos, pos.GetPosition(), batch->num_rows() - 1, finishForFound, false); + auto foundFinish = + NArrow::NMerger::TSortableBatchPosition::FindPosition(pos, pos.GetPosition(), batch->num_rows() - 1, finishForFound, false); AFL_VERIFY(foundFinish); AFL_VERIFY(foundFinish->GetPosition() >= foundStart->GetPosition()); if (foundFinish->GetPosition() > foundStart->GetPosition()) { - AFL_VERIFY(!foundFinish->IsGreater())("pos", pos.DebugJson())("finish", finishForFound.DebugJson())("found", foundFinish->DebugString()); + AFL_VERIFY(!foundFinish->IsGreater())("pos", pos.DebugJson())("finish", finishForFound.DebugJson())( + "found", foundFinish->DebugString()); } filterCorrection.Add(foundStart->GetPosition() - posCurrent, false); if (foundFinish->IsGreater()) { @@ -109,180 +76,74 @@ std::shared_ptr TGeneralCompactColumnEngineChanges::Build return filter; } -void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks(TConstructionContext& context, std::vector&& portions) noexcept { - static const TString portionIdFieldName = "$$__portion_id"; - static const TString portionRecordIndexFieldName = "$$__portion_record_idx"; - static const std::shared_ptr portionIdField = std::make_shared(portionIdFieldName, std::make_shared()); - static const std::shared_ptr portionRecordIndexField = std::make_shared(portionRecordIndexFieldName, std::make_shared()); - +void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks( + TConstructionContext& context, std::vector&& portions) noexcept { auto resultSchema = context.SchemaVersions.GetLastSchema(); auto shardingActual = context.SchemaVersions.GetShardingInfoActual(GranuleMeta->GetPathId()); - std::vector pkFieldNames = resultSchema->GetIndexInfo().GetReplaceKey()->field_names(); - std::set pkFieldNamesSet(pkFieldNames.begin(), pkFieldNames.end()); - for (auto&& i : TIndexInfo::GetSnapshotColumnNames()) { - pkFieldNamesSet.emplace(i); - } - pkFieldNamesSet.emplace(TIndexInfo::SPEC_COL_DELETE_FLAG); - - std::vector> batchResults; + std::shared_ptr stats = std::make_shared(); + std::shared_ptr resultFiltered; + NCompaction::TMerger merger(context, SaverContext); { - arrow::FieldVector indexFields; - indexFields.emplace_back(portionIdField); - indexFields.emplace_back(portionRecordIndexField); - IIndexInfo::AddSpecialFields(indexFields); - auto dataSchema = std::make_shared(indexFields); - NArrow::NMerger::TMergePartialStream mergeStream(resultSchema->GetIndexInfo().GetReplaceKey(), dataSchema, false, IIndexInfo::GetSnapshotColumnNames()); - THashSet usedPortionIds; - for (auto&& i : portions) { - AFL_VERIFY(usedPortionIds.emplace(i.GetPortionInfo().GetPortionId()).second); + std::set pkColumnIds; + { + auto pkColumnIdsVector = IIndexInfo::AddSnapshotFieldIds(resultSchema->GetIndexInfo().GetPKColumnIds()); + pkColumnIds = std::set(pkColumnIdsVector.begin(), pkColumnIdsVector.end()); } - - ui32 idx = 0; - for (auto&& i : portions) { - auto dataSchema = i.GetPortionInfo().GetSchema(context.SchemaVersions); - auto batch = i.GetBatch(dataSchema, *resultSchema, pkFieldNamesSet); + std::set dataColumnIds; + { { - NArrow::NConstruction::IArrayBuilder::TPtr column = std::make_shared>>(portionIdFieldName, idx++); - batch = NArrow::TStatusValidator::GetValid(batch->AddColumn(batch->num_columns(), portionIdField, column->BuildArray(batch->num_rows()))); - } - { - NArrow::NConstruction::IArrayBuilder::TPtr column = std::make_shared>>(portionRecordIndexFieldName); - batch = NArrow::TStatusValidator::GetValid(batch->AddColumn(batch->num_columns(), portionRecordIndexField, column->BuildArray(batch->num_rows()))); - } - batch = IIndexInfo::NormalizeDeletionColumn(batch); - Y_DEBUG_ABORT_UNLESS(NArrow::IsSortedAndUnique(batch, resultSchema->GetIndexInfo().GetReplaceKey())); - std::shared_ptr filter = BuildPortionFilter(shardingActual, batch, i.GetPortionInfo(), usedPortionIds, resultSchema); - mergeStream.AddSource(batch, filter); - } - batchResults = mergeStream.DrainAllParts(CheckPoints, indexFields); - } - Y_ABORT_UNLESS(batchResults.size()); - - std::shared_ptr stats = std::make_shared(); - for (auto&& i : SwitchedPortions) { - stats->Merge(i.GetSerializationStat(*resultSchema)); - } - - std::vector>> chunkGroups; - chunkGroups.resize(batchResults.size()); - for (auto&& columnId : resultSchema->GetIndexInfo().GetColumnIds()) { - NActors::TLogContextGuard logGuard(NActors::TLogContextBuilder::Build()("field_name", resultSchema->GetIndexInfo().GetColumnName(columnId))); - auto columnInfo = stats->GetColumnInfo(columnId); - auto resultField = resultSchema->GetIndexInfo().GetColumnFieldVerified(columnId); - - std::vector cursors; - for (auto&& p : portions) { - auto dataSchema = p.GetPortionInfo().GetSchema(context.SchemaVersions); - auto loader = dataSchema->GetColumnLoaderOptional(columnId); - std::vector records; - std::vector> chunks; - if (!p.ExtractColumnChunks(columnId, records, chunks)) { - if (!loader) { - loader = resultSchema->GetColumnLoaderVerified(columnId); - } else { - AFL_VERIFY(dataSchema->IsSpecialColumnId(columnId)); + THashMap schemas; + for (auto& portion : SwitchedPortions) { + auto dataSchema = portion.GetSchema(context.SchemaVersions); + schemas.emplace(dataSchema->GetVersion(), dataSchema); } - chunks.emplace_back(std::make_shared(columnId, p.GetPortionInfo().GetRecordsCount(), resultField, resultSchema->GetDefaultValueVerified(columnId), resultSchema->GetColumnSaver(columnId))); - records = { nullptr }; + dataColumnIds = ISnapshotSchema::GetColumnsWithDifferentDefaults(schemas, resultSchema); } - AFL_VERIFY(!!loader); - cursors.emplace_back(TPortionColumnCursor(chunks, records, loader, p.GetPortionInfo().GetPortionId())); - } - - ui32 batchesRecordsCount = 0; - ui32 columnRecordsCount = 0; - std::map> columnChunks; - ui32 batchIdx = 0; - for (auto&& batchResult : batchResults) { - const ui32 portionRecordsCountLimit = batchResult->num_rows() / (batchResult->num_rows() / NSplitter::TSplitSettings().GetExpectedRecordsCountOnPage() + 1) + 1; - TColumnMergeContext context(columnId, resultSchema, portionRecordsCountLimit, NSplitter::TSplitSettings().GetExpectedUnpackColumnChunkRawSize(), columnInfo); - TMergedColumn mColumn(context); - - auto columnPortionIdx = batchResult->GetColumnByName(portionIdFieldName); - auto columnPortionRecordIdx = batchResult->GetColumnByName(portionRecordIndexFieldName); - auto columnSnapshotPlanStepIdx = batchResult->GetColumnByName(TIndexInfo::SPEC_COL_PLAN_STEP); - auto columnSnapshotTxIdx = batchResult->GetColumnByName(TIndexInfo::SPEC_COL_TX_ID); - Y_ABORT_UNLESS(columnPortionIdx && columnPortionRecordIdx && columnSnapshotPlanStepIdx && columnSnapshotTxIdx); - Y_ABORT_UNLESS(columnPortionIdx->type_id() == arrow::UInt16Type::type_id); - Y_ABORT_UNLESS(columnPortionRecordIdx->type_id() == arrow::UInt32Type::type_id); - Y_ABORT_UNLESS(columnSnapshotPlanStepIdx->type_id() == arrow::UInt64Type::type_id); - Y_ABORT_UNLESS(columnSnapshotTxIdx->type_id() == arrow::UInt64Type::type_id); - const arrow::UInt16Array& pIdxArray = static_cast(*columnPortionIdx); - const arrow::UInt32Array& pRecordIdxArray = static_cast(*columnPortionRecordIdx); - - AFL_VERIFY(batchResult->num_rows() == pIdxArray.length()); - std::optional predPortionIdx; - for (ui32 idx = 0; idx < pIdxArray.length(); ++idx) { - const ui16 portionIdx = pIdxArray.Value(idx); - const ui32 portionRecordIdx = pRecordIdxArray.Value(idx); - auto& cursor = cursors[portionIdx]; - cursor.Next(portionRecordIdx, mColumn); - if (predPortionIdx && portionIdx != *predPortionIdx) { - cursors[*predPortionIdx].Fetch(mColumn); - } - if (idx + 1 == pIdxArray.length()) { - cursor.Fetch(mColumn); + for (auto&& i : SwitchedPortions) { + stats->Merge(i.GetSerializationStat(*resultSchema)); + if (dataColumnIds.size() != resultSchema->GetColumnsCount()) { + for (auto id : i.GetColumnIds()) { + if (resultSchema->HasColumnId(id)) { + dataColumnIds.emplace(id); + } + } } - predPortionIdx = portionIdx; } - chunkGroups[batchIdx][columnId] = mColumn.BuildResult(); - batchesRecordsCount += batchResult->num_rows(); - columnRecordsCount += mColumn.GetRecordsCount(); - AFL_VERIFY(batchResult->num_rows() == mColumn.GetRecordsCount()); - ++batchIdx; - } - AFL_VERIFY(columnRecordsCount == batchesRecordsCount)("mCount", columnRecordsCount)("bCount", batchesRecordsCount); - } - ui32 batchIdx = 0; - - const auto groups = resultSchema->GetIndexInfo().GetEntityGroupsByStorageId(IStoragesManager::DefaultStorageId, *SaverContext.GetStoragesManager()); - for (auto&& columnChunks : chunkGroups) { - auto batchResult = batchResults[batchIdx]; - ++batchIdx; - Y_ABORT_UNLESS(columnChunks.size()); - - for (auto&& i : columnChunks) { - if (i.second.size() != columnChunks.begin()->second.size()) { - for (ui32 p = 0; p < std::min(columnChunks.begin()->second.size(), i.second.size()); ++p) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("p_first", columnChunks.begin()->second[p].DebugString())("p", i.second[p].DebugString()); - } + AFL_VERIFY(dataColumnIds.size() <= resultSchema->GetColumnsCount()); + if (dataColumnIds.contains((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG)) { + pkColumnIds.emplace((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG); } - AFL_VERIFY(i.second.size() == columnChunks.begin()->second.size())("first", columnChunks.begin()->second.size())("current", i.second.size())("first_name", columnChunks.begin()->first)("current_name", i.first); } - - std::vector batchSlices; - std::shared_ptr schemaDetails(new TDefaultSchemaDetails(resultSchema, stats)); - - for (ui32 i = 0; i < columnChunks.begin()->second.size(); ++i) { - THashMap>> portionColumns; - for (auto&& p : columnChunks) { - portionColumns.emplace(p.first, p.second[i].GetChunks()); + resultFiltered = std::make_shared(resultSchema, dataColumnIds); + { + auto seqDataColumnIds = dataColumnIds; + for (auto&& i : pkColumnIds) { + AFL_VERIFY(seqDataColumnIds.erase(i))("id", i); + } + THashSet usedPortionIds; + for (auto&& i : portions) { + AFL_VERIFY(usedPortionIds.emplace(i.GetPortionInfo().GetPortionId()).second); } - resultSchema->GetIndexInfo().AppendIndexes(portionColumns); - batchSlices.emplace_back(portionColumns, schemaDetails, context.Counters.SplitterCounters); - } - TSimilarPacker slicer(NSplitter::TSplitSettings().GetExpectedPortionSize()); - auto packs = slicer.Split(batchSlices); - ui32 recordIdx = 0; - for (auto&& i : packs) { - TGeneralSerializedSlice slice(std::move(i)); - auto b = batchResult->Slice(recordIdx, slice.GetRecordsCount()); - const ui32 deletionsCount = IIndexInfo::CalcDeletions(b, true); - AppendedPortions.emplace_back(TWritePortionInfoWithBlobs::BuildByBlobs(slice.GroupChunksByBlobs(groups), GranuleMeta->GetPathId(), - resultSchema->GetVersion(), resultSchema->GetSnapshot(), SaverContext.GetStoragesManager())); - AppendedPortions.back().FillStatistics(resultSchema->GetIndexInfo()); - NArrow::TFirstLastSpecialKeys primaryKeys(slice.GetFirstLastPKBatch(resultSchema->GetIndexInfo().GetReplaceKey())); - NArrow::TMinMaxSpecialKeys snapshotKeys(b, TIndexInfo::ArrowSchemaSnapshot()); - AppendedPortions.back().GetPortionConstructor().AddMetadata(*resultSchema, deletionsCount, primaryKeys, snapshotKeys); - AppendedPortions.back().GetPortionConstructor().MutableMeta().SetTierName(IStoragesManager::DefaultStorageId); - if (shardingActual) { - AppendedPortions.back().GetPortionConstructor().SetShardingVersion(shardingActual->GetSnapshotVersion()); + for (auto&& i : portions) { + auto blobsSchema = i.GetPortionInfo().GetSchema(context.SchemaVersions); + auto batch = i.RestoreBatch(*blobsSchema, *resultFiltered, seqDataColumnIds); + std::shared_ptr filter = + BuildPortionFilter(shardingActual, batch, i.GetPortionInfo(), usedPortionIds, resultFiltered); + merger.AddBatch(batch, filter); } - recordIdx += slice.GetRecordsCount(); } } + + std::optional shardingActualVersion; + if (shardingActual) { + shardingActualVersion = shardingActual->GetSnapshotVersion(); + } + AppendedPortions = merger.Execute(stats, CheckPoints, resultFiltered, GranuleMeta->GetPathId(), shardingActualVersion); + for (auto&& p : AppendedPortions) { + p.GetPortionConstructor().MutableMeta().UpdateRecordsMeta(NPortion::EProduced::SPLIT_COMPACTED); + } } TConclusionStatus TGeneralCompactColumnEngineChanges::DoConstructBlobs(TConstructionContext& context) noexcept { @@ -306,12 +167,9 @@ TConclusionStatus TGeneralCompactColumnEngineChanges::DoConstructBlobs(TConstruc NChanges::TGeneralCompactionCounters::OnRepackPortions(portionsCount, portionsSize); { - std::vector portions = TReadPortionInfoWithBlobs::RestorePortions(SwitchedPortions, Blobs, context.SchemaVersions); - if (!HasAppData() || AppDataVerified().ColumnShardConfig.GetUseChunkedMergeOnCompaction()) { - BuildAppendedPortionsByChunks(context, std::move(portions)); - } else { - BuildAppendedPortionsByFullBatches(context, std::move(portions)); - } + std::vector portions = + TReadPortionInfoWithBlobs::RestorePortions(SwitchedPortions, Blobs, context.SchemaVersions); + BuildAppendedPortionsByChunks(context, std::move(portions)); } if (IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD)) { @@ -328,7 +186,8 @@ TConclusionStatus TGeneralCompactColumnEngineChanges::DoConstructBlobs(TConstruc } AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "blobs_created_diff")("appended", sbAppended)("switched", sbSwitched); } - AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "blobs_created")("appended", AppendedPortions.size())("switched", SwitchedPortions.size()); + AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "blobs_created")("appended", AppendedPortions.size())( + "switched", SwitchedPortions.size()); return TConclusionStatus::Success(); } @@ -336,32 +195,29 @@ TConclusionStatus TGeneralCompactColumnEngineChanges::DoConstructBlobs(TConstruc void TGeneralCompactColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) { TBase::DoWriteIndexOnComplete(self, context); if (self) { - self->IncCounter(context.FinishedSuccessfully ? NColumnShard::COUNTER_SPLIT_COMPACTION_SUCCESS : NColumnShard::COUNTER_SPLIT_COMPACTION_FAIL); - self->IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_BLOBS_WRITTEN, context.BlobsWritten); - self->IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_BYTES_WRITTEN, context.BytesWritten); + self->Counters.GetTabletCounters()->OnCompactionWriteIndexCompleted( + context.FinishedSuccessfully, context.BlobsWritten, context.BytesWritten); } } void TGeneralCompactColumnEngineChanges::DoStart(NColumnShard::TColumnShard& self) { TBase::DoStart(self); auto& g = *GranuleMeta; - self.CSCounters.OnSplitCompactionInfo(g.GetAdditiveSummary().GetCompacted().GetTotalPortionsSize(), g.GetAdditiveSummary().GetCompacted().GetPortionsCount()); + self.Counters.GetCSCounters().OnSplitCompactionInfo( + g.GetAdditiveSummary().GetCompacted().GetTotalPortionsSize(), g.GetAdditiveSummary().GetCompacted().GetPortionsCount()); } NColumnShard::ECumulativeCounters TGeneralCompactColumnEngineChanges::GetCounterIndex(const bool isSuccess) const { return isSuccess ? NColumnShard::COUNTER_COMPACTION_SUCCESS : NColumnShard::COUNTER_COMPACTION_FAIL; } -void TGeneralCompactColumnEngineChanges::AddCheckPoint(const NArrow::NMerger::TSortableBatchPosition& position, const bool include, const bool validationDuplications) { - AFL_VERIFY(CheckPoints.emplace(position, include).second || !validationDuplications); +void TGeneralCompactColumnEngineChanges::AddCheckPoint( + const NArrow::NMerger::TSortableBatchPosition& position, const bool include) { + CheckPoints.InsertPosition(position, include); } std::shared_ptr TGeneralCompactColumnEngineChanges::BuildMemoryPredictor() { - if (!HasAppData() || AppDataVerified().ColumnShardConfig.GetUseChunkedMergeOnCompaction()) { - return std::make_shared(); - } else { - return std::make_shared(); - } + return std::make_shared(); } ui64 TGeneralCompactColumnEngineChanges::TMemoryPredictorChunkedPolicy::AddPortion(const TPortionInfo& portionInfo) { @@ -386,7 +242,8 @@ ui64 TGeneralCompactColumnEngineChanges::TMemoryPredictorChunkedPolicy::AddPorti SumMemoryDelta = std::max(SumMemoryDelta, MaxMemoryByColumnChunk[i.first]); } - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("memory_prediction_after", SumMemoryFix + SumMemoryDelta)("portion_info", portionInfo.DebugString()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("memory_prediction_after", SumMemoryFix + SumMemoryDelta)( + "portion_info", portionInfo.DebugString()); return SumMemoryFix + SumMemoryDelta; } diff --git a/ydb/core/tx/columnshard/engines/changes/general_compaction.h b/ydb/core/tx/columnshard/engines/changes/general_compaction.h index 75dc35317630..ab6f1e18684e 100644 --- a/ydb/core/tx/columnshard/engines/changes/general_compaction.h +++ b/ydb/core/tx/columnshard/engines/changes/general_compaction.h @@ -9,12 +9,11 @@ class TGeneralCompactColumnEngineChanges: public TCompactColumnEngineChanges { private: using TBase = TCompactColumnEngineChanges; virtual void DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) override; - std::map CheckPoints; - void BuildAppendedPortionsByFullBatches(TConstructionContext& context, std::vector&& portions) noexcept; + NArrow::NMerger::TIntervalPositions CheckPoints; void BuildAppendedPortionsByChunks(TConstructionContext& context, std::vector&& portions) noexcept; std::shared_ptr BuildPortionFilter(const std::optional& shardingActual, - const std::shared_ptr& batch, const TPortionInfo& pInfo, const THashSet& portionsInUsage, + const std::shared_ptr& batch, const TPortionInfo& pInfo, const THashSet& portionsInUsage, const ISnapshotSchema::TPtr& resultSchema) const; protected: virtual TConclusionStatus DoConstructBlobs(TConstructionContext& context) noexcept override; @@ -64,7 +63,7 @@ class TGeneralCompactColumnEngineChanges: public TCompactColumnEngineChanges { static std::shared_ptr BuildMemoryPredictor(); - void AddCheckPoint(const NArrow::NMerger::TSortableBatchPosition& position, const bool include = true, const bool validationDuplications = true); + void AddCheckPoint(const NArrow::NMerger::TSortableBatchPosition& position, const bool include); virtual TString TypeString() const override { return StaticTypeName(); diff --git a/ydb/core/tx/columnshard/engines/changes/indexation.cpp b/ydb/core/tx/columnshard/engines/changes/indexation.cpp index e34fa411a347..22ca7fd2c738 100644 --- a/ydb/core/tx/columnshard/engines/changes/indexation.cpp +++ b/ydb/core/tx/columnshard/engines/changes/indexation.cpp @@ -1,12 +1,8 @@ #include "indexation.h" -#include -#include + +#include "compaction/merger.h" + #include -#include -#include -#include -#include -#include namespace NKikimr::NOlap { @@ -40,9 +36,7 @@ void TInsertColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::TColumnSha if (!DataToIndex.empty()) { self->UpdateInsertTableCounters(); } - self->IncCounter(NColumnShard::COUNTER_INDEXING_BLOBS_WRITTEN, context.BlobsWritten); - self->IncCounter(NColumnShard::COUNTER_INDEXING_BYTES_WRITTEN, context.BytesWritten); - self->IncCounter(NColumnShard::COUNTER_INDEXING_TIME, context.Duration.MilliSeconds()); + self->Counters.GetTabletCounters()->OnInsertionWriteIndexCompleted(context.BlobsWritten, context.BytesWritten, context.Duration); } } @@ -54,18 +48,70 @@ namespace { class TBatchInfo { private: - YDB_READONLY_DEF(std::shared_ptr, Batch); - const NEvWrite::EModificationType ModificationType; + YDB_READONLY_DEF(std::shared_ptr, Batch); + +public: + TBatchInfo(const std::shared_ptr& batch, const NEvWrite::EModificationType /*modificationType*/) + : Batch(batch) { + } +}; + +class TPathFieldsInfo { +private: + std::set UsageColumnIds; + const ISnapshotSchema::TPtr ResultSchema; + THashMap Schemas; + bool Finished = false; + const ui32 FullColumnsCount; + public: - TBatchInfo(const std::shared_ptr& batch, const NEvWrite::EModificationType modificationType) - : Batch(batch) - , ModificationType(modificationType) + TPathFieldsInfo(const ISnapshotSchema::TPtr& resultSchema) + : UsageColumnIds(IIndexInfo::GetNecessarySystemColumnIdsSet()) + , ResultSchema(resultSchema) + , FullColumnsCount(ResultSchema->GetIndexInfo().GetColumnIds(true).size()) { + AFL_VERIFY(FullColumnsCount); + } + + bool IsFinished() const { + return Finished; + } + bool HasDeletion() const { + AFL_VERIFY(Finished); + return UsageColumnIds.contains((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG); } - bool GetIsDeletion() const { - return ModificationType == NEvWrite::EModificationType::Delete; + void Finish() { + AFL_VERIFY(UsageColumnIds.size()); + AFL_VERIFY(!Finished); + Finished = true; + if (UsageColumnIds.size() == FullColumnsCount) { + return; + } + auto defaultDiffs = ISnapshotSchema::GetColumnsWithDifferentDefaults(Schemas, ResultSchema); + UsageColumnIds.insert(defaultDiffs.begin(), defaultDiffs.end()); + } + + const std::set& GetUsageColumnIds() const { + AFL_VERIFY(Finished); + return UsageColumnIds; + } + + void AddChunkInfo(const TCommittedData& data, const TConstructionContext& context) { + AFL_VERIFY(!Finished); + if (UsageColumnIds.size() == FullColumnsCount) { + return; + } + auto blobSchema = context.SchemaVersions.GetSchemaVerified(data.GetSchemaVersion()); + if (!Schemas.contains(data.GetSchemaVersion())) { + Schemas.emplace(data.GetSchemaVersion(), blobSchema); + } + std::vector filteredIds = data.GetMeta().GetSchemaSubset().Apply(blobSchema->GetIndexInfo().GetColumnIds(false)); + if (data.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete) { + filteredIds.emplace_back((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG); + } + UsageColumnIds.insert(filteredIds.begin(), filteredIds.end()); } }; @@ -73,22 +119,40 @@ class TPathData { private: std::vector Batches; YDB_READONLY_DEF(std::optional, ShardingInfo); - bool HasDeletionFlag = false; + TPathFieldsInfo ColumnsInfo; + public: - TPathData(const std::optional& shardingInfo) + TPathData(const std::optional& shardingInfo, const ISnapshotSchema::TPtr& resultSchema) : ShardingInfo(shardingInfo) - { - + , ColumnsInfo(resultSchema) { } - bool HasDeletion() { - return HasDeletionFlag; + const TPathFieldsInfo& GetColumnsInfo() const { + return ColumnsInfo; } - void AddBatch(const NOlap::TInsertedData& data, const std::shared_ptr& batch) { - if (data.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete) { - HasDeletionFlag = true; + void FinishChunksInfo() { + ColumnsInfo.Finish(); + } + + std::vector> GetGeneralContainers() const { + std::vector> result; + for (auto&& i : Batches) { + result.emplace_back(i.GetBatch()); } + return result; + } + + void AddChunkInfo(const NOlap::TCommittedData& data, const TConstructionContext& context) { + ColumnsInfo.AddChunkInfo(data, context); + } + + bool HasDeletion() { + return ColumnsInfo.HasDeletion(); + } + + void AddBatch(const NOlap::TCommittedData& data, const std::shared_ptr& batch) { + AFL_VERIFY(ColumnsInfo.IsFinished()); AFL_VERIFY(batch); Batches.emplace_back(batch, data.GetMeta().GetModificationType()); } @@ -100,121 +164,117 @@ class TPathData { ShardingInfo = info; } } - - std::shared_ptr Merge(const TIndexInfo& indexInfo) const { - auto fullSchema = indexInfo.ArrowSchemaWithSpecials(); - NArrow::NMerger::TMergePartialStream stream(indexInfo.GetReplaceKey(), fullSchema, false, IIndexInfo::GetSnapshotColumnNames()); - THashMap fieldSizes; - ui64 rowsCount = 0; - for (auto&& batch : Batches) { - auto& forMerge = batch.GetBatch(); - stream.AddSource(forMerge, nullptr); - for (ui32 cIdx = 0; cIdx < (ui32)forMerge->num_columns(); ++cIdx) { - fieldSizes[forMerge->column_name(cIdx)] += NArrow::GetArrayDataSize(forMerge->column(cIdx)); - } - rowsCount += forMerge->num_rows(); - } - - NArrow::NMerger::TRecordBatchBuilder builder(fullSchema->fields(), rowsCount, fieldSizes); - stream.SetPossibleSameVersion(true); - stream.DrainAll(builder); - return builder.Finalize(); - } }; class TPathesData { private: THashMap Data; + const ISnapshotSchema::TPtr ResultSchema; public: + TPathesData(const ISnapshotSchema::TPtr& resultSchema) + : ResultSchema(resultSchema) { + } + + void FinishChunksInfo() { + for (auto&& i : Data) { + i.second.FinishChunksInfo(); + } + } + const THashMap& GetData() const { return Data; } - void Add(const NOlap::TInsertedData& inserted, const std::optional& info, const std::shared_ptr& batch) { - auto it = Data.find(inserted.PathId); + void AddChunkInfo(const NOlap::TCommittedData& inserted, const TConstructionContext& context) { + auto shardingFilterCommit = context.SchemaVersions.GetShardingInfoOptional(inserted.GetPathId(), inserted.GetSnapshot()); + auto it = Data.find(inserted.GetPathId()); if (it == Data.end()) { - it = Data.emplace(inserted.PathId, info).first; + it = Data.emplace(inserted.GetPathId(), TPathData(shardingFilterCommit, ResultSchema)).first; } - it->second.AddShardingInfo(info); + it->second.AddChunkInfo(inserted, context); + it->second.AddShardingInfo(shardingFilterCommit); + } + + void AddBatch(const NOlap::TCommittedData& inserted, const std::shared_ptr& batch) { + auto it = Data.find(inserted.GetPathId()); + AFL_VERIFY(it != Data.end()); it->second.AddBatch(inserted, batch); } + + const TPathFieldsInfo& GetPathInfo(const ui64 pathId) const { + auto it = Data.find(pathId); + AFL_VERIFY(it != Data.end()); + return it->second.GetColumnsInfo(); + } }; -} + +} // namespace TConclusionStatus TInsertColumnEngineChanges::DoConstructBlobs(TConstructionContext& context) noexcept { Y_ABORT_UNLESS(!DataToIndex.empty()); Y_ABORT_UNLESS(AppendedPortions.empty()); - auto maxSnapshot = TSnapshot::Zero(); + auto resultSchema = context.SchemaVersions.GetLastSchema(); + Y_ABORT_UNLESS(resultSchema->GetIndexInfo().IsSorted()); + + TPathesData pathBatches(resultSchema); for (auto& inserted : DataToIndex) { - TSnapshot insertSnap = inserted.GetSnapshot(); - Y_ABORT_UNLESS(insertSnap.Valid()); - if (insertSnap > maxSnapshot) { - maxSnapshot = insertSnap; + if (inserted.GetRemove()) { + continue; } + pathBatches.AddChunkInfo(inserted, context); } - Y_ABORT_UNLESS(maxSnapshot.Valid()); - auto resultSchema = context.SchemaVersions.GetSchema(maxSnapshot); - Y_ABORT_UNLESS(resultSchema->GetIndexInfo().IsSorted()); + pathBatches.FinishChunksInfo(); - TPathesData pathBatches; for (auto& inserted : DataToIndex) { const TBlobRange& blobRange = inserted.GetBlobRange(); - - auto shardingFilterCommit = context.SchemaVersions.GetShardingInfoOptional(inserted.PathId, inserted.GetSnapshot()); - + if (inserted.GetRemove()) { + Blobs.Extract(IStoragesManager::DefaultStorageId, blobRange); + continue; + } auto blobSchema = context.SchemaVersions.GetSchemaVerified(inserted.GetSchemaVersion()); - auto& indexInfo = blobSchema->GetIndexInfo(); - Y_ABORT_UNLESS(indexInfo.IsSorted()); - std::shared_ptr batch; + std::shared_ptr batch; { const auto blobData = Blobs.Extract(IStoragesManager::DefaultStorageId, blobRange); - Y_ABORT_UNLESS(blobData.size(), "Blob data not present"); - // Prepare batch - batch = NArrow::DeserializeBatch(blobData, indexInfo.ArrowSchema()); - AFL_VERIFY(batch)("event", "cannot_parse") - ("data_snapshot", TStringBuilder() << inserted.GetSnapshot()) - ("index_snapshot", TStringBuilder() << blobSchema->GetSnapshot()); - ; + auto batchSchema = + std::make_shared(inserted.GetMeta().GetSchemaSubset().Apply(blobSchema->GetIndexInfo().ArrowSchema()->fields())); + batch = std::make_shared(NArrow::DeserializeBatch(blobData, batchSchema)); + blobSchema->AdaptBatchToSchema(*batch, resultSchema); } + IIndexInfo::AddSnapshotColumns(*batch, inserted.GetSnapshot()); - batch = AddSpecials(batch, indexInfo, inserted); - batch = resultSchema->NormalizeBatch(*blobSchema, batch).DetachResult(); - pathBatches.Add(inserted, shardingFilterCommit, batch); - Y_DEBUG_ABORT_UNLESS(NArrow::IsSorted(batch, resultSchema->GetIndexInfo().GetReplaceKey())); + auto& pathInfo = pathBatches.GetPathInfo(inserted.GetPathId()); + + if (pathInfo.HasDeletion()) { + IIndexInfo::AddDeleteFlagsColumn(*batch, inserted.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete); + } + + pathBatches.AddBatch(inserted, batch); } Y_ABORT_UNLESS(Blobs.IsEmpty()); - const std::vector comparableColumns = resultSchema->GetIndexInfo().GetReplaceKey()->field_names(); + auto stats = std::make_shared(); + std::vector> filters; for (auto& [pathId, pathInfo] : pathBatches.GetData()) { - auto shardingFilter = context.SchemaVersions.GetShardingInfoActual(pathId); - auto mergedBatch = pathInfo.Merge(resultSchema->GetIndexInfo()); + auto filteredSnapshot = std::make_shared(resultSchema, pathInfo.GetColumnsInfo().GetUsageColumnIds()); + std::optional shardingVersion; + if (pathInfo.GetShardingInfo()) { + shardingVersion = pathInfo.GetShardingInfo()->GetSnapshotVersion(); + } + auto batches = pathInfo.GetGeneralContainers(); + filters.resize(batches.size()); auto itGranule = PathToGranule.find(pathId); - AFL_VERIFY(itGranule != PathToGranule.end()); - std::vector> result = NArrow::NMerger::TRWSortableBatchPosition:: - SplitByBordersInSequentialContainer(mergedBatch, comparableColumns, itGranule->second); - for (auto&& b : result) { - if (!b) { - continue; - } - std::optional externalSaver; - if (b->num_rows() < 100) { - externalSaver = NArrow::NSerialization::TSerializerContainer(std::make_shared(arrow::Compression::type::UNCOMPRESSED)); - } else { - externalSaver = NArrow::NSerialization::TSerializerContainer(std::make_shared(arrow::Compression::type::LZ4_FRAME)); - } - auto portions = MakeAppendedPortions(b, pathId, maxSnapshot, nullptr, context, externalSaver); - Y_ABORT_UNLESS(portions.size()); - for (auto& portion : portions) { - if (pathInfo.GetShardingInfo()) { - portion.GetPortionConstructor().SetShardingVersion(pathInfo.GetShardingInfo()->GetSnapshotVersion()); - } - AppendedPortions.emplace_back(std::move(portion)); - } + AFL_VERIFY(itGranule != PathToGranule.end())("path_id", pathId); + NCompaction::TMerger merger(context, SaverContext, std::move(batches), std::move(filters)); + merger.SetOptimizationWritingPackMode(true); + auto localAppended = merger.Execute(stats, itGranule->second, filteredSnapshot, pathId, shardingVersion); + for (auto&& i : localAppended) { + i.GetPortionConstructor().MutableMeta().UpdateRecordsMeta(NPortion::EProduced::INSERTED); + AppendedPortions.emplace_back(std::move(i)); } } @@ -222,15 +282,8 @@ TConclusionStatus TInsertColumnEngineChanges::DoConstructBlobs(TConstructionCont return TConclusionStatus::Success(); } -std::shared_ptr TInsertColumnEngineChanges::AddSpecials(const std::shared_ptr& srcBatch, - const TIndexInfo& indexInfo, const TInsertedData& inserted) const { - auto batch = IIndexInfo::AddSnapshotColumns(srcBatch, inserted.GetSnapshot()); - batch = IIndexInfo::AddDeleteFlagsColumn(batch, inserted.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete); - return NArrow::TColumnOperator().Adapt(batch, indexInfo.ArrowSchemaWithSpecials()).DetachResult(); -} - NColumnShard::ECumulativeCounters TInsertColumnEngineChanges::GetCounterIndex(const bool isSuccess) const { return isSuccess ? NColumnShard::COUNTER_INDEXING_SUCCESS : NColumnShard::COUNTER_INDEXING_FAIL; } -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/changes/indexation.h b/ydb/core/tx/columnshard/engines/changes/indexation.h index 95befd334c23..4c7f8602a6f5 100644 --- a/ydb/core/tx/columnshard/engines/changes/indexation.h +++ b/ydb/core/tx/columnshard/engines/changes/indexation.h @@ -1,18 +1,21 @@ #pragma once -#include "abstract/abstract.h" #include "with_appended.h" -#include -#include + +#include "abstract/abstract.h" + #include +#include +#include + +#include namespace NKikimr::NOlap { class TInsertColumnEngineChanges: public TChangesWithAppend { private: using TBase = TChangesWithAppend; - std::shared_ptr AddSpecials(const std::shared_ptr& srcBatch, - const TIndexInfo& indexInfo, const TInsertedData& inserted) const; - std::vector DataToIndex; + std::vector DataToIndex; + protected: virtual void DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) override; virtual void DoWriteIndexOnExecute(NColumnShard::TColumnShard* self, TWriteIndexContext& context) override; @@ -34,15 +37,14 @@ class TInsertColumnEngineChanges: public TChangesWithAppend { } public: - THashMap> PathToGranule; // pathId -> positions (sorted by pk) + THashMap PathToGranule; // pathId -> positions (sorted by pk) public: - TInsertColumnEngineChanges(std::vector&& dataToIndex, const TSaverContext& saverContext) + TInsertColumnEngineChanges(std::vector&& dataToIndex, const TSaverContext& saverContext) : TBase(saverContext, NBlobOperations::EConsumer::INDEXATION) - , DataToIndex(std::move(dataToIndex)) - { + , DataToIndex(std::move(dataToIndex)) { } - const std::vector& GetDataToIndex() const { + const std::vector& GetDataToIndex() const { return DataToIndex; } @@ -54,7 +56,6 @@ class TInsertColumnEngineChanges: public TChangesWithAppend { return StaticTypeName(); } std::optional AddPathIfNotExists(ui64 pathId); - }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/changes/ttl.cpp b/ydb/core/tx/columnshard/engines/changes/ttl.cpp index f23c799e3319..fc74dbea0454 100644 --- a/ydb/core/tx/columnshard/engines/changes/ttl.cpp +++ b/ydb/core/tx/columnshard/engines/changes/ttl.cpp @@ -46,7 +46,7 @@ void TTTLColumnEngineChanges::DoOnFinish(NColumnShard::TColumnShard& self, TChan } } -std::optional TTTLColumnEngineChanges::UpdateEvictedPortion(TPortionForEviction& info, NBlobOperations::NRead::TCompositeReadBlobs& srcBlobs, +std::optional TTTLColumnEngineChanges::UpdateEvictedPortion(TPortionForEviction& info, NBlobOperations::NRead::TCompositeReadBlobs& srcBlobs, TConstructionContext& context) const { const TPortionInfo& portionInfo = info.GetPortionInfo(); @@ -55,7 +55,7 @@ std::optional TTTLColumnEngineChanges::UpdateEvicted Y_ABORT_UNLESS(portionInfo.GetMeta().GetTierName() != evictFeatures.GetTargetTierName() || blobSchema->GetVersion() < evictFeatures.GetTargetScheme()->GetVersion()); auto portionWithBlobs = TReadPortionInfoWithBlobs::RestorePortion(portionInfo, srcBlobs, blobSchema->GetIndexInfo()); - std::optional result = TReadPortionInfoWithBlobs::SyncPortion( + std::optional result = TReadPortionInfoWithBlobs::SyncPortion( std::move(portionWithBlobs), blobSchema, evictFeatures.GetTargetScheme(), evictFeatures.GetTargetTierName(), SaverContext.GetStoragesManager(), context.Counters.SplitterCounters); return std::move(result); } diff --git a/ydb/core/tx/columnshard/engines/changes/ttl.h b/ydb/core/tx/columnshard/engines/changes/ttl.h index 92eb0ffa9b3c..b75795e16fe4 100644 --- a/ydb/core/tx/columnshard/engines/changes/ttl.h +++ b/ydb/core/tx/columnshard/engines/changes/ttl.h @@ -40,7 +40,7 @@ class TTTLColumnEngineChanges: public TChangesWithAppend { } }; - std::optional UpdateEvictedPortion(TPortionForEviction& info, NBlobOperations::NRead::TCompositeReadBlobs& srcBlobs, + std::optional UpdateEvictedPortion(TPortionForEviction& info, NBlobOperations::NRead::TCompositeReadBlobs& srcBlobs, TConstructionContext& context) const; std::vector PortionsToEvict; diff --git a/ydb/core/tx/columnshard/engines/changes/with_appended.cpp b/ydb/core/tx/columnshard/engines/changes/with_appended.cpp index edc8f8ca86de..24d44eb34587 100644 --- a/ydb/core/tx/columnshard/engines/changes/with_appended.cpp +++ b/ydb/core/tx/columnshard/engines/changes/with_appended.cpp @@ -17,9 +17,9 @@ void TChangesWithAppend::DoWriteIndexOnExecute(NColumnShard::TColumnShard* self, AFL_VERIFY(usedPortionIds.emplace(portionInfo.GetPortionId()).second)("portion_info", portionInfo.DebugString(true)); portionInfo.SaveToDatabase(context.DBWrapper, schemaPtr->GetIndexInfo().GetPKFirstColumnId(), false); } - const auto predRemoveDroppedTable = [self](const TWritePortionInfoWithBlobs& item) { + const auto predRemoveDroppedTable = [self](const TWritePortionInfoWithBlobsResult& item) { auto& portionInfo = item.GetPortionResult(); - if (!!self && (!self->TablesManager.HasTable(portionInfo.GetPathId()) || self->TablesManager.GetTable(portionInfo.GetPathId()).IsDropped())) { + if (!!self && !self->TablesManager.HasTable(portionInfo.GetPathId(), false)) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_inserted_data")("reason", "table_removed")("path_id", portionInfo.GetPathId()); return true; } else { @@ -42,13 +42,13 @@ void TChangesWithAppend::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self case NOlap::TPortionMeta::EProduced::UNSPECIFIED: Y_ABORT_UNLESS(false); // unexpected case NOlap::TPortionMeta::EProduced::INSERTED: - self->IncCounter(NColumnShard::COUNTER_INDEXING_PORTIONS_WRITTEN); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_INDEXING_PORTIONS_WRITTEN); break; case NOlap::TPortionMeta::EProduced::COMPACTED: - self->IncCounter(NColumnShard::COUNTER_COMPACTION_PORTIONS_WRITTEN); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_COMPACTION_PORTIONS_WRITTEN); break; case NOlap::TPortionMeta::EProduced::SPLIT_COMPACTED: - self->IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_PORTIONS_WRITTEN); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_PORTIONS_WRITTEN); break; case NOlap::TPortionMeta::EProduced::EVICTED: Y_ABORT("Unexpected evicted case"); @@ -58,19 +58,19 @@ void TChangesWithAppend::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self break; } } - self->IncCounter(NColumnShard::COUNTER_PORTIONS_DEACTIVATED, PortionsToRemove.size()); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_PORTIONS_DEACTIVATED, PortionsToRemove.size()); THashSet blobsDeactivated; for (auto& [_, portionInfo] : PortionsToRemove) { for (auto& rec : portionInfo.Records) { blobsDeactivated.emplace(portionInfo.GetBlobId(rec.BlobRange.GetBlobIdxVerified())); } - self->IncCounter(NColumnShard::COUNTER_RAW_BYTES_DEACTIVATED, portionInfo.GetTotalRawBytes()); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_RAW_BYTES_DEACTIVATED, portionInfo.GetTotalRawBytes()); } - self->IncCounter(NColumnShard::COUNTER_BLOBS_DEACTIVATED, blobsDeactivated.size()); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_BLOBS_DEACTIVATED, blobsDeactivated.size()); for (auto& blobId : blobsDeactivated) { - self->IncCounter(NColumnShard::COUNTER_BYTES_DEACTIVATED, blobId.BlobSize()); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_BYTES_DEACTIVATED, blobId.BlobSize()); } } { @@ -89,7 +89,6 @@ void TChangesWithAppend::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self void TChangesWithAppend::DoCompile(TFinalizationContext& context) { for (auto&& i : AppendedPortions) { i.GetPortionConstructor().SetPortionId(context.NextPortionId()); - i.GetPortionConstructor().MutableMeta().UpdateRecordsMeta(TPortionMeta::EProduced::INSERTED); } for (auto& [_, portionInfo] : PortionsToRemove) { portionInfo.SetRemoveSnapshot(context.GetSnapshot()); @@ -102,49 +101,6 @@ void TChangesWithAppend::DoOnAfterCompile() { } } -std::vector TChangesWithAppend::MakeAppendedPortions(const std::shared_ptr batch, - const ui64 pathId, const TSnapshot& snapshot, const TGranuleMeta* granuleMeta, TConstructionContext& context, const std::optional& overrideSaver) const { - Y_ABORT_UNLESS(batch->num_rows()); - - auto resultSchema = context.SchemaVersions.GetSchema(snapshot); - - std::shared_ptr stats = std::make_shared(); - if (granuleMeta) { - stats = granuleMeta->BuildSerializationStats(resultSchema); - } - auto schema = std::make_shared(resultSchema, stats); - if (overrideSaver) { - schema->SetOverrideSerializer(*overrideSaver); - } - std::vector out; - { - std::vector pages = TBatchSerializedSlice::BuildSimpleSlices(batch, NSplitter::TSplitSettings(), context.Counters.SplitterCounters, schema); - std::vector generalPages; - for (auto&& i : pages) { - auto portionColumns = i.GetPortionChunksToHash(); - resultSchema->GetIndexInfo().AppendIndexes(portionColumns); - generalPages.emplace_back(portionColumns, schema, context.Counters.SplitterCounters); - } - - const NSplitter::TEntityGroups groups = resultSchema->GetIndexInfo().GetEntityGroupsByStorageId(IStoragesManager::DefaultStorageId, *SaverContext.GetStoragesManager()); - TSimilarPacker slicer(NSplitter::TSplitSettings().GetExpectedPortionSize()); - auto packs = slicer.Split(generalPages); - - ui32 recordIdx = 0; - for (auto&& i : packs) { - TGeneralSerializedSlice slice(std::move(i)); - auto b = batch->Slice(recordIdx, slice.GetRecordsCount()); - out.emplace_back(TWritePortionInfoWithBlobs::BuildByBlobs(slice.GroupChunksByBlobs(groups), pathId, resultSchema->GetVersion(), snapshot, SaverContext.GetStoragesManager())); - out.back().FillStatistics(resultSchema->GetIndexInfo()); - out.back().GetPortionConstructor().AddMetadata(*resultSchema, b); - out.back().GetPortionConstructor().MutableMeta().SetTierName(IStoragesManager::DefaultStorageId); - recordIdx += slice.GetRecordsCount(); - } - } - - return out; -} - void TChangesWithAppend::DoStart(NColumnShard::TColumnShard& /*self*/) { } diff --git a/ydb/core/tx/columnshard/engines/changes/with_appended.h b/ydb/core/tx/columnshard/engines/changes/with_appended.h index 4c5fbc2189c6..e35dfbbe4acc 100644 --- a/ydb/core/tx/columnshard/engines/changes/with_appended.h +++ b/ydb/core/tx/columnshard/engines/changes/with_appended.h @@ -17,8 +17,6 @@ class TChangesWithAppend: public TColumnEngineChanges { virtual void DoWriteIndexOnExecute(NColumnShard::TColumnShard* self, TWriteIndexContext& context) override; virtual void DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) override; virtual void DoStart(NColumnShard::TColumnShard& self) override; - std::vector MakeAppendedPortions(const std::shared_ptr batch, const ui64 granule, - const TSnapshot& snapshot, const TGranuleMeta* granuleMeta, TConstructionContext& context, const std::optional& overrideSaver) const; virtual void DoDebugString(TStringOutput& out) const override { out << "remove=" << PortionsToRemove.size() << ";append=" << AppendedPortions.size() << ";"; @@ -61,11 +59,11 @@ class TChangesWithAppend: public TColumnEngineChanges { AFL_VERIFY(PortionsToRemove.emplace(info.GetAddress(), info).second); } - std::vector AppendedPortions; + std::vector AppendedPortions; virtual ui32 GetWritePortionsCount() const override { return AppendedPortions.size(); } - virtual TWritePortionInfoWithBlobs* GetWritePortionInfo(const ui32 index) override { + virtual TWritePortionInfoWithBlobsResult* GetWritePortionInfo(const ui32 index) override { Y_ABORT_UNLESS(index < AppendedPortions.size()); return &AppendedPortions[index]; } diff --git a/ydb/core/tx/columnshard/engines/column_engine.h b/ydb/core/tx/columnshard/engines/column_engine.h index 912223f8cf1b..2c616c06e32d 100644 --- a/ydb/core/tx/columnshard/engines/column_engine.h +++ b/ydb/core/tx/columnshard/engines/column_engine.h @@ -46,10 +46,6 @@ struct TSelectInfo { std::vector> PortionsOrderedPK; - NColumnShard::TContainerAccessorWithDirection>> GetPortionsOrdered(const bool reverse) const { - return NColumnShard::TContainerAccessorWithDirection>>(PortionsOrderedPK, reverse); - } - size_t NumChunks() const; TStats Stats() const; @@ -284,7 +280,7 @@ class IColumnEngine { } virtual bool IsOverloadedByMetadata(const ui64 limit) const = 0; virtual std::shared_ptr Select(ui64 pathId, TSnapshot snapshot, const TPKRangesFilter& pkRangesFilter) const = 0; - virtual std::shared_ptr StartInsert(std::vector&& dataToIndex) noexcept = 0; + virtual std::shared_ptr StartInsert(std::vector&& dataToIndex) noexcept = 0; virtual std::shared_ptr StartCompaction(const std::shared_ptr& dataLocksManager) noexcept = 0; virtual std::shared_ptr StartCleanupPortions(const TSnapshot& snapshot, const THashSet& pathsToDrop, const std::shared_ptr& dataLocksManager) noexcept = 0; virtual std::shared_ptr StartCleanupTables(const THashSet& pathsToDrop) noexcept = 0; diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp index 5c2a2d6ad62b..4218fc19cf5e 100644 --- a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp +++ b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp @@ -157,10 +157,9 @@ void TColumnEngineForLogs::RegisterSchemaVersion(const TSnapshot& snapshot, TInd } void TColumnEngineForLogs::RegisterSchemaVersion(const TSnapshot& snapshot, const NKikimrSchemeOp::TColumnTableSchema& schema) { - std::optional indexInfoOptional = NOlap::TIndexInfo::BuildFromProto(schema, StoragesManager); + std::optional indexInfoOptional = NOlap::TIndexInfo::BuildFromProto(schema, StoragesManager, SchemaObjectsCache); AFL_VERIFY(indexInfoOptional); NOlap::TIndexInfo indexInfo = std::move(*indexInfoOptional); - indexInfo.SetAllKeys(StoragesManager); RegisterSchemaVersion(snapshot, std::move(indexInfo)); } @@ -272,7 +271,7 @@ bool TColumnEngineForLogs::LoadCounters(IDbWrapper& db) { return db.LoadCounters(callback); } -std::shared_ptr TColumnEngineForLogs::StartInsert(std::vector&& dataToIndex) noexcept { +std::shared_ptr TColumnEngineForLogs::StartInsert(std::vector&& dataToIndex) noexcept { Y_ABORT_UNLESS(dataToIndex.size()); TSaverContext saverContext(StoragesManager); @@ -280,12 +279,15 @@ std::shared_ptr TColumnEngineForLogs::StartInsert(st auto pkSchema = VersionedIndex.GetLastSchema()->GetIndexInfo().GetReplaceKey(); for (const auto& data : changes->GetDataToIndex()) { - const ui64 pathId = data.PathId; + const ui64 pathId = data.GetPathId(); if (changes->PathToGranule.contains(pathId)) { continue; } - changes->PathToGranule[pathId] = GetGranulePtrVerified(pathId)->GetBucketPositions(); + if (!data.GetRemove()) { + AFL_VERIFY(changes->PathToGranule.emplace(pathId, GetGranulePtrVerified(pathId)->GetBucketPositions()).second); + } + } return changes; @@ -341,7 +343,6 @@ std::shared_ptr TColumnEngineForLogs::Start ui32 skipLocked = 0; ui32 portionsFromDrop = 0; bool limitExceeded = false; - THashSet uniquePortions; for (ui64 pathId : pathsToDrop) { auto g = GranulesStorage->GetGranuleOptional(pathId); if (!g) { @@ -349,6 +350,9 @@ std::shared_ptr TColumnEngineForLogs::Start } for (auto& [portion, info] : g->GetPortions()) { + if (info->CheckForCleanup()) { + continue; + } if (dataLocksManager->IsLocked(*info)) { ++skipLocked; continue; @@ -359,8 +363,6 @@ std::shared_ptr TColumnEngineForLogs::Start limitExceeded = true; break; } - const auto inserted = uniquePortions.emplace(info->GetAddress()).second; - Y_ABORT_UNLESS(inserted); changes->PortionsToDrop.push_back(*info); ++portionsFromDrop; } @@ -368,8 +370,9 @@ std::shared_ptr TColumnEngineForLogs::Start const TInstant snapshotInstant = snapshot.GetPlanInstant(); for (auto it = CleanupPortions.begin(); !limitExceeded && it != CleanupPortions.end();) { - if (it->first >= snapshotInstant) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "StartCleanupStop")("snapshot", snapshot.DebugString())("current_snapshot_ts", it->first); + if (it->first > snapshotInstant) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "StartCleanupStop")("snapshot", snapshot.DebugString())( + "current_snapshot_ts", it->first.MilliSeconds()); break; } for (ui32 i = 0; i < it->second.size();) { @@ -378,17 +381,14 @@ std::shared_ptr TColumnEngineForLogs::Start ++i; continue; } - const auto inserted = uniquePortions.emplace(it->second[i].GetAddress()).second; - if (inserted) { - Y_ABORT_UNLESS(it->second[i].CheckForCleanup(snapshot)); - if (txSize + it->second[i].GetTxVolume() < txSizeLimit || changes->PortionsToDrop.empty()) { - txSize += it->second[i].GetTxVolume(); - } else { - limitExceeded = true; - break; - } - changes->PortionsToDrop.push_back(std::move(it->second[i])); + AFL_VERIFY(it->second[i].CheckForCleanup(snapshot))("p_snapshot", it->second[i].GetRemoveSnapshotOptional())("snapshot", snapshot); + if (txSize + it->second[i].GetTxVolume() < txSizeLimit || changes->PortionsToDrop.empty()) { + txSize += it->second[i].GetTxVolume(); + } else { + limitExceeded = true; + break; } + changes->PortionsToDrop.push_back(std::move(it->second[i])); if (i + 1 < it->second.size()) { it->second[i] = std::move(it->second.back()); } @@ -420,7 +420,7 @@ std::vector> TColumnEngineForLogs::Star TSaverContext saverContext(StoragesManager); NActualizer::TTieringProcessContext context(memoryUsageLimit, saverContext, dataLocksManager, SignalCounters, ActualizationController); - const TDuration actualizationLag = NYDBTest::TControllers::GetColumnShardController()->GetActualizationTasksLag(TDuration::Seconds(1)); + const TDuration actualizationLag = NYDBTest::TControllers::GetColumnShardController()->GetActualizationTasksLag(); for (auto&& i : pathEviction) { auto g = GetGranuleOptional(i.first); if (g) { @@ -485,9 +485,8 @@ void TColumnEngineForLogs::UpsertPortion(const TPortionInfo& portionInfo, const bool TColumnEngineForLogs::ErasePortion(const TPortionInfo& portionInfo, bool updateStats) { Y_ABORT_UNLESS(!portionInfo.Empty()); const ui64 portion = portionInfo.GetPortion(); - auto spg = GetGranulePtrVerified(portionInfo.GetPathId()); - Y_ABORT_UNLESS(spg); - auto p = spg->GetPortionOptional(portion); + auto& spg = MutableGranuleVerified(portionInfo.GetPathId()); + auto p = spg.GetPortionOptional(portion); if (!p) { LOG_S_WARN("Portion erased already " << portionInfo << " at tablet " << TabletId); @@ -496,7 +495,7 @@ bool TColumnEngineForLogs::ErasePortion(const TPortionInfo& portionInfo, bool up if (updateStats) { UpdatePortionStats(*p, EStatsUpdateType::ERASE); } - Y_ABORT_UNLESS(spg->ErasePortion(portion)); + Y_ABORT_UNLESS(spg.ErasePortion(portion)); return true; } } @@ -509,20 +508,18 @@ std::shared_ptr TColumnEngineForLogs::Select(ui64 pathId, TSnapshot return out; } - for (const auto& [indexKey, keyPortions] : spg->GetPortionsIndex().GetPoints()) { - for (auto&& [_, portionInfo] : keyPortions.GetStart()) { - if (!portionInfo->IsVisible(snapshot)) { - continue; - } - Y_ABORT_UNLESS(portionInfo->Produced()); - const bool skipPortion = !pkRangesFilter.IsPortionInUsage(*portionInfo, VersionedIndex.GetLastSchema()->GetIndexInfo()); - AFL_TRACE(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", skipPortion ? "portion_skipped" : "portion_selected") - ("pathId", pathId)("portion", portionInfo->DebugString()); - if (skipPortion) { - continue; - } - out->PortionsOrderedPK.emplace_back(portionInfo); + for (const auto& [_, portionInfo] : spg->GetPortions()) { + if (!portionInfo->IsVisible(snapshot)) { + continue; + } + Y_ABORT_UNLESS(portionInfo->Produced()); + const bool skipPortion = !pkRangesFilter.IsPortionInUsage(*portionInfo); + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", skipPortion ? "portion_skipped" : "portion_selected")("pathId", pathId)( + "portion", portionInfo->DebugString()); + if (skipPortion) { + continue; } + out->PortionsOrderedPK.emplace_back(portionInfo); } return out; @@ -574,12 +571,4 @@ void TColumnEngineForLogs::DoRegisterTable(const ui64 pathId) { } } -TDuration TColumnEngineForLogs::GetRemovedPortionLivetime() { - TDuration result = TDuration::Minutes(10); - if (HasAppData() && AppDataVerified().ColumnShardConfig.HasRemovedPortionLivetimeSeconds()) { - result = TDuration::Seconds(AppDataVerified().ColumnShardConfig.GetRemovedPortionLivetimeSeconds()); - } - return NYDBTest::TControllers::GetColumnShardController()->GetRemovedPortionLivetime(result); -} - } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.h b/ydb/core/tx/columnshard/engines/column_engine_logs.h index a48fb9cda9e0..7b515c26f40c 100644 --- a/ydb/core/tx/columnshard/engines/column_engine_logs.h +++ b/ydb/core/tx/columnshard/engines/column_engine_logs.h @@ -53,10 +53,7 @@ class TColumnEngineForLogs : public IColumnEngine { std::shared_ptr StoragesManager; std::shared_ptr ActualizationController; - - static TDuration GetRemovedPortionLivetime(); - - const TDuration RemovedPortionLivetime = GetRemovedPortionLivetime(); + std::shared_ptr SchemaObjectsCache = std::make_shared(); public: const std::shared_ptr& GetActualizationController() const { @@ -112,7 +109,7 @@ class TColumnEngineForLogs : public IColumnEngine { return limit < TGranulesStat::GetSumMetadataMemoryPortionsSize(); } - std::shared_ptr StartInsert(std::vector&& dataToIndex) noexcept override; + std::shared_ptr StartInsert(std::vector&& dataToIndex) noexcept override; std::shared_ptr StartCompaction(const std::shared_ptr& dataLocksManager) noexcept override; std::shared_ptr StartCleanupPortions(const TSnapshot& snapshot, const THashSet& pathsToDrop, const std::shared_ptr& dataLocksManager) noexcept override; std::shared_ptr StartCleanupTables(const THashSet& pathsToDrop) noexcept override; @@ -154,6 +151,10 @@ class TColumnEngineForLogs : public IColumnEngine { return *GetGranulePtrVerified(pathId); } + TGranuleMeta& MutableGranuleVerified(const ui64 pathId) const { + return *GetGranulePtrVerified(pathId); + } + std::shared_ptr GetGranulePtrVerified(const ui64 pathId) const { auto result = GetGranuleOptional(pathId); AFL_VERIFY(result)("path_id", pathId); @@ -173,7 +174,7 @@ class TColumnEngineForLogs : public IColumnEngine { } void AddCleanupPortion(const TPortionInfo& info) { - CleanupPortions[info.GetRemoveSnapshotVerified().GetPlanInstant() + RemovedPortionLivetime].emplace_back(info); + CleanupPortions[info.GetRemoveSnapshotVerified().GetPlanInstant()].emplace_back(info); } void AddShardingInfo(const TGranuleShardingInfo& shardingInfo) { VersionedIndex.AddShardingInfo(shardingInfo); diff --git a/ydb/core/tx/columnshard/engines/db_wrapper.cpp b/ydb/core/tx/columnshard/engines/db_wrapper.cpp index b605475552cc..b5c8e5e4ea58 100644 --- a/ydb/core/tx/columnshard/engines/db_wrapper.cpp +++ b/ydb/core/tx/columnshard/engines/db_wrapper.cpp @@ -12,7 +12,7 @@ void TDbWrapper::Insert(const TInsertedData& data) { NColumnShard::Schema::InsertTable_Insert(db, data); } -void TDbWrapper::Commit(const TInsertedData& data) { +void TDbWrapper::Commit(const TCommittedData& data) { NIceDb::TNiceDb db(Database); NColumnShard::Schema::InsertTable_Commit(db, data); } @@ -27,7 +27,7 @@ void TDbWrapper::EraseInserted(const TInsertedData& data) { NColumnShard::Schema::InsertTable_EraseInserted(db, data); } -void TDbWrapper::EraseCommitted(const TInsertedData& data) { +void TDbWrapper::EraseCommitted(const TCommittedData& data) { NIceDb::TNiceDb db(Database); NColumnShard::Schema::InsertTable_EraseCommitted(db, data); } @@ -144,16 +144,23 @@ bool TDbWrapper::LoadPortions(const std::function().Key(portion.GetPathId(), portion.GetPortionId(), row.GetIndexId(), row.GetChunkIdx()).Update( - NIceDb::TUpdate(portion.GetBlobId(row.GetBlobRange().GetBlobIdxVerified()).SerializeBinary()), - NIceDb::TUpdate(row.GetBlobRange().Offset), - NIceDb::TUpdate(row.GetBlobRange().Size), - NIceDb::TUpdate(row.GetRecordsCount()), - NIceDb::TUpdate(row.GetRawBytes()) - ); + if (auto bRange = row.GetBlobRangeOptional()) { + AFL_VERIFY(bRange->IsValid()); + db.Table() + .Key(portion.GetPathId(), portion.GetPortionId(), row.GetIndexId(), row.GetChunkIdx()) + .Update(NIceDb::TUpdate(portion.GetBlobId(bRange->GetBlobIdxVerified()).SerializeBinary()), + NIceDb::TUpdate(bRange->Offset), NIceDb::TUpdate(row.GetDataSize()), + NIceDb::TUpdate(row.GetRecordsCount()), NIceDb::TUpdate(row.GetRawBytes())); + } else if (auto bData = row.GetBlobDataOptional()) { + db.Table() + .Key(portion.GetPathId(), portion.GetPortionId(), row.GetIndexId(), row.GetChunkIdx()) + .Update(NIceDb::TUpdate(*bData), + NIceDb::TUpdate(row.GetRecordsCount()), NIceDb::TUpdate(row.GetRawBytes())); + } else { + AFL_VERIFY(false); + } } void TDbWrapper::EraseIndex(const TPortionInfo& portion, const TIndexChunk& row) { diff --git a/ydb/core/tx/columnshard/engines/db_wrapper.h b/ydb/core/tx/columnshard/engines/db_wrapper.h index 39536cb9c987..50958b6fca29 100644 --- a/ydb/core/tx/columnshard/engines/db_wrapper.h +++ b/ydb/core/tx/columnshard/engines/db_wrapper.h @@ -16,7 +16,8 @@ namespace NKikimr::NOlap { class TColumnChunkLoadContext; class TIndexChunkLoadContext; -struct TInsertedData; +class TInsertedData; +class TCommittedData; class TInsertTableAccessor; class TColumnRecord; class TIndexChunk; @@ -30,10 +31,10 @@ class IDbWrapper { virtual ~IDbWrapper() = default; virtual void Insert(const TInsertedData& data) = 0; - virtual void Commit(const TInsertedData& data) = 0; + virtual void Commit(const TCommittedData& data) = 0; virtual void Abort(const TInsertedData& data) = 0; virtual void EraseInserted(const TInsertedData& data) = 0; - virtual void EraseCommitted(const TInsertedData& data) = 0; + virtual void EraseCommitted(const TCommittedData& data) = 0; virtual void EraseAborted(const TInsertedData& data) = 0; virtual bool Load(TInsertTableAccessor& insertTable, const TInstant& loadTime) = 0; @@ -63,10 +64,10 @@ class TDbWrapper : public IDbWrapper { {} void Insert(const TInsertedData& data) override; - void Commit(const TInsertedData& data) override; + void Commit(const TCommittedData& data) override; void Abort(const TInsertedData& data) override; void EraseInserted(const TInsertedData& data) override; - void EraseCommitted(const TInsertedData& data) override; + void EraseCommitted(const TCommittedData& data) override; void EraseAborted(const TInsertedData& data) override; bool Load(TInsertTableAccessor& insertTable, const TInstant& loadTime) override; diff --git a/ydb/core/tx/columnshard/engines/defs.cpp b/ydb/core/tx/columnshard/engines/defs.cpp new file mode 100644 index 000000000000..2c50c99d1d3c --- /dev/null +++ b/ydb/core/tx/columnshard/engines/defs.cpp @@ -0,0 +1,11 @@ +#include "defs.h" + +template <> +void Out(IOutputStream& os, TTypeTraits::TFuncParam val) { + os << (ui64)val; +} + +template <> +void Out(IOutputStream& os, TTypeTraits::TFuncParam val) { + os << (ui64)val; +} diff --git a/ydb/core/tx/columnshard/engines/defs.h b/ydb/core/tx/columnshard/engines/defs.h index a01edc7ef767..efe1f1c744a8 100644 --- a/ydb/core/tx/columnshard/engines/defs.h +++ b/ydb/core/tx/columnshard/engines/defs.h @@ -8,18 +8,33 @@ namespace NKikimr::NOlap { using TLogThis = TCtorLogger; -enum class TWriteId : ui64 {}; +enum class TOperationWriteId : ui64 { +}; +enum class TInsertWriteId : ui64 { +}; + +inline TOperationWriteId operator++(TOperationWriteId& w) noexcept { + w = TOperationWriteId{ ui64(w) + 1 }; + return w; +} -inline TWriteId operator++(TWriteId& w) noexcept { - w = TWriteId{ui64(w) + 1}; +inline TInsertWriteId operator++(TInsertWriteId& w) noexcept { + w = TInsertWriteId{ ui64(w) + 1 }; return w; } -} // namespace NKikimr::NOlap +} // namespace NKikimr::NOlap + +template <> +struct THash { + inline size_t operator()(const NKikimr::NOlap::TInsertWriteId x) const noexcept { + return THash()(ui64(x)); + } +}; template <> -struct THash { - inline size_t operator()(const NKikimr::NOlap::TWriteId x) const noexcept { +struct THash { + inline size_t operator()(const NKikimr::NOlap::TOperationWriteId x) const noexcept { return THash()(ui64(x)); } }; diff --git a/ydb/core/tx/columnshard/engines/filter.h b/ydb/core/tx/columnshard/engines/filter.h index 7670b0eab1d3..39167306b993 100644 --- a/ydb/core/tx/columnshard/engines/filter.h +++ b/ydb/core/tx/columnshard/engines/filter.h @@ -2,7 +2,7 @@ #include "defs.h" #include -#include +#include #include namespace NKikimr::NOlap { diff --git a/ydb/core/tx/columnshard/engines/insert_table/committed.cpp b/ydb/core/tx/columnshard/engines/insert_table/committed.cpp new file mode 100644 index 000000000000..bd4bb9ff6c06 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/insert_table/committed.cpp @@ -0,0 +1,5 @@ +#include "committed.h" + +namespace NKikimr::NOlap { + +} diff --git a/ydb/core/tx/columnshard/engines/insert_table/committed.h b/ydb/core/tx/columnshard/engines/insert_table/committed.h new file mode 100644 index 000000000000..bd633647b5ec --- /dev/null +++ b/ydb/core/tx/columnshard/engines/insert_table/committed.h @@ -0,0 +1,161 @@ +#pragma once +#include "user_data.h" + +#include + +namespace NKikimr::NOlap { + +class TCommittedData: public TUserDataContainer { +private: + using TBase = TUserDataContainer; + YDB_READONLY(TSnapshot, Snapshot, NOlap::TSnapshot::Zero()); + YDB_READONLY_DEF(TString, DedupId); + YDB_READONLY(bool, Remove, false); + +public: + TCommittedData(const std::shared_ptr& userData, const ui64 planStep, const ui64 txId, const TInsertWriteId insertWriteId) + : TBase(userData) + , Snapshot(planStep, txId) + , DedupId(ToString(planStep) + ":" + ToString((ui64)insertWriteId)) { + } + + TCommittedData(const std::shared_ptr& userData, const ui64 planStep, const ui64 txId, const TString& dedupId) + : TBase(userData) + , Snapshot(planStep, txId) + , DedupId(dedupId) { + } + + TCommittedData(const std::shared_ptr& userData, const TSnapshot& ss, const TInsertWriteId insertWriteId) + : TBase(userData) + , Snapshot(ss) + , DedupId(ToString(ss.GetPlanStep()) + ":" + ToString((ui64)insertWriteId)) { + } + + void SetRemove() { + AFL_VERIFY(!Remove); + Remove = true; + } + + bool operator<(const TCommittedData& key) const { + if (Snapshot == key.Snapshot) { + if (UserData->GetPathId() == key.UserData->GetPathId()) { + return DedupId < key.DedupId; + } else { + return UserData->GetPathId() < key.UserData->GetPathId(); + } + } else { + return Snapshot < key.Snapshot; + } + } +}; + +class TCommittedBlob { +private: + TBlobRange BlobRange; + std::variant WriteInfo; + YDB_READONLY(ui64, SchemaVersion, 0); + YDB_READONLY(ui64, RecordsCount, 0); + YDB_READONLY(bool, IsDelete, false); + NArrow::TReplaceKey First; + NArrow::TReplaceKey Last; + YDB_READONLY_DEF(NArrow::TSchemaSubset, SchemaSubset); + +public: + const NArrow::TReplaceKey& GetFirst() const { + return First; + } + const NArrow::TReplaceKey& GetLast() const { + return Last; + } + + ui64 GetSize() const { + return BlobRange.Size; + } + + TCommittedBlob(const TBlobRange& blobRange, const TSnapshot& snapshot, const ui64 schemaVersion, const ui64 recordsCount, + const NArrow::TReplaceKey& first, const NArrow::TReplaceKey& last, const bool isDelete, + const NArrow::TSchemaSubset& subset) + : BlobRange(blobRange) + , WriteInfo(snapshot) + , SchemaVersion(schemaVersion) + , RecordsCount(recordsCount) + , IsDelete(isDelete) + , First(first) + , Last(last) + , SchemaSubset(subset) { + } + + TCommittedBlob(const TBlobRange& blobRange, const TInsertWriteId writeId, const ui64 schemaVersion, const ui64 recordsCount, + const NArrow::TReplaceKey& first, const NArrow::TReplaceKey& last, const bool isDelete, + const NArrow::TSchemaSubset& subset) + : BlobRange(blobRange) + , WriteInfo(writeId) + , SchemaVersion(schemaVersion) + , RecordsCount(recordsCount) + , IsDelete(isDelete) + , First(first) + , Last(last) + , SchemaSubset(subset) { + } + + /// It uses trick then we place key with planStep:txId in container and find them later by BlobId only. + /// So hash() and equality should depend on BlobId only. + bool operator==(const TCommittedBlob& key) const { + return BlobRange == key.BlobRange; + } + ui64 Hash() const noexcept { + return BlobRange.Hash(); + } + TString DebugString() const { + if (auto* ss = GetSnapshotOptional()) { + return TStringBuilder() << BlobRange << ";snapshot=" << ss->DebugString(); + } else { + return TStringBuilder() << BlobRange << ";write_id=" << (ui64)GetWriteIdVerified(); + } + } + + bool HasSnapshot() const { + return GetSnapshotOptional(); + } + + const TSnapshot& GetSnapshotDef(const TSnapshot& def) const { + if (auto* snapshot = GetSnapshotOptional()) { + return *snapshot; + } else { + return def; + } + } + + const TSnapshot* GetSnapshotOptional() const { + return std::get_if(&WriteInfo); + } + + const TSnapshot& GetSnapshotVerified() const { + auto* result = GetSnapshotOptional(); + AFL_VERIFY(result); + return *result; + } + + const TInsertWriteId* GetWriteIdOptional() const { + return std::get_if(&WriteInfo); + } + + TInsertWriteId GetWriteIdVerified() const { + auto* result = GetWriteIdOptional(); + AFL_VERIFY(result); + return *result; + } + + const TBlobRange& GetBlobRange() const { + return BlobRange; + } +}; + +} // namespace NKikimr::NOlap + +template <> +struct THash { + inline size_t operator()(const NKikimr::NOlap::TCommittedBlob& key) const { + return key.Hash(); + } +}; diff --git a/ydb/core/tx/columnshard/engines/insert_table/data.cpp b/ydb/core/tx/columnshard/engines/insert_table/data.cpp deleted file mode 100644 index 3f27a5ad5e7d..000000000000 --- a/ydb/core/tx/columnshard/engines/insert_table/data.cpp +++ /dev/null @@ -1,56 +0,0 @@ -#include "data.h" -#include - -namespace NKikimr::NOlap { - -namespace { - -class TInsertTableCacheController { -private: - TAtomicCounter BlobsCacheSize = 0; - const i64 BlobsCacheLimit = (i64)1 << 30; -public: - void Return(const ui64 size) { - const i64 val = BlobsCacheSize.Sub(size); - AFL_VERIFY(val >= 0)("size", size)("val", val); - } - - bool Take(const ui64 size) { - if (BlobsCacheSize.Add(size) <= BlobsCacheLimit) { - return true; - } - const i64 val = BlobsCacheSize.Sub(size); - AFL_VERIFY(val >= 0)("size", size)("val", val); - return false; - } -}; - -} - -TInsertedData::TBlobStorageGuard::~TBlobStorageGuard() { - Singleton()->Return(Data.size()); -} - -TInsertedData::~TInsertedData() { -} - -TInsertedData::TInsertedData(ui64 planStep, ui64 writeTxId, ui64 pathId, TString dedupId, const TBlobRange& blobRange, - const NKikimrTxColumnShard::TLogicalMetadata& proto, const ui64 schemaVersion, - const std::optional& blobData /*= {}*/) - : Meta(proto) - , BlobRange(blobRange) - , PlanStep(planStep) - , WriteTxId(writeTxId) - , PathId(pathId) - , DedupId(dedupId) - , SchemaVersion(schemaVersion) -{ - if (blobData) { - AFL_VERIFY(blobData->size() == BlobRange.Size); - if (Singleton()->Take(blobData->size())) { - BlobDataGuard = std::make_shared(*blobData); - } - } -} - -} diff --git a/ydb/core/tx/columnshard/engines/insert_table/data.h b/ydb/core/tx/columnshard/engines/insert_table/data.h deleted file mode 100644 index 0317b818073c..000000000000 --- a/ydb/core/tx/columnshard/engines/insert_table/data.h +++ /dev/null @@ -1,190 +0,0 @@ -#pragma once -#include "meta.h" -#include -#include -#include -#include - -namespace NKikimr::NOlap { - -struct TInsertedData { -private: - TInsertedDataMeta Meta; - YDB_READONLY_DEF(TBlobRange, BlobRange); - class TBlobStorageGuard { - private: - YDB_READONLY_DEF(TString, Data); - public: - TBlobStorageGuard(const TString& data) - : Data(data) - { - - } - ~TBlobStorageGuard(); - }; - - std::shared_ptr BlobDataGuard; - -public: - ui64 PlanStep = 0; - ui64 WriteTxId = 0; - ui64 PathId = 0; - TString DedupId; - -private: - YDB_READONLY(ui64, SchemaVersion, 0); -public: - std::optional GetBlobData() const { - if (BlobDataGuard) { - return BlobDataGuard->GetData(); - } else { - return {}; - } - } - - ui64 GetTxVolume() const { - return Meta.GetTxVolume() + sizeof(TBlobRange); - } - - const TInsertedDataMeta& GetMeta() const { - return Meta; - } - - TInsertedData() = delete; // avoid invalid TInsertedData anywhere - - TInsertedData(ui64 planStep, ui64 writeTxId, ui64 pathId, TString dedupId, const TBlobRange& blobRange, - const NKikimrTxColumnShard::TLogicalMetadata& proto, const ui64 schemaVersion, const std::optional& blobData); - - TInsertedData(ui64 writeTxId, ui64 pathId, TString dedupId, const TBlobRange& blobRange, - const NKikimrTxColumnShard::TLogicalMetadata& proto, const ui64 schemaVersion, const std::optional& blobData) - : TInsertedData(0, writeTxId, pathId, dedupId, blobRange, proto, schemaVersion, blobData) - {} - - TInsertedData(ui64 writeTxId, ui64 pathId, TString dedupId, const TUnifiedBlobId& blobId, - const NKikimrTxColumnShard::TLogicalMetadata& proto, const ui64 schemaVersion, const std::optional& blobData) - : TInsertedData(0, writeTxId, pathId, dedupId, TBlobRange(blobId, 0, blobId.BlobSize()), proto, schemaVersion, blobData) - { - } - - ~TInsertedData(); - - bool operator < (const TInsertedData& key) const { - if (PlanStep < key.PlanStep) { - return true; - } else if (PlanStep > key.PlanStep) { - return false; - } - - // PlanStep == key.PlanStep - if (WriteTxId < key.WriteTxId) { - return true; - } else if (WriteTxId > key.WriteTxId) { - return false; - } - - // PlanStep == key.PlanStep && WriteTxId == key.WriteTxId - if (PathId < key.PathId) { - return true; - } else if (PathId > key.PathId) { - return false; - } - - return DedupId < key.DedupId; - } - - bool operator == (const TInsertedData& key) const { - return (PlanStep == key.PlanStep) && - (WriteTxId == key.WriteTxId) && - (PathId == key.PathId) && - (DedupId == key.DedupId); - } - - /// We commit many writeIds in one txId. There could be several blobs with same WriteId and different DedupId. - /// One of them wins and becomes committed. Original DedupId would be lost then. - /// After commit we use original Initiator:WriteId as DedupId of inserted blob inside {PlanStep, TxId}. - /// pathId, initiator, {writeId}, {dedupId} -> pathId, planStep, txId, {dedupId} - void Commit(ui64 planStep, ui64 txId) { - DedupId = ToString(PlanStep) + ":" + ToString((ui64)WriteTxId); - PlanStep = planStep; - WriteTxId = txId; - } - - /// Undo Commit() operation. Restore Initiator:WriteId from DedupId. - void Undo() { - TVector tokens; - size_t numTokens = Split(DedupId, ":", tokens); - Y_ABORT_UNLESS(numTokens == 2); - - PlanStep = FromString(tokens[0]); - WriteTxId = FromString(tokens[1]); - DedupId.clear(); - } - - TSnapshot GetSnapshot() const { - return TSnapshot(PlanStep, WriteTxId); - } - - ui32 BlobSize() const { return BlobRange.GetBlobSize(); } - -}; - -class TCommittedBlob { -private: - TBlobRange BlobRange; - TSnapshot CommitSnapshot; - YDB_READONLY(ui64, SchemaVersion, 0); - YDB_READONLY(ui64, RecordsCount, 0); - YDB_READONLY(bool, IsDelete, false); - YDB_READONLY_DEF(std::optional, First); - YDB_READONLY_DEF(std::optional, Last); -public: - ui64 GetSize() const { - return BlobRange.Size; - } - - const NArrow::TReplaceKey& GetFirstVerified() const { - Y_ABORT_UNLESS(First); - return *First; - } - - const NArrow::TReplaceKey& GetLastVerified() const { - Y_ABORT_UNLESS(Last); - return *Last; - } - - TCommittedBlob(const TBlobRange& blobRange, const TSnapshot& snapshot, const ui64 schemaVersion, const ui64 recordsCount, const std::optional& first, - const std::optional& last, const bool isDelete) - : BlobRange(blobRange) - , CommitSnapshot(snapshot) - , SchemaVersion(schemaVersion) - , RecordsCount(recordsCount) - , IsDelete(isDelete) - , First(first) - , Last(last) - {} - - /// It uses trick then we place key with planStep:txId in container and find them later by BlobId only. - /// So hash() and equality should depend on BlobId only. - bool operator == (const TCommittedBlob& key) const { return BlobRange == key.BlobRange; } - ui64 Hash() const noexcept { return BlobRange.Hash(); } - TString DebugString() const { - return TStringBuilder() << BlobRange << ";ps=" << CommitSnapshot.GetPlanStep() << ";ti=" << CommitSnapshot.GetTxId(); - } - - const TSnapshot& GetSnapshot() const { - return CommitSnapshot; - } - - const TBlobRange& GetBlobRange() const { - return BlobRange; - } -}; - -} - -template <> -struct THash { - inline size_t operator() (const NKikimr::NOlap::TCommittedBlob& key) const { - return key.Hash(); - } -}; diff --git a/ydb/core/tx/columnshard/engines/insert_table/insert_table.cpp b/ydb/core/tx/columnshard/engines/insert_table/insert_table.cpp index 5147fbd02bc8..980882ad8eca 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/insert_table.cpp +++ b/ydb/core/tx/columnshard/engines/insert_table/insert_table.cpp @@ -1,7 +1,9 @@ #include "insert_table.h" + #include -#include +#include #include +#include namespace NKikimr::NOlap { @@ -11,12 +13,13 @@ bool TInsertTable::Insert(IDbWrapper& dbTable, TInsertedData&& data) { dbTable.Insert(*dataPtr); return true; } else { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_insertion"); return false; } } -TInsertionSummary::TCounters TInsertTable::Commit(IDbWrapper& dbTable, ui64 planStep, ui64 txId, - const THashSet& writeIds, std::function pathExists) { +TInsertionSummary::TCounters TInsertTable::Commit( + IDbWrapper& dbTable, ui64 planStep, ui64 txId, const THashSet& writeIds, std::function pathExists) { Y_ABORT_UNLESS(!writeIds.empty()); TInsertionSummary::TCounters counters; @@ -33,15 +36,19 @@ TInsertionSummary::TCounters TInsertTable::Commit(IDbWrapper& dbTable, ui64 plan dbTable.EraseInserted(*data); - const ui64 pathId = data->PathId; + const ui64 pathId = data->GetPathId(); auto* pathInfo = Summary.GetPathInfoOptional(pathId); // There could be commit after drop: propose, drop, plan if (pathInfo && pathExists(pathId)) { - data->Commit(planStep, txId); - dbTable.Commit(*data); + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "commit_insertion")("path_id", data->GetPathId())( + "blob_range", data->GetBlobRange().ToString()); + auto committed = data->Commit(planStep, txId); + dbTable.Commit(committed); - pathInfo->AddCommitted(std::move(*data)); + pathInfo->AddCommitted(std::move(committed)); } else { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "abort_insertion")("path_id", data->GetPathId())( + "blob_range", data->GetBlobRange().ToString()); dbTable.Abort(*data); Summary.AddAborted(std::move(*data)); } @@ -50,12 +57,14 @@ TInsertionSummary::TCounters TInsertTable::Commit(IDbWrapper& dbTable, ui64 plan return counters; } -void TInsertTable::Abort(IDbWrapper& dbTable, const THashSet& writeIds) { +void TInsertTable::Abort(IDbWrapper& dbTable, const THashSet& writeIds) { Y_ABORT_UNLESS(!writeIds.empty()); for (auto writeId : writeIds) { // There could be inconsistency with txs and writes in case of bugs. So we could find no record for writeId. if (std::optional data = Summary.ExtractInserted(writeId)) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "abort_insertion")("path_id", data->GetPathId())( + "blob_range", data->GetBlobRange().ToString())("write_id", writeId); dbTable.EraseInserted(*data); dbTable.Abort(*data); Summary.AddAborted(std::move(*data)); @@ -63,88 +72,100 @@ void TInsertTable::Abort(IDbWrapper& dbTable, const THashSet& writeIds } } -THashSet TInsertTable::OldWritesToAbort(const TInstant& now) const { +THashSet TInsertTable::OldWritesToAbort(const TInstant& now) const { return Summary.GetExpiredInsertions(now - WaitCommitDelay, CleanupPackageSize); } -THashSet TInsertTable::DropPath(IDbWrapper& dbTable, ui64 pathId) { - auto pathInfo = Summary.ExtractPathInfo(pathId); - if (!!pathInfo) { - for (auto& data : pathInfo->GetCommitted()) { - dbTable.EraseCommitted(data); - TInsertedData copy = data; - copy.Undo(); - dbTable.Abort(copy); - Summary.AddAborted(std::move(copy)); - } - } - - return Summary.GetInsertedByPathId(pathId); -} - -void TInsertTable::EraseCommittedOnExecute(IDbWrapper& dbTable, const TInsertedData& data, const std::shared_ptr& blobsAction) { +void TInsertTable::EraseCommittedOnExecute( + IDbWrapper& dbTable, const TCommittedData& data, const std::shared_ptr& blobsAction) { if (Summary.HasCommitted(data)) { dbTable.EraseCommitted(data); RemoveBlobLinkOnExecute(data.GetBlobRange().BlobId, blobsAction); } } -void TInsertTable::EraseCommittedOnComplete(const TInsertedData& data) { +void TInsertTable::EraseCommittedOnComplete(const TCommittedData& data) { if (Summary.EraseCommitted(data)) { RemoveBlobLinkOnComplete(data.GetBlobRange().BlobId); } } -void TInsertTable::EraseAbortedOnExecute(IDbWrapper& dbTable, const TInsertedData& data, const std::shared_ptr& blobsAction) { - if (Summary.HasAborted((TWriteId)data.WriteTxId)) { +void TInsertTable::EraseAbortedOnExecute( + IDbWrapper& dbTable, const TInsertedData& data, const std::shared_ptr& blobsAction) { + if (Summary.HasAborted(data.GetInsertWriteId())) { dbTable.EraseAborted(data); RemoveBlobLinkOnExecute(data.GetBlobRange().BlobId, blobsAction); } } void TInsertTable::EraseAbortedOnComplete(const TInsertedData& data) { - if (Summary.EraseAborted((TWriteId)data.WriteTxId)) { + if (Summary.EraseAborted(data.GetInsertWriteId())) { RemoveBlobLinkOnComplete(data.GetBlobRange().BlobId); } } -bool TInsertTable::Load(IDbWrapper& dbTable, const TInstant loadTime) { +bool TInsertTable::Load(NIceDb::TNiceDb& db, IDbWrapper& dbTable, const TInstant loadTime) { Y_ABORT_UNLESS(!Loaded); Loaded = true; + LastWriteId = (TInsertWriteId)0; + if (!NColumnShard::Schema::GetSpecialValueOpt(db, NColumnShard::Schema::EValueIds::LastWriteId, LastWriteId)) { + return false; + } + return dbTable.Load(*this, loadTime); } -std::vector TInsertTable::Read(ui64 pathId, const TSnapshot& snapshot, const std::shared_ptr& pkSchema) const { +std::vector TInsertTable::Read(ui64 pathId, const std::optional lockId, const TSnapshot& reqSnapshot, + const std::shared_ptr& pkSchema, const TPKRangesFilter* pkRangesFilter) const { const TPathInfo* pInfo = Summary.GetPathInfoOptional(pathId); if (!pInfo) { return {}; } - std::vector ret; - ret.reserve(pInfo->GetCommitted().size()); + std::vector result; + result.reserve(pInfo->GetCommitted().size() + Summary.GetInserted().size()); for (const auto& data : pInfo->GetCommitted()) { - if (std::less_equal()(data.GetSnapshot(), snapshot)) { - ret.emplace_back(&data); + if (lockId || data.GetSnapshot() <= reqSnapshot) { + auto start = data.GetMeta().GetFirstPK(pkSchema); + auto finish = data.GetMeta().GetLastPK(pkSchema); + if (pkRangesFilter && pkRangesFilter->IsPortionInPartialUsage(start, finish) == TPKRangeFilter::EUsageClass::DontUsage) { + continue; + } + result.emplace_back(TCommittedBlob(data.GetBlobRange(), data.GetSnapshot(), data.GetSchemaVersion(), data.GetMeta().GetNumRows(), + start, finish, data.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete, data.GetMeta().GetSchemaSubset())); } } - const auto pred = [pkSchema](const TInsertedData* l, const TInsertedData* r) { - return l->GetMeta().GetFirstPK(pkSchema) < r->GetMeta().GetFirstPK(pkSchema); - }; - std::sort(ret.begin(), ret.end(), pred); - - std::vector result; - result.reserve(ret.size()); - for (auto&& i : ret) { - result.emplace_back(TCommittedBlob( - i->GetBlobRange(), i->GetSnapshot(), i->GetSchemaVersion(), i->GetMeta().GetNumRows(), i->GetMeta().GetFirstPK(pkSchema), i->GetMeta().GetLastPK(pkSchema) - , i->GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete)); + if (lockId) { + for (const auto& [writeId, data] : Summary.GetInserted()) { + if (data.GetPathId() != pathId) { + continue; + } + auto start = data.GetMeta().GetFirstPK(pkSchema); + auto finish = data.GetMeta().GetLastPK(pkSchema); + if (pkRangesFilter && pkRangesFilter->IsPortionInPartialUsage(start, finish) == TPKRangeFilter::EUsageClass::DontUsage) { + continue; + } + result.emplace_back(TCommittedBlob(data.GetBlobRange(), writeId, data.GetSchemaVersion(), data.GetMeta().GetNumRows(), start, finish, + data.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete, data.GetMeta().GetSchemaSubset())); + } } - return result; } -bool TInsertTableAccessor::RemoveBlobLinkOnExecute(const TUnifiedBlobId& blobId, const std::shared_ptr& blobsAction) { +TInsertWriteId TInsertTable::BuildNextWriteId(NTabletFlatExecutor::TTransactionContext& txc) { + NIceDb::TNiceDb db(txc.DB); + return BuildNextWriteId(db); +} + +TInsertWriteId TInsertTable::BuildNextWriteId(NIceDb::TNiceDb& db) { + TInsertWriteId writeId = ++LastWriteId; + NColumnShard::Schema::SaveSpecialValue(db, NColumnShard::Schema::EValueIds::LastWriteId, (ui64)writeId); + return writeId; +} + +bool TInsertTableAccessor::RemoveBlobLinkOnExecute( + const TUnifiedBlobId& blobId, const std::shared_ptr& blobsAction) { AFL_VERIFY(blobsAction); auto itBlob = BlobLinks.find(blobId); AFL_VERIFY(itBlob != BlobLinks.end()); @@ -170,4 +191,4 @@ bool TInsertTableAccessor::RemoveBlobLinkOnComplete(const TUnifiedBlobId& blobId } } -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/insert_table/insert_table.h b/ydb/core/tx/columnshard/engines/insert_table/insert_table.h index 373964818370..4f7544e10184 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/insert_table.h +++ b/ydb/core/tx/columnshard/engines/insert_table/insert_table.h @@ -1,17 +1,17 @@ #pragma once -#include "data.h" -#include "rt_insertion.h" +#include "committed.h" +#include "inserted.h" #include "path_info.h" +#include "rt_insertion.h" + +#include +#include #include namespace NKikimr::NOlap { - +class TPKRangesFilter; class IDbWrapper; -/// Use one table for inserted and committed blobs: -/// !Commited => {PlanStep, WriteTxId} are {0, WriteId} -/// Commited => {PlanStep, WriteTxId} are {PlanStep, TxId} - class TInsertTableAccessor { protected: TInsertionSummary Summary; @@ -23,7 +23,14 @@ class TInsertTableAccessor { bool RemoveBlobLinkOnExecute(const TUnifiedBlobId& blobId, const std::shared_ptr& blobsAction); bool RemoveBlobLinkOnComplete(const TUnifiedBlobId& blobId); + public: + void ErasePath(const ui64 pathId) { + Summary.ErasePath(pathId); + } + bool HasDataInPathId(const ui64 pathId) const { + return Summary.HasPathIdData(pathId); + } const std::map>& GetPathPriorities() const { return Summary.GetPathPriorities(); } @@ -46,20 +53,28 @@ class TInsertTableAccessor { return Summary.AddInserted(std::move(data), load); } bool AddAborted(TInsertedData&& data, const bool load) { + AFL_VERIFY_DEBUG(!Summary.ExtractInserted(data.GetInsertWriteId())); if (load) { AddBlobLink(data.GetBlobRange().BlobId); } return Summary.AddAborted(std::move(data), load); } - bool AddCommitted(TInsertedData&& data, const bool load) { + bool AddCommitted(TCommittedData&& data, const bool load) { if (load) { AddBlobLink(data.GetBlobRange().BlobId); } - const ui64 pathId = data.PathId; + const ui64 pathId = data.GetPathId(); return Summary.GetPathInfo(pathId).AddCommitted(std::move(data), load); } - const THashMap& GetAborted() const { return Summary.GetAborted(); } - const THashMap& GetInserted() const { return Summary.GetInserted(); } + bool HasPathIdData(const ui64 pathId) const { + return Summary.HasPathIdData(pathId); + } + const THashMap& GetAborted() const { + return Summary.GetAborted(); + } + const TInsertedContainer& GetInserted() const { + return Summary.GetInserted(); + } const TInsertionSummary::TCounters& GetCountersPrepared() const { return Summary.GetCountersPrepared(); } @@ -74,25 +89,34 @@ class TInsertTableAccessor { class TInsertTable: public TInsertTableAccessor { private: bool Loaded = false; + TInsertWriteId LastWriteId = TInsertWriteId{ 0 }; + public: static constexpr const TDuration WaitCommitDelay = TDuration::Minutes(10); static constexpr ui64 CleanupPackageSize = 10000; bool Insert(IDbWrapper& dbTable, TInsertedData&& data); - TInsertionSummary::TCounters Commit(IDbWrapper& dbTable, ui64 planStep, ui64 txId, - const THashSet& writeIds, std::function pathExists); - void Abort(IDbWrapper& dbTable, const THashSet& writeIds); - THashSet OldWritesToAbort(const TInstant& now) const; - THashSet DropPath(IDbWrapper& dbTable, ui64 pathId); + TInsertionSummary::TCounters Commit( + IDbWrapper& dbTable, ui64 planStep, ui64 txId, const THashSet& writeIds, std::function pathExists); + void Abort(IDbWrapper& dbTable, const THashSet& writeIds); + void MarkAsNotAbortable(const TInsertWriteId writeId) { + Summary.MarkAsNotAbortable(writeId); + } + THashSet OldWritesToAbort(const TInstant& now) const; - void EraseCommittedOnExecute(IDbWrapper& dbTable, const TInsertedData& key, const std::shared_ptr& blobsAction); - void EraseCommittedOnComplete(const TInsertedData& key); + void EraseCommittedOnExecute( + IDbWrapper& dbTable, const TCommittedData& key, const std::shared_ptr& blobsAction); + void EraseCommittedOnComplete(const TCommittedData& key); void EraseAbortedOnExecute(IDbWrapper& dbTable, const TInsertedData& key, const std::shared_ptr& blobsAction); void EraseAbortedOnComplete(const TInsertedData& key); - std::vector Read(ui64 pathId, const TSnapshot& snapshot, const std::shared_ptr& pkSchema) const; - bool Load(IDbWrapper& dbTable, const TInstant loadTime); + std::vector Read(ui64 pathId, const std::optional lockId, const TSnapshot& reqSnapshot, + const std::shared_ptr& pkSchema, const TPKRangesFilter* pkRangesFilter) const; + bool Load(NIceDb::TNiceDb& db, IDbWrapper& dbTable, const TInstant loadTime); + + TInsertWriteId BuildNextWriteId(NTabletFlatExecutor::TTransactionContext& txc); + TInsertWriteId BuildNextWriteId(NIceDb::TNiceDb& db); }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/insert_table/inserted.cpp b/ydb/core/tx/columnshard/engines/insert_table/inserted.cpp new file mode 100644 index 000000000000..2986fc0b4c35 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/insert_table/inserted.cpp @@ -0,0 +1,12 @@ +#include "committed.h" +#include "inserted.h" + +#include + +namespace NKikimr::NOlap { + +TCommittedData TInsertedData::Commit(const ui64 planStep, const ui64 txId) { + return TCommittedData(UserData, planStep, txId, InsertWriteId); +} + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/insert_table/inserted.h b/ydb/core/tx/columnshard/engines/insert_table/inserted.h new file mode 100644 index 000000000000..e124edeb57e5 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/insert_table/inserted.h @@ -0,0 +1,35 @@ +#pragma once +#include "user_data.h" + +#include + +namespace NKikimr::NOlap { + +class TCommittedData; + +class TInsertedData: public TUserDataContainer { +private: + using TBase = TUserDataContainer; + YDB_READONLY(TInsertWriteId, InsertWriteId, TInsertWriteId(0)); + YDB_READONLY_FLAG(NotAbortable, false); + +public: + void MarkAsNotAbortable() { + NotAbortableFlag = true; + } + + TInsertedData() = delete; // avoid invalid TInsertedData anywhere + + TInsertedData(const TInsertWriteId writeId, const std::shared_ptr& userData) + : TBase(userData) + , InsertWriteId(writeId) { + } + + /// We commit many writeIds in one txId. There could be several blobs with same WriteId and different DedupId. + /// One of them wins and becomes committed. Original DedupId would be lost then. + /// After commit we use original Initiator:WriteId as DedupId of inserted blob inside {PlanStep, TxId}. + /// pathId, initiator, {writeId}, {dedupId} -> pathId, planStep, txId, {dedupId} + [[nodiscard]] TCommittedData Commit(const ui64 planStep, const ui64 txId); +}; + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/insert_table/meta.cpp b/ydb/core/tx/columnshard/engines/insert_table/meta.cpp index caed6b91ac0d..eee7b2444b2e 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/meta.cpp +++ b/ydb/core/tx/columnshard/engines/insert_table/meta.cpp @@ -6,14 +6,23 @@ NKikimrTxColumnShard::TLogicalMetadata TInsertedDataMeta::SerializeToProto() con return OriginalProto; } -const std::optional& TInsertedDataMeta::GetSpecialKeys() const { - if (!KeysParsed) { - if (OriginalProto.HasSpecialKeysRawData()) { - SpecialKeysParsed = NArrow::TFirstLastSpecialKeys(OriginalProto.GetSpecialKeysRawData()); - } - KeysParsed = true; +std::shared_ptr TInsertedDataMeta::GetSpecialKeys(const std::shared_ptr& schema) const { + if (KeyInitialized.Val()) { + return SpecialKeysParsed; } - return SpecialKeysParsed; + std::shared_ptr result; + if (OriginalProto.HasSpecialKeysPayloadData()) { + result = std::make_shared(OriginalProto.GetSpecialKeysPayloadData(), schema); + } else if (OriginalProto.HasSpecialKeysRawData()) { + result = std::make_shared(OriginalProto.GetSpecialKeysRawData()); + } else { + AFL_VERIFY(false); + } + if (AtomicCas(&KeyInitialization, 1, 0)) { + SpecialKeysParsed = result; + KeyInitialized = 1; + } + return result; } } diff --git a/ydb/core/tx/columnshard/engines/insert_table/meta.h b/ydb/core/tx/columnshard/engines/insert_table/meta.h index 196e4cb667de..a913e88c973a 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/meta.h +++ b/ydb/core/tx/columnshard/engines/insert_table/meta.h @@ -1,5 +1,6 @@ #pragma once #include +#include #include #include #include @@ -14,13 +15,14 @@ class TInsertedDataMeta { YDB_READONLY(ui32, NumRows, 0); YDB_READONLY(ui64, RawBytes, 0); YDB_READONLY(NEvWrite::EModificationType, ModificationType, NEvWrite::EModificationType::Upsert); + YDB_READONLY_DEF(NArrow::TSchemaSubset, SchemaSubset); - mutable bool KeysParsed = false; - mutable std::optional SpecialKeysParsed; - + mutable TAtomicCounter KeyInitialized = 0; + mutable TAtomic KeyInitialization = 0; + mutable std::shared_ptr SpecialKeysParsed; NKikimrTxColumnShard::TLogicalMetadata OriginalProto; + std::shared_ptr GetSpecialKeys(const std::shared_ptr& schema) const; - const std::optional& GetSpecialKeys() const; public: ui64 GetTxVolume() const { return 2 * sizeof(ui64) + sizeof(ui32) + sizeof(OriginalProto) + (SpecialKeysParsed ? SpecialKeysParsed->GetMemoryBytes() : 0); @@ -36,21 +38,18 @@ class TInsertedDataMeta { if (proto.HasModificationType()) { ModificationType = TEnumOperator::DeserializeFromProto(proto.GetModificationType()); } + if (proto.HasSchemaSubset()) { + SchemaSubset.DeserializeFromProto(proto.GetSchemaSubset()).Validate(); + } } - std::optional GetFirstPK(const std::shared_ptr& schema) const { - if (GetSpecialKeys()) { - return GetSpecialKeys()->GetFirst(schema); - } else { - return {}; - } + NArrow::TReplaceKey GetFirstPK(const std::shared_ptr& schema) const { + AFL_VERIFY(schema); + return GetSpecialKeys(schema)->GetFirst(); } - std::optional GetLastPK(const std::shared_ptr& schema) const { - if (GetSpecialKeys()) { - return GetSpecialKeys()->GetLast(schema); - } else { - return {}; - } + NArrow::TReplaceKey GetLastPK(const std::shared_ptr& schema) const { + AFL_VERIFY(schema); + return GetSpecialKeys(schema)->GetLast(); } NKikimrTxColumnShard::TLogicalMetadata SerializeToProto() const; diff --git a/ydb/core/tx/columnshard/engines/insert_table/path_info.cpp b/ydb/core/tx/columnshard/engines/insert_table/path_info.cpp index 31be2b8a8fa3..d4530b55c481 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/path_info.cpp +++ b/ydb/core/tx/columnshard/engines/insert_table/path_info.cpp @@ -29,7 +29,7 @@ void TPathInfo::AddInsertedSize(const i64 size, const ui64 overloadLimit) { SetInsertedOverload((ui64)InsertedSize > overloadLimit); } -bool TPathInfo::EraseCommitted(const TInsertedData& data) { +bool TPathInfo::EraseCommitted(const TCommittedData& data) { Summary->RemovePriority(*this); const bool result = Committed.erase(data); AddCommittedSize(-1 * (i64)data.BlobSize(), TCompactionLimits::OVERLOAD_INSERT_TABLE_SIZE_BY_PATH_ID); @@ -39,11 +39,11 @@ bool TPathInfo::EraseCommitted(const TInsertedData& data) { return result; } -bool TPathInfo::HasCommitted(const TInsertedData& data) { +bool TPathInfo::HasCommitted(const TCommittedData& data) { return Committed.contains(data); } -bool TPathInfo::AddCommitted(TInsertedData&& data, const bool load) { +bool TPathInfo::AddCommitted(TCommittedData&& data, const bool load) { const ui64 dataSize = data.BlobSize(); Summary->RemovePriority(*this); AddCommittedSize(data.BlobSize(), TCompactionLimits::OVERLOAD_INSERT_TABLE_SIZE_BY_PATH_ID); diff --git a/ydb/core/tx/columnshard/engines/insert_table/path_info.h b/ydb/core/tx/columnshard/engines/insert_table/path_info.h index 5e44929307c4..8675a05a7d1b 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/path_info.h +++ b/ydb/core/tx/columnshard/engines/insert_table/path_info.h @@ -1,7 +1,10 @@ #pragma once +#include "committed.h" +#include "inserted.h" + #include + #include -#include "data.h" namespace NKikimr::NOlap { class TInsertionSummary; @@ -17,12 +20,11 @@ class TPathInfoIndexPriority { private: YDB_READONLY(EIndexationPriority, Category, EIndexationPriority::NoPriority); const ui32 Weight; + public: TPathInfoIndexPriority(const EIndexationPriority category, const ui32 weight) : Category(category) - , Weight(weight) - { - + , Weight(weight) { } bool operator!() const { @@ -37,7 +39,7 @@ class TPathInfoIndexPriority { class TPathInfo: public TMoveOnly { private: const ui64 PathId = 0; - TSet Committed; + TSet Committed; YDB_READONLY(i64, CommittedSize, 0); YDB_READONLY(i64, InsertedSize, 0); bool CommittedOverload = false; @@ -51,6 +53,10 @@ class TPathInfo: public TMoveOnly { void AddCommittedSize(const i64 size, const ui64 overloadLimit); public: + bool IsEmpty() const { + return Committed.empty() && !InsertedSize; + } + void AddInsertedSize(const i64 size, const ui64 overloadLimit); explicit TPathInfo(TInsertionSummary& summary, const ui64 pathId); @@ -61,18 +67,18 @@ class TPathInfo: public TMoveOnly { TPathInfoIndexPriority GetIndexationPriority() const; - bool EraseCommitted(const TInsertedData& data); - bool HasCommitted(const TInsertedData& data); + bool EraseCommitted(const TCommittedData& data); + bool HasCommitted(const TCommittedData& data); - const TSet& GetCommitted() const { + const TSet& GetCommitted() const { return Committed; } - bool AddCommitted(TInsertedData&& data, const bool load = false); + bool AddCommitted(TCommittedData&& data, const bool load = false); bool IsOverloaded() const { return CommittedOverload || InsertedOverload; } }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.cpp b/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.cpp index 4723ac8da5c6..6cc6e4872da3 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.cpp +++ b/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.cpp @@ -23,9 +23,10 @@ void TInsertionSummary::RemovePriority(const TPathInfo& pathInfo) noexcept { const auto priority = pathInfo.GetIndexationPriority(); auto it = Priorities.find(priority); if (it == Priorities.end()) { - Y_ABORT_UNLESS(!priority); + AFL_VERIFY(!priority); return; } + AFL_VERIFY(!!priority); Y_ABORT_UNLESS(it->second.erase(&pathInfo) || !priority); if (it->second.empty()) { Priorities.erase(it); @@ -33,7 +34,9 @@ void TInsertionSummary::RemovePriority(const TPathInfo& pathInfo) noexcept { } void TInsertionSummary::AddPriority(const TPathInfo& pathInfo) noexcept { - Y_ABORT_UNLESS(Priorities[pathInfo.GetIndexationPriority()].emplace(&pathInfo).second); + if (!!pathInfo.GetIndexationPriority()) { + Y_ABORT_UNLESS(Priorities[pathInfo.GetIndexationPriority()].emplace(&pathInfo).second); + } } NKikimr::NOlap::TPathInfo& TInsertionSummary::GetPathInfo(const ui64 pathId) { @@ -44,20 +47,6 @@ NKikimr::NOlap::TPathInfo& TInsertionSummary::GetPathInfo(const ui64 pathId) { return it->second; } -std::optional TInsertionSummary::ExtractPathInfo(const ui64 pathId) { - auto it = PathInfo.find(pathId); - if (it == PathInfo.end()) { - return {}; - } - RemovePriority(it->second); - std::optional result = std::move(it->second); - PathInfo.erase(it); - for (auto&& i : result->GetCommitted()) { - OnEraseCommitted(*result, i.BlobSize()); - } - return result; -} - NKikimr::NOlap::TPathInfo* TInsertionSummary::GetPathInfoOptional(const ui64 pathId) { auto it = PathInfo.find(pathId); if (it == PathInfo.end()) { @@ -100,37 +89,11 @@ void TInsertionSummary::OnEraseInserted(TPathInfo& pathInfo, const ui64 dataSize AFL_VERIFY(Counters.Inserted.GetDataSize() == (i64)StatsPrepared.Bytes); } -THashSet TInsertionSummary::GetInsertedByPathId(const ui64 pathId) const { - THashSet result; - for (auto& [writeId, data] : Inserted) { - if (data.PathId == pathId) { - result.insert(writeId); - } - } - - return result; +THashSet TInsertionSummary::GetExpiredInsertions(const TInstant timeBorder, const ui64 limit) const { + return Inserted.GetExpired(timeBorder, limit); } -THashSet TInsertionSummary::GetExpiredInsertions(const TInstant timeBorder, const ui64 limit) const { - if (timeBorder < MinInsertedTs) { - return {}; - } - - THashSet toAbort; - TInstant newMin = TInstant::Max(); - for (auto& [writeId, data] : Inserted) { - const TInstant dataInsertTs = data.GetMeta().GetDirtyWriteTime(); - if (dataInsertTs < timeBorder && toAbort.size() < limit) { - toAbort.insert(writeId); - } else { - newMin = Min(newMin, dataInsertTs); - } - } - MinInsertedTs = (toAbort.size() == Inserted.size()) ? TInstant::Zero() : newMin; - return toAbort; -} - -bool TInsertionSummary::EraseAborted(const TWriteId writeId) { +bool TInsertionSummary::EraseAborted(const TInsertWriteId writeId) { auto it = Aborted.find(writeId); if (it == Aborted.end()) { return false; @@ -140,7 +103,7 @@ bool TInsertionSummary::EraseAborted(const TWriteId writeId) { return true; } -bool TInsertionSummary::HasAborted(const TWriteId writeId) { +bool TInsertionSummary::HasAborted(const TInsertWriteId writeId) { auto it = Aborted.find(writeId); if (it == Aborted.end()) { return false; @@ -148,8 +111,8 @@ bool TInsertionSummary::HasAborted(const TWriteId writeId) { return true; } -bool TInsertionSummary::EraseCommitted(const TInsertedData& data) { - TPathInfo* pathInfo = GetPathInfoOptional(data.PathId); +bool TInsertionSummary::EraseCommitted(const TCommittedData& data) { + TPathInfo* pathInfo = GetPathInfoOptional(data.GetPathId()); if (!pathInfo) { Counters.Committed.SkipErase(data.BlobSize()); return false; @@ -163,8 +126,8 @@ bool TInsertionSummary::EraseCommitted(const TInsertedData& data) { } } -bool TInsertionSummary::HasCommitted(const TInsertedData& data) { - TPathInfo* pathInfo = GetPathInfoOptional(data.PathId); +bool TInsertionSummary::HasCommitted(const TCommittedData& data) { + TPathInfo* pathInfo = GetPathInfoOptional(data.GetPathId()); if (!pathInfo) { return false; } @@ -172,40 +135,30 @@ bool TInsertionSummary::HasCommitted(const TInsertedData& data) { } const NKikimr::NOlap::TInsertedData* TInsertionSummary::AddAborted(TInsertedData&& data, const bool load /*= false*/) { - const TWriteId writeId((TWriteId)data.WriteTxId); + const TInsertWriteId writeId = data.GetInsertWriteId(); Counters.Aborted.Add(data.BlobSize(), load); + AFL_VERIFY_DEBUG(!Inserted.contains(writeId)); auto insertInfo = Aborted.emplace(writeId, std::move(data)); - Y_ABORT_UNLESS(insertInfo.second); + AFL_VERIFY(insertInfo.second)("write_id", writeId); return &insertInfo.first->second; } -std::optional TInsertionSummary::ExtractInserted(const TWriteId id) { - auto it = Inserted.find(id); - if (it == Inserted.end()) { - return {}; - } else { - auto pathInfo = GetPathInfoOptional(it->second.PathId); +std::optional TInsertionSummary::ExtractInserted(const TInsertWriteId id) { + auto result = Inserted.ExtractOptional(id); + if (result) { + auto pathInfo = GetPathInfoOptional(result->GetPathId()); if (pathInfo) { - OnEraseInserted(*pathInfo, it->second.BlobSize()); + OnEraseInserted(*pathInfo, result->BlobSize()); } - std::optional result = std::move(it->second); - Inserted.erase(it); - return result; } + return result; } const NKikimr::NOlap::TInsertedData* TInsertionSummary::AddInserted(TInsertedData&& data, const bool load /*= false*/) { - TWriteId writeId{ data.WriteTxId }; - const ui32 dataSize = data.BlobSize(); - const ui64 pathId = data.PathId; - auto insertInfo = Inserted.emplace(writeId, std::move(data)); - if (insertInfo.second) { - OnNewInserted(GetPathInfo(pathId), dataSize, load); - return &insertInfo.first->second; - } else { - Counters.Inserted.SkipAdd(dataSize); - return nullptr; - } + auto* insertInfo = Inserted.AddVerified(std::move(data)); + AFL_VERIFY_DEBUG(!Aborted.contains(insertInfo->GetInsertWriteId())); + OnNewInserted(GetPathInfo(insertInfo->GetPathId()), insertInfo->BlobSize(), load); + return insertInfo; } } diff --git a/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.h b/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.h index 291886bd81bd..67e8034628c8 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.h +++ b/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.h @@ -1,10 +1,121 @@ #pragma once +#include "inserted.h" +#include "path_info.h" + #include + #include -#include "path_info.h" namespace NKikimr::NOlap { class IBlobsDeclareRemovingAction; + +class TInsertedDataInstant { +private: + const TInsertedData* Data; + const TInstant WriteTime; + +public: + TInsertedDataInstant(const TInsertedData& data) + : Data(&data) + , WriteTime(Data->GetMeta().GetDirtyWriteTime()) + { + + } + + const TInsertedData& GetData() const { + return *Data; + } + TInstant GetWriteTime() const { + return WriteTime; + } + + bool operator<(const TInsertedDataInstant& item) const { + if (WriteTime == item.WriteTime) { + return Data->GetInsertWriteId() < item.Data->GetInsertWriteId(); + } else { + return WriteTime < item.WriteTime; + } + } +}; + +class TInsertedContainer { +private: + THashMap Inserted; + std::set InsertedByWriteTime; + +public: + size_t size() const { + return Inserted.size(); + } + + bool contains(const TInsertWriteId id) const { + return Inserted.contains(id); + } + + THashMap::const_iterator begin() const { + return Inserted.begin(); + } + + THashMap::const_iterator end() const { + return Inserted.end(); + } + + THashSet GetExpired(const TInstant timeBorder, const ui64 limit) const { + THashSet result; + for (auto& data : InsertedByWriteTime) { + if (timeBorder < data.GetWriteTime()) { + break; + } + if (data.GetData().IsNotAbortable()) { + continue; + } + result.emplace(data.GetData().GetInsertWriteId()); + if (limit <= result.size()) { + break; + } + } + return result; + } + + TInsertedData* AddVerified(TInsertedData&& data) { + const TInsertWriteId writeId = data.GetInsertWriteId(); + auto itInsertion = Inserted.emplace(writeId, std::move(data)); + AFL_VERIFY(itInsertion.second); + auto* dataPtr = &itInsertion.first->second; + InsertedByWriteTime.emplace(TInsertedDataInstant(*dataPtr)); + return dataPtr; + } + + const TInsertedData* GetOptional(const TInsertWriteId id) const { + auto it = Inserted.find(id); + if (it == Inserted.end()) { + return nullptr; + } else { + return &it->second; + } + } + + TInsertedData* MutableOptional(const TInsertWriteId id) { + auto it = Inserted.find(id); + if (it == Inserted.end()) { + return nullptr; + } else { + return &it->second; + } + } + + std::optional ExtractOptional(const TInsertWriteId id) { + auto it = Inserted.find(id); + if (it == Inserted.end()) { + return std::nullopt; + } + AFL_VERIFY(InsertedByWriteTime.erase(TInsertedDataInstant(it->second))); + TInsertedData result = std::move(it->second); + Inserted.erase(it); + return result; + } +}; + class TInsertionSummary { public: struct TCounters { @@ -19,9 +130,8 @@ class TInsertionSummary { TCounters StatsCommitted; const NColumnShard::TInsertTableCounters Counters; - THashMap Inserted; - THashMap Aborted; - mutable TInstant MinInsertedTs = TInstant::Zero(); + TInsertedContainer Inserted; + THashMap Aborted; std::map> Priorities; THashMap PathInfo; @@ -33,35 +143,63 @@ class TInsertionSummary { void OnNewInserted(TPathInfo& pathInfo, const ui64 dataSize, const bool load) noexcept; void OnEraseInserted(TPathInfo& pathInfo, const ui64 dataSize) noexcept; static TAtomicCounter CriticalInserted; + public: - THashSet GetInsertedByPathId(const ui64 pathId) const; + bool HasPathIdData(const ui64 pathId) const { + auto it = PathInfo.find(pathId); + if (it == PathInfo.end()) { + return false; + } + return !it->second.IsEmpty(); + } - THashSet GetExpiredInsertions(const TInstant timeBorder, const ui64 limit) const; + void ErasePath(const ui64 pathId) { + auto it = PathInfo.find(pathId); + if (it == PathInfo.end()) { + return; + } + RemovePriority(it->second); + AFL_VERIFY(it->second.IsEmpty()); + PathInfo.erase(it); + } - const THashMap& GetInserted() const { + void MarkAsNotAbortable(const TInsertWriteId writeId) { + auto* data = Inserted.MutableOptional(writeId); + if (!data) { + return; + } + data->MarkAsNotAbortable(); + } + + THashSet GetExpiredInsertions(const TInstant timeBorder, const ui64 limit) const; + + const TInsertedContainer& GetInserted() const { return Inserted; } - const THashMap& GetAborted() const { + const THashMap& GetAborted() const { return Aborted; } const TInsertedData* AddAborted(TInsertedData&& data, const bool load = false); - bool EraseAborted(const TWriteId writeId); - bool HasAborted(const TWriteId writeId); + bool EraseAborted(const TInsertWriteId writeId); + bool HasAborted(const TInsertWriteId writeId); - bool EraseCommitted(const TInsertedData& data); - bool HasCommitted(const TInsertedData& data); + bool EraseCommitted(const TCommittedData& data); + bool HasCommitted(const TCommittedData& data); const TInsertedData* AddInserted(TInsertedData&& data, const bool load = false); - std::optional ExtractInserted(const TWriteId id); + std::optional ExtractInserted(const TInsertWriteId id); - const TCounters& GetCountersPrepared() const { return StatsPrepared; } - const TCounters& GetCountersCommitted() const { return StatsCommitted; } + const TCounters& GetCountersPrepared() const { + return StatsPrepared; + } + const TCounters& GetCountersCommitted() const { + return StatsCommitted; + } const NColumnShard::TInsertTableCounters& GetCounters() const { return Counters; } NKikimr::NOlap::TPathInfo& GetPathInfo(const ui64 pathId); - std::optional ExtractPathInfo(const ui64 pathId); TPathInfo* GetPathInfoOptional(const ui64 pathId); const TPathInfo* GetPathInfoOptional(const ui64 pathId) const; @@ -76,4 +214,4 @@ class TInsertionSummary { } }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/insert_table/user_data.cpp b/ydb/core/tx/columnshard/engines/insert_table/user_data.cpp new file mode 100644 index 000000000000..f5d3db9d71d7 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/insert_table/user_data.cpp @@ -0,0 +1,45 @@ +#include "user_data.h" +#include + +namespace NKikimr::NOlap { + +namespace { + +class TInsertTableCacheController { +private: + TAtomicCounter BlobsCacheSize = 0; + const i64 BlobsCacheLimit = (i64)1 << 30; +public: + void Return(const ui64 size) { + const i64 val = BlobsCacheSize.Sub(size); + AFL_VERIFY(val >= 0)("size", size)("val", val); + } + + bool Take(const ui64 size) { + if (BlobsCacheSize.Add(size) <= BlobsCacheLimit) { + return true; + } + const i64 val = BlobsCacheSize.Sub(size); + AFL_VERIFY(val >= 0)("size", size)("val", val); + return false; + } +}; + +} + +TUserData::TBlobStorageGuard::~TBlobStorageGuard() { + Singleton()->Return(Data.size()); +} + + TUserData::TUserData(const ui64 pathId, const TBlobRange& blobRange, const NKikimrTxColumnShard::TLogicalMetadata& proto, + const ui64 schemaVersion, const std::optional& blobData) + : Meta(proto) + , BlobRange(blobRange) + , PathId(pathId) + , SchemaVersion(schemaVersion) { + if (blobData && Singleton()->Take(blobData->size())) { + BlobDataGuard = std::make_shared(*blobData); + } +} + +} diff --git a/ydb/core/tx/columnshard/engines/insert_table/user_data.h b/ydb/core/tx/columnshard/engines/insert_table/user_data.h new file mode 100644 index 000000000000..d734d90524eb --- /dev/null +++ b/ydb/core/tx/columnshard/engines/insert_table/user_data.h @@ -0,0 +1,100 @@ +#pragma once +#include "meta.h" + +#include + +#include + +namespace NKikimr::NOlap { + +class TUserData { +private: + TInsertedDataMeta Meta; + YDB_READONLY_DEF(TBlobRange, BlobRange); + class TBlobStorageGuard { + private: + YDB_READONLY_DEF(TString, Data); + + public: + TBlobStorageGuard(const TString& data) + : Data(data) { + } + ~TBlobStorageGuard(); + }; + + std::shared_ptr BlobDataGuard; + YDB_READONLY(ui64, PathId, 0); + YDB_READONLY(ui64, SchemaVersion, 0); + +public: + TUserData() = delete; + TUserData(const ui64 pathId, const TBlobRange& blobRange, const NKikimrTxColumnShard::TLogicalMetadata& proto, const ui64 schemaVersion, + const std::optional& blobData); + + static std::shared_ptr Build(const ui64 pathId, const TBlobRange& blobRange, const NKikimrTxColumnShard::TLogicalMetadata& proto, const ui64 schemaVersion, + const std::optional& blobData) { + return std::make_shared(pathId, blobRange, proto, schemaVersion, blobData); + } + + static std::shared_ptr Build(const ui64 pathId, const TUnifiedBlobId& blobId, const NKikimrTxColumnShard::TLogicalMetadata& proto, const ui64 schemaVersion, + const std::optional& blobData) { + return std::make_shared(pathId, TBlobRange(blobId), proto, schemaVersion, blobData); + } + + std::optional GetBlobData() const { + if (BlobDataGuard) { + return BlobDataGuard->GetData(); + } else { + return std::nullopt; + } + } + + ui64 GetTxVolume() const { + return Meta.GetTxVolume() + sizeof(TBlobRange); + } + + const TInsertedDataMeta& GetMeta() const { + return Meta; + } +}; + +class TUserDataContainer { +protected: + std::shared_ptr UserData; + +public: + TUserDataContainer(const std::shared_ptr& userData) + : UserData(userData) { + AFL_VERIFY(UserData); + } + + ui64 GetSchemaVersion() const { + return UserData->GetSchemaVersion(); + } + + ui32 BlobSize() const { + return GetBlobRange().Size; + } + + ui32 GetTxVolume() const { + return UserData->GetTxVolume(); + } + + ui64 GetPathId() const { + return UserData->GetPathId(); + } + + const TBlobRange& GetBlobRange() const { + return UserData->GetBlobRange(); + } + + std::optional GetBlobData() const { + return UserData->GetBlobData(); + } + + const TInsertedDataMeta& GetMeta() const { + return UserData->GetMeta(); + } +}; + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/insert_table/ya.make b/ydb/core/tx/columnshard/engines/insert_table/ya.make index 5f1d92bfb0ee..e6fde75077d5 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/ya.make +++ b/ydb/core/tx/columnshard/engines/insert_table/ya.make @@ -3,13 +3,16 @@ LIBRARY() SRCS( insert_table.cpp rt_insertion.cpp - data.cpp + user_data.cpp + inserted.cpp + committed.cpp path_info.cpp meta.cpp ) PEERDIR( contrib/libs/apache/arrow + ydb/library/formats/arrow/modifier ydb/core/protos ydb/core/formats/arrow ydb/core/tablet_flat diff --git a/ydb/core/tx/columnshard/engines/portions/column_record.cpp b/ydb/core/tx/columnshard/engines/portions/column_record.cpp index e4fbef70c5fc..6127ad439326 100644 --- a/ydb/core/tx/columnshard/engines/portions/column_record.cpp +++ b/ydb/core/tx/columnshard/engines/portions/column_record.cpp @@ -28,7 +28,7 @@ TChunkMeta::TChunkMeta(const TColumnChunkLoadContext& context, const TSimpleColu DeserializeFromProto(context.GetAddress(), context.GetMetaProto(), columnInfo).Validate(); } -TChunkMeta::TChunkMeta(const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo) +TChunkMeta::TChunkMeta(const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo) : TBase(column, columnInfo.GetNeedMinMax(), columnInfo.GetIsSorted()) { } @@ -52,7 +52,8 @@ TColumnRecord::TColumnRecord(const TBlobRangeLink16::TLinkId blobLinkId, const T { } -TColumnRecord::TColumnRecord(const TChunkAddress& address, const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo) +TColumnRecord::TColumnRecord( + const TChunkAddress& address, const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo) : Meta(column, columnInfo) , ColumnId(address.GetColumnId()) , Chunk(address.GetChunk()) diff --git a/ydb/core/tx/columnshard/engines/portions/column_record.h b/ydb/core/tx/columnshard/engines/portions/column_record.h index ff34086f38d7..18fd0984d61b 100644 --- a/ydb/core/tx/columnshard/engines/portions/column_record.h +++ b/ydb/core/tx/columnshard/engines/portions/column_record.h @@ -2,20 +2,19 @@ #include "common.h" -#include - +#include +#include #include #include -#include -#include +#include #include +#include #include -#include #include #include - +#include #include namespace NKikimrColumnShardDataSharingProto { @@ -31,16 +30,17 @@ struct TChunkMeta: public TSimpleChunkMeta { private: using TBase = TSimpleChunkMeta; TChunkMeta() = default; - [[nodiscard]] TConclusionStatus DeserializeFromProto(const TChunkAddress& address, const NKikimrTxColumnShard::TIndexColumnMeta& proto, const TSimpleColumnInfo& columnInfo); + [[nodiscard]] TConclusionStatus DeserializeFromProto( + const TChunkAddress& address, const NKikimrTxColumnShard::TIndexColumnMeta& proto, const TSimpleColumnInfo& columnInfo); friend class TColumnRecord; + public: TChunkMeta(TSimpleChunkMeta&& baseMeta) - : TBase(baseMeta) - { - + : TBase(baseMeta) { } - [[nodiscard]] static TConclusion BuildFromProto(const TChunkAddress& address, const NKikimrTxColumnShard::TIndexColumnMeta& proto, const TSimpleColumnInfo& columnInfo) { + [[nodiscard]] static TConclusion BuildFromProto( + const TChunkAddress& address, const NKikimrTxColumnShard::TIndexColumnMeta& proto, const TSimpleColumnInfo& columnInfo) { TChunkMeta result; auto parse = result.DeserializeFromProto(address, proto, columnInfo); if (!parse) { @@ -63,20 +63,19 @@ struct TChunkMeta: public TSimpleChunkMeta { TChunkMeta(const TColumnChunkLoadContext& context, const TSimpleColumnInfo& columnInfo); - TChunkMeta(const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo); + TChunkMeta(const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo); }; class TColumnRecord { private: TChunkMeta Meta; TColumnRecord(TChunkMeta&& meta) - : Meta(std::move(meta)) - { - + : Meta(std::move(meta)) { } TColumnRecord() = default; TConclusionStatus DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TColumnRecord& proto, const TSimpleColumnInfo& columnInfo); + public: ui32 ColumnId = 0; ui16 Chunk = 0; @@ -99,9 +98,7 @@ class TColumnRecord { : Meta(std::move(meta)) , ColumnId(address.GetColumnId()) , Chunk(address.GetChunk()) - , BlobRange(range) - { - + , BlobRange(range) { } class TTestInstanceBuilder { @@ -116,7 +113,7 @@ class TColumnRecord { } }; - ui32 GetColumnId() const { + ui32 GetColumnId() const { return ColumnId; } ui16 GetChunkIdx() const { @@ -127,7 +124,8 @@ class TColumnRecord { } NKikimrColumnShardDataSharingProto::TColumnRecord SerializeToProto() const; - static TConclusion BuildFromProto(const NKikimrColumnShardDataSharingProto::TColumnRecord& proto, const TSimpleColumnInfo& columnInfo) { + static TConclusion BuildFromProto( + const NKikimrColumnShardDataSharingProto::TColumnRecord& proto, const TSimpleColumnInfo& columnInfo) { TColumnRecord result; auto parse = result.DeserializeFromProto(proto, columnInfo); if (!parse) { @@ -136,14 +134,14 @@ class TColumnRecord { return result; } - TColumnSerializationStat GetSerializationStat(const std::string& columnName) const { - TColumnSerializationStat result(ColumnId, columnName); + NArrow::NSplitter::TColumnSerializationStat GetSerializationStat(const std::string& columnName) const { + NArrow::NSplitter::TColumnSerializationStat result(ColumnId, columnName); result.Merge(GetSerializationStat()); return result; } - TSimpleSerializationStat GetSerializationStat() const { - return TSimpleSerializationStat(BlobRange.Size, Meta.GetNumRows(), Meta.GetRawBytes()); + NArrow::NSplitter::TSimpleSerializationStat GetSerializationStat() const { + return NArrow::NSplitter::TSimpleSerializationStat(BlobRange.Size, Meta.GetNumRows(), Meta.GetRawBytes()); } const TChunkMeta& GetMeta() const { @@ -163,18 +161,17 @@ class TColumnRecord { } TString DebugString() const { - return TStringBuilder() - << "column_id:" << ColumnId << ";" - << "chunk_idx:" << Chunk << ";" - << "blob_range:" << BlobRange.ToString() << ";" - ; + return TStringBuilder() << "column_id:" << ColumnId << ";" + << "chunk_idx:" << Chunk << ";" + << "blob_range:" << BlobRange.ToString() << ";"; } - TColumnRecord(const TChunkAddress& address, const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo); + TColumnRecord( + const TChunkAddress& address, const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo); TColumnRecord(const TBlobRangeLink16::TLinkId blobLinkId, const TColumnChunkLoadContext& loadContext, const TSimpleColumnInfo& columnInfo); - friend IOutputStream& operator << (IOutputStream& out, const TColumnRecord& rec) { + friend IOutputStream& operator<<(IOutputStream& out, const TColumnRecord& rec) { out << '{'; if (rec.Chunk) { out << 'n' << rec.Chunk; @@ -186,49 +183,4 @@ class TColumnRecord { } }; -class TSimpleOrderedColumnChunk: public IPortionColumnChunk { -private: - using TBase = IPortionColumnChunk; - const TColumnRecord ColumnRecord; - YDB_READONLY_DEF(TString, Data); -protected: - virtual TString DoDebugString() const override { - TStringBuilder sb; - sb << "column_id=" << GetColumnId() << ";data_size=" << Data.size() << ";"; - if (GetChunkIdxOptional()) { - sb << "chunk=" << GetChunkIdxVerified() << ";"; - } else { - sb << "chunk=NO_INITIALIZED;"; - } - return sb; - } - - virtual const TString& DoGetData() const override { - return Data; - } - virtual ui32 DoGetRecordsCountImpl() const override { - return ColumnRecord.GetMeta().GetNumRows(); - } - virtual std::vector> DoInternalSplitImpl(const TColumnSaver& /*saver*/, const std::shared_ptr& /*counters*/, - const std::vector& /*splitSizes*/) const override { - Y_ABORT_UNLESS(false); - return {}; - } - virtual TSimpleChunkMeta DoBuildSimpleChunkMeta() const override { - return ColumnRecord.GetMeta(); - } - virtual std::shared_ptr DoGetFirstScalar() const override { - return nullptr; - } - virtual std::shared_ptr DoGetLastScalar() const override { - return nullptr; - } -public: - TSimpleOrderedColumnChunk(const TColumnRecord& cRecord, const TString& data) - : TBase(cRecord.ColumnId, cRecord.Chunk) - , ColumnRecord(cRecord) - , Data(data) { - } -}; - -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/portions/common.h b/ydb/core/tx/columnshard/engines/portions/common.h index 1231a1e9f5f3..3702887ccc81 100644 --- a/ydb/core/tx/columnshard/engines/portions/common.h +++ b/ydb/core/tx/columnshard/engines/portions/common.h @@ -1,7 +1,9 @@ #pragma once #include +#include namespace NKikimr::NOlap { +using TColumnSaver = NArrow::NAccessor::TColumnSaver; class TChunkAddress { private: diff --git a/ydb/core/tx/columnshard/engines/portions/constructor.cpp b/ydb/core/tx/columnshard/engines/portions/constructor.cpp index 4b8cfdf97a01..39cd0fe983dc 100644 --- a/ydb/core/tx/columnshard/engines/portions/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/portions/constructor.cpp @@ -57,28 +57,16 @@ void TPortionInfoConstructor::LoadRecord(const TIndexInfo& indexInfo, const TCol } void TPortionInfoConstructor::LoadIndex(const TIndexChunkLoadContext& loadContext) { - const auto linkBlobId = RegisterBlobId(loadContext.GetBlobRange().GetBlobId()); - AddIndex(loadContext.BuildIndexChunk(linkBlobId)); + if (loadContext.GetBlobRange()) { + const TBlobRangeLink16::TLinkId linkBlobId = RegisterBlobId(loadContext.GetBlobRange()->GetBlobId()); + AddIndex(loadContext.BuildIndexChunk(linkBlobId)); + } else { + AddIndex(loadContext.BuildIndexChunk()); + } } const NKikimr::NOlap::TColumnRecord& TPortionInfoConstructor::AppendOneChunkColumn(TColumnRecord&& record) { Y_ABORT_UNLESS(record.ColumnId); - std::optional maxChunk; - for (auto&& i : Records) { - if (i.ColumnId == record.ColumnId) { - if (!maxChunk) { - maxChunk = i.Chunk; - } else { - Y_ABORT_UNLESS(*maxChunk + 1 == i.Chunk); - maxChunk = i.Chunk; - } - } - } - if (maxChunk) { - AFL_VERIFY(*maxChunk + 1 == record.Chunk)("max", *maxChunk)("record", record.Chunk); - } else { - AFL_VERIFY(0 == record.Chunk)("record", record.Chunk); - } Records.emplace_back(std::move(record)); return Records.back(); } diff --git a/ydb/core/tx/columnshard/engines/portions/constructor.h b/ydb/core/tx/columnshard/engines/portions/constructor.h index c29a7afc2c7e..4146c80fbc19 100644 --- a/ydb/core/tx/columnshard/engines/portions/constructor.h +++ b/ydb/core/tx/columnshard/engines/portions/constructor.h @@ -276,7 +276,9 @@ class TPortionInfoConstructor { blobIdxs.emplace(i.GetBlobRange().GetBlobIdxVerified()); } for (auto&& i : Indexes) { - blobIdxs.emplace(i.GetBlobRange().GetBlobIdxVerified()); + if (i.HasBlobRange()) { + blobIdxs.emplace(i.GetBlobRangeVerified().GetBlobIdxVerified()); + } } if (BlobIds.size()) { AFL_VERIFY(BlobIds.size() == blobIdxs.size()); diff --git a/ydb/core/tx/columnshard/engines/portions/constructor_meta.cpp b/ydb/core/tx/columnshard/engines/portions/constructor_meta.cpp index 378871ccf2aa..fa21c6feeeb6 100644 --- a/ydb/core/tx/columnshard/engines/portions/constructor_meta.cpp +++ b/ydb/core/tx/columnshard/engines/portions/constructor_meta.cpp @@ -29,9 +29,6 @@ TPortionMetaConstructor::TPortionMetaConstructor(const TPortionMeta& meta) { RecordSnapshotMax = meta.RecordSnapshotMax; DeletionsCount = meta.GetDeletionsCount(); TierName = meta.GetTierNameOptional(); - if (!meta.StatisticsStorage.IsEmpty()) { - StatisticsStorage = meta.StatisticsStorage; - } if (meta.Produced != NPortion::EProduced::UNSPECIFIED) { Produced = meta.Produced; } @@ -49,28 +46,14 @@ TPortionMeta TPortionMetaConstructor::Build() { result.DeletionsCount = *DeletionsCount; AFL_VERIFY(Produced); result.Produced = *Produced; - if (StatisticsStorage) { - result.StatisticsStorage = *StatisticsStorage; - } return result; } bool TPortionMetaConstructor::LoadMetadata(const NKikimrTxColumnShard::TIndexPortionMeta& portionMeta, const TIndexInfo& indexInfo) { if (!!Produced) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "DeserializeFromProto")("error", "parsing duplication"); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "DeserializeFromProto")("error", "parsing duplication"); return true; } - if (portionMeta.HasStatisticsStorage()) { - auto parsed = NStatistics::TPortionStorage::BuildFromProto(portionMeta.GetStatisticsStorage()); - if (!parsed) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "DeserializeFromProto")("error", parsed.GetErrorMessage()); - return false; - } - StatisticsStorage = parsed.DetachResult(); - if (StatisticsStorage->IsEmpty()) { - StatisticsStorage.reset(); - } - } if (portionMeta.GetTierName()) { TierName = portionMeta.GetTierName(); } diff --git a/ydb/core/tx/columnshard/engines/portions/constructor_meta.h b/ydb/core/tx/columnshard/engines/portions/constructor_meta.h index ccc854886ec7..87b808a282fd 100644 --- a/ydb/core/tx/columnshard/engines/portions/constructor_meta.h +++ b/ydb/core/tx/columnshard/engines/portions/constructor_meta.h @@ -3,7 +3,6 @@ #include #include #include -#include namespace NKikimr::NOlap { class TPortionInfoConstructor; @@ -13,7 +12,6 @@ class TPortionMetaConstructor { private: std::optional FirstAndLastPK; std::optional TierName; - std::optional StatisticsStorage; std::optional RecordSnapshotMin; std::optional RecordSnapshotMax; std::optional Produced; @@ -31,15 +29,6 @@ class TPortionMetaConstructor { SetTierName(tierName); } - void SetStatisticsStorage(NStatistics::TPortionStorage&& storage) { - AFL_VERIFY(!StatisticsStorage); - StatisticsStorage = std::move(storage); - } - - void ResetStatisticsStorage(NStatistics::TPortionStorage&& storage) { - StatisticsStorage = std::move(storage); - } - void UpdateRecordsMeta(const NPortion::EProduced prod) { Produced = prod; } diff --git a/ydb/core/tx/columnshard/engines/portions/index_chunk.cpp b/ydb/core/tx/columnshard/engines/portions/index_chunk.cpp index 4aeaa20dd20e..990de1b65d78 100644 --- a/ydb/core/tx/columnshard/engines/portions/index_chunk.cpp +++ b/ydb/core/tx/columnshard/engines/portions/index_chunk.cpp @@ -1,4 +1,5 @@ #include "index_chunk.h" + #include namespace NKikimr::NOlap { @@ -13,16 +14,39 @@ NKikimr::TConclusionStatus TIndexChunk::DeserializeFromProto(const NKikimrColumn RecordsCount = proto.GetMeta().GetRecordsCount(); RawBytes = proto.GetMeta().GetRawBytes(); } - { + if (proto.HasBlobRange()) { auto parsed = TBlobRangeLink16::BuildFromProto(proto.GetBlobRange()); if (!parsed) { return parsed; } - BlobRange = parsed.DetachResult(); + Data = parsed.DetachResult(); + } else if (proto.HasBlobData()) { + Data = proto.GetBlobData(); + } else { + return TConclusionStatus::Fail("incorrect blob info - neither BlobData nor BlobRange"); } return TConclusionStatus::Success(); } +namespace { +class TBlobInfoSerializer { +private: + NKikimrColumnShardDataSharingProto::TIndexChunk& Proto; + +public: + TBlobInfoSerializer(NKikimrColumnShardDataSharingProto::TIndexChunk& proto) + : Proto(proto) { + } + + void operator()(const TBlobRangeLink16& link) { + *Proto.MutableBlobRange() = link.SerializeToProto(); + } + void operator()(const TString& data) { + *Proto.MutableBlobData() = data; + } +}; +} // namespace + NKikimrColumnShardDataSharingProto::TIndexChunk TIndexChunk::SerializeToProto() const { NKikimrColumnShardDataSharingProto::TIndexChunk result; result.SetIndexId(IndexId); @@ -32,8 +56,26 @@ NKikimrColumnShardDataSharingProto::TIndexChunk TIndexChunk::SerializeToProto() meta->SetRecordsCount(RecordsCount); meta->SetRawBytes(RawBytes); } - *result.MutableBlobRange() = BlobRange.SerializeToProto(); + std::visit(TBlobInfoSerializer(result), Data); return result; } +namespace { +class TDataSizeExtractor { +public: + TDataSizeExtractor() = default; + + ui64 operator()(const TBlobRangeLink16& link) { + return link.GetSize(); + } + ui64 operator()(const TString& data) { + return data.size(); + } +}; +} // namespace + +ui64 TIndexChunk::GetDataSize() const { + return std::visit(TDataSizeExtractor(), Data); } + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/portions/index_chunk.h b/ydb/core/tx/columnshard/engines/portions/index_chunk.h index 6a71704318ca..65e0855d5eef 100644 --- a/ydb/core/tx/columnshard/engines/portions/index_chunk.h +++ b/ydb/core/tx/columnshard/engines/portions/index_chunk.h @@ -6,7 +6,6 @@ #include #include -#include #include #include @@ -30,11 +29,41 @@ class TIndexChunk { YDB_READONLY(ui32, ChunkIdx, 0); YDB_READONLY(ui32, RecordsCount, 0); YDB_READONLY(ui32, RawBytes, 0); - YDB_READONLY_DEF(TBlobRangeLink16, BlobRange); + std::variant Data; TIndexChunk() = default; TConclusionStatus DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TIndexChunk& proto); public: + ui64 GetDataSize() const; + + bool HasBlobRange() const { + return std::holds_alternative(Data); + } + + const TBlobRangeLink16* GetBlobRangeOptional() const { + return std::get_if(&Data); + } + + const TBlobRangeLink16& GetBlobRangeVerified() const { + const auto* result = std::get_if(&Data); + AFL_VERIFY(result); + return *result; + } + + bool HasBlobData() const { + return std::holds_alternative(Data); + } + + const TString* GetBlobDataOptional() const { + return std::get_if(&Data); + } + + const TString& GetBlobDataVerified() const { + const auto* result = std::get_if(&Data); + AFL_VERIFY(result); + return *result; + } + TChunkAddress GetAddress() const { return TChunkAddress(IndexId, ChunkIdx); } @@ -48,13 +77,22 @@ class TIndexChunk { , ChunkIdx(chunkIdx) , RecordsCount(recordsCount) , RawBytes(rawBytes) - , BlobRange(blobRange) { + , Data(blobRange) { + + } + TIndexChunk(const ui32 indexId, const ui32 chunkIdx, const ui32 recordsCount, const ui64 rawBytes, const TString& blobData) + : IndexId(indexId) + , ChunkIdx(chunkIdx) + , RecordsCount(recordsCount) + , RawBytes(rawBytes) + , Data(blobData) { } void RegisterBlobIdx(const TBlobRangeLink16::TLinkId blobLinkId) { -// AFL_VERIFY(!BlobRange.BlobId.GetTabletId())("original", BlobRange.BlobId.ToStringNew())("new", blobId.ToStringNew()); - BlobRange.BlobIdx = blobLinkId; + auto* result = std::get_if(&Data); + AFL_VERIFY(result); + result->BlobIdx = blobLinkId; } static TConclusion BuildFromProto(const NKikimrColumnShardDataSharingProto::TIndexChunk& proto) { diff --git a/ydb/core/tx/columnshard/engines/portions/meta.cpp b/ydb/core/tx/columnshard/engines/portions/meta.cpp index b3b2a92ef9e2..9d7e374ec8f1 100644 --- a/ydb/core/tx/columnshard/engines/portions/meta.cpp +++ b/ydb/core/tx/columnshard/engines/portions/meta.cpp @@ -11,9 +11,6 @@ namespace NKikimr::NOlap { NKikimrTxColumnShard::TIndexPortionMeta TPortionMeta::SerializeToProto() const { NKikimrTxColumnShard::TIndexPortionMeta portionMeta; portionMeta.SetTierName(TierName); - if (!StatisticsStorage.IsEmpty()) { - *portionMeta.MutableStatisticsStorage() = StatisticsStorage.SerializeToProto(); - } portionMeta.SetDeletionsCount(DeletionsCount); switch (Produced) { case TPortionMeta::EProduced::UNSPECIFIED: @@ -36,7 +33,7 @@ NKikimrTxColumnShard::TIndexPortionMeta TPortionMeta::SerializeToProto() const { break; } - portionMeta.SetPrimaryKeyBorders(ReplaceKeyEdges.SerializeToStringDataOnlyNoCompression()); + portionMeta.SetPrimaryKeyBorders(ReplaceKeyEdges.SerializePayloadToString()); RecordSnapshotMin.SerializeToProto(*portionMeta.MutableRecordSnapshotMin()); RecordSnapshotMax.SerializeToProto(*portionMeta.MutableRecordSnapshotMax()); diff --git a/ydb/core/tx/columnshard/engines/portions/meta.h b/ydb/core/tx/columnshard/engines/portions/meta.h index 5facbe8d5ae6..ad57ef1325c3 100644 --- a/ydb/core/tx/columnshard/engines/portions/meta.h +++ b/ydb/core/tx/columnshard/engines/portions/meta.h @@ -1,9 +1,8 @@ #pragma once #include #include -#include #include -#include +#include #include #include #include @@ -16,7 +15,6 @@ struct TPortionMeta { private: NArrow::TFirstLastSpecialKeys ReplaceKeyEdges; // first and last PK rows YDB_READONLY_DEF(TString, TierName); - YDB_READONLY_DEF(NStatistics::TPortionStorage, StatisticsStorage); YDB_READONLY(ui32, DeletionsCount, 0); friend class TPortionMetaConstructor; TPortionMeta(NArrow::TFirstLastSpecialKeys& pk, const TSnapshot& min, const TSnapshot& max) diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp index 0bbefe1c5550..6652bf9c4c5f 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp @@ -7,9 +7,11 @@ #include #include #include +#include +#include #include #include -#include +#include #include @@ -30,38 +32,57 @@ std::shared_ptr TPortionInfo::MaxValue(ui32 columnId) const { return result; } -ui64 TPortionInfo::GetColumnRawBytes(const std::vector& columnIds, const bool validation) const { - return GetColumnRawBytes(std::set(columnIds.begin(), columnIds.end()), validation); +ui64 TPortionInfo::GetColumnRawBytes(const std::set& entityIds, const bool validation) const { + ui64 sum = 0; + const auto aggr = [&](const TColumnRecord& r) { + sum += r.GetMeta().GetRawBytes(); + }; + AggregateIndexChunksData(aggr, Records, &entityIds, validation); + return sum; } -ui64 TPortionInfo::GetColumnRawBytes(const std::optional>& entityIds, const bool validation) const { +ui64 TPortionInfo::GetColumnBlobBytes(const std::set& entityIds, const bool validation) const { + ui64 sum = 0; + const auto aggr = [&](const TColumnRecord& r) { + sum += r.GetBlobRange().GetSize(); + }; + AggregateIndexChunksData(aggr, Records, &entityIds, validation); + return sum; +} + +ui64 TPortionInfo::GetColumnRawBytes(const bool validation) const { ui64 sum = 0; const auto aggr = [&](const TColumnRecord& r) { sum += r.GetMeta().GetRawBytes(); }; - AggregateIndexChunksData(aggr, Records, entityIds, validation); + AggregateIndexChunksData(aggr, Records, nullptr, validation); return sum; } -ui64 TPortionInfo::GetColumnBlobBytes(const std::optional>& entityIds, const bool validation) const { +ui64 TPortionInfo::GetColumnBlobBytes(const bool validation) const { ui64 sum = 0; const auto aggr = [&](const TColumnRecord& r) { sum += r.GetBlobRange().GetSize(); }; - AggregateIndexChunksData(aggr, Records, entityIds, validation); + AggregateIndexChunksData(aggr, Records, nullptr, validation); return sum; } -ui64 TPortionInfo::GetColumnBlobBytes(const std::vector& columnIds, const bool validation) const { - return GetColumnBlobBytes(std::set(columnIds.begin(), columnIds.end()), validation); +ui64 TPortionInfo::GetIndexRawBytes(const std::set& entityIds, const bool validation) const { + ui64 sum = 0; + const auto aggr = [&](const TIndexChunk& r) { + sum += r.GetRawBytes(); + }; + AggregateIndexChunksData(aggr, Indexes, &entityIds, validation); + return sum; } -ui64 TPortionInfo::GetIndexRawBytes(const std::optional>& entityIds, const bool validation) const { +ui64 TPortionInfo::GetIndexRawBytes(const bool validation) const { ui64 sum = 0; const auto aggr = [&](const TIndexChunk& r) { sum += r.GetRawBytes(); }; - AggregateIndexChunksData(aggr, Indexes, entityIds, validation); + AggregateIndexChunksData(aggr, Indexes, nullptr, validation); return sum; } @@ -312,7 +333,10 @@ THashMap TPortionInfo::DecodeBlobAddress continue; } for (auto&& record : Indexes) { - if (RestoreBlobRange(record.GetBlobRange()) == b.first) { + if (!record.HasBlobRange()) { + continue; + } + if (RestoreBlobRange(record.GetBlobRangeVerified()) == b.first) { if (columnId != record.GetIndexId()) { columnStorageId = indexInfo.GetIndexStorageId(record.GetIndexId()); } @@ -355,7 +379,9 @@ void TPortionInfo::FillBlobRangesByStorage(THashMapemplace(i.GetBlobRange().GetBlobIdxVerified()).second) { - auto blobId = GetBlobId(i.GetBlobRange().GetBlobIdxVerified()); - AFL_VERIFY(currentHashResult); - AFL_VERIFY(currentHashResult->emplace(blobId).second)("blob_id", blobId.ToStringNew()); - lastBlobIdx = i.GetBlobRange().GetBlobIdxVerified(); + if (auto bRange = i.GetBlobRangeOptional()) { + if (lastBlobIdx != bRange->GetBlobIdxVerified() && currentHashLocal->emplace(bRange->GetBlobIdxVerified()).second) { + auto blobId = GetBlobId(bRange->GetBlobIdxVerified()); + AFL_VERIFY(currentHashResult); + AFL_VERIFY(currentHashResult->emplace(blobId).second)("blob_id", blobId.ToStringNew()); + lastBlobIdx = bRange->GetBlobIdxVerified(); + } } } } @@ -414,38 +442,30 @@ void TPortionInfo::FillBlobIdsByStorage(THashMapGetIndexInfo()); } -THashMap>>> TPortionInfo::RestoreEntityChunks(NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) const { - THashMap>>> result; +THashMap>> TPortionInfo::RestoreEntityChunks(NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) const { + THashMap>> result; for (auto&& c : GetRecords()) { const TString& storageId = GetColumnStorageId(c.GetColumnId(), indexInfo); - auto& storageRecords = result[storageId]; - auto& blobRecords = storageRecords[GetBlobId(c.GetBlobRange().GetBlobIdxVerified())]; - blobRecords.emplace_back(std::make_shared(blobs.Extract(storageId, RestoreBlobRange(c.GetBlobRange())), c, indexInfo.GetColumnFeaturesVerified(c.GetColumnId()))); - blobRecords.back()->SetChunkIdx(c.GetChunkIdx()); + auto chunk = std::make_shared(blobs.Extract(storageId, RestoreBlobRange(c.GetBlobRange())), c, indexInfo.GetColumnFeaturesVerified(c.GetColumnId())); + chunk->SetChunkIdx(c.GetChunkIdx()); + AFL_VERIFY(result[storageId].emplace(c.GetAddress(), chunk).second); } for (auto&& c : GetIndexes()) { const TString& storageId = indexInfo.GetIndexStorageId(c.GetIndexId()); - auto& storageRecords = result[storageId]; - auto& blobRecords = storageRecords[GetBlobId(c.GetBlobRange().GetBlobIdxVerified())]; - blobRecords.emplace_back(std::make_shared(c.GetAddress(), c.GetRecordsCount(), c.GetRawBytes(), blobs.Extract(storageId, RestoreBlobRange(c.GetBlobRange())))); - blobRecords.back()->SetChunkIdx(c.GetChunkIdx()); - } - return result; -} + const TString blobData = [&]() -> TString { + if (auto bRange = c.GetBlobRangeOptional()) { + return blobs.Extract(storageId, RestoreBlobRange(*bRange)); + } else if (auto data = c.GetBlobDataOptional()) { + return *data; + } else { + AFL_VERIFY(false); + Y_UNREACHABLE(); + } + }(); + auto chunk = std::make_shared(c.GetAddress(), c.GetRecordsCount(), c.GetRawBytes(), blobData); + chunk->SetChunkIdx(c.GetChunkIdx()); -THashMap>> TPortionInfo::GetEntityChunks(const TIndexInfo& indexInfo) const { - THashMap>> result; - for (auto&& c : GetRecords()) { - const TString& storageId = GetColumnStorageId(c.GetColumnId(), indexInfo); - auto& storageRecords = result[storageId]; - auto& blobRecords = storageRecords[GetBlobId(c.GetBlobRange().GetBlobIdxVerified())]; - blobRecords.emplace_back(TEntityChunk(c.GetAddress(), c.GetMeta().GetNumRows(), c.GetMeta().GetRawBytes(), c.GetBlobRange())); - } - for (auto&& c : GetIndexes()) { - const TString& storageId = indexInfo.GetIndexStorageId(c.GetIndexId()); - auto& storageRecords = result[storageId]; - auto& blobRecords = storageRecords[GetBlobId(c.GetBlobRange().GetBlobIdxVerified())]; - blobRecords.emplace_back(TEntityChunk(c.GetAddress(), c.GetRecordsCount(), c.GetRawBytes(), c.GetBlobRange())); + AFL_VERIFY(result[storageId].emplace(c.GetAddress(), chunk).second); } return result; } @@ -496,7 +516,9 @@ void TPortionInfo::FullValidation() const { blobIdxs.emplace(i.GetBlobRange().GetBlobIdxVerified()); } for (auto&& i : Indexes) { - blobIdxs.emplace(i.GetBlobRange().GetBlobIdxVerified()); + if (auto bRange = i.GetBlobRangeOptional()) { + blobIdxs.emplace(bRange->GetBlobIdxVerified()); + } } if (BlobIds.size()) { AFL_VERIFY(BlobIds.size() == blobIdxs.size()); @@ -589,11 +611,10 @@ template TPortionInfo::TPreparedBatchData PrepareForAssembleImpl(const TPortionInfo& portion, const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, THashMap& blobsData) { std::vector columns; - auto arrowResultSchema = resultSchema.GetSchema(); - columns.reserve(arrowResultSchema->num_fields()); + columns.reserve(resultSchema.GetColumnIds().size()); const ui32 rowsCount = portion.GetRecordsCount(); - for (auto&& i : arrowResultSchema->fields()) { - columns.emplace_back(rowsCount, dataSchema.GetColumnLoaderOptional(i->name()), resultSchema.GetColumnLoaderOptional(i->name())); + for (auto&& i : resultSchema.GetColumnIds()) { + columns.emplace_back(rowsCount, dataSchema.GetColumnLoaderOptional(i), resultSchema.GetColumnLoaderVerified(i)); } { int skipColumnId = -1; @@ -625,35 +646,11 @@ TPortionInfo::TPreparedBatchData PrepareForAssembleImpl(const TPortionInfo& port preparedColumns.emplace_back(c.Compile()); } - return TPortionInfo::TPreparedBatchData(std::move(preparedColumns), arrowResultSchema, rowsCount); + return TPortionInfo::TPreparedBatchData(std::move(preparedColumns), rowsCount); } } -namespace { -class TChunkAccessor { -private: - const std::vector& Chunks; - const std::shared_ptr Loader; -public: - TChunkAccessor(const std::vector& chunks, const std::shared_ptr& loader) - : Chunks(chunks) - , Loader(loader) - { - - } - ui64 GetChunksCount() const { - return Chunks.size(); - } - ui64 GetChunkLength(const ui32 idx) const { - return Chunks[idx].GetRecordsCount(); - } - std::shared_ptr GetArray(const ui32 idx) const { - return Chunks[idx].GetArrayVerified(Loader); - } -}; -} - ISnapshotSchema::TPtr TPortionInfo::TSchemaCursor::GetSchema(const TPortionInfoConstructor& portion) { if (!CurrentSchema || portion.GetMinSnapshotDeprecatedVerified() != LastSnapshot) { CurrentSchema = portion.GetSchema(VersionedIndex); @@ -663,12 +660,8 @@ ISnapshotSchema::TPtr TPortionInfo::TSchemaCursor::GetSchema(const TPortionInfoC return CurrentSchema; } -NArrow::NAccessor::IChunkedArray::TCurrentChunkAddress TDeserializeChunkedArray::DoGetChunk(const std::optional& chunkCurrent, const ui64 position) const { - TChunkAccessor accessor(Chunks, Loader); - return SelectChunk(chunkCurrent, position, accessor); -} - -TPortionInfo::TPreparedBatchData TPortionInfo::PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, THashMap& blobsData) const { +TPortionInfo::TPreparedBatchData TPortionInfo::PrepareForAssemble( + const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, THashMap& blobsData) const { return PrepareForAssembleImpl(*this, dataSchema, resultSchema, blobsData); } @@ -683,100 +676,74 @@ bool TPortionInfo::NeedShardingFilter(const TGranuleShardingInfo& shardingInfo) return true; } -std::shared_ptr TPortionInfo::TPreparedColumn::AssembleForSeqAccess() const { +std::shared_ptr TPortionInfo::TPreparedColumn::AssembleAccessor() const { Y_ABORT_UNLESS(!Blobs.empty()); - std::vector chunks; - chunks.reserve(Blobs.size()); - ui64 recordsCount = 0; + NArrow::NAccessor::TCompositeChunkedArray::TBuilder builder(GetField()->type()); for (auto& blob : Blobs) { - chunks.push_back(blob.BuildDeserializeChunk(Loader)); - recordsCount += blob.GetExpectedRowsCountVerified(); + auto chunkedArray = blob.BuildRecordBatch(*Loader); + builder.AddChunk(chunkedArray); } - - return std::make_shared(recordsCount, Loader, std::move(chunks)); + return builder.Finish(); } -std::shared_ptr TPortionInfo::TPreparedColumn::Assemble() const { +std::shared_ptr TPortionInfo::TPreparedColumn::AssembleForSeqAccess() const { Y_ABORT_UNLESS(!Blobs.empty()); - std::vector> batches; - batches.reserve(Blobs.size()); + std::vector chunks; + chunks.reserve(Blobs.size()); + ui64 recordsCount = 0; for (auto& blob : Blobs) { - batches.push_back(blob.BuildRecordBatch(*Loader)); - Y_ABORT_UNLESS(batches.back()); + chunks.push_back(blob.BuildDeserializeChunk(Loader)); + if (!!blob.GetData()) { + recordsCount += blob.GetExpectedRowsCountVerified(); + } else { + recordsCount += blob.GetDefaultRowsCount(); + } } - auto res = arrow::Table::FromRecordBatches(batches); - Y_VERIFY_S(res.ok(), res.status().message()); - return (*res)->column(0); + return std::make_shared(recordsCount, Loader, std::move(chunks)); } -TDeserializeChunkedArray::TChunk TPortionInfo::TAssembleBlobInfo::BuildDeserializeChunk(const std::shared_ptr& loader) const { +NArrow::NAccessor::TDeserializeChunkedArray::TChunk TPortionInfo::TAssembleBlobInfo::BuildDeserializeChunk( + const std::shared_ptr& loader) const { if (DefaultRowsCount) { Y_ABORT_UNLESS(!Data); - AFL_VERIFY(loader->GetExpectedSchema()->num_fields() == 1); - auto col = NArrow::TThreadSimpleArraysCache::Get(loader->GetExpectedSchema()->field(0)->type(), DefaultValue, DefaultRowsCount); - return TDeserializeChunkedArray::TChunk(col); + auto col = std::make_shared( + NArrow::TThreadSimpleArraysCache::Get(loader->GetField()->type(), DefaultValue, DefaultRowsCount)); + return NArrow::NAccessor::TDeserializeChunkedArray::TChunk(col); } else { AFL_VERIFY(ExpectedRowsCount); - return TDeserializeChunkedArray::TChunk(*ExpectedRowsCount, Data); + return NArrow::NAccessor::TDeserializeChunkedArray::TChunk(*ExpectedRowsCount, Data); } } -std::shared_ptr TPortionInfo::TAssembleBlobInfo::BuildRecordBatch(const TColumnLoader& loader) const { +std::shared_ptr TPortionInfo::TAssembleBlobInfo::BuildRecordBatch(const TColumnLoader& loader) const { if (DefaultRowsCount) { Y_ABORT_UNLESS(!Data); - AFL_VERIFY(loader.GetExpectedSchema()->num_fields() == 1); - return arrow::RecordBatch::Make(loader.GetExpectedSchema(), DefaultRowsCount, - { NArrow::TThreadSimpleArraysCache::Get(loader.GetExpectedSchema()->field(0)->type(), DefaultValue, DefaultRowsCount) }); + return std::make_shared( + NArrow::TThreadSimpleArraysCache::Get(loader.GetField()->type(), DefaultValue, DefaultRowsCount)); } else { - auto result = loader.Apply(Data); - if (!result.ok()) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "cannot unpack batch")("error", result.status().ToString())("loader", loader.DebugString()); - return nullptr; - } - if (ExpectedRowsCount) { - AFL_VERIFY((*result)->num_rows() == ExpectedRowsCount)("real", (*result)->num_rows())("expected", ExpectedRowsCount); - } - return *result; + AFL_VERIFY(ExpectedRowsCount); + return loader.ApplyVerified(Data, *ExpectedRowsCount); } } -std::shared_ptr TPortionInfo::TPreparedBatchData::AssembleForSeqAccess() const { +std::shared_ptr TPortionInfo::TPreparedBatchData::AssembleToGeneralContainer( + const std::set& sequentialColumnIds) const { std::vector> columns; std::vector> fields; for (auto&& i : Columns) { - columns.emplace_back(i.AssembleForSeqAccess()); - fields.emplace_back(i.GetField()); - } - - return std::make_shared(std::make_shared(fields), std::move(columns)); -} - -std::shared_ptr TPortionInfo::TPreparedBatchData::AssembleTable(const TAssembleOptions& options) const { - std::vector> columns; - std::vector> fields; - for (auto&& i : Columns) { - if (!options.IsAcceptedColumn(i.GetColumnId())) { - continue; - } - std::shared_ptr scalar; - if (options.IsConstantColumn(i.GetColumnId(), scalar)) { - auto type = i.GetField()->type(); - std::shared_ptr arr = NArrow::TThreadSimpleArraysCache::Get(type, scalar, RowsCount); - columns.emplace_back(std::make_shared(arr)); + NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("column", i.GetField()->ToString())("id", i.GetColumnId()); + if (sequentialColumnIds.contains(i.GetColumnId())) { + columns.emplace_back(i.AssembleForSeqAccess()); } else { - columns.emplace_back(i.Assemble()); + columns.emplace_back(i.AssembleAccessor()); } fields.emplace_back(i.GetField()); } - return arrow::Table::Make(std::make_shared(fields), columns); -} - -std::shared_ptr TPortionInfo::TPreparedBatchData::Assemble(const TAssembleOptions& options) const { - return NArrow::ToBatch(AssembleTable(options), true); + return std::make_shared(fields, std::move(columns)); } } diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.h b/ydb/core/tx/columnshard/engines/portions/portion_info.h index 25c32c264684..6fa105745b05 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.h +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.h @@ -3,9 +3,11 @@ #include "index_chunk.h" #include "meta.h" +#include #include -#include +#include #include +#include #include #include #include @@ -28,57 +30,6 @@ struct TIndexInfo; class TVersionedIndex; class IDbWrapper; -class TDeserializeChunkedArray: public NArrow::NAccessor::IChunkedArray { -private: - using TBase = NArrow::NAccessor::IChunkedArray; -public: - class TChunk { - private: - YDB_READONLY(ui32, RecordsCount, 0); - std::shared_ptr PredefinedArray; - const TString Data; - public: - TChunk(const std::shared_ptr& predefinedArray) - : PredefinedArray(predefinedArray) { - AFL_VERIFY(PredefinedArray); - RecordsCount = PredefinedArray->length(); - } - - TChunk(const ui32 recordsCount, const TString& data) - : RecordsCount(recordsCount) - , Data(data) { - - } - - std::shared_ptr GetArrayVerified(const std::shared_ptr& loader) const { - if (PredefinedArray) { - return PredefinedArray; - } - auto result = loader->ApplyVerified(Data); - AFL_VERIFY(result); - AFL_VERIFY(result->num_columns() == 1); - AFL_VERIFY(result->num_rows() == RecordsCount)("length", result->num_rows())("records_count", RecordsCount); - return result->column(0); - } - }; - - std::shared_ptr Loader; - std::vector Chunks; -protected: - virtual TCurrentChunkAddress DoGetChunk(const std::optional& chunkCurrent, const ui64 position) const override; - virtual std::shared_ptr DoGetChunkedArray() const override { - AFL_VERIFY(false); - return nullptr; - } -public: - TDeserializeChunkedArray(const ui64 recordsCount, const std::shared_ptr& loader, std::vector&& chunks) - : TBase(recordsCount, NArrow::NAccessor::IChunkedArray::EType::SerializedChunkedArray, loader->GetField()->type()) - , Loader(loader) - , Chunks(std::move(chunks)) { - AFL_VERIFY(Loader); - } -}; - class TEntityChunk { private: TChunkAddress Address; @@ -147,7 +98,7 @@ class TPortionInfo { } template - static void AggregateIndexChunksData(const TAggregator& aggr, const std::vector& chunks, const std::optional>& columnIds, const bool validation) { + static void AggregateIndexChunksData(const TAggregator& aggr, const std::vector& chunks, const std::set* columnIds, const bool validation) { if (columnIds) { auto itColumn = columnIds->begin(); auto itRecord = chunks.begin(); @@ -197,6 +148,16 @@ class TPortionInfo { SetRemoveSnapshot(TSnapshot(planStep, txId)); } + std::vector GetIndexInplaceDataVerified(const ui32 indexId) const { + std::vector result; + for (auto&& i : Indexes) { + if (i.GetEntityId() == indexId) { + result.emplace_back(i.GetBlobDataVerified()); + } + } + return result; + } + void InitRuntimeFeature(const ERuntimeFeature feature, const bool activity) { if (activity) { AddRuntimeFeature(feature); @@ -239,8 +200,7 @@ class TPortionInfo { void ReorderChunks(); - THashMap>>> RestoreEntityChunks(NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) const; - THashMap>> GetEntityChunks(const TIndexInfo & info) const; + THashMap>> RestoreEntityChunks(NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) const; const TBlobRange RestoreBlobRange(const TBlobRangeLink16& linkRange) const { return linkRange.RestoreRange(GetBlobId(linkRange.GetBlobIdxVerified())); @@ -359,8 +319,8 @@ class TPortionInfo { return result; } - TSerializationStats GetSerializationStat(const ISnapshotSchema& schema) const { - TSerializationStats result; + NArrow::NSplitter::TSerializationStats GetSerializationStat(const ISnapshotSchema& schema) const { + NArrow::NSplitter::TSerializationStats result; for (auto&& i : Records) { if (schema.GetFieldByColumnIdOptional(i.ColumnId)) { result.AddStat(i.GetSerializationStat(schema.GetFieldByColumnIdVerified(i.ColumnId)->name())); @@ -378,36 +338,29 @@ class TPortionInfo { } const TColumnRecord* GetRecordPointer(const TChunkAddress& address) const { - for (auto&& i : Records) { - if (i.GetAddress() == address) { - return &i; - } + auto it = std::lower_bound(Records.begin(), Records.end(), address, [](const TColumnRecord& item, const TChunkAddress& address) { + return item.GetAddress() < address; + }); + if (it != Records.end() && it->GetAddress() == address) { + return &*it; } return nullptr; } - std::optional GetEntityRecord(const TChunkAddress& address) const { - for (auto&& c : GetRecords()) { - if (c.GetAddress() == address) { - return TEntityChunk(c.GetAddress(), c.GetMeta().GetNumRows(), c.GetMeta().GetRawBytes(), c.GetBlobRange()); - } - } - for (auto&& c : GetIndexes()) { - if (c.GetAddress() == address) { - return TEntityChunk(c.GetAddress(), c.GetRecordsCount(), c.GetRawBytes(), c.GetBlobRange()); - } - } - return {}; - } - bool HasEntityAddress(const TChunkAddress& address) const { - for (auto&& c : GetRecords()) { - if (c.GetAddress() == address) { + { + auto it = std::lower_bound(Records.begin(), Records.end(), address, [](const TColumnRecord& item, const TChunkAddress& address) { + return item.GetAddress() < address; + }); + if (it != Records.end() && it->GetAddress() == address) { return true; } } - for (auto&& c : GetIndexes()) { - if (c.GetAddress() == address) { + { + auto it = std::lower_bound(Indexes.begin(), Indexes.end(), address, [](const TIndexChunk& item, const TChunkAddress& address) { + return item.GetAddress() < address; + }); + if (it != Indexes.end() && it->GetAddress() == address) { return true; } } @@ -498,10 +451,7 @@ class TPortionInfo { return false; } - bool visible = (Meta.RecordSnapshotMin <= snapshot); - if (visible && RemoveSnapshot.Valid()) { - visible = snapshot < RemoveSnapshot; - } + const bool visible = (Meta.RecordSnapshotMin <= snapshot) && (!RemoveSnapshot.Valid() || snapshot < RemoveSnapshot); AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "IsVisible")("analyze_portion", DebugString())("visible", visible)("snapshot", snapshot.DebugString()); return visible; @@ -587,20 +537,21 @@ class TPortionInfo { return result; } - ui64 GetIndexRawBytes(const std::optional>& columnIds = {}, const bool validation = true) const; + ui64 GetIndexRawBytes(const std::set& columnIds, const bool validation = true) const; + ui64 GetIndexRawBytes(const bool validation = true) const; ui64 GetIndexBlobBytes() const noexcept { ui64 sum = 0; for (const auto& rec : Indexes) { - sum += rec.GetBlobRange().Size; + sum += rec.GetDataSize(); } return sum; } - ui64 GetColumnRawBytes(const std::vector& columnIds, const bool validation = true) const; - ui64 GetColumnRawBytes(const std::optional>& columnIds = {}, const bool validation = true) const; + ui64 GetColumnRawBytes(const std::set& columnIds, const bool validation = true) const; + ui64 GetColumnRawBytes(const bool validation = true) const; - ui64 GetColumnBlobBytes(const std::vector& columnIds, const bool validation = true) const; - ui64 GetColumnBlobBytes(const std::optional>& columnIds = {}, const bool validation = true) const; + ui64 GetColumnBlobBytes(const std::set& columnIds, const bool validation = true) const; + ui64 GetColumnBlobBytes(const bool validation = true) const; ui64 GetTotalBlobBytes() const noexcept { return GetIndexBlobBytes() + GetColumnBlobBytes(); @@ -658,8 +609,8 @@ class TPortionInfo { return DefaultRowsCount && !Data; } - std::shared_ptr BuildRecordBatch(const TColumnLoader& loader) const; - TDeserializeChunkedArray::TChunk BuildDeserializeChunk(const std::shared_ptr& loader) const; + std::shared_ptr BuildRecordBatch(const TColumnLoader& loader) const; + NArrow::NAccessor::TDeserializeChunkedArray::TChunk BuildDeserializeChunk(const std::shared_ptr& loader) const; }; class TPreparedColumn { @@ -672,28 +623,26 @@ class TPortionInfo { } const std::string& GetName() const { - return Loader->GetExpectedSchema()->field(0)->name(); + return Loader->GetField()->name(); } std::shared_ptr GetField() const { - return Loader->GetExpectedSchema()->field(0); + return Loader->GetField(); } TPreparedColumn(std::vector&& blobs, const std::shared_ptr& loader) : Loader(loader) , Blobs(std::move(blobs)) { - Y_ABORT_UNLESS(Loader); - Y_ABORT_UNLESS(Loader->GetExpectedSchema()->num_fields() == 1); + AFL_VERIFY(Loader); } - std::shared_ptr Assemble() const; - std::shared_ptr AssembleForSeqAccess() const; + std::shared_ptr AssembleForSeqAccess() const; + std::shared_ptr AssembleAccessor() const; }; class TPreparedBatchData { private: std::vector Columns; - std::shared_ptr Schema; size_t RowsCount = 0; public: struct TAssembleOptions { @@ -734,10 +683,6 @@ class TPortionInfo { return nullptr; } - std::vector GetSchemaColumnNames() const { - return Schema->field_names(); - } - size_t GetColumnsCount() const { return Columns.size(); } @@ -746,15 +691,12 @@ class TPortionInfo { return RowsCount; } - TPreparedBatchData(std::vector&& columns, std::shared_ptr schema, const size_t rowsCount) + TPreparedBatchData(std::vector&& columns, const size_t rowsCount) : Columns(std::move(columns)) - , Schema(schema) , RowsCount(rowsCount) { } - std::shared_ptr Assemble(const TAssembleOptions& options = {}) const; - std::shared_ptr AssembleTable(const TAssembleOptions& options = {}) const; - std::shared_ptr AssembleForSeqAccess() const; + std::shared_ptr AssembleToGeneralContainer(const std::set& sequentialColumnIds) const; }; class TColumnAssemblingInfo { @@ -791,7 +733,7 @@ class TPortionInfo { TPreparedColumn Compile() { if (BlobsInfo.empty()) { - BlobsInfo.emplace_back(TAssembleBlobInfo(NumRows, DataLoader->GetDefaultValue())); + BlobsInfo.emplace_back(TAssembleBlobInfo(NumRows, DataLoader ? DataLoader->GetDefaultValue() : ResultLoader->GetDefaultValue())); return TPreparedColumn(std::move(BlobsInfo), ResultLoader); } else { AFL_VERIFY(NumRowsByChunks == NumRows)("by_chunks", NumRowsByChunks)("expected", NumRows); @@ -804,13 +746,6 @@ class TPortionInfo { TPreparedBatchData PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, THashMap& blobsData) const; TPreparedBatchData PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, THashMap& blobsData) const; - std::shared_ptr AssembleInBatch(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, - THashMap& data) const { - auto batch = PrepareForAssemble(dataSchema, resultSchema, data).Assemble(); - Y_ABORT_UNLESS(batch->Validate().ok()); - return batch; - } - friend IOutputStream& operator << (IOutputStream& out, const TPortionInfo& info) { out << info.DebugString(); return out; diff --git a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp index ca5a1505d620..ae85ef59842c 100644 --- a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp +++ b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp @@ -13,44 +13,22 @@ void TReadPortionInfoWithBlobs::RestoreChunk(const std::shared_ptr TReadPortionInfoWithBlobs::GetBatch(const ISnapshotSchema::TPtr& data, const ISnapshotSchema& result, const std::set& columnNames) const { - Y_ABORT_UNLESS(data); - if (columnNames.empty()) { - if (!CachedBatch) { - THashMap blobs; - for (auto&& i : PortionInfo.Records) { - blobs[i.GetAddress()] = GetBlobByAddressVerified(i.ColumnId, i.Chunk); - Y_ABORT_UNLESS(blobs[i.GetAddress()].size() == i.BlobRange.Size); - } - CachedBatch = PortionInfo.AssembleInBatch(*data, result, blobs); - Y_DEBUG_ABORT_UNLESS(NArrow::IsSortedAndUnique(*CachedBatch, result.GetIndexInfo().GetReplaceKey())); - } - return *CachedBatch; - } else if (CachedBatch) { - std::vector columnNamesString; - for (auto&& i : columnNames) { - columnNamesString.emplace_back(i.data(), i.size()); - } - return NArrow::TColumnOperator().VerifyIfAbsent().Extract(*CachedBatch, columnNamesString); - } else { - auto filteredSchema = std::make_shared(data, columnNames); - THashMap blobs; - for (auto&& i : PortionInfo.Records) { - blobs[i.GetAddress()] = GetBlobByAddressVerified(i.ColumnId, i.Chunk); - Y_ABORT_UNLESS(blobs[i.GetAddress()].size() == i.BlobRange.Size); - } - return PortionInfo.AssembleInBatch(*data, *filteredSchema, blobs); +std::shared_ptr TReadPortionInfoWithBlobs::RestoreBatch( + const ISnapshotSchema& data, const ISnapshotSchema& resultSchema, const std::set& seqColumns) const { + THashMap blobs; + for (auto&& i : PortionInfo.Records) { + blobs[i.GetAddress()] = GetBlobByAddressVerified(i.ColumnId, i.Chunk); + Y_ABORT_UNLESS(blobs[i.GetAddress()].size() == i.BlobRange.Size); } + return PortionInfo.PrepareForAssemble(data, resultSchema, blobs).AssembleToGeneralContainer(seqColumns); } NKikimr::NOlap::TReadPortionInfoWithBlobs TReadPortionInfoWithBlobs::RestorePortion(const TPortionInfo& portion, NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) { TReadPortionInfoWithBlobs result(portion); - THashMap>>> records = result.PortionInfo.RestoreEntityChunks(blobs, indexInfo); - for (auto&& [storageId, recordsByBlob] : records) { - for (auto&& i : recordsByBlob) { - for (auto&& d : i.second) { - result.RestoreChunk(d); - } + THashMap>> records = result.PortionInfo.RestoreEntityChunks(blobs, indexInfo); + for (auto&& [storageId, chunksByAddress] : records) { + for (auto&& [_, chunk] : chunksByAddress) { + result.RestoreChunk(chunk); } } return result; @@ -86,26 +64,21 @@ bool TReadPortionInfoWithBlobs::ExtractColumnChunks(const ui32 entityId, std::ve if (records.empty()) { return false; } - std::map> chunksMap; + std::vector> chunksLocal; for (auto it = Chunks.begin(); it != Chunks.end();) { if (it->first.GetEntityId() == entityId) { - chunksMap.emplace(it->first, std::move(it->second)); + AFL_VERIFY(chunksLocal.empty() || chunksLocal.back()->GetChunkAddressVerified() < it->second->GetChunkAddressVerified()); + chunksLocal.emplace_back(std::move(it->second)); it = Chunks.erase(it); } else { ++it; } } - std::vector> chunksLocal; - for (auto&& i : chunksMap) { - Y_ABORT_UNLESS(i.first.GetColumnId() == entityId); - Y_ABORT_UNLESS(i.first.GetChunk() == chunksLocal.size()); - chunksLocal.emplace_back(i.second); - } std::swap(chunksLocal, chunks); return true; } -std::optional TReadPortionInfoWithBlobs::SyncPortion(TReadPortionInfoWithBlobs&& source, +std::optional TReadPortionInfoWithBlobs::SyncPortion(TReadPortionInfoWithBlobs&& source, const ISnapshotSchema::TPtr& from, const ISnapshotSchema::TPtr& to, const TString& targetTier, const std::shared_ptr& storages, std::shared_ptr counters) { if (from->GetVersion() == to->GetVersion() && targetTier == source.GetPortionInfo().GetTierNameDef(IStoragesManager::DefaultStorageId)) { @@ -131,40 +104,27 @@ std::optional TReadPortionInfoWithBlobs::SyncPortion std::vector> newChunks; if (it != columnChunks.end()) { newChunks = to->GetIndexInfo().ActualizeColumnData(it->second, from->GetIndexInfo(), i); - } else { - newChunks = to->GetIndexInfo().MakeEmptyChunks(i, pageSizes, to->GetIndexInfo().GetColumnFeaturesVerified(i)); - } - AFL_VERIFY(entityChunksNew.emplace(i, std::move(newChunks)).second); - } - - for (auto&& i : to->GetIndexInfo().GetIndexes()) { - if (from->GetIndexInfo().HasIndexId(i.first)) { - continue; + AFL_VERIFY(entityChunksNew.emplace(i, std::move(newChunks)).second); } - to->GetIndexInfo().AppendIndex(entityChunksNew, i.first); } - auto schemaTo = std::make_shared(to, std::make_shared()); - TGeneralSerializedSlice slice(entityChunksNew, schemaTo, counters); - const NSplitter::TEntityGroups groups = to->GetIndexInfo().GetEntityGroupsByStorageId(targetTier, *storages); TPortionInfoConstructor constructor(source.PortionInfo, false, true); constructor.SetMinSnapshotDeprecated(to->GetSnapshot()); constructor.SetSchemaVersion(to->GetVersion()); constructor.MutableMeta().ResetTierName(targetTier); - NStatistics::TPortionStorage storage; - for (auto&& i : to->GetIndexInfo().GetStatisticsByName()) { - auto it = from->GetIndexInfo().GetStatisticsByName().find(i.first); - if (it != from->GetIndexInfo().GetStatisticsByName().end()) { - i.second->CopyData(it->second.GetCursorVerified(), source.PortionInfo.GetMeta().GetStatisticsStorage(), storage); - } else { - i.second->FillStatisticsData(entityChunksNew, storage, to->GetIndexInfo()); - } + TIndexInfo::TSecondaryData secondaryData; + secondaryData.MutableExternalData() = entityChunksNew; + for (auto&& i : to->GetIndexInfo().GetIndexes()) { + to->GetIndexInfo().AppendIndex(entityChunksNew, i.first, storages, secondaryData).Validate(); } - constructor.MutableMeta().ResetStatisticsStorage(std::move(storage)); - TWritePortionInfoWithBlobs result = TWritePortionInfoWithBlobs::BuildByBlobs(slice.GroupChunksByBlobs(groups), std::move(constructor), storages); - return result; + const NSplitter::TEntityGroups groups = to->GetIndexInfo().GetEntityGroupsByStorageId(targetTier, *storages); + auto schemaTo = std::make_shared(to, std::make_shared()); + TGeneralSerializedSlice slice(secondaryData.GetExternalData(), schemaTo, counters); + + return TWritePortionInfoWithBlobsConstructor::BuildByBlobs( + slice.GroupChunksByBlobs(groups), secondaryData.GetSecondaryInplaceData(), std::move(constructor), storages); } const TString& TReadPortionInfoWithBlobs::GetBlobByAddressVerified(const ui32 columnId, const ui32 chunkId) const { diff --git a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.h b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.h index 6d688db66074..a9e24eb3c165 100644 --- a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.h +++ b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.h @@ -12,7 +12,7 @@ namespace NKikimr::NOlap { class TVersionedIndex; -class TWritePortionInfoWithBlobs; +class TWritePortionInfoWithBlobsResult; class TReadPortionInfoWithBlobs: public TBasePortionInfoWithBlobs { private: @@ -21,7 +21,6 @@ class TReadPortionInfoWithBlobs: public TBasePortionInfoWithBlobs { void RestoreChunk(const std::shared_ptr& chunk); TPortionInfo PortionInfo; - mutable std::optional> CachedBatch; explicit TReadPortionInfoWithBlobs(TPortionInfo&& portionInfo) : PortionInfo(std::move(portionInfo)) { @@ -39,8 +38,8 @@ class TReadPortionInfoWithBlobs: public TBasePortionInfoWithBlobs { static TReadPortionInfoWithBlobs RestorePortion(const TPortionInfo& portion, NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo); - std::shared_ptr GetBatch(const ISnapshotSchema::TPtr& data, const ISnapshotSchema& result, const std::set& columnNames = {}) const; - static std::optional SyncPortion(TReadPortionInfoWithBlobs&& source, + std::shared_ptr RestoreBatch(const ISnapshotSchema& data, const ISnapshotSchema& resultSchema, const std::set& seqColumns) const; + static std::optional SyncPortion(TReadPortionInfoWithBlobs&& source, const ISnapshotSchema::TPtr& from, const ISnapshotSchema::TPtr& to, const TString& targetTier, const std::shared_ptr& storages, std::shared_ptr counters); diff --git a/ydb/core/tx/columnshard/engines/portions/write_with_blobs.cpp b/ydb/core/tx/columnshard/engines/portions/write_with_blobs.cpp index 4fbc7cb3305c..3f580531b749 100644 --- a/ydb/core/tx/columnshard/engines/portions/write_with_blobs.cpp +++ b/ydb/core/tx/columnshard/engines/portions/write_with_blobs.cpp @@ -3,9 +3,9 @@ namespace NKikimr::NOlap { -void TWritePortionInfoWithBlobs::TBlobInfo::AddChunk(TWritePortionInfoWithBlobs& owner, const std::shared_ptr& chunk) { +void TWritePortionInfoWithBlobsConstructor::TBlobInfo::AddChunk(TWritePortionInfoWithBlobsConstructor& owner, const std::shared_ptr& chunk) { AFL_VERIFY(chunk); - Y_ABORT_UNLESS(!ResultBlob); + Y_ABORT_UNLESS(!Finished); const TString& data = chunk->GetData(); TBlobRangeLink16 bRange(Size, data.size()); @@ -17,24 +17,26 @@ void TWritePortionInfoWithBlobs::TBlobInfo::AddChunk(TWritePortionInfoWithBlobs& chunk->AddIntoPortionBeforeBlob(bRange, owner.GetPortionConstructor()); } -void TWritePortionInfoWithBlobs::TBlobInfo::RegisterBlobId(TWritePortionInfoWithBlobs& owner, const TUnifiedBlobId& blobId) { +void TWritePortionInfoWithBlobsResult::TBlobInfo::RegisterBlobId(TWritePortionInfoWithBlobsResult& owner, const TUnifiedBlobId& blobId) const { const TBlobRangeLink16::TLinkId idx = owner.GetPortionConstructor().RegisterBlobId(blobId); for (auto&& i : Chunks) { - owner.GetPortionConstructor().RegisterBlobIdx(i.first, idx); + owner.GetPortionConstructor().RegisterBlobIdx(i, idx); } } -TWritePortionInfoWithBlobs TWritePortionInfoWithBlobs::BuildByBlobs(std::vector&& chunks, +TWritePortionInfoWithBlobsConstructor TWritePortionInfoWithBlobsConstructor::BuildByBlobs(std::vector&& chunks, + const THashMap>& inplaceChunks, const ui64 granule, const ui64 schemaVersion, const TSnapshot& snapshot, const std::shared_ptr& operators) { TPortionInfoConstructor constructor(granule); constructor.SetMinSnapshotDeprecated(snapshot); constructor.SetSchemaVersion(schemaVersion); - return BuildByBlobs(std::move(chunks), std::move(constructor), operators); + return BuildByBlobs(std::move(chunks), inplaceChunks, std::move(constructor), operators); } -TWritePortionInfoWithBlobs TWritePortionInfoWithBlobs::BuildByBlobs(std::vector&& chunks, TPortionInfoConstructor&& constructor, const std::shared_ptr& operators) { - TWritePortionInfoWithBlobs result(std::move(constructor)); +TWritePortionInfoWithBlobsConstructor TWritePortionInfoWithBlobsConstructor::BuildByBlobs( + std::vector&& chunks, const THashMap>& inplaceChunks, TPortionInfoConstructor&& constructor, const std::shared_ptr& operators) { + TWritePortionInfoWithBlobsConstructor result(std::move(constructor)); for (auto&& blob : chunks) { auto storage = operators->GetOperatorVerified(blob.GetGroupName()); auto blobInfo = result.StartBlob(storage); @@ -42,10 +44,15 @@ TWritePortionInfoWithBlobs TWritePortionInfoWithBlobs::BuildByBlobs(std::vector< blobInfo.AddChunk(chunk); } } + for (auto&& [_, i] : inplaceChunks) { + result.GetPortionConstructor().AddIndex( + TIndexChunk(i->GetEntityId(), i->GetChunkIdxVerified(), i->GetRecordsCountVerified(), i->GetRawBytesVerified(), i->GetData())); + } + return result; } -std::vector> TWritePortionInfoWithBlobs::GetEntityChunks(const ui32 entityId) const { +std::vector> TWritePortionInfoWithBlobsConstructor::GetEntityChunks(const ui32 entityId) const { std::map> sortedChunks; for (auto&& b : GetBlobs()) { for (auto&& i : b.GetChunks()) { @@ -62,16 +69,23 @@ std::vector> TWritePortionInfoWithBlobs::GetE return result; } -void TWritePortionInfoWithBlobs::FillStatistics(const TIndexInfo& index) { - NStatistics::TPortionStorage storage; - for (auto&& i : index.GetStatisticsByName()) { - THashMap>> data; - for (auto&& entityId : i.second->GetEntityIds()) { - data.emplace(entityId, GetEntityChunks(entityId)); +TString TWritePortionInfoWithBlobsResult::GetBlobByRangeVerified(const ui32 entityId, const ui32 chunkIdx) const { + AFL_VERIFY(!!PortionConstructor); + for (auto&& rec : PortionConstructor->GetRecords()) { + if (rec.GetEntityId() != entityId || rec.GetChunkIdx() != chunkIdx) { + continue; + } + for (auto&& i : Blobs) { + for (auto&& c : i.GetChunks()) { + if (c == TChunkAddress(entityId, chunkIdx)) { + return i.GetResultBlob().substr(rec.BlobRange.Offset, rec.BlobRange.Size); + } + } } - i.second->FillStatisticsData(data, storage, index); + AFL_VERIFY(false); } - GetPortionConstructor().MutableMeta().SetStatisticsStorage(std::move(storage)); + AFL_VERIFY(false); + return ""; } } diff --git a/ydb/core/tx/columnshard/engines/portions/write_with_blobs.h b/ydb/core/tx/columnshard/engines/portions/write_with_blobs.h index 39c3bb885fe4..dde424fd63b8 100644 --- a/ydb/core/tx/columnshard/engines/portions/write_with_blobs.h +++ b/ydb/core/tx/columnshard/engines/portions/write_with_blobs.h @@ -9,7 +9,9 @@ namespace NKikimr::NOlap { -class TWritePortionInfoWithBlobs: public TBasePortionInfoWithBlobs { +class TWritePortionInfoWithBlobsResult; + +class TWritePortionInfoWithBlobsConstructor: public TBasePortionInfoWithBlobs { public: class TBlobInfo { private: @@ -18,9 +20,8 @@ class TWritePortionInfoWithBlobs: public TBasePortionInfoWithBlobs { YDB_READONLY_DEF(TBlobChunks, Chunks); YDB_READONLY_DEF(std::shared_ptr, Operator); std::vector> ChunksOrdered; - mutable std::optional ResultBlob; - void AddChunk(TWritePortionInfoWithBlobs& owner, const std::shared_ptr& chunk); - + bool Finished = false; + void AddChunk(TWritePortionInfoWithBlobsConstructor& owner, const std::shared_ptr& chunk); public: TBlobInfo(const std::shared_ptr& bOperator) : Operator(bOperator) @@ -31,9 +32,9 @@ class TWritePortionInfoWithBlobs: public TBasePortionInfoWithBlobs { class TBuilder { private: TBlobInfo* OwnerBlob; - TWritePortionInfoWithBlobs* OwnerPortion; + TWritePortionInfoWithBlobsConstructor* OwnerPortion; public: - TBuilder(TBlobInfo& blob, TWritePortionInfoWithBlobs& portion) + TBuilder(TBlobInfo& blob, TWritePortionInfoWithBlobsConstructor& portion) : OwnerBlob(&blob) , OwnerPortion(&portion) { } @@ -46,26 +47,32 @@ class TWritePortionInfoWithBlobs: public TBasePortionInfoWithBlobs { } }; - const TString& GetBlob() const { - if (!ResultBlob) { - TString result; - result.reserve(Size); - for (auto&& i : ChunksOrdered) { - result.append(i->GetData()); - } - ResultBlob = std::move(result); + std::vector ExtractChunks() { + std::vector result; + result.reserve(Chunks.size()); + for (auto&& i : Chunks) { + result.emplace_back(i.first); } - return *ResultBlob; + return result; } - void RegisterBlobId(TWritePortionInfoWithBlobs& owner, const TUnifiedBlobId& blobId); + TString ExtractBlob() { + AFL_VERIFY(!Finished); + Finished = true; + TString result; + result.reserve(Size); + for (auto&& i : ChunksOrdered) { + result.append(i->GetData()); + } + ChunksOrdered.clear(); + return result; + } }; private: std::optional PortionConstructor; - std::optional PortionResult; YDB_READONLY_DEF(std::vector, Blobs); - explicit TWritePortionInfoWithBlobs(TPortionInfoConstructor&& portionConstructor) + explicit TWritePortionInfoWithBlobsConstructor(TPortionInfoConstructor&& portionConstructor) : PortionConstructor(std::move(portionConstructor)) { } @@ -73,46 +80,75 @@ class TWritePortionInfoWithBlobs: public TBasePortionInfoWithBlobs { Blobs.emplace_back(TBlobInfo(bOperator)); return TBlobInfo::TBuilder(Blobs.back(), *this); } - + friend class TWritePortionInfoWithBlobsResult; public: std::vector> GetEntityChunks(const ui32 entityId) const; - void FillStatistics(const TIndexInfo& index); - - static TWritePortionInfoWithBlobs BuildByBlobs(std::vector&& chunks, + static TWritePortionInfoWithBlobsConstructor BuildByBlobs(std::vector&& chunks, + const THashMap>& inplaceChunks, const ui64 granule, const ui64 schemaVersion, const TSnapshot& snapshot, const std::shared_ptr& operators); - static TWritePortionInfoWithBlobs BuildByBlobs(std::vector&& chunks, + static TWritePortionInfoWithBlobsConstructor BuildByBlobs(std::vector&& chunks, + const THashMap>& inplaceChunks, TPortionInfoConstructor&& constructor, const std::shared_ptr& operators); - const TString& GetBlobByRangeVerified(const ui32 columnId, const ui32 chunkId) const { - for (auto&& b : Blobs) { - auto it = b.GetChunks().find(TChunkAddress(columnId, chunkId)); - if (it == b.GetChunks().end()) { - continue; - } else { - return it->second->GetData(); - } - } - Y_ABORT_UNLESS(false); + std::vector& GetBlobs() { + return Blobs; } - ui64 GetBlobFullSizeVerified(const ui32 columnId, const ui32 chunkId) const { - for (auto&& b : Blobs) { - auto it = b.GetChunks().find(TChunkAddress(columnId, chunkId)); - if (it == b.GetChunks().end()) { - continue; - } else { - return b.GetSize(); - } - } - Y_ABORT_UNLESS(false); + TString DebugString() const { + return TStringBuilder() << "blobs_count=" << Blobs.size() << ";"; } - std::vector& GetBlobs() { - return Blobs; + TPortionInfoConstructor& GetPortionConstructor() { + AFL_VERIFY(!!PortionConstructor); + return *PortionConstructor; + } + +}; + +class TWritePortionInfoWithBlobsResult { +public: + class TBlobInfo { + private: + using TBlobChunks = std::vector; + YDB_READONLY_DEF(TBlobChunks, Chunks); + const TString ResultBlob; + YDB_READONLY_DEF(std::shared_ptr, Operator); + + public: + ui64 GetSize() const { + return ResultBlob.size(); + } + + TBlobInfo(const TString& blobData, TBlobChunks&& chunks, const std::shared_ptr& stOperator) + : Chunks(std::move(chunks)) + , ResultBlob(blobData) + , Operator(stOperator) + { + + } + + const TString& GetResultBlob() const { + return ResultBlob; + } + + void RegisterBlobId(TWritePortionInfoWithBlobsResult& owner, const TUnifiedBlobId& blobId) const; + }; +private: + std::optional PortionConstructor; + std::optional PortionResult; + YDB_READONLY_DEF(std::vector, Blobs); +public: + TWritePortionInfoWithBlobsResult(TWritePortionInfoWithBlobsConstructor&& constructor) + : PortionConstructor(std::move(constructor.PortionConstructor)) { + for (auto&& i : constructor.Blobs) { + Blobs.emplace_back(i.ExtractBlob(), i.ExtractChunks(), i.GetOperator()); + } } + TString GetBlobByRangeVerified(const ui32 entityId, const ui32 chunkIdx) const; + TString DebugString() const { return TStringBuilder() << "blobs_count=" << Blobs.size() << ";"; } @@ -135,7 +171,6 @@ class TWritePortionInfoWithBlobs: public TBasePortionInfoWithBlobs { AFL_VERIFY(!PortionResult); return *PortionConstructor; } - }; } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/predicate/container.cpp b/ydb/core/tx/columnshard/engines/predicate/container.cpp index 0f27c3538293..8afcc1895e4d 100644 --- a/ydb/core/tx/columnshard/engines/predicate/container.cpp +++ b/ydb/core/tx/columnshard/engines/predicate/container.cpp @@ -115,51 +115,53 @@ bool TPredicateContainer::CrossRanges(const TPredicateContainer& ext) { } } -std::optional TPredicateContainer::BuildPredicateFrom(std::shared_ptr object, const TIndexInfo* indexInfo) { +TConclusion TPredicateContainer::BuildPredicateFrom( + std::shared_ptr object, const std::shared_ptr& pkSchema) { if (!object || object->Empty()) { return TPredicateContainer(NArrow::ECompareType::GREATER_OR_EQUAL); } else { if (!object->Good()) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "not good 'from' predicate"); - return {}; + return TConclusionStatus::Fail("not good 'from' predicate"); } if (!object->IsFrom()) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "'from' predicate not is from"); - return {}; + return TConclusionStatus::Fail("'from' predicate not is from"); } - if (indexInfo) { + if (pkSchema) { auto cNames = object->ColumnNames(); i32 countSortingFields = 0; - for (i32 i = 0; i < indexInfo->GetReplaceKey()->num_fields(); ++i) { - if (i < (int)cNames.size() && cNames[i] == indexInfo->GetReplaceKey()->field(i)->name()) { + for (i32 i = 0; i < pkSchema->num_fields(); ++i) { + if (i < (int)cNames.size() && cNames[i] == pkSchema->field(i)->name()) { ++countSortingFields; } else { break; } } - Y_ABORT_UNLESS(countSortingFields == object->Batch->num_columns()); + AFL_VERIFY(countSortingFields == object->Batch->num_columns())("count", countSortingFields)("object", object->Batch->num_columns()); } - return TPredicateContainer(object); + return TPredicateContainer(object, pkSchema ? ExtractKey(*object, pkSchema) : nullptr); } } -std::optional TPredicateContainer::BuildPredicateTo(std::shared_ptr object, const TIndexInfo* indexInfo) { +TConclusion TPredicateContainer::BuildPredicateTo( + std::shared_ptr object, const std::shared_ptr& pkSchema) { if (!object || object->Empty()) { return TPredicateContainer(NArrow::ECompareType::LESS_OR_EQUAL); } else { if (!object->Good()) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "not good 'to' predicate"); - return {}; + return TConclusionStatus::Fail("not good 'to' predicate"); } if (!object->IsTo()) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "'to' predicate not is to"); - return {}; + return TConclusionStatus::Fail("'to' predicate not is to"); } - if (indexInfo) { + if (pkSchema) { auto cNames = object->ColumnNames(); i32 countSortingFields = 0; - for (i32 i = 0; i < indexInfo->GetReplaceKey()->num_fields(); ++i) { - if (i < (int)cNames.size() && cNames[i] == indexInfo->GetReplaceKey()->field(i)->name()) { + for (i32 i = 0; i < pkSchema->num_fields(); ++i) { + if (i < (int)cNames.size() && cNames[i] == pkSchema->field(i)->name()) { ++countSortingFields; } else { break; @@ -167,7 +169,7 @@ std::optional TPredicateContainer::BuildPre } Y_ABORT_UNLESS(countSortingFields == object->Batch->num_columns()); } - return TPredicateContainer(object); + return TPredicateContainer(object, pkSchema ? TPredicateContainer::ExtractKey(*object, pkSchema) : nullptr); } } diff --git a/ydb/core/tx/columnshard/engines/predicate/container.h b/ydb/core/tx/columnshard/engines/predicate/container.h index 5cff2cbefea5..7d969cf9a759 100644 --- a/ydb/core/tx/columnshard/engines/predicate/container.h +++ b/ydb/core/tx/columnshard/engines/predicate/container.h @@ -1,9 +1,13 @@ #pragma once #include "predicate.h" + #include -#include +#include + #include + #include + #include namespace NKikimr::NOlap { @@ -15,10 +19,12 @@ class TPredicateContainer { std::shared_ptr Object; NArrow::ECompareType CompareType; mutable std::optional> ColumnNames; + std::shared_ptr ReplaceKey; - TPredicateContainer(std::shared_ptr object) + TPredicateContainer(std::shared_ptr object, const std::shared_ptr& replaceKey) : Object(object) - , CompareType(Object->GetCompareType()) { + , CompareType(Object->GetCompareType()) + , ReplaceKey(replaceKey) { } TPredicateContainer(const NArrow::ECompareType compareType) @@ -27,15 +33,37 @@ class TPredicateContainer { static std::partial_ordering ComparePredicatesSamePrefix(const NOlap::TPredicate& l, const NOlap::TPredicate& r); + static std::shared_ptr ExtractKey(const NOlap::TPredicate& predicate, const std::shared_ptr& key) { + AFL_VERIFY(predicate.Batch); + const auto& batchFields = predicate.Batch->schema()->fields(); + const auto& keyFields = key->fields(); + size_t minSize = std::min(batchFields.size(), keyFields.size()); + for (size_t i = 0; i < minSize; ++i) { + Y_DEBUG_ABORT_UNLESS(batchFields[i]->type()->Equals(*keyFields[i]->type())); + } + if (batchFields.size() <= keyFields.size()) { + return std::make_shared(NArrow::TReplaceKey::FromBatch(predicate.Batch, predicate.Batch->schema(), 0)); + } else { + return std::make_shared(NArrow::TReplaceKey::FromBatch(predicate.Batch, key, 0)); + } + } + public: + NArrow::ECompareType GetCompareType() const { + return CompareType; + } + + const std::shared_ptr& GetReplaceKey() const { + return ReplaceKey; + } bool IsEmpty() const { return !Object; } template - std::optional Get(const ui32 colIndex, const ui32 rowIndex, - const std::optional defaultValue = {}) const { + std::optional Get( + const ui32 colIndex, const ui32 rowIndex, const std::optional defaultValue = {}) const { if (!Object) { return defaultValue; } else { @@ -59,13 +87,15 @@ class TPredicateContainer { return TPredicateContainer(NArrow::ECompareType::GREATER_OR_EQUAL); } - static std::optional BuildPredicateFrom(std::shared_ptr object, const TIndexInfo* indexInfo); + static TConclusion BuildPredicateFrom( + std::shared_ptr object, const std::shared_ptr& pkSchema); static TPredicateContainer BuildNullPredicateTo() { return TPredicateContainer(NArrow::ECompareType::LESS_OR_EQUAL); } - static std::optional BuildPredicateTo(std::shared_ptr object, const TIndexInfo* indexInfo); + static TConclusion BuildPredicateTo( + std::shared_ptr object, const std::shared_ptr& pkSchema); NKikimr::NArrow::TColumnFilter BuildFilter(const arrow::Datum& data) const { if (!Object) { @@ -73,23 +103,6 @@ class TPredicateContainer { } return NArrow::TColumnFilter::MakePredicateFilter(data, Object->Batch, CompareType); } - - std::optional ExtractKey(const std::shared_ptr& key) const { - if (Object) { - const auto& batchFields = Object->Batch->schema()->fields(); - const auto& keyFields = key->fields(); - size_t minSize = std::min(batchFields.size(), keyFields.size()); - for (size_t i = 0; i < minSize; ++i) { - Y_DEBUG_ABORT_UNLESS(batchFields[i]->type()->Equals(*keyFields[i]->type())); - } - if (batchFields.size() <= keyFields.size()) { - return NArrow::TReplaceKey::FromBatch(Object->Batch, Object->Batch->schema(), 0); - } else { - return NArrow::TReplaceKey::FromBatch(Object->Batch, key, 0); - } - } - return {}; - } }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/predicate/filter.cpp b/ydb/core/tx/columnshard/engines/predicate/filter.cpp index 10d66a832c1a..085eaa798f07 100644 --- a/ydb/core/tx/columnshard/engines/predicate/filter.cpp +++ b/ydb/core/tx/columnshard/engines/predicate/filter.cpp @@ -1,4 +1,7 @@ #include "filter.h" + +#include + #include namespace NKikimr::NOlap { @@ -14,43 +17,50 @@ NKikimr::NArrow::TColumnFilter TPKRangesFilter::BuildFilter(const arrow::Datum& return result; } -bool TPKRangesFilter::Add(std::shared_ptr f, std::shared_ptr t, const TIndexInfo* indexInfo) { +TConclusionStatus TPKRangesFilter::Add( + std::shared_ptr f, std::shared_ptr t, const std::shared_ptr& pkSchema) { if ((!f || f->Empty()) && (!t || t->Empty())) { - return true; + return TConclusionStatus::Success(); + } + auto fromContainerConclusion = TPredicateContainer::BuildPredicateFrom(f, pkSchema); + if (fromContainerConclusion.IsFail()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "incorrect from container")( + "from", fromContainerConclusion.GetErrorMessage()); + return fromContainerConclusion; } - auto fromContainer = TPredicateContainer::BuildPredicateFrom(f, indexInfo); - auto toContainer = TPredicateContainer::BuildPredicateTo(t, indexInfo); - if (!fromContainer || !toContainer) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "incorrect from/to containers")("from", !!fromContainer)("to", !!toContainer); - return false; + auto toContainerConclusion = TPredicateContainer::BuildPredicateTo(t, pkSchema); + if (toContainerConclusion.IsFail()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "incorrect to container")( + "from", toContainerConclusion.GetErrorMessage()); + return toContainerConclusion; } if (SortedRanges.size() && !FakeRanges) { if (ReverseFlag) { - if (fromContainer->CrossRanges(SortedRanges.front().GetPredicateTo())) { + if (fromContainerConclusion->CrossRanges(SortedRanges.front().GetPredicateTo())) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "not sorted sequence"); - return false; + return TConclusionStatus::Fail("not sorted sequence"); } } else { - if (fromContainer->CrossRanges(SortedRanges.back().GetPredicateTo())) { + if (fromContainerConclusion->CrossRanges(SortedRanges.back().GetPredicateTo())) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "not sorted sequence"); - return false; + return TConclusionStatus::Fail("not sorted sequence"); } } } - auto pkRangeFilter = TPKRangeFilter::Build(std::move(*fromContainer), std::move(*toContainer)); - if (!pkRangeFilter) { - return false; + auto pkRangeFilterConclusion = TPKRangeFilter::Build(fromContainerConclusion.DetachResult(), toContainerConclusion.DetachResult()); + if (pkRangeFilterConclusion.IsFail()) { + return pkRangeFilterConclusion; } if (FakeRanges) { FakeRanges = false; SortedRanges.clear(); } if (ReverseFlag) { - SortedRanges.emplace_front(std::move(*pkRangeFilter)); + SortedRanges.emplace_front(pkRangeFilterConclusion.DetachResult()); } else { - SortedRanges.emplace_back(std::move(*pkRangeFilter)); + SortedRanges.emplace_back(pkRangeFilterConclusion.DetachResult()); } - return true; + return TConclusionStatus::Success(); } TString TPKRangesFilter::DebugString() const { @@ -75,30 +85,134 @@ std::set TPKRangesFilter::GetColumnIds(const TIndexInfo& indexInfo) const return result; } -bool TPKRangesFilter::IsPortionInUsage(const TPortionInfo& info, const TIndexInfo& indexInfo) const { +bool TPKRangesFilter::IsPortionInUsage(const TPortionInfo& info) const { for (auto&& i : SortedRanges) { - if (i.IsPortionInUsage(info, indexInfo)) { + if (i.IsPortionInUsage(info)) { return true; } } return SortedRanges.empty(); } -bool TPKRangesFilter::IsPortionInPartialUsage(const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end, const TIndexInfo& indexInfo) const { +bool TPKRangesFilter::CheckPoint(const NArrow::TReplaceKey& point) const { for (auto&& i : SortedRanges) { - if (i.IsPortionInPartialUsage(start, end, indexInfo)) { + if (i.CheckPoint(point)) { return true; } } - return false; + return SortedRanges.empty(); +} + +TPKRangeFilter::EUsageClass TPKRangesFilter::IsPortionInPartialUsage(const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end) const { + for (auto&& i : SortedRanges) { + switch (i.IsPortionInPartialUsage(start, end)) { + case TPKRangeFilter::EUsageClass::FullUsage: + return TPKRangeFilter::EUsageClass::FullUsage; + case TPKRangeFilter::EUsageClass::PartialUsage: + return TPKRangeFilter::EUsageClass::PartialUsage; + case TPKRangeFilter::EUsageClass::DontUsage: + break; + } + } + return TPKRangeFilter::EUsageClass::DontUsage; } TPKRangesFilter::TPKRangesFilter(const bool reverse) - : ReverseFlag(reverse) -{ + : ReverseFlag(reverse) { auto range = TPKRangeFilter::Build(TPredicateContainer::BuildNullPredicateFrom(), TPredicateContainer::BuildNullPredicateTo()); Y_ABORT_UNLESS(range); SortedRanges.emplace_back(*range); } +std::shared_ptr TPKRangesFilter::SerializeToRecordBatch(const std::shared_ptr& pkSchema) const { + auto fullSchema = NArrow::TStatusValidator::GetValid( + pkSchema->AddField(pkSchema->num_fields(), std::make_shared(".ydb_operation_type", arrow::uint32()))); + auto builders = NArrow::MakeBuilders(fullSchema, SortedRanges.size() * 2); + for (auto&& i : SortedRanges) { + for (ui32 idx = 0; idx < (ui32)pkSchema->num_fields(); ++idx) { + if (idx < i.GetPredicateFrom().GetReplaceKey()->Size()) { + AFL_VERIFY(NArrow::Append( + *builders[idx], i.GetPredicateFrom().GetReplaceKey()->Column(idx), i.GetPredicateFrom().GetReplaceKey()->GetPosition())); + } else { + NArrow::TStatusValidator::Validate(builders[idx]->AppendNull()); + } + } + NArrow::Append(*builders[pkSchema->num_fields()], (ui32)i.GetPredicateFrom().GetCompareType()); + + for (ui32 idx = 0; idx < (ui32)pkSchema->num_fields(); ++idx) { + if (idx < i.GetPredicateTo().GetReplaceKey()->Size()) { + AFL_VERIFY(NArrow::Append( + *builders[idx], i.GetPredicateTo().GetReplaceKey()->Column(idx), i.GetPredicateTo().GetReplaceKey()->GetPosition())); + } else { + NArrow::TStatusValidator::Validate(builders[idx]->AppendNull()); + } + } + NArrow::Append(*builders[pkSchema->num_fields()], (ui32)i.GetPredicateTo().GetCompareType()); + } + return arrow::RecordBatch::Make(fullSchema, SortedRanges.size() * 2, NArrow::Finish(std::move(builders))); +} + +std::shared_ptr TPKRangesFilter::BuildFromRecordBatchLines( + const std::shared_ptr& batch, const bool reverse) { + std::shared_ptr result = std::make_shared(reverse); + for (ui32 i = 0; i < batch->num_rows(); ++i) { + auto batchRow = batch->Slice(i, 1); + auto pFrom = std::make_shared(NKernels::EOperation::GreaterEqual, batchRow); + auto pTo = std::make_shared(NKernels::EOperation::LessEqual, batchRow); + result->Add(pFrom, pTo, batch->schema()).Validate(); + } + return result; } + +std::shared_ptr TPKRangesFilter::BuildFromRecordBatchFull( + const std::shared_ptr& batch, const std::shared_ptr& pkSchema, const bool reverse) { + std::shared_ptr result = std::make_shared(reverse); + auto pkBatch = NArrow::TColumnOperator().Adapt(batch, pkSchema).DetachResult(); + auto c = batch->GetColumnByName(".ydb_operation_type"); + AFL_VERIFY(c); + AFL_VERIFY(c->type_id() == arrow::Type::UINT32); + auto cUi32 = static_pointer_cast(c); + for (ui32 i = 0; i < batch->num_rows();) { + std::shared_ptr pFrom; + std::shared_ptr pTo; + { + auto batchRow = TPredicate::CutNulls(batch->Slice(i, 1)); + NKernels::EOperation op = (NKernels::EOperation)cUi32->Value(i); + if (op == NKernels::EOperation::GreaterEqual || op == NKernels::EOperation::Greater) { + pFrom = std::make_shared(op, batchRow); + } else if (op == NKernels::EOperation::Equal) { + pFrom = std::make_shared(NKernels::EOperation::GreaterEqual, batchRow); + } else { + AFL_VERIFY(false); + } + if (op != NKernels::EOperation::Equal) { + ++i; + } + } + { + auto batchRow = TPredicate::CutNulls(batch->Slice(i, 1)); + NKernels::EOperation op = (NKernels::EOperation)cUi32->Value(i); + if (op == NKernels::EOperation::LessEqual || op == NKernels::EOperation::Less) { + pTo = std::make_shared(op, batchRow); + } else if (op == NKernels::EOperation::Equal) { + pTo = std::make_shared(NKernels::EOperation::LessEqual, batchRow); + } else { + AFL_VERIFY(false); + } + } + result->Add(pFrom, pTo, pkSchema).Validate(); + } + return result; +} + +std::shared_ptr TPKRangesFilter::BuildFromString( + const TString& data, const std::shared_ptr& pkSchema, const bool reverse) { + auto batch = NArrow::TStatusValidator::GetValid(NArrow::NSerialization::TNativeSerializer().Deserialize(data)); + return BuildFromRecordBatchFull(batch, pkSchema, reverse); +} + +TString TPKRangesFilter::SerializeToString(const std::shared_ptr& pkSchema) const { + return NArrow::NSerialization::TNativeSerializer().SerializeFull(SerializeToRecordBatch(pkSchema)); +} + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/predicate/filter.h b/ydb/core/tx/columnshard/engines/predicate/filter.h index 20e7a039b273..bbc70b5ff584 100644 --- a/ydb/core/tx/columnshard/engines/predicate/filter.h +++ b/ydb/core/tx/columnshard/engines/predicate/filter.h @@ -9,9 +9,15 @@ class TPKRangesFilter { bool FakeRanges = true; std::deque SortedRanges; bool ReverseFlag = false; + public: TPKRangesFilter(const bool reverse); + [[nodiscard]] TConclusionStatus Add( + std::shared_ptr f, std::shared_ptr t, const std::shared_ptr& pkSchema); + std::shared_ptr SerializeToRecordBatch(const std::shared_ptr& pkSchema) const; + TString SerializeToString(const std::shared_ptr& pkSchema) const; + bool IsEmpty() const { return SortedRanges.empty() || FakeRanges; } @@ -37,13 +43,12 @@ class TPKRangesFilter { return SortedRanges.end(); } - bool IsPortionInUsage(const TPortionInfo& info, const TIndexInfo& indexInfo) const; - bool IsPortionInPartialUsage(const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end, const TIndexInfo& indexInfo) const; + bool IsPortionInUsage(const TPortionInfo& info) const; + TPKRangeFilter::EUsageClass IsPortionInPartialUsage(const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end) const; + bool CheckPoint(const NArrow::TReplaceKey& point) const; NArrow::TColumnFilter BuildFilter(const arrow::Datum& data) const; - [[nodiscard]] bool Add(std::shared_ptr f, std::shared_ptr t, const TIndexInfo* indexInfo); - std::set GetColumnNames() const { std::set result; for (auto&& i : SortedRanges) { @@ -57,6 +62,30 @@ class TPKRangesFilter { TString DebugString() const; std::set GetColumnIds(const TIndexInfo& indexInfo) const; + + static std::shared_ptr BuildFromRecordBatchLines(const std::shared_ptr& batch, const bool reverse); + + static std::shared_ptr BuildFromRecordBatchFull( + const std::shared_ptr& batch, const std::shared_ptr& pkSchema, const bool reverse); + static std::shared_ptr BuildFromString( + const TString& data, const std::shared_ptr& pkSchema, const bool reverse); + + template + static TConclusion BuildFromProto(const TProto& proto, const bool reverse, const std::vector& ydbPk) { + TPKRangesFilter result(reverse); + for (auto& protoRange : proto.GetRanges()) { + TSerializedTableRange range(protoRange); + auto fromPredicate = std::make_shared(); + auto toPredicate = std::make_shared(); + TSerializedTableRange serializedRange(protoRange); + std::tie(*fromPredicate, *toPredicate) = TPredicate::DeserializePredicatesRange(serializedRange, ydbPk); + auto status = result.Add(fromPredicate, toPredicate, NArrow::TStatusValidator::GetValid(NArrow::MakeArrowSchema(ydbPk))); + if (status.IsFail()) { + return status; + } + } + return result; + } }; } diff --git a/ydb/core/tx/columnshard/engines/predicate/predicate.cpp b/ydb/core/tx/columnshard/engines/predicate/predicate.cpp index 7a4ee282f0cc..a6831ca2ad50 100644 --- a/ydb/core/tx/columnshard/engines/predicate/predicate.cpp +++ b/ydb/core/tx/columnshard/engines/predicate/predicate.cpp @@ -1,20 +1,21 @@ #include "predicate.h" +#include #include -#include +#include + +#include namespace NKikimr::NOlap { TPredicate::TPredicate(EOperation op, std::shared_ptr batch) noexcept : Operation(op) - , Batch(std::move(batch)) -{ + , Batch(std::move(batch)) { Y_ABORT_UNLESS(IsFrom() || IsTo()); } TPredicate::TPredicate(EOperation op, const TString& serializedBatch, const std::shared_ptr& schema) - : Operation(op) -{ + : Operation(op) { Y_ABORT_UNLESS(IsFrom() || IsTo()); if (!serializedBatch.empty()) { Batch = NArrow::DeserializeBatch(serializedBatch, schema); @@ -31,7 +32,112 @@ std::vector TPredicate::ColumnNames() const { return out; } -IOutputStream& operator << (IOutputStream& out, const TPredicate& pred) { +std::vector ExtractTypes(const std::vector>& columns) { + std::vector types; + types.reserve(columns.size()); + for (auto& [name, type] : columns) { + types.push_back(type); + } + return types; +} + +TString FromCells(const TConstArrayRef& cells, const std::vector>& columns) { + Y_ABORT_UNLESS(cells.size() == columns.size()); + if (cells.empty()) { + return {}; + } + + std::vector types = ExtractTypes(columns); + + NArrow::TArrowBatchBuilder batchBuilder; + batchBuilder.Reserve(1); + auto startStatus = batchBuilder.Start(columns); + Y_ABORT_UNLESS(startStatus.ok(), "%s", startStatus.ToString().c_str()); + + batchBuilder.AddRow(NKikimr::TDbTupleRef(), NKikimr::TDbTupleRef(types.data(), cells.data(), cells.size())); + + auto batch = batchBuilder.FlushBatch(false); + Y_ABORT_UNLESS(batch); + Y_ABORT_UNLESS(batch->num_columns() == (int)cells.size()); + Y_ABORT_UNLESS(batch->num_rows() == 1); + return NArrow::SerializeBatchNoCompression(batch); +} + +std::pair TPredicate::DeserializePredicatesRange( + const TSerializedTableRange& range, const std::vector>& columns) { + std::vector leftCells; + std::vector> leftColumns; + bool leftTrailingNull = false; + { + TConstArrayRef cells = range.From.GetCells(); + const size_t size = cells.size(); + Y_ASSERT(size <= columns.size()); + leftCells.reserve(size); + leftColumns.reserve(size); + for (size_t i = 0; i < size; ++i) { + if (!cells[i].IsNull()) { + leftCells.push_back(cells[i]); + leftColumns.push_back(columns[i]); + leftTrailingNull = false; + } else { + leftTrailingNull = true; + } + } + } + + std::vector rightCells; + std::vector> rightColumns; + bool rightTrailingNull = false; + { + TConstArrayRef cells = range.To.GetCells(); + const size_t size = cells.size(); + Y_ASSERT(size <= columns.size()); + rightCells.reserve(size); + rightColumns.reserve(size); + for (size_t i = 0; i < size; ++i) { + if (!cells[i].IsNull()) { + rightCells.push_back(cells[i]); + rightColumns.push_back(columns[i]); + rightTrailingNull = false; + } else { + rightTrailingNull = true; + } + } + } + + const bool fromInclusive = range.FromInclusive || leftTrailingNull; + const bool toInclusive = range.ToInclusive && !rightTrailingNull; + + TString leftBorder = FromCells(leftCells, leftColumns); + TString rightBorder = FromCells(rightCells, rightColumns); + auto leftSchema = NArrow::MakeArrowSchema(leftColumns); + Y_ASSERT(leftSchema.ok()); + auto rightSchema = NArrow::MakeArrowSchema(rightColumns); + Y_ASSERT(rightSchema.ok()); + return std::make_pair( + TPredicate(fromInclusive ? NKernels::EOperation::GreaterEqual : NKernels::EOperation::Greater, leftBorder, leftSchema.ValueUnsafe()), + TPredicate(toInclusive ? NKernels::EOperation::LessEqual : NKernels::EOperation::Less, rightBorder, rightSchema.ValueUnsafe())); +} + +std::shared_ptr TPredicate::CutNulls(const std::shared_ptr& batch) { + AFL_VERIFY(batch->num_rows() == 1)("count", batch->num_rows()); + AFL_VERIFY(batch->num_columns()); + std::vector> colsNotNull; + std::vector> fieldsNotNull; + ui32 idx = 0; + for (auto&& i : batch->columns()) { + if (i->IsNull(0)) { + break; + } + colsNotNull.emplace_back(i); + fieldsNotNull.emplace_back(batch->schema()->field(idx)); + ++idx; + } + AFL_VERIFY(colsNotNull.size()); + return arrow::RecordBatch::Make(std::make_shared(fieldsNotNull), 1, colsNotNull); +} + +IOutputStream& operator<<(IOutputStream& out, const TPredicate& pred) { out << NSsa::GetFunctionName(pred.Operation); for (i32 i = 0; i < pred.Batch->num_columns(); ++i) { @@ -61,4 +167,4 @@ IOutputStream& operator << (IOutputStream& out, const TPredicate& pred) { return out; } -} // namespace NKikimr::NOlap +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/predicate/predicate.h b/ydb/core/tx/columnshard/engines/predicate/predicate.h index 0b856894e65f..8365971ea29e 100644 --- a/ydb/core/tx/columnshard/engines/predicate/predicate.h +++ b/ydb/core/tx/columnshard/engines/predicate/predicate.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -10,7 +11,9 @@ struct TPredicate { private: using EOperation = NArrow::EOperation; EOperation Operation{ EOperation::Unspecified }; + public: + static std::shared_ptr CutNulls(const std::shared_ptr& batch); std::shared_ptr Batch; @@ -29,8 +32,8 @@ struct TPredicate { } template - std::optional Get(const ui32 colIndex, const ui32 rowIndex, - const std::optional defaultValue = {}) const { + std::optional Get( + const ui32 colIndex, const ui32 rowIndex, const std::optional defaultValue = {}) const { auto column = Batch->column(colIndex); if (!column) { return defaultValue; @@ -42,10 +45,18 @@ struct TPredicate { } } - bool Empty() const noexcept { return Batch.get() == nullptr; } - bool Good() const { return !Empty() && Batch->num_columns() && Batch->num_rows() == 1; } - bool IsFrom() const noexcept { return Operation == EOperation::Greater || Operation == EOperation::GreaterEqual; } - bool IsTo() const noexcept { return Operation == EOperation::Less || Operation == EOperation::LessEqual; } + bool Empty() const noexcept { + return Batch.get() == nullptr; + } + bool Good() const { + return !Empty() && Batch->num_columns() && Batch->num_rows() == 1; + } + bool IsFrom() const noexcept { + return Operation == EOperation::Greater || Operation == EOperation::GreaterEqual; + } + bool IsTo() const noexcept { + return Operation == EOperation::Less || Operation == EOperation::LessEqual; + } bool IsInclusive() const { return Operation == EOperation::GreaterEqual || Operation == EOperation::LessEqual; } @@ -56,13 +67,16 @@ struct TPredicate { return Empty() ? "()" : Batch->schema()->ToString(); } + static std::pair DeserializePredicatesRange( + const TSerializedTableRange& range, const std::vector>& columns); + constexpr TPredicate() noexcept = default; TPredicate(EOperation op, std::shared_ptr batch) noexcept; TPredicate(EOperation op, const TString& serializedBatch, const std::shared_ptr& schema); - friend IOutputStream& operator << (IOutputStream& out, const TPredicate& pred); + friend IOutputStream& operator<<(IOutputStream& out, const TPredicate& pred); }; -} // namespace NKikimr::NOlap +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/predicate/range.cpp b/ydb/core/tx/columnshard/engines/predicate/range.cpp index 3a073fcb8fec..83c6602d242d 100644 --- a/ydb/core/tx/columnshard/engines/predicate/range.cpp +++ b/ydb/core/tx/columnshard/engines/predicate/range.cpp @@ -6,11 +6,11 @@ namespace NKikimr::NOlap { std::set TPKRangeFilter::GetColumnIds(const TIndexInfo& indexInfo) const { std::set result; for (auto&& i : PredicateFrom.GetColumnNames()) { - result.emplace(indexInfo.GetColumnId(i)); + result.emplace(indexInfo.GetColumnIdVerified(i)); AFL_TRACE(NKikimrServices::TX_COLUMNSHARD_SCAN)("predicate_column", i); } for (auto&& i : PredicateTo.GetColumnNames()) { - result.emplace(indexInfo.GetColumnId(i)); + result.emplace(indexInfo.GetColumnIdVerified(i)); AFL_TRACE(NKikimrServices::TX_COLUMNSHARD_SCAN)("predicate_column", i); } return result; @@ -39,63 +39,84 @@ NKikimr::NArrow::TColumnFilter TPKRangeFilter::BuildFilter(const arrow::Datum& d return result.And(PredicateFrom.BuildFilter(data)); } -bool TPKRangeFilter::IsPortionInUsage(const TPortionInfo& info, const TIndexInfo& indexInfo) const { - if (auto from = PredicateFrom.ExtractKey(indexInfo.GetPrimaryKey())) { - const auto& portionEnd = info.IndexKeyEnd(); - const int commonSize = std::min(from->Size(), portionEnd.Size()); - if (std::is_gt(from->ComparePartNotNull(portionEnd, commonSize))) { - return false; - } - } +bool TPKRangeFilter::IsPortionInUsage(const TPortionInfo& info) const { + return IsPortionInPartialUsage(info.IndexKeyStart(), info.IndexKeyEnd()) != TPKRangeFilter::EUsageClass::DontUsage; +} - if (auto to = PredicateTo.ExtractKey(indexInfo.GetPrimaryKey())) { - const auto& portionStart = info.IndexKeyStart(); - const int commonSize = std::min(to->Size(), portionStart.Size()); - if (std::is_lt(to->ComparePartNotNull(portionStart, commonSize))) { - return false; +TPKRangeFilter::EUsageClass TPKRangeFilter::IsPortionInPartialUsage(const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end) const { + { + std::partial_ordering equalityStartWithFrom = std::partial_ordering::greater; + if (const auto& from = PredicateFrom.GetReplaceKey()) { + equalityStartWithFrom = start.ComparePartNotNull(*from, from->Size()); + } + std::partial_ordering equalityEndWithTo = std::partial_ordering::less; + if (const auto& to = PredicateTo.GetReplaceKey()) { + equalityEndWithTo = end.ComparePartNotNull(*to, to->Size()); + } + const bool startInternal = (equalityStartWithFrom == std::partial_ordering::equivalent && PredicateFrom.IsInclude()) || + (equalityStartWithFrom == std::partial_ordering::greater); + const bool endInternal = (equalityEndWithTo == std::partial_ordering::equivalent && PredicateTo.IsInclude()) || + (equalityEndWithTo == std::partial_ordering::less); + if (startInternal && endInternal) { + return EUsageClass::FullUsage; } } + - return true; -} - -bool TPKRangeFilter::IsPortionInPartialUsage(const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end, const TIndexInfo& indexInfo) const { - bool startUsage = false; - bool endUsage = false; - if (auto from = PredicateFrom.ExtractKey(indexInfo.GetPrimaryKey())) { - AFL_VERIFY(from->Size() <= start.Size()); - if (PredicateFrom.IsInclude()) { - startUsage = std::is_lt(start.ComparePartNotNull(*from, from->Size())); - } else { - startUsage = std::is_lteq(start.ComparePartNotNull(*from, from->Size())); + if (const auto& from = PredicateFrom.GetReplaceKey()) { + const std::partial_ordering equalityEndWithFrom = end.ComparePartNotNull(*from, from->Size()); + if (equalityEndWithFrom == std::partial_ordering::less) { + return EUsageClass::DontUsage; + } else if (equalityEndWithFrom == std::partial_ordering::equivalent) { + if (PredicateFrom.IsInclude()) { + return EUsageClass::PartialUsage; + } else { + return EUsageClass::DontUsage; + } } - } else { - startUsage = true; } - if (auto to = PredicateTo.ExtractKey(indexInfo.GetPrimaryKey())) { - AFL_VERIFY(to->Size() <= end.Size()); - if (PredicateTo.IsInclude()) { - endUsage = std::is_gt(end.ComparePartNotNull(*to, to->Size())); - } else { - endUsage = std::is_gteq(end.ComparePartNotNull(*to, to->Size())); + if (const auto& to = PredicateTo.GetReplaceKey()) { + const std::partial_ordering equalityStartWithTo = start.ComparePartNotNull(*to, to->Size()); + if (equalityStartWithTo == std::partial_ordering::greater) { + return EUsageClass::DontUsage; + } else if (equalityStartWithTo == std::partial_ordering::equivalent) { + if (PredicateTo.IsInclude()) { + return EUsageClass::PartialUsage; + } else { + return EUsageClass::DontUsage; + } } - } else { - endUsage = true; } -// AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("start", start.DebugString())("end", end.DebugString())("from", PredicateFrom.DebugString())("to", PredicateTo.DebugString()) -// ("start_usage", startUsage)("end_usage", endUsage); +// AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("start", start.DebugString())("end", end.DebugString())("from", PredicateFrom.DebugString())( +// "to", PredicateTo.DebugString()); - return endUsage || startUsage; + return EUsageClass::PartialUsage; } -std::optional TPKRangeFilter::Build(TPredicateContainer&& from, TPredicateContainer&& to) { +TConclusion TPKRangeFilter::Build(TPredicateContainer&& from, TPredicateContainer&& to) { if (!from.CrossRanges(to)) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "cannot_build_predicate_range")("error", "predicates from/to not intersected"); - return {}; + return TConclusionStatus::Fail("predicates from/to not intersected"); } return TPKRangeFilter(std::move(from), std::move(to)); } +bool TPKRangeFilter::CheckPoint(const NArrow::TReplaceKey& point) const { + std::partial_ordering equalityWithFrom = std::partial_ordering::greater; + if (const auto& from = PredicateFrom.GetReplaceKey()) { + equalityWithFrom = point.ComparePartNotNull(*from, from->Size()); + } + std::partial_ordering equalityWithTo = std::partial_ordering::less; + if (const auto& to = PredicateTo.GetReplaceKey()) { + equalityWithTo = point.ComparePartNotNull(*to, to->Size()); + } + const bool startInternal = (equalityWithFrom == std::partial_ordering::equivalent && PredicateFrom.IsInclude()) || + (equalityWithFrom == std::partial_ordering::greater); + const bool endInternal = (equalityWithTo == std::partial_ordering::equivalent && PredicateTo.IsInclude()) || + (equalityWithTo == std::partial_ordering::less); + return startInternal && endInternal; +} + } diff --git a/ydb/core/tx/columnshard/engines/predicate/range.h b/ydb/core/tx/columnshard/engines/predicate/range.h index ff84f35408a3..6f9f264b7d70 100644 --- a/ydb/core/tx/columnshard/engines/predicate/range.h +++ b/ydb/core/tx/columnshard/engines/predicate/range.h @@ -28,20 +28,20 @@ class TPKRangeFilter { return PredicateTo; } - std::optional KeyFrom(const std::shared_ptr& key) const { - return PredicateFrom.ExtractKey(key); - } + static TConclusion Build(TPredicateContainer&& from, TPredicateContainer&& to); - std::optional KeyTo(const std::shared_ptr& key) const { - return PredicateTo.ExtractKey(key); - } + NArrow::TColumnFilter BuildFilter(const arrow::Datum& data) const; - static std::optional Build(TPredicateContainer&& from, TPredicateContainer&& to); + bool IsPortionInUsage(const TPortionInfo& info) const; + bool CheckPoint(const NArrow::TReplaceKey& point) const; - NArrow::TColumnFilter BuildFilter(const arrow::Datum& data) const; + enum class EUsageClass { + DontUsage, + PartialUsage, + FullUsage + }; - bool IsPortionInUsage(const TPortionInfo& info, const TIndexInfo& indexInfo) const; - bool IsPortionInPartialUsage(const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end, const TIndexInfo& indexInfo) const; + EUsageClass IsPortionInPartialUsage(const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end) const; std::set GetColumnIds(const TIndexInfo& indexInfo) const; TString DebugString() const; diff --git a/ydb/core/tx/columnshard/engines/protos/portion_info.proto b/ydb/core/tx/columnshard/engines/protos/portion_info.proto index 8e058f49d8e2..f7f38bb96ed7 100644 --- a/ydb/core/tx/columnshard/engines/protos/portion_info.proto +++ b/ydb/core/tx/columnshard/engines/protos/portion_info.proto @@ -1,5 +1,4 @@ -import "ydb/core/tx/columnshard/engines/scheme/statistics/protos/data.proto"; -import "ydb/core/formats/arrow/protos/ssa.proto"; +import "ydb/library/formats/arrow/protos/ssa.proto"; package NKikimrTxColumnShard; @@ -19,7 +18,6 @@ message TIndexPortionMeta { optional bytes PrimaryKeyBorders = 6; // arrow::RecordBatch with first and last ReplaceKey rows optional TSnapshot RecordSnapshotMin = 7; optional TSnapshot RecordSnapshotMax = 8; - optional NKikimrColumnShardStatisticsProto.TPortionStorage StatisticsStorage = 9; optional uint32 DeletionsCount = 10; } diff --git a/ydb/core/tx/columnshard/engines/protos/ya.make b/ydb/core/tx/columnshard/engines/protos/ya.make index 67c3e138a8ac..5719eb76af10 100644 --- a/ydb/core/tx/columnshard/engines/protos/ya.make +++ b/ydb/core/tx/columnshard/engines/protos/ya.make @@ -5,8 +5,7 @@ SRCS( ) PEERDIR( - ydb/core/tx/columnshard/engines/scheme/statistics/protos - ydb/core/formats/arrow/protos + ydb/library/formats/arrow/protos ) diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/abstract.h b/ydb/core/tx/columnshard/engines/reader/abstract/abstract.h index 2681626b6d4f..37ba57b89985 100644 --- a/ydb/core/tx/columnshard/engines/reader/abstract/abstract.h +++ b/ydb/core/tx/columnshard/engines/reader/abstract/abstract.h @@ -13,7 +13,7 @@ class TScanIteratorBase { public: virtual ~TScanIteratorBase() = default; - virtual void Apply(IDataTasksProcessor::ITask::TPtr /*processor*/) { + virtual void Apply(const std::shared_ptr& /*task*/) { } @@ -31,7 +31,7 @@ class TScanIteratorBase { return {}; } virtual bool Finished() const = 0; - virtual TConclusion> GetBatch() = 0; + virtual TConclusion> GetBatch() = 0; virtual void PrepareResults() { } diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/read_context.h b/ydb/core/tx/columnshard/engines/reader/abstract/read_context.h index fbcdab4d8622..3b1d545094ac 100644 --- a/ydb/core/tx/columnshard/engines/reader/abstract/read_context.h +++ b/ydb/core/tx/columnshard/engines/reader/abstract/read_context.h @@ -1,10 +1,12 @@ #pragma once #include "read_metadata.h" + #include -#include -#include #include +#include #include +#include + #include namespace NKikimr::NOlap::NReader { @@ -13,6 +15,7 @@ class TComputeShardingPolicy { private: YDB_READONLY(ui32, ShardsCount, 0); YDB_READONLY_DEF(std::vector, ColumnNames); + public: TString DebugString() const { return TStringBuilder() << "shards_count:" << ShardsCount << ";columns=" << JoinSeq(",", ColumnNames) << ";"; @@ -42,10 +45,12 @@ class TReadContext { const NColumnShard::TConcreteScanCounters Counters; TReadMetadataBase::TConstPtr ReadMetadata; NResourceBroker::NSubscribe::TTaskContext ResourcesTaskContext; + const ui64 ScanId; const TActorId ScanActorId; const TActorId ResourceSubscribeActorId; const TActorId ReadCoordinatorActorId; const TComputeShardingPolicy ComputeShardingPolicy; + public: template std::shared_ptr GetReadMetadataPtrVerifiedAs() const { @@ -74,6 +79,14 @@ class TReadContext { return ScanActorId; } + ui64 GetScanId() const { + return ScanId; + } + + bool HasLock() const { + return !!ReadMetadata->GetLockId(); + } + const TReadMetadataBase::TConstPtr& GetReadMetadata() const { return ReadMetadata; } @@ -86,17 +99,18 @@ class TReadContext { return ResourcesTaskContext; } - TReadContext(const std::shared_ptr& storagesManager, const NColumnShard::TConcreteScanCounters& counters, const TReadMetadataBase::TConstPtr& readMetadata, - const TActorId& scanActorId, const TActorId& resourceSubscribeActorId, const TActorId& readCoordinatorActorId, const TComputeShardingPolicy& computeShardingPolicy) + TReadContext(const std::shared_ptr& storagesManager, const NColumnShard::TConcreteScanCounters& counters, + const TReadMetadataBase::TConstPtr& readMetadata, const TActorId& scanActorId, const TActorId& resourceSubscribeActorId, + const TActorId& readCoordinatorActorId, const TComputeShardingPolicy& computeShardingPolicy, const ui64 scanId) : StoragesManager(storagesManager) , Counters(counters) , ReadMetadata(readMetadata) , ResourcesTaskContext("CS::SCAN_READ", counters.ResourcesSubscriberCounters) + , ScanId(scanId) , ScanActorId(scanActorId) , ResourceSubscribeActorId(resourceSubscribeActorId) , ReadCoordinatorActorId(readCoordinatorActorId) - , ComputeShardingPolicy(computeShardingPolicy) - { + , ComputeShardingPolicy(computeShardingPolicy) { Y_ABORT_UNLESS(ReadMetadata); } }; @@ -109,8 +123,9 @@ class IDataReader { virtual TString DoDebugString(const bool verbose) const = 0; virtual void DoAbort() = 0; virtual bool DoIsFinished() const = 0; - virtual std::vector DoExtractReadyResults(const int64_t maxRowsInBatch) = 0; + virtual std::vector> DoExtractReadyResults(const int64_t maxRowsInBatch) = 0; virtual TConclusion DoReadNextInterval() = 0; + public: IDataReader(const std::shared_ptr& context); virtual ~IDataReader() = default; @@ -153,7 +168,7 @@ class IDataReader { return *result; } - std::vector ExtractReadyResults(const int64_t maxRowsInBatch) { + std::vector> ExtractReadyResults(const int64_t maxRowsInBatch) { return DoExtractReadyResults(maxRowsInBatch); } @@ -171,4 +186,4 @@ class IDataReader { } }; -} +} // namespace NKikimr::NOlap::NReader diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.cpp b/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.cpp index e6fc29578f1c..88416a4d214f 100644 --- a/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.cpp +++ b/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.cpp @@ -1,21 +1,20 @@ #include "read_metadata.h" + #include namespace NKikimr::NOlap::NReader { -TDataStorageAccessor::TDataStorageAccessor(const std::unique_ptr& insertTable, - const std::unique_ptr& index) +TDataStorageAccessor::TDataStorageAccessor(const std::unique_ptr& insertTable, const std::unique_ptr& index) : InsertTable(insertTable) - , Index(index) -{} + , Index(index) { +} std::shared_ptr TDataStorageAccessor::Select(const TReadDescription& readDescription) const { if (readDescription.ReadNothing) { return std::make_shared(); } - return Index->Select(readDescription.PathId, - readDescription.GetSnapshot(), - readDescription.PKRangesFilter); + AFL_VERIFY(readDescription.PKRangesFilter); + return Index->Select(readDescription.PathId, readDescription.GetSnapshot(), *readDescription.PKRangesFilter); } ISnapshotSchema::TPtr TReadMetadataBase::GetLoadSchemaVerified(const TPortionInfo& portion) const { @@ -24,8 +23,10 @@ ISnapshotSchema::TPtr TReadMetadataBase::GetLoadSchemaVerified(const TPortionInf return schema; } -std::vector TDataStorageAccessor::GetCommitedBlobs(const TReadDescription& readDescription, const std::shared_ptr& pkSchema) const { - return std::move(InsertTable->Read(readDescription.PathId, readDescription.GetSnapshot(), pkSchema)); +std::vector TDataStorageAccessor::GetCommitedBlobs(const TReadDescription& readDescription, + const std::shared_ptr& pkSchema, const std::optional lockId, const TSnapshot& reqSnapshot) const { + AFL_VERIFY(readDescription.PKRangesFilter); + return std::move(InsertTable->Read(readDescription.PathId, lockId, reqSnapshot, pkSchema, &*readDescription.PKRangesFilter)); } -} +} // namespace NKikimr::NOlap::NReader diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h b/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h index 7770674caa45..d87fcf02868e 100644 --- a/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h +++ b/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h @@ -1,12 +1,17 @@ #pragma once +#include +#include #include #include -#include -#include namespace NKikimr::NOlap { - class TPortionInfo; +class TPortionInfo; } + +namespace NKikimr::NKqp::NInternalImplementation { +struct TEvScanData; +} + namespace NKikimr::NOlap::NReader { class TScanIteratorBase; @@ -18,10 +23,10 @@ class TDataStorageAccessor { const std::unique_ptr& Index; public: - TDataStorageAccessor(const std::unique_ptr& insertTable, - const std::unique_ptr& index); + TDataStorageAccessor(const std::unique_ptr& insertTable, const std::unique_ptr& index); std::shared_ptr Select(const TReadDescription& readDescription) const; - std::vector GetCommitedBlobs(const TReadDescription& readDescription, const std::shared_ptr& pkSchema) const; + std::vector GetCommitedBlobs(const TReadDescription& readDescription, const std::shared_ptr& pkSchema, + const std::optional lockId, const TSnapshot& reqSnapshot) const; }; // Holds all metadata that is needed to perform read/scan @@ -32,29 +37,61 @@ struct TReadMetadataBase { ASC /* "ascending" */, DESC /* "descending" */, }; + private: - const ESorting Sorting = ESorting::ASC; // Sorting inside returned batches - std::optional PKRangesFilter; + const ESorting Sorting = ESorting::ASC; // Sorting inside returned batches + std::shared_ptr PKRangesFilter; TProgramContainer Program; std::shared_ptr IndexVersionsPointer; TSnapshot RequestSnapshot; std::optional RequestShardingInfo; + virtual void DoOnReadFinished(NColumnShard::TColumnShard& /*owner*/) const { + } + virtual void DoOnBeforeStartReading(NColumnShard::TColumnShard& /*owner*/) const { + } + virtual void DoOnReplyConstruction(const ui64 /*tabletId*/, NKqp::NInternalImplementation::TEvScanData& /*scanData*/) const { + } protected: std::shared_ptr ResultIndexSchema; + ui64 TxId = 0; + std::optional LockId; + +public: + using TConstPtr = std::shared_ptr; + + void OnReplyConstruction(const ui64 tabletId, NKqp::NInternalImplementation::TEvScanData& scanData) const { + DoOnReplyConstruction(tabletId, scanData); + } + + ui64 GetTxId() const { + return TxId; + } + + std::optional GetLockId() const { + return LockId; + } + + void OnReadFinished(NColumnShard::TColumnShard& owner) const { + DoOnReadFinished(owner); + } + + void OnBeforeStartReading(NColumnShard::TColumnShard& owner) const { + DoOnBeforeStartReading(owner); + } + const TVersionedIndex& GetIndexVersions() const { AFL_VERIFY(IndexVersionsPointer); return *IndexVersionsPointer; } -public: - using TConstPtr = std::shared_ptr; const std::optional& GetRequestShardingInfo() const { return RequestShardingInfo; } - void SetPKRangesFilter(const TPKRangesFilter& value) { - Y_ABORT_UNLESS(IsSorted() && value.IsReverse() == IsDescSorted()); + void SetPKRangesFilter(const std::shared_ptr& value) { + AFL_VERIFY(value); + Y_ABORT_UNLESS(IsSorted() && value->IsReverse() == IsDescSorted()); Y_ABORT_UNLESS(!PKRangesFilter); PKRangesFilter = value; } @@ -64,6 +101,11 @@ struct TReadMetadataBase { return *PKRangesFilter; } + const std::shared_ptr& GetPKRangesFilterPtr() const { + Y_ABORT_UNLESS(!!PKRangesFilter); + return PKRangesFilter; + } + ISnapshotSchema::TPtr GetResultSchema() const { return ResultIndexSchema; } @@ -74,7 +116,7 @@ struct TReadMetadataBase { ISnapshotSchema::TPtr GetLoadSchemaVerified(const TPortionInfo& porition) const; - std::shared_ptr GetBlobSchema(const ui64 version) const { + const std::shared_ptr& GetBlobSchema(const ui64 version) const { return GetIndexVersions().GetSchema(version)->GetIndexInfo().ArrowSchema(); } @@ -90,13 +132,13 @@ struct TReadMetadataBase { RequestShardingInfo = IndexVersionsPointer->GetShardingInfoOptional(pathId, RequestSnapshot); } - TReadMetadataBase(const std::shared_ptr index, const ESorting sorting, const TProgramContainer& ssaProgram, const std::shared_ptr& schema, const TSnapshot& requestSnapshot) + TReadMetadataBase(const std::shared_ptr index, const ESorting sorting, const TProgramContainer& ssaProgram, + const std::shared_ptr& schema, const TSnapshot& requestSnapshot) : Sorting(sorting) , Program(ssaProgram) , IndexVersionsPointer(index) , RequestSnapshot(requestSnapshot) - , ResultIndexSchema(schema) - { + , ResultIndexSchema(schema) { } virtual ~TReadMetadataBase() = default; @@ -110,19 +152,25 @@ struct TReadMetadataBase { std::set GetProcessingColumnIds() const { std::set result; for (auto&& i : GetProgram().GetProcessingColumns()) { - result.emplace(ResultIndexSchema->GetIndexInfo().GetColumnId(i)); + result.emplace(ResultIndexSchema->GetIndexInfo().GetColumnIdVerified(i)); } return result; } - bool IsAscSorted() const { return Sorting == ESorting::ASC; } - bool IsDescSorted() const { return Sorting == ESorting::DESC; } - bool IsSorted() const { return IsAscSorted() || IsDescSorted(); } + bool IsAscSorted() const { + return Sorting == ESorting::ASC; + } + bool IsDescSorted() const { + return Sorting == ESorting::DESC; + } + bool IsSorted() const { + return IsAscSorted() || IsDescSorted(); + } virtual std::unique_ptr StartScan(const std::shared_ptr& readContext) const = 0; virtual std::vector GetKeyYqlSchema() const = 0; // TODO: can this only be done for base class? - friend IOutputStream& operator << (IOutputStream& out, const TReadMetadataBase& meta) { + friend IOutputStream& operator<<(IOutputStream& out, const TReadMetadataBase& meta) { meta.Dump(out); return out; } @@ -160,7 +208,6 @@ struct TReadMetadataBase { } return ResultIndexSchema->GetIndexInfo().GetIndexNameOptional(entityId); } - }; -} +} // namespace NKikimr::NOlap::NReader diff --git a/ydb/core/tx/columnshard/engines/reader/actor/actor.cpp b/ydb/core/tx/columnshard/engines/reader/actor/actor.cpp index 8b655d899a55..00ab74fa9921 100644 --- a/ydb/core/tx/columnshard/engines/reader/actor/actor.cpp +++ b/ydb/core/tx/columnshard/engines/reader/actor/actor.cpp @@ -61,8 +61,7 @@ TColumnShardScan::TColumnShardScan(const TActorId& columnShardActorId, const TAc , Deadline(TInstant::Now() + (timeout ? timeout + SCAN_HARD_TIMEOUT_GAP : SCAN_HARD_TIMEOUT)) , ScanCountersPool(scanCountersPool) , Stats(NTracing::TTraceClient::GetLocalClient("SHARD", ::ToString(TabletId)/*, "SCAN_TXID:" + ::ToString(TxId)*/)) - , ComputeShardingPolicy(computeShardingPolicy) -{ + , ComputeShardingPolicy(computeShardingPolicy) { AFL_VERIFY(ReadMetadataRange); KeyYqlSchema = ReadMetadataRange->GetKeyYqlSchema(); } @@ -80,7 +79,7 @@ void TColumnShardScan::Bootstrap(const TActorContext& ctx) { ReadCoordinatorActorId = ctx.Register(new NBlobOperations::NRead::TReadCoordinatorActor(TabletId, SelfId())); std::shared_ptr context = std::make_shared(StoragesManager, ScanCountersPool, - ReadMetadataRange, SelfId(), ResourceSubscribeActorId, ReadCoordinatorActorId, ComputeShardingPolicy); + ReadMetadataRange, SelfId(), ResourceSubscribeActorId, ReadCoordinatorActorId, ComputeShardingPolicy, ScanId); ScanIterator = ReadMetadataRange->StartScan(context); auto startResult = ScanIterator->Start(); StartInstant = TMonotonic::Now(); @@ -98,17 +97,18 @@ void TColumnShardScan::Bootstrap(const TActorContext& ctx) { } } -void TColumnShardScan::HandleScan(NConveyor::TEvExecution::TEvTaskProcessedResult::TPtr& ev) { +void TColumnShardScan::HandleScan(NColumnShard::TEvPrivate::TEvTaskProcessedResult::TPtr& ev) { --InFlightReads; auto g = Stats->MakeGuard("task_result"); - if (ev->Get()->GetErrorMessage()) { - ACFL_ERROR("event", "TEvTaskProcessedResult")("error", ev->Get()->GetErrorMessage()); - SendScanError("task_error:" + ev->Get()->GetErrorMessage()); + auto result = ev->Get()->ExtractResult(); + if (result.IsFail()) { + ACFL_ERROR("event", "TEvTaskProcessedResult")("error", result.GetErrorMessage()); + SendScanError("task_error:" + result.GetErrorMessage()); Finish(NColumnShard::TScanCounters::EStatusFinish::ConveyorInternalError); } else { ACFL_DEBUG("event", "TEvTaskProcessedResult"); - auto t = static_pointer_cast(ev->Get()->GetResult()); - Y_DEBUG_ABORT_UNLESS(dynamic_pointer_cast(ev->Get()->GetResult())); + auto t = static_pointer_cast(result.GetResult()); + Y_DEBUG_ABORT_UNLESS(dynamic_pointer_cast(result.GetResult())); if (!ScanIterator->Finished()) { ScanIterator->Apply(t); } @@ -208,7 +208,7 @@ bool TColumnShardScan::ProduceResults() noexcept { return false; } - std::optional resultOpt = resultConclusion.DetachResult(); + std::shared_ptr resultOpt = resultConclusion.DetachResult(); if (!resultOpt) { ACFL_DEBUG("stage", "no data is ready yet")("iterator", ScanIterator->DebugString()); return false; @@ -242,6 +242,7 @@ bool TColumnShardScan::ProduceResults() noexcept { Result->ArrowBatch = shardedBatch.GetRecordBatch(); Rows += batch->num_rows(); Bytes += NArrow::GetTableDataSize(Result->ArrowBatch); + ACFL_DEBUG("stage", "data_format")("batch_size", NArrow::GetTableDataSize(Result->ArrowBatch))("num_rows", numRows)("batch_columns", JoinSeq(",", batch->schema()->field_names())); } if (CurrentLastReadKey) { @@ -273,9 +274,9 @@ void TColumnShardScan::ContinueProcessing() { if (ChunksLimiter.HasMore()) { auto g = Stats->MakeGuard("Finish"); MakeResult(); + Finish(NColumnShard::TScanCounters::EStatusFinish::Success); SendResult(false, true); ScanIterator.reset(); - Finish(NColumnShard::TScanCounters::EStatusFinish::Success); } } else { while (true) { @@ -374,6 +375,7 @@ bool TColumnShardScan::SendResult(bool pageFault, bool lastBatch) { Y_ABORT_UNLESS(AckReceivedInstant); ScanCountersPool.AckWaitingInfo(TMonotonic::Now() - *AckReceivedInstant); } + ReadMetadataRange->OnReplyConstruction(TabletId, *Result); AckReceivedInstant.reset(); Send(ScanComputeActorId, Result.Release(), IEventHandle::FlagTrackDelivery); // TODO: FlagSubscribeOnSession ? @@ -401,7 +403,7 @@ void TColumnShardScan::Finish(const NColumnShard::TScanCounters::EStatusFinish s Send(ColumnShardActorId, new NColumnShard::TEvPrivate::TEvReadFinished(RequestCookie, TxId)); AFL_VERIFY(StartInstant); - ScanCountersPool.OnScanDuration(status, TMonotonic::Now() - *StartInstant); + ScanCountersPool.OnScanFinished(status, TMonotonic::Now() - *StartInstant); ReportStats(); PassAway(); } diff --git a/ydb/core/tx/columnshard/engines/reader/actor/actor.h b/ydb/core/tx/columnshard/engines/reader/actor/actor.h index 33134f85bcab..a37074f70a5d 100644 --- a/ydb/core/tx/columnshard/engines/reader/actor/actor.h +++ b/ydb/core/tx/columnshard/engines/reader/actor/actor.h @@ -1,17 +1,17 @@ #pragma once +#include +#include #include +#include +#include #include -#include #include -#include +#include #include #include -#include - -#include -#include #include +#include #include namespace NKikimr::NOlap::NReader { @@ -22,6 +22,7 @@ class TColumnShardScan: public TActorBootstrapped, NArrow::IRo TActorId ReadCoordinatorActorId; const std::shared_ptr StoragesManager; std::optional StartInstant; + public: static constexpr auto ActorActivityType() { return NKikimrServices::TActivity::KQP_OLAP_SCAN; @@ -31,9 +32,8 @@ class TColumnShardScan: public TActorBootstrapped, NArrow::IRo virtual void PassAway() override; TColumnShardScan(const TActorId& columnShardActorId, const TActorId& scanComputeActorId, - const std::shared_ptr& storagesManager, const TComputeShardingPolicy& computeShardingPolicy, - ui32 scanId, ui64 txId, ui32 scanGen, ui64 requestCookie, - ui64 tabletId, TDuration timeout, const TReadMetadataBase::TConstPtr& readMetadataRange, + const std::shared_ptr& storagesManager, const TComputeShardingPolicy& computeShardingPolicy, ui32 scanId, ui64 txId, + ui32 scanGen, ui64 requestCookie, ui64 tabletId, TDuration timeout, const TReadMetadataBase::TConstPtr& readMetadataRange, NKikimrDataEvents::EDataFormat dataFormat, const NColumnShard::TScanCounters& scanCountersPool); void Bootstrap(const TActorContext& ctx); @@ -41,21 +41,20 @@ class TColumnShardScan: public TActorBootstrapped, NArrow::IRo private: STATEFN(StateScan) { auto g = Stats->MakeGuard("processing"); - TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_SCAN) - ("SelfId", SelfId())("TabletId", TabletId)("ScanId", ScanId)("TxId", TxId)("ScanGen", ScanGen) - ); + TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_SCAN) ("SelfId", SelfId())( + "TabletId", TabletId)("ScanId", ScanId)("TxId", TxId)("ScanGen", ScanGen)); switch (ev->GetTypeRewrite()) { hFunc(NKqp::TEvKqpCompute::TEvScanDataAck, HandleScan); hFunc(NKqp::TEvKqp::TEvAbortExecution, HandleScan); hFunc(TEvents::TEvUndelivered, HandleScan); hFunc(TEvents::TEvWakeup, HandleScan); - hFunc(NConveyor::TEvExecution::TEvTaskProcessedResult, HandleScan); + hFunc(NColumnShard::TEvPrivate::TEvTaskProcessedResult, HandleScan); default: AFL_VERIFY(false)("unexpected_event", ev->GetTypeName()); } } - void HandleScan(NConveyor::TEvExecution::TEvTaskProcessedResult::TPtr& ev); + void HandleScan(NColumnShard::TEvPrivate::TEvTaskProcessedResult::TPtr& ev); void HandleScan(NKqp::TEvKqpCompute::TEvScanDataAck::TPtr& ev); @@ -80,10 +79,10 @@ class TColumnShardScan: public TActorBootstrapped, NArrow::IRo class TScanStatsOwner: public NKqp::TEvKqpCompute::IShardScanStats { private: YDB_READONLY_DEF(TReadStats, Stats); + public: TScanStatsOwner(const TReadStats& stats) : Stats(stats) { - } virtual THashMap GetMetrics() const override { @@ -142,11 +141,11 @@ class TColumnShardScan: public TActorBootstrapped, NArrow::IRo TDuration ReadingDurationMax; NMonitoring::THistogramPtr BlobDurationsCounter; NMonitoring::THistogramPtr ByteDurationsCounter; + public: TBlobStats(const NMonitoring::THistogramPtr blobDurationsCounter, const NMonitoring::THistogramPtr byteDurationsCounter) : BlobDurationsCounter(blobDurationsCounter) , ByteDurationsCounter(byteDurationsCounter) { - } void Received(const TBlobRange& br, const TDuration d) { ReadingDurationSum += d; @@ -181,4 +180,4 @@ class TColumnShardScan: public TActorBootstrapped, NArrow::IRo TDuration LastReportedElapsedTime; }; -} \ No newline at end of file +} // namespace NKikimr::NOlap::NReader diff --git a/ydb/core/tx/columnshard/engines/reader/common/conveyor_task.cpp b/ydb/core/tx/columnshard/engines/reader/common/conveyor_task.cpp index 26f14784a032..ac778b00a6c7 100644 --- a/ydb/core/tx/columnshard/engines/reader/common/conveyor_task.cpp +++ b/ydb/core/tx/columnshard/engines/reader/common/conveyor_task.cpp @@ -1,9 +1,23 @@ #include "conveyor_task.h" +#include +#include namespace NKikimr::NOlap::NReader { -bool IDataTasksProcessor::ITask::Apply(IDataReader& indexedDataRead) const { - return DoApply(indexedDataRead); +NKikimr::TConclusionStatus IDataTasksProcessor::ITask::DoExecute(const std::shared_ptr& taskPtr) { + auto result = DoExecuteImpl(); + if (result.IsFail()) { + NActors::TActivationContext::AsActorContext().Send(OwnerId, new NColumnShard::TEvPrivate::TEvTaskProcessedResult(result)); + } else { + NActors::TActivationContext::AsActorContext().Send( + OwnerId, new NColumnShard::TEvPrivate::TEvTaskProcessedResult(static_pointer_cast(taskPtr))); + } + return result; +} + +void IDataTasksProcessor::ITask::DoOnCannotExecute(const TString& reason) { + NActors::TActivationContext::AsActorContext().Send( + OwnerId, new NColumnShard::TEvPrivate::TEvTaskProcessedResult(TConclusionStatus::Fail(reason))); } } diff --git a/ydb/core/tx/columnshard/engines/reader/common/conveyor_task.h b/ydb/core/tx/columnshard/engines/reader/common/conveyor_task.h index f29b19ecee59..0342577c2554 100644 --- a/ydb/core/tx/columnshard/engines/reader/common/conveyor_task.h +++ b/ydb/core/tx/columnshard/engines/reader/common/conveyor_task.h @@ -1,28 +1,46 @@ #pragma once + #include + #include +#include namespace NKikimr::NOlap::NReader { class IDataReader; +class IApplyAction { +protected: + virtual bool DoApply(IDataReader& indexedDataRead) const = 0; + +public: + bool Apply(IDataReader& indexedDataRead) const { + return DoApply(indexedDataRead); + } +}; + class IDataTasksProcessor { public: - class ITask: public NConveyor::ITask { + class ITask: public NConveyor::ITask, public IApplyAction { private: using TBase = NConveyor::ITask; - protected: - virtual bool DoApply(IDataReader& indexedDataRead) const = 0; - public: - ITask(const std::optional ownerId = {}) - : TBase(ownerId) { + const NActors::TActorId OwnerId; + virtual TConclusionStatus DoExecuteImpl() = 0; - } + protected: + virtual TConclusionStatus DoExecute(const std::shared_ptr& taskPtr) override final; + virtual void DoOnCannotExecute(const TString& reason) override; + public: using TPtr = std::shared_ptr; virtual ~ITask() = default; - bool Apply(IDataReader& indexedDataRead) const; + + ITask(const NActors::TActorId& ownerId) + : OwnerId(ownerId) + { + + } }; }; -} +} // namespace NKikimr::NOlap::NReader diff --git a/ydb/core/tx/columnshard/engines/reader/common/description.h b/ydb/core/tx/columnshard/engines/reader/common/description.h index 704b4bd101a9..c180dcc8d067 100644 --- a/ydb/core/tx/columnshard/engines/reader/common/description.h +++ b/ydb/core/tx/columnshard/engines/reader/common/description.h @@ -13,13 +13,15 @@ struct TReadDescription { TProgramContainer Program; public: // Table + ui64 TxId = 0; + std::optional LockId; ui64 PathId = 0; TString TableName; bool ReadNothing = false; // Less[OrEqual], Greater[OrEqual] or both // There's complex logic in NKikimr::TTableRange comparison that could be emulated only with separated compare // operations with potentially different columns. We have to remove columns to support -Inf (Null) and +Inf. - NOlap::TPKRangesFilter PKRangesFilter; + std::shared_ptr PKRangesFilter; NYql::NDqProto::EDqStatsMode StatsMode = NYql::NDqProto::EDqStatsMode::DQ_STATS_MODE_NONE; // List of columns @@ -28,7 +30,7 @@ struct TReadDescription { TReadDescription(const TSnapshot& snapshot, const bool isReverse) : Snapshot(snapshot) - , PKRangesFilter(isReverse) { + , PKRangesFilter(std::make_shared(isReverse)) { } void SetProgram(TProgramContainer&& value) { diff --git a/ydb/core/tx/columnshard/engines/reader/common/result.cpp b/ydb/core/tx/columnshard/engines/reader/common/result.cpp index 484165c67b54..e81e86bfc9d0 100644 --- a/ydb/core/tx/columnshard/engines/reader/common/result.cpp +++ b/ydb/core/tx/columnshard/engines/reader/common/result.cpp @@ -4,19 +4,19 @@ namespace NKikimr::NOlap::NReader { class TCurrentBatch { private: - std::vector Results; + std::vector> Results; ui64 RecordsCount = 0; public: ui64 GetRecordsCount() const { return RecordsCount; } - void AddChunk(TPartialReadResult&& res) { - RecordsCount += res.GetRecordsCount(); + void AddChunk(std::shared_ptr&& res) { + RecordsCount += res->GetRecordsCount(); Results.emplace_back(std::move(res)); } - void FillResult(std::vector& result) const { + void FillResult(std::vector>& result) const { if (Results.empty()) { return; } @@ -26,11 +26,12 @@ class TCurrentBatch { } }; -std::vector TPartialReadResult::SplitResults(std::vector&& resultsExt, const ui32 maxRecordsInResult) { +std::vector> TPartialReadResult::SplitResults( + std::vector>&& resultsExt, const ui32 maxRecordsInResult) { std::vector resultBatches; TCurrentBatch currentBatch; for (auto&& i : resultsExt) { - AFL_VERIFY(i.GetRecordsCount()); + AFL_VERIFY(i->GetRecordsCount()); currentBatch.AddChunk(std::move(i)); if (currentBatch.GetRecordsCount() >= maxRecordsInResult) { resultBatches.emplace_back(std::move(currentBatch)); @@ -41,7 +42,7 @@ std::vector TPartialReadResult::SplitResults(std::vector result; + std::vector> result; for (auto&& i : resultBatches) { i.FillResult(result); } diff --git a/ydb/core/tx/columnshard/engines/reader/common/result.h b/ydb/core/tx/columnshard/engines/reader/common/result.h index 2c3f698bf7d7..e3028b01b5ad 100644 --- a/ydb/core/tx/columnshard/engines/reader/common/result.h +++ b/ydb/core/tx/columnshard/engines/reader/common/result.h @@ -1,16 +1,19 @@ #pragma once #include +#include #include #include +#include #include #include namespace NKikimr::NOlap::NReader { // Represents a batch of rows produced by ASC or DESC scan with applied filters and partial aggregation -class TPartialReadResult { +class TPartialReadResult: public TNonCopyable { private: - YDB_READONLY_DEF(std::vector>, ResourcesGuards); + YDB_READONLY_DEF(std::shared_ptr, ResourcesGuard); + YDB_READONLY_DEF(std::shared_ptr, GroupGuard); NArrow::TShardedRecordBatch ResultBatch; // This 1-row batch contains the last key that was read while producing the ResultBatch. @@ -32,12 +35,6 @@ class TPartialReadResult { return ResultBatch.GetRecordBatch(); } - const std::shared_ptr& GetResourcesGuardOnly() const { - AFL_VERIFY(ResourcesGuards.size() == 1); - AFL_VERIFY(!!ResourcesGuards.front()); - return ResourcesGuards.front(); - } - ui64 GetMemorySize() const { return ResultBatch.GetMemorySize(); } @@ -46,7 +43,8 @@ class TPartialReadResult { return ResultBatch.GetRecordsCount(); } - static std::vector SplitResults(std::vector&& resultsExt, const ui32 maxRecordsInResult); + static std::vector> SplitResults( + std::vector>&& resultsExt, const ui32 maxRecordsInResult); const NArrow::TShardedRecordBatch& GetShardedBatch() const { return ResultBatch; @@ -56,32 +54,23 @@ class TPartialReadResult { return LastReadKey; } - explicit TPartialReadResult( - const std::vector>& resourcesGuards, - const NArrow::TShardedRecordBatch& batch, std::shared_ptr lastKey, const std::optional notFinishedIntervalIdx) - : ResourcesGuards(resourcesGuards) + explicit TPartialReadResult(std::shared_ptr&& resourcesGuard, + std::shared_ptr&& gGuard, const NArrow::TShardedRecordBatch& batch, + std::shared_ptr lastKey, const std::optional notFinishedIntervalIdx) + : ResourcesGuard(std::move(resourcesGuard)) + , GroupGuard(std::move(gGuard)) , ResultBatch(batch) , LastReadKey(lastKey) - , NotFinishedIntervalIdx(notFinishedIntervalIdx) - { - for (auto&& i : ResourcesGuards) { - AFL_VERIFY(i); - } + , NotFinishedIntervalIdx(notFinishedIntervalIdx) { Y_ABORT_UNLESS(ResultBatch.GetRecordsCount()); Y_ABORT_UNLESS(LastReadKey); Y_ABORT_UNLESS(LastReadKey->num_rows() == 1); } explicit TPartialReadResult( - const std::shared_ptr& resourcesGuards, const NArrow::TShardedRecordBatch& batch, std::shared_ptr lastKey, const std::optional notFinishedIntervalIdx) - : TPartialReadResult(std::vector>({resourcesGuards}), batch, lastKey, notFinishedIntervalIdx) { - AFL_VERIFY(resourcesGuards); - } - - explicit TPartialReadResult(const NArrow::TShardedRecordBatch& batch, std::shared_ptr lastKey, const std::optional notFinishedIntervalIdx) - : TPartialReadResult(std::vector>(), batch, lastKey, notFinishedIntervalIdx) { + : TPartialReadResult(nullptr, nullptr, batch, lastKey, notFinishedIntervalIdx) { } }; -} +} // namespace NKikimr::NOlap::NReader diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.cpp index 87315949329a..ae28340c9932 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.cpp @@ -1,11 +1,13 @@ #include "constructor.h" -#include "resolver.h" #include "read_metadata.h" +#include "resolver.h" + #include namespace NKikimr::NOlap::NReader::NPlain { -NKikimr::TConclusionStatus TIndexScannerConstructor::ParseProgram(const TVersionedIndex* vIndex, const NKikimrTxDataShard::TEvKqpScan& proto, TReadDescription& read) const { +NKikimr::TConclusionStatus TIndexScannerConstructor::ParseProgram( + const TVersionedIndex* vIndex, const NKikimrTxDataShard::TEvKqpScan& proto, TReadDescription& read) const { AFL_VERIFY(vIndex); auto& indexInfo = vIndex->GetSchema(Snapshot)->GetIndexInfo(); TIndexColumnResolver columnResolver(indexInfo); @@ -17,26 +19,29 @@ std::vector TIndexScannerConstructor::GetPrimaryKeyScheme(const N return indexInfo.GetPrimaryKeyColumns(); } -NKikimr::TConclusion> TIndexScannerConstructor::DoBuildReadMetadata(const NColumnShard::TColumnShard* self, const TReadDescription& read) const { +NKikimr::TConclusion> TIndexScannerConstructor::DoBuildReadMetadata( + const NColumnShard::TColumnShard* self, const TReadDescription& read) const { auto& insertTable = self->InsertTable; auto& index = self->TablesManager.GetPrimaryIndex(); if (!insertTable || !index) { return std::shared_ptr(); } - if (read.GetSnapshot().GetPlanStep() < self->GetMinReadStep()) { - return TConclusionStatus::Fail(TStringBuilder() << "Snapshot too old: " << read.GetSnapshot()); + if (read.GetSnapshot().GetPlanInstant() < self->GetMinReadSnapshot().GetPlanInstant()) { + return TConclusionStatus::Fail(TStringBuilder() << "Snapshot too old: " << read.GetSnapshot() << ". CS min read snapshot: " + << self->GetMinReadSnapshot() << ". now: " << TInstant::Now()); } TDataStorageAccessor dataAccessor(insertTable, index); - auto readMetadata = std::make_shared(index->CopyVersionedIndexPtr(), read.GetSnapshot(), + AFL_VERIFY(read.PathId); + auto readMetadata = std::make_shared(read.PathId, index->CopyVersionedIndexPtr(), read.GetSnapshot(), IsReverse ? TReadMetadataBase::ESorting::DESC : TReadMetadataBase::ESorting::ASC, read.GetProgram()); - auto initResult = readMetadata->Init(read, dataAccessor); + auto initResult = readMetadata->Init(self, read, dataAccessor); if (!initResult) { return initResult; } - return dynamic_pointer_cast(readMetadata); + return static_pointer_cast(readMetadata); } -} \ No newline at end of file +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.cpp index 076b69c7f49a..c24fbe0577a7 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.cpp @@ -1,6 +1,11 @@ #include "read_metadata.h" + +#include +#include #include #include +#include +#include namespace NKikimr::NOlap::NReader::NPlain { @@ -8,16 +13,36 @@ std::unique_ptr TReadMetadata::StartScan(const std::shared_pt return std::make_unique(readContext, readContext->GetReadMetadataPtrVerifiedAs()); } -TConclusionStatus TReadMetadata::Init(const TReadDescription& readDescription, const TDataStorageAccessor& dataAccessor) { +TConclusionStatus TReadMetadata::Init( + const NColumnShard::TColumnShard* owner, const TReadDescription& readDescription, const TDataStorageAccessor& dataAccessor) { SetPKRangesFilter(readDescription.PKRangesFilter); InitShardingInfo(readDescription.PathId); + TxId = readDescription.TxId; + LockId = readDescription.LockId; + if (LockId) { + owner->GetOperationsManager().RegisterLock(*LockId, owner->Generation()); + LockSharingInfo = owner->GetOperationsManager().GetLockVerified(*LockId).GetSharingInfo(); + } /// @note We could have column name changes between schema versions: /// Add '1:foo', Drop '1:foo', Add '2:foo'. Drop should hide '1:foo' from reads. /// It's expected that we have only one version on 'foo' in blob and could split them by schema {planStep:txId}. /// So '1:foo' would be omitted in blob records for the column in new snapshots. And '2:foo' - in old ones. /// It's not possible for blobs with several columns. There should be a special logic for them. - CommittedBlobs = dataAccessor.GetCommitedBlobs(readDescription, ResultIndexSchema->GetIndexInfo().GetReplaceKey()); + CommittedBlobs = + dataAccessor.GetCommitedBlobs(readDescription, ResultIndexSchema->GetIndexInfo().GetReplaceKey(), LockId, GetRequestSnapshot()); + + if (LockId) { + for (auto&& i : CommittedBlobs) { + if (auto writeId = i.GetWriteIdOptional()) { + if (owner->HasLongTxWrites(*writeId)) { + } else { + auto op = owner->GetOperationsManager().GetOperationByInsertWriteIdVerified(*writeId); + AddWriteIdToCheck(*writeId, op->GetLockId()); + } + } + } + } SelectInfo = dataAccessor.Select(readDescription); StatsMode = readDescription.StatsMode; @@ -41,7 +66,7 @@ std::set TReadMetadata::GetPKColumnIds() const { std::set result; auto& indexInfo = ResultIndexSchema->GetIndexInfo(); for (auto&& i : indexInfo.GetPrimaryKeyColumns()) { - Y_ABORT_UNLESS(result.emplace(indexInfo.GetColumnId(i.first)).second); + Y_ABORT_UNLESS(result.emplace(indexInfo.GetColumnIdVerified(i.first)).second); } return result; } @@ -51,8 +76,57 @@ std::shared_ptr TReadMetadata::BuildReader(const std::shared_ptr
field_names(), {}, IsDescSorted()); + return NArrow::NMerger::TSortableBatchPosition(key.ToBatch(GetReplaceKey()), 0, GetReplaceKey()->field_names(), {}, IsDescSorted()); +} + +void TReadMetadata::DoOnReadFinished(NColumnShard::TColumnShard& owner) const { + if (!GetLockId()) { + return; + } + const ui64 lock = *GetLockId(); + if (GetBrokenWithCommitted()) { + owner.GetOperationsManager().GetLockVerified(lock).SetBroken(); + } else { + NOlap::NTxInteractions::TTxConflicts conflicts; + for (auto&& i : GetConflictableLockIds()) { + conflicts.Add(i, lock); + } + auto writer = std::make_shared(PathId, conflicts); + owner.GetOperationsManager().AddEventForLock(owner, lock, writer); + } } +void TReadMetadata::DoOnBeforeStartReading(NColumnShard::TColumnShard& owner) const { + if (!LockId) { + return; + } + auto evWriter = std::make_shared( + PathId, GetResultSchema()->GetIndexInfo().GetPrimaryKey(), GetPKRangesFilterPtr(), GetConflictableLockIds()); + owner.GetOperationsManager().AddEventForLock(owner, *LockId, evWriter); } + +void TReadMetadata::DoOnReplyConstruction(const ui64 tabletId, NKqp::NInternalImplementation::TEvScanData& scanData) const { + if (LockSharingInfo) { + NKikimrDataEvents::TLock lockInfo; + lockInfo.SetLockId(LockSharingInfo->GetLockId()); + lockInfo.SetGeneration(LockSharingInfo->GetGeneration()); + lockInfo.SetDataShard(tabletId); + lockInfo.SetCounter(LockSharingInfo->GetCounter()); + lockInfo.SetPathId(PathId); + lockInfo.SetHasWrites(LockSharingInfo->HasWrites()); + if (LockSharingInfo->IsBroken()) { + scanData.LocksInfo.BrokenLocks.emplace_back(std::move(lockInfo)); + } else { + scanData.LocksInfo.Locks.emplace_back(std::move(lockInfo)); + } + } +} + +bool TReadMetadata::IsMyUncommitted(const TInsertWriteId writeId) const { + AFL_VERIFY(LockSharingInfo); + auto it = ConflictedWriteIds.find(writeId); + AFL_VERIFY(it != ConflictedWriteIds.end()); + return it->second.GetLockId() == LockSharingInfo->GetLockId(); +} + +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.h index 371a09d73102..5f5ad70db296 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.h @@ -1,18 +1,104 @@ #pragma once #include #include -#include +#include #include #include +namespace NKikimr::NColumnShard { +class TLockSharingInfo; +} + namespace NKikimr::NOlap::NReader::NPlain { // Holds all metadata that is needed to perform read/scan struct TReadMetadata : public TReadMetadataBase { using TBase = TReadMetadataBase; + +private: + const ui64 PathId; + std::shared_ptr BrokenWithCommitted = std::make_shared(); + std::shared_ptr LockSharingInfo; + + class TWriteIdInfo { + private: + const ui64 LockId; + std::shared_ptr Conflicts; + + public: + TWriteIdInfo(const ui64 lockId, const std::shared_ptr& counter) + : LockId(lockId) + , Conflicts(counter) { + } + + ui64 GetLockId() const { + return LockId; + } + + void MarkAsConflictable() const { + Conflicts->Inc(); + } + + bool IsConflictable() const { + return Conflicts->Val(); + } + }; + + THashMap> LockConflictCounters; + THashMap ConflictedWriteIds; + + virtual void DoOnReadFinished(NColumnShard::TColumnShard& owner) const override; + virtual void DoOnBeforeStartReading(NColumnShard::TColumnShard& owner) const override; + virtual void DoOnReplyConstruction(const ui64 tabletId, NKqp::NInternalImplementation::TEvScanData& scanData) const override; + public: using TConstPtr = std::shared_ptr; + bool GetBrokenWithCommitted() const { + return BrokenWithCommitted->Val(); + } + THashSet GetConflictableLockIds() const { + THashSet result; + for (auto&& i : ConflictedWriteIds) { + if (i.second.IsConflictable()) { + result.emplace(i.second.GetLockId()); + } + } + return result; + } + + bool IsLockConflictable(const ui64 lockId) const { + auto it = LockConflictCounters.find(lockId); + AFL_VERIFY(it != LockConflictCounters.end()); + return it->second->Val(); + } + + bool IsWriteConflictable(const TInsertWriteId writeId) const { + auto it = ConflictedWriteIds.find(writeId); + AFL_VERIFY(it != ConflictedWriteIds.end()); + return it->second.IsConflictable(); + } + + void AddWriteIdToCheck(const TInsertWriteId writeId, const ui64 lockId) { + auto it = LockConflictCounters.find(lockId); + if (it == LockConflictCounters.end()) { + it = LockConflictCounters.emplace(lockId, std::make_shared()).first; + } + AFL_VERIFY(ConflictedWriteIds.emplace(writeId, TWriteIdInfo(lockId, it->second)).second); + } + + [[nodiscard]] bool IsMyUncommitted(const TInsertWriteId writeId) const; + + void SetConflictedWriteId(const TInsertWriteId writeId) const { + auto it = ConflictedWriteIds.find(writeId); + AFL_VERIFY(it != ConflictedWriteIds.end()); + it->second.MarkAsConflictable(); + } + + void SetBrokenWithCommitted() const { + BrokenWithCommitted->Inc(); + } + NArrow::NMerger::TSortableBatchPosition BuildSortedPosition(const NArrow::TReplaceKey& key) const; std::shared_ptr BuildReader(const std::shared_ptr& context) const; @@ -20,13 +106,18 @@ struct TReadMetadata : public TReadMetadataBase { return GetProgram().HasProcessingColumnIds(); } + ui64 GetPathId() const { + return PathId; + } + std::shared_ptr SelectInfo; NYql::NDqProto::EDqStatsMode StatsMode = NYql::NDqProto::EDqStatsMode::DQ_STATS_MODE_NONE; std::vector CommittedBlobs; std::shared_ptr ReadStats; - TReadMetadata(const std::shared_ptr info, const TSnapshot& snapshot, const ESorting sorting, const TProgramContainer& ssaProgram) + TReadMetadata(const ui64 pathId, const std::shared_ptr info, const TSnapshot& snapshot, const ESorting sorting, const TProgramContainer& ssaProgram) : TBase(info, sorting, ssaProgram, info->GetSchema(snapshot), snapshot) + , PathId(pathId) , ReadStats(std::make_shared()) { } @@ -35,7 +126,7 @@ struct TReadMetadata : public TReadMetadataBase { return GetResultSchema()->GetIndexInfo().GetPrimaryKeyColumns(); } - TConclusionStatus Init(const TReadDescription& readDescription, const TDataStorageAccessor& dataAccessor); + TConclusionStatus Init(const NColumnShard::TColumnShard* owner, const TReadDescription& readDescription, const TDataStorageAccessor& dataAccessor); std::vector GetColumnsOrder() const { auto schema = GetResultSchema(); diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/resolver.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/resolver.h index c5a2998a54c1..3890edc6c361 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/resolver.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/resolver.h @@ -20,10 +20,6 @@ class TIndexColumnResolver: public IColumnResolver { return IndexInfo.GetColumnName(id, required); } - const NTable::TScheme::TTableSchema& GetSchema() const override { - return IndexInfo; - } - NSsa::TColumnInfo GetDefaultColumn() const override { return NSsa::TColumnInfo::Original((ui32)NOlap::TIndexInfo::ESpecialColumn::PLAN_STEP, NOlap::TIndexInfo::SPEC_COL_PLAN_STEP); } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/ya.make b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/ya.make index b91efa4346d8..1ab826414813 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/ya.make +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/ya.make @@ -8,6 +8,7 @@ SRCS( PEERDIR( ydb/core/tx/columnshard/engines/reader/abstract + ydb/core/kqp/compute_actor ) END() diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.cpp index f100c8f89041..24ef9a452e4c 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.cpp @@ -11,7 +11,7 @@ TString TColumnsSet::DebugString() const { << ");"; } -NKikimr::NOlap::NReader::NPlain::TColumnsSet TColumnsSet::operator-(const TColumnsSet& external) const { +TColumnsSet TColumnsSet::operator-(const TColumnsSet& external) const { if (external.IsEmpty() || IsEmpty()) { return *this; } @@ -30,7 +30,7 @@ NKikimr::NOlap::NReader::NPlain::TColumnsSet TColumnsSet::operator-(const TColum return result; } -NKikimr::NOlap::NReader::NPlain::TColumnsSet TColumnsSet::operator+(const TColumnsSet& external) const { +TColumnsSet TColumnsSet::operator+(const TColumnsSet& external) const { if (external.IsEmpty()) { return *this; } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.h index 08d7ac103d80..98e77f4971e9 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.h @@ -1,15 +1,24 @@ #pragma once -#include -#include #include +#include + +#include + #include namespace NKikimr::NOlap::NReader::NPlain { +enum class EStageFeaturesIndexes { + Filter = 0, + Fetching = 1, + Merge = 2 +}; + class TIndexesSet { private: YDB_READONLY_DEF(std::vector, IndexIds); YDB_READONLY_DEF(std::set, IndexIdsSet); + public: TIndexesSet(const std::set& indexIds) : IndexIds(indexIds.begin(), indexIds.end()) @@ -18,8 +27,8 @@ class TIndexesSet { } TIndexesSet(const ui32& indexId) - : IndexIds({indexId}) - , IndexIdsSet({indexId}) { + : IndexIds({ indexId }) + , IndexIdsSet({ indexId }) { } ui32 GetIndexesCount() const { @@ -31,78 +40,70 @@ class TIndexesSet { } }; -class TColumnsSet { -private: - YDB_READONLY_DEF(std::set, ColumnIds); - YDB_READONLY_DEF(std::set, ColumnNames); - std::vector ColumnNamesVector; - YDB_READONLY_DEF(std::shared_ptr, Schema); - ISnapshotSchema::TPtr FullReadSchema; - YDB_READONLY_DEF(ISnapshotSchema::TPtr, FilteredSchema); - - void Rebuild(); +class TColumnsSetIds { +protected: + std::set ColumnIds; public: - TColumnsSet() = default; - bool IsEmpty() const { - return ColumnIds.empty(); + const std::set& GetColumnIds() const { + return ColumnIds; } - bool operator!() const { - return IsEmpty(); + TString DebugString() const { + return JoinSeq(",", ColumnIds); } - const std::vector& GetColumnNamesVector() const { - return ColumnNamesVector; + TColumnsSetIds(const std::set& ids) + : ColumnIds(ids) { } - - ui32 GetColumnsCount() const { - return ColumnIds.size(); + TColumnsSetIds() = default; + TColumnsSetIds(std::set&& ids) + : ColumnIds(std::move(ids)) { } - bool ColumnsOnly(const std::vector& fieldNames) const; - - TColumnsSet(const std::set& columnIds, const TIndexInfo& indexInfo, const ISnapshotSchema::TPtr& fullReadSchema) - : ColumnIds(columnIds) - , FullReadSchema(fullReadSchema) - { - Schema = indexInfo.GetColumnsSchema(ColumnIds); - Rebuild(); + TColumnsSetIds(const std::vector& ids) + : ColumnIds(ids.begin(), ids.end()) { } - TColumnsSet(const std::vector& columnIds, const TIndexInfo& indexInfo, const ISnapshotSchema::TPtr& fullReadSchema) - : ColumnIds(columnIds.begin(), columnIds.end()) - , FullReadSchema(fullReadSchema) - { - Schema = indexInfo.GetColumnsSchema(ColumnIds); - Rebuild(); + TColumnsSetIds operator+(const TColumnsSetIds& external) const { + TColumnsSetIds result = *this; + result.ColumnIds.insert(external.ColumnIds.begin(), external.ColumnIds.end()); + return result; } - const ISnapshotSchema& GetFilteredSchemaVerified() const { - AFL_VERIFY(FilteredSchema); - return *FilteredSchema; + TColumnsSetIds operator-(const TColumnsSetIds& external) const { + TColumnsSetIds result = *this; + for (auto&& i : external.ColumnIds) { + result.ColumnIds.erase(i); + } + return result; + } + bool IsEmpty() const { + return ColumnIds.empty(); } - const std::shared_ptr& GetFilteredSchemaPtrVerified() const { - AFL_VERIFY(FilteredSchema); - return FilteredSchema; + bool operator!() const { + return IsEmpty(); + } + ui32 GetColumnsCount() const { + return ColumnIds.size(); } - bool Contains(const std::shared_ptr& columnsSet) const { + bool Contains(const std::shared_ptr& columnsSet) const { if (!columnsSet) { return true; } return Contains(*columnsSet); } - bool IsEqual(const std::shared_ptr& columnsSet) const { + bool IsEqual(const std::shared_ptr& columnsSet) const { if (!columnsSet) { return false; } return IsEqual(*columnsSet); } - bool Contains(const TColumnsSet& columnsSet) const { + bool Contains(const TColumnsSetIds& columnsSet) const { for (auto&& i : columnsSet.ColumnIds) { if (!ColumnIds.contains(i)) { return false; @@ -111,7 +112,7 @@ class TColumnsSet { return true; } - bool Cross(const TColumnsSet& columnsSet) const { + bool Cross(const TColumnsSetIds& columnsSet) const { for (auto&& i : columnsSet.ColumnIds) { if (ColumnIds.contains(i)) { return true; @@ -120,7 +121,7 @@ class TColumnsSet { return false; } - std::set Intersect(const TColumnsSet& columnsSet) const { + std::set Intersect(const TColumnsSetIds& columnsSet) const { std::set result; for (auto&& i : columnsSet.ColumnIds) { if (ColumnIds.contains(i)) { @@ -130,7 +131,7 @@ class TColumnsSet { return result; } - bool IsEqual(const TColumnsSet& columnsSet) const { + bool IsEqual(const TColumnsSetIds& columnsSet) const { if (columnsSet.GetColumnIds().size() != ColumnIds.size()) { return false; } @@ -145,6 +146,56 @@ class TColumnsSet { } return true; } +}; + +class TColumnsSet: public TColumnsSetIds { +private: + using TBase = TColumnsSetIds; + YDB_READONLY_DEF(std::set, ColumnNames); + std::vector ColumnNamesVector; + YDB_READONLY_DEF(std::shared_ptr, Schema); + ISnapshotSchema::TPtr FullReadSchema; + YDB_READONLY_DEF(ISnapshotSchema::TPtr, FilteredSchema); + + void Rebuild(); + +public: + TColumnsSet() = default; + const std::vector& GetColumnNamesVector() const { + return ColumnNamesVector; + } + + bool ColumnsOnly(const std::vector& fieldNames) const; + + std::shared_ptr BuildSamePtr(const std::set& columnIds) const { + return std::make_shared(columnIds, FullReadSchema); + } + + TColumnsSet(const std::set& columnIds, const ISnapshotSchema::TPtr& fullReadSchema) + : TBase(columnIds) + , FullReadSchema(fullReadSchema) { + AFL_VERIFY(!!FullReadSchema); + Schema = FullReadSchema->GetIndexInfo().GetColumnsSchema(ColumnIds); + Rebuild(); + } + + TColumnsSet(const std::vector& columnIds, const ISnapshotSchema::TPtr& fullReadSchema) + : TBase(columnIds) + , FullReadSchema(fullReadSchema) { + AFL_VERIFY(!!FullReadSchema); + Schema = FullReadSchema->GetIndexInfo().GetColumnsSchema(ColumnIds); + Rebuild(); + } + + const ISnapshotSchema& GetFilteredSchemaVerified() const { + AFL_VERIFY(FilteredSchema); + return *FilteredSchema; + } + + const std::shared_ptr& GetFilteredSchemaPtrVerified() const { + AFL_VERIFY(FilteredSchema); + return FilteredSchema; + } TString DebugString() const; @@ -153,4 +204,4 @@ class TColumnsSet { TColumnsSet operator-(const TColumnsSet& external) const; }; -} +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/constructor.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/constructor.cpp index 55810391516d..654315a1ab0b 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/constructor.cpp @@ -1,5 +1,6 @@ #include "constructor.h" #include +#include namespace NKikimr::NOlap::NReader::NPlain { @@ -14,7 +15,7 @@ bool TBlobsFetcherTask::DoOnError(const TString& storageId, const TBlobRange& ra AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("error_on_blob_reading", range.ToString())("scan_actor_id", Context->GetCommonContext()->GetScanActorId()) ("status", status.GetErrorMessage())("status_code", status.GetStatus())("storage_id", storageId); NActors::TActorContext::AsActorContext().Send(Context->GetCommonContext()->GetScanActorId(), - std::make_unique(TConclusionStatus::Fail("cannot read blob range " + range.ToString()))); + std::make_unique(TConclusionStatus::Fail("cannot read blob range " + range.ToString()))); return false; } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.cpp index ee6bc2eac7e8..0efd8bfbb9d2 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.cpp @@ -1,20 +1,22 @@ #include "context.h" #include "source.h" +#include + namespace NKikimr::NOlap::NReader::NPlain { std::unique_ptr TSpecialReadContext::BuildMerger() const { - return std::make_unique(ReadMetadata->GetReplaceKey(), ProgramInputColumns->GetSchema(), CommonContext->IsReverse(), - IIndexInfo::GetSnapshotColumnNames()); + return std::make_unique( + ReadMetadata->GetReplaceKey(), ProgramInputColumns->GetSchema(), CommonContext->IsReverse(), IIndexInfo::GetSnapshotColumnNames()); } -ui64 TSpecialReadContext::GetMemoryForSources(const THashMap>& sources, const bool isExclusive) { +ui64 TSpecialReadContext::GetMemoryForSources(const THashMap>& sources) { ui64 result = 0; bool hasSequentialReadSources = false; for (auto&& i : sources) { auto fetchingPlan = GetColumnsFetchingPlan(i.second); AFL_VERIFY(i.second->GetIntervalsCount()); - const ui64 sourceMemory = std::max(1, fetchingPlan->PredictRawBytes(i.second) / i.second->GetIntervalsCount()); + const ui64 sourceMemory = std::max(1, i.second->GetResourceGuardsMemory() / i.second->GetIntervalsCount()); if (!i.second->IsSourceInMemory()) { hasSequentialReadSources = true; } @@ -23,17 +25,23 @@ ui64 TSpecialReadContext::GetMemoryForSources(const THashMapIsReverse()) { - result = 2 * result; // due to in time we will have data in original portion + data in merged(or reversed) interval - } } return result; } -std::shared_ptr TSpecialReadContext::GetColumnsFetchingPlan(const std::shared_ptr& source) const { - const bool needSnapshots = !source->GetExclusiveIntervalOnly() || ReadMetadata->GetRequestSnapshot() < source->GetRecordSnapshotMax() || !source->IsSourceInMemory(); - const bool partialUsageByPK = ReadMetadata->GetPKRangesFilter().IsPortionInPartialUsage(source->GetStartReplaceKey(), source->GetFinishReplaceKey(), ReadMetadata->GetIndexInfo()); +std::shared_ptr TSpecialReadContext::GetColumnsFetchingPlan(const std::shared_ptr& source) { + const bool needSnapshots = !source->GetExclusiveIntervalOnly() || ReadMetadata->GetRequestSnapshot() < source->GetRecordSnapshotMax() || + !source->IsSourceInMemory(); + const bool partialUsageByPK = [&]() { + switch (source->GetUsageClass()) { + case TPKRangeFilter::EUsageClass::PartialUsage: + return true; + case TPKRangeFilter::EUsageClass::DontUsage: + return true; + case TPKRangeFilter::EUsageClass::FullUsage: + return false; + } + }(); const bool useIndexes = (IndexChecker ? source->HasIndexes(IndexChecker->GetIndexIds()) : false); const bool isWholeExclusiveSource = source->GetExclusiveIntervalOnly() && source->IsSourceInMemory(); const bool hasDeletions = source->GetHasDeletions(); @@ -44,16 +52,18 @@ std::shared_ptr TSpecialReadContext::GetColumnsFetchingPlan(con needShardingFilter = true; } } - if (auto result = CacheFetchingScripts - [needSnapshots ? 1 : 0][isWholeExclusiveSource ? 1 : 0] - [partialUsageByPK ? 1 : 0][useIndexes ? 1 : 0] - [needShardingFilter ? 1 : 0][hasDeletions ? 1 : 0]) { -// AFL_WARN(NKikimrServices::TX_COLUMNSHARD_SCAN)("SS", needSnapshots)("PK", partialUsageByPK)("IDX", useIndexes)("SHARDING", needShardingFilter) -// ("EXCL", source->GetExclusiveIntervalOnly())("MEM", source->IsSourceInMemory())("result", result->DebugString()); - return result; + auto result = CacheFetchingScripts[needSnapshots ? 1 : 0][isWholeExclusiveSource ? 1 : 0][partialUsageByPK ? 1 : 0][useIndexes ? 1 : 0] + [needShardingFilter ? 1 : 0][hasDeletions ? 1 : 0]; + if (!result) { + result = BuildColumnsFetchingPlan(needSnapshots, isWholeExclusiveSource, partialUsageByPK, useIndexes, needShardingFilter, hasDeletions); + CacheFetchingScripts[needSnapshots ? 1 : 0][isWholeExclusiveSource ? 1 : 0][partialUsageByPK ? 1 : 0][useIndexes ? 1 : 0] + [needShardingFilter ? 1 : 0][hasDeletions ? 1 : 0] = result; } - { - std::shared_ptr result = std::make_shared(); + AFL_VERIFY(result); + if (*result) { + return *result; + } else { + std::shared_ptr result = std::make_shared(*this); result->SetBranchName("FAKE"); result->AddStep(std::make_shared(source->GetRecordsCount())); return result; @@ -62,27 +72,42 @@ std::shared_ptr TSpecialReadContext::GetColumnsFetchingPlan(con class TColumnsAccumulator { private: - TColumnsSet FetchingReadyColumns; - TColumnsSet AssemblerReadyColumns; + TColumnsSetIds FetchingReadyColumns; + TColumnsSetIds AssemblerReadyColumns; + ISnapshotSchema::TPtr FullSchema; + std::shared_ptr GuaranteeNotOptional; public: - bool AddFetchingStep(TFetchingScript& script, const TColumnsSet& columns) { - auto actualColumns = columns - FetchingReadyColumns; - FetchingReadyColumns = FetchingReadyColumns + columns; + TColumnsAccumulator(const std::shared_ptr& guaranteeNotOptional, const ISnapshotSchema::TPtr& fullSchema) + : FullSchema(fullSchema) + , GuaranteeNotOptional(guaranteeNotOptional) { + } + + bool AddFetchingStep(TFetchingScript& script, const TColumnsSetIds& columns, const EStageFeaturesIndexes& stage) { + auto actualColumns = (TColumnsSetIds)columns - FetchingReadyColumns; + FetchingReadyColumns = FetchingReadyColumns + (TColumnsSetIds)columns; if (!actualColumns.IsEmpty()) { - auto actualSet = std::make_shared(actualColumns); - script.AddStep(std::make_shared(actualSet)); + script.AddStep(std::make_shared(actualColumns, stage)); + script.AddStep(std::make_shared(actualColumns)); return true; } return false; } - bool AddAssembleStep(TFetchingScript& script, const TColumnsSet& columns, const TString& purposeId, const bool optional) { - auto actualColumns = columns - AssemblerReadyColumns; + bool AddAssembleStep(TFetchingScript& script, const TColumnsSetIds& columns, const TString& purposeId, const bool optional) { + auto actualColumns = (TColumnsSetIds)columns - AssemblerReadyColumns; AssemblerReadyColumns = AssemblerReadyColumns + columns; if (!actualColumns.IsEmpty()) { - auto actualSet = std::make_shared(actualColumns); + auto actualSet = std::make_shared(actualColumns.GetColumnIds(), FullSchema); if (optional) { - script.AddStep(std::make_shared(actualSet, purposeId)); + const auto notOptionalColumnIds = GuaranteeNotOptional->Intersect(*actualSet); + if (notOptionalColumnIds.size()) { + std::shared_ptr cross = actualSet->BuildSamePtr(notOptionalColumnIds); + script.AddStep(std::make_shared(cross, purposeId)); + *actualSet = *actualSet - *cross; + } + if (!actualSet->IsEmpty()) { + script.AddStep(std::make_shared(actualSet, purposeId)); + } } else { script.AddStep(std::make_shared(actualSet, purposeId)); } @@ -92,25 +117,29 @@ class TColumnsAccumulator { } }; -std::shared_ptr TSpecialReadContext::BuildColumnsFetchingPlan(const bool needSnapshots, const bool exclusiveSource, const bool partialUsageByPredicateExt, const bool useIndexes, - const bool needFilterSharding, const bool needFilterDeletion) const { - std::shared_ptr result = std::make_shared(); +std::shared_ptr TSpecialReadContext::BuildColumnsFetchingPlan(const bool needSnapshots, const bool exclusiveSource, + const bool partialUsageByPredicateExt, const bool useIndexes, const bool needFilterSharding, const bool needFilterDeletion) const { + std::shared_ptr result = std::make_shared(*this); const bool partialUsageByPredicate = partialUsageByPredicateExt && PredicateColumns->GetColumnsCount(); if (!!IndexChecker && useIndexes && exclusiveSource) { result->AddStep(std::make_shared(std::make_shared(IndexChecker->GetIndexIds()))); result->AddStep(std::make_shared(IndexChecker)); } bool hasFilterSharding = false; - TColumnsAccumulator acc; + TColumnsAccumulator acc(MergeColumns, ReadMetadata->GetResultSchema()); if (needFilterSharding && !ShardingColumns->IsEmpty()) { hasFilterSharding = true; - acc.AddFetchingStep(*result, *ShardingColumns); - acc.AddAssembleStep(*result, *ShardingColumns, "SPEC_SHARDING", false); + TColumnsSetIds columnsFetch = *ShardingColumns; + if (!exclusiveSource) { + columnsFetch = columnsFetch + *PKColumns + *SpecColumns; + } + acc.AddFetchingStep(*result, columnsFetch, EStageFeaturesIndexes::Filter); + acc.AddAssembleStep(*result, columnsFetch, "SPEC_SHARDING", false); result->AddStep(std::make_shared()); } if (!EFColumns->GetColumnsCount() && !partialUsageByPredicate) { result->SetBranchName("simple"); - TColumnsSet columnsFetch = *FFColumns; + TColumnsSetIds columnsFetch = *FFColumns; if (needFilterDeletion) { columnsFetch = columnsFetch + *DeletionColumns; } @@ -118,21 +147,25 @@ std::shared_ptr TSpecialReadContext::BuildColumnsFetchingPlan(c columnsFetch = columnsFetch + *SpecColumns; } if (!exclusiveSource) { - columnsFetch = columnsFetch + *PKColumns + *SpecColumns; + columnsFetch = columnsFetch + *MergeColumns; } else { if (columnsFetch.GetColumnsCount() == 1 && SpecColumns->Contains(columnsFetch) && !hasFilterSharding) { return nullptr; } } if (columnsFetch.GetColumnsCount() || hasFilterSharding || needFilterDeletion) { - acc.AddFetchingStep(*result, columnsFetch); + acc.AddFetchingStep(*result, columnsFetch, EStageFeaturesIndexes::Fetching); + if (needSnapshots) { + acc.AddAssembleStep(*result, *SpecColumns, "SPEC", false); + result->AddStep(std::make_shared()); + } + if (!exclusiveSource) { + acc.AddAssembleStep(*result, *MergeColumns, "LAST_PK", false); + } if (needFilterDeletion) { acc.AddAssembleStep(*result, *DeletionColumns, "SPEC_DELETION", false); result->AddStep(std::make_shared()); } - if (!exclusiveSource) { - acc.AddAssembleStep(*result, *PKColumns + *SpecColumns, "LAST_PK", false); - } acc.AddAssembleStep(*result, columnsFetch, "LAST", true); } else { return nullptr; @@ -151,29 +184,35 @@ std::shared_ptr TSpecialReadContext::BuildColumnsFetchingPlan(c } AFL_VERIFY(columnsFetch.GetColumnsCount()); - acc.AddFetchingStep(*result, columnsFetch); + acc.AddFetchingStep(*result, columnsFetch, EStageFeaturesIndexes::Filter); if (needFilterDeletion) { acc.AddAssembleStep(*result, *DeletionColumns, "SPEC_DELETION", false); result->AddStep(std::make_shared()); } - if (needSnapshots || FFColumns->Cross(*SpecColumns)) { - acc.AddAssembleStep(*result, *SpecColumns, "SPEC", false); - result->AddStep(std::make_shared()); - } if (partialUsageByPredicate) { acc.AddAssembleStep(*result, *PredicateColumns, "PREDICATE", false); result->AddStep(std::make_shared()); } + if (needSnapshots || FFColumns->Cross(*SpecColumns)) { + acc.AddAssembleStep(*result, *SpecColumns, "SPEC", false); + result->AddStep(std::make_shared()); + } for (auto&& i : ReadMetadata->GetProgram().GetSteps()) { - if (!i->IsFilterOnly()) { + if (i->GetFilterOriginalColumnIds().empty()) { break; } - TColumnsSet stepColumnIds(i->GetFilterOriginalColumnIds(), ReadMetadata->GetIndexInfo(), ReadMetadata->GetResultSchema()); + TColumnsSet stepColumnIds(i->GetFilterOriginalColumnIds(), ReadMetadata->GetResultSchema()); acc.AddAssembleStep(*result, stepColumnIds, "EF", true); result->AddStep(std::make_shared(i)); + if (!i->IsFilterOnly()) { + break; + } + } + if (GetReadMetadata()->Limit) { + result->AddStep(std::make_shared(GetReadMetadata()->Limit, GetReadMetadata()->IsDescSorted())); } - acc.AddFetchingStep(*result, *FFColumns); + acc.AddFetchingStep(*result, *FFColumns, EStageFeaturesIndexes::Fetching); acc.AddAssembleStep(*result, *FFColumns, "LAST", true); } else { result->SetBranchName("merge"); @@ -182,29 +221,32 @@ std::shared_ptr TSpecialReadContext::BuildColumnsFetchingPlan(c columnsFetch = columnsFetch + *DeletionColumns; } AFL_VERIFY(columnsFetch.GetColumnsCount()); - acc.AddFetchingStep(*result, columnsFetch); + acc.AddFetchingStep(*result, columnsFetch, EStageFeaturesIndexes::Filter); - if (needFilterDeletion) { - acc.AddAssembleStep(*result, *DeletionColumns, "SPEC_DELETION", false); - result->AddStep(std::make_shared()); - } acc.AddAssembleStep(*result, *SpecColumns, "SPEC", false); + acc.AddAssembleStep(*result, *PKColumns, "PK", false); if (needSnapshots) { result->AddStep(std::make_shared()); } - acc.AddAssembleStep(*result, *PKColumns, "PK", false); + if (needFilterDeletion) { + acc.AddAssembleStep(*result, *DeletionColumns, "SPEC_DELETION", false); + result->AddStep(std::make_shared()); + } if (partialUsageByPredicate) { result->AddStep(std::make_shared()); } for (auto&& i : ReadMetadata->GetProgram().GetSteps()) { - if (!i->IsFilterOnly()) { + if (i->GetFilterOriginalColumnIds().empty()) { break; } - TColumnsSet stepColumnIds(i->GetFilterOriginalColumnIds(), ReadMetadata->GetIndexInfo(), ReadMetadata->GetResultSchema()); + TColumnsSet stepColumnIds(i->GetFilterOriginalColumnIds(), ReadMetadata->GetResultSchema()); acc.AddAssembleStep(*result, stepColumnIds, "EF", true); result->AddStep(std::make_shared(i)); + if (!i->IsFilterOnly()) { + break; + } } - acc.AddFetchingStep(*result, *FFColumns); + acc.AddFetchingStep(*result, *FFColumns, EStageFeaturesIndexes::Fetching); acc.AddAssembleStep(*result, *FFColumns, "LAST", true); } return result; @@ -212,42 +254,72 @@ std::shared_ptr TSpecialReadContext::BuildColumnsFetchingPlan(c TSpecialReadContext::TSpecialReadContext(const std::shared_ptr& commonContext) : CommonContext(commonContext) { + ReadMetadata = dynamic_pointer_cast(CommonContext->GetReadMetadata()); Y_ABORT_UNLESS(ReadMetadata); Y_ABORT_UNLESS(ReadMetadata->SelectInfo); + double kffFilter = 0.45; + double kffFetching = 0.45; + double kffMerge = 0.10; + TString stagePrefix; + if (ReadMetadata->GetEarlyFilterColumnIds().size()) { + stagePrefix = "EF"; + kffFilter = 0.7; + kffFetching = 0.15; + kffMerge = 0.15; + } else { + stagePrefix = "FO"; + kffFilter = 0.1; + kffFetching = 0.75; + kffMerge = 0.15; + } + + std::vector> stages = { + NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildStageFeatures( + stagePrefix + "::FILTER", kffFilter * TGlobalLimits::ScanMemoryLimit), + NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildStageFeatures( + stagePrefix + "::FETCHING", kffFetching * TGlobalLimits::ScanMemoryLimit), + NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildStageFeatures(stagePrefix + "::MERGE", kffMerge * TGlobalLimits::ScanMemoryLimit) + }; + ProcessMemoryGuard = + NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildProcessGuard(CommonContext->GetReadMetadata()->GetTxId(), stages); + ProcessScopeGuard = + NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildScopeGuard(CommonContext->GetReadMetadata()->GetTxId(), GetCommonContext()->GetScanId()); + auto readSchema = ReadMetadata->GetResultSchema(); - SpecColumns = std::make_shared(TIndexInfo::GetSnapshotColumnIdsSet(), ReadMetadata->GetIndexInfo(), readSchema); + SpecColumns = std::make_shared(TIndexInfo::GetSnapshotColumnIdsSet(), readSchema); IndexChecker = ReadMetadata->GetProgram().GetIndexChecker(); { auto predicateColumns = ReadMetadata->GetPKRangesFilter().GetColumnIds(ReadMetadata->GetIndexInfo()); if (predicateColumns.size()) { - PredicateColumns = std::make_shared(predicateColumns, ReadMetadata->GetIndexInfo(), readSchema); + PredicateColumns = std::make_shared(predicateColumns, readSchema); } else { PredicateColumns = std::make_shared(); } } { - std::set columnIds = {NPortion::TSpecialColumns::SPEC_COL_DELETE_FLAG_INDEX}; - DeletionColumns = std::make_shared(columnIds, ReadMetadata->GetIndexInfo(), ReadMetadata->GetResultSchema()); + std::set columnIds = { NPortion::TSpecialColumns::SPEC_COL_DELETE_FLAG_INDEX }; + DeletionColumns = std::make_shared(columnIds, ReadMetadata->GetResultSchema()); } if (!!ReadMetadata->GetRequestShardingInfo()) { - auto shardingColumnIds = ReadMetadata->GetIndexInfo().GetColumnIdsVerified(ReadMetadata->GetRequestShardingInfo()->GetShardingInfo()->GetColumnNames()); - ShardingColumns = std::make_shared(shardingColumnIds, ReadMetadata->GetIndexInfo(), ReadMetadata->GetResultSchema()); + auto shardingColumnIds = + ReadMetadata->GetIndexInfo().GetColumnIdsVerified(ReadMetadata->GetRequestShardingInfo()->GetShardingInfo()->GetColumnNames()); + ShardingColumns = std::make_shared(shardingColumnIds, ReadMetadata->GetResultSchema()); } else { ShardingColumns = std::make_shared(); } { auto efColumns = ReadMetadata->GetEarlyFilterColumnIds(); if (efColumns.size()) { - EFColumns = std::make_shared(efColumns, ReadMetadata->GetIndexInfo(), readSchema); + EFColumns = std::make_shared(efColumns, readSchema); } else { EFColumns = std::make_shared(); } } if (ReadMetadata->HasProcessingColumnIds()) { - FFColumns = std::make_shared(ReadMetadata->GetProcessingColumnIds(), ReadMetadata->GetIndexInfo(), readSchema); + FFColumns = std::make_shared(ReadMetadata->GetProcessingColumnIds(), readSchema); if (SpecColumns->Contains(*FFColumns) && !EFColumns->IsEmpty()) { FFColumns = std::make_shared(*EFColumns + *SpecColumns); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("ff_modified", FFColumns->DebugString()); @@ -264,18 +336,35 @@ TSpecialReadContext::TSpecialReadContext(const std::shared_ptr& co ProgramInputColumns = FFColumns; } - PKColumns = std::make_shared(ReadMetadata->GetPKColumnIds(), ReadMetadata->GetIndexInfo(), readSchema); + PKColumns = std::make_shared(ReadMetadata->GetPKColumnIds(), readSchema); MergeColumns = std::make_shared(*PKColumns + *SpecColumns); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("columns_context_info", DebugString()); +} + +TString TSpecialReadContext::DebugString() const { + TStringBuilder sb; + sb << "ef=" << EFColumns->DebugString() << ";" + << "sharding=" << ShardingColumns->DebugString() << ";" + << "pk=" << PKColumns->DebugString() << ";" + << "ff=" << FFColumns->DebugString() << ";" + << "program_input=" << ProgramInputColumns->DebugString() << ";"; + return sb; +} + +TString TSpecialReadContext::ProfileDebugString() const { + TStringBuilder sb; const auto GetBit = [](const ui32 val, const ui32 pos) -> ui32 { return (val & (1 << pos)) ? 1 : 0; }; - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("columns_context_info", DebugString()); - for (ui32 i = 0; i < (1 << 7); ++i) { - CacheFetchingScripts[GetBit(i, 0)][GetBit(i, 1)][GetBit(i, 2)][GetBit(i, 3)][GetBit(i, 4)][GetBit(i, 5)] - = BuildColumnsFetchingPlan(GetBit(i, 0), GetBit(i, 1), GetBit(i, 2), GetBit(i, 3), GetBit(i, 4), GetBit(i, 5)); + for (ui32 i = 0; i < (1 << 6); ++i) { + auto script = CacheFetchingScripts[GetBit(i, 0)][GetBit(i, 1)][GetBit(i, 2)][GetBit(i, 3)][GetBit(i, 4)][GetBit(i, 5)]; + if (script && *script) { + sb << (*script)->DebugString() << ";"; + } } + return sb; } } // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.h index 5a869c5fc78e..1ae41c039808 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.h @@ -1,6 +1,7 @@ #pragma once #include "columns_set.h" #include "fetching.h" +#include #include #include #include @@ -13,6 +14,8 @@ class IDataSource; class TSpecialReadContext { private: YDB_READONLY_DEF(std::shared_ptr, CommonContext); + YDB_READONLY_DEF(std::shared_ptr, ProcessMemoryGuard); + YDB_READONLY_DEF(std::shared_ptr, ProcessScopeGuard); YDB_READONLY_DEF(std::shared_ptr, SpecColumns); YDB_READONLY_DEF(std::shared_ptr, MergeColumns); @@ -24,41 +27,58 @@ class TSpecialReadContext { YDB_READONLY_DEF(std::shared_ptr, FFColumns); YDB_READONLY_DEF(std::shared_ptr, ProgramInputColumns); + YDB_READONLY_DEF(std::shared_ptr, MergeStageMemory); + YDB_READONLY_DEF(std::shared_ptr, FilterStageMemory); + YDB_READONLY_DEF(std::shared_ptr, FetchingStageMemory); + + TAtomic AbortFlag = 0; NIndexes::TIndexCheckerContainer IndexChecker; TReadMetadata::TConstPtr ReadMetadata; std::shared_ptr EmptyColumns = std::make_shared(); std::shared_ptr BuildColumnsFetchingPlan(const bool needSnapshotsFilter, const bool exclusiveSource, const bool partialUsageByPredicate, const bool useIndexes, const bool needFilterSharding, const bool needFilterDeletion) const; - std::array, 2>, 2>, 2>, 2>, 2>, 2> CacheFetchingScripts; + std::array>, 2>, 2>, 2>, 2>, 2>, 2> + CacheFetchingScripts; public: - static const inline ui64 DefaultRejectMemoryIntervalLimit = ((ui64)3) << 30; - static const inline ui64 DefaultReduceMemoryIntervalLimit = DefaultRejectMemoryIntervalLimit; - static const inline ui64 DefaultReadSequentiallyBufferSize = ((ui64)8) << 20; - - const ui64 ReduceMemoryIntervalLimit = NYDBTest::TControllers::GetColumnShardController()->GetReduceMemoryIntervalLimit(DefaultReduceMemoryIntervalLimit); - const ui64 RejectMemoryIntervalLimit = NYDBTest::TControllers::GetColumnShardController()->GetRejectMemoryIntervalLimit(DefaultRejectMemoryIntervalLimit); - const ui64 ReadSequentiallyBufferSize = DefaultReadSequentiallyBufferSize; + const ui64 ReduceMemoryIntervalLimit = NYDBTest::TControllers::GetColumnShardController()->GetReduceMemoryIntervalLimit(); + const ui64 RejectMemoryIntervalLimit = NYDBTest::TControllers::GetColumnShardController()->GetRejectMemoryIntervalLimit(); + const ui64 ReadSequentiallyBufferSize = TGlobalLimits::DefaultReadSequentiallyBufferSize; - ui64 GetMemoryForSources(const THashMap>& sources, const bool isExclusive); + ui64 GetProcessMemoryControlId() const { + AFL_VERIFY(ProcessMemoryGuard); + return ProcessMemoryGuard->GetProcessId(); + } + ui64 GetMemoryForSources(const THashMap>& sources); + ui64 GetRequestedMemoryBytes() const { + return MergeStageMemory->GetFullMemory() + FilterStageMemory->GetFullMemory() + FetchingStageMemory->GetFullMemory(); + } const TReadMetadata::TConstPtr& GetReadMetadata() const { return ReadMetadata; } - std::unique_ptr BuildMerger() const; + bool IsAborted() const { + return AtomicGet(AbortFlag); + } - TString DebugString() const { - return TStringBuilder() << "ef=" << EFColumns->DebugString() << ";" - << "sharding=" << ShardingColumns->DebugString() << ";" - << "pk=" << PKColumns->DebugString() << ";" - << "ff=" << FFColumns->DebugString() << ";" - << "program_input=" << ProgramInputColumns->DebugString(); + void Abort() { + AtomicSet(AbortFlag, 1); } + ~TSpecialReadContext() { + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_SCAN)("profile", ProfileDebugString()); + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_SCAN)("fetching", DebugString()); + } + + std::unique_ptr BuildMerger() const; + + TString DebugString() const; + TString ProfileDebugString() const; + TSpecialReadContext(const std::shared_ptr& commonContext); - std::shared_ptr GetColumnsFetchingPlan(const std::shared_ptr& source) const; + std::shared_ptr GetColumnsFetchingPlan(const std::shared_ptr& source); }; } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.cpp index 869269fd9d3a..bf38c466b75b 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.cpp @@ -1,18 +1,21 @@ #include "fetched_data.h" -#include -#include -#include + +#include +#include +#include namespace NKikimr::NOlap { -void TFetchedData::SyncTableColumns(const std::vector>& fields) { +void TFetchedData::SyncTableColumns(const std::vector>& fields, const ISnapshotSchema& schema) { for (auto&& i : fields) { if (Table->GetSchema()->GetFieldByName(i->name())) { continue; } - Table->AddField(i, std::make_shared( - NArrow::TThreadSimpleArraysCache::GetNull(i->type(), Table->num_rows()))).Validate(); + Table + ->AddField(i, std::make_shared(NArrow::TThreadSimpleArraysCache::Get( + i->type(), schema.GetExternalDefaultValueVerified(i->name()), Table->num_rows()))) + .Validate(); } } -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.h index 1bd31f77dc0e..b535c2bc4673 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.h @@ -1,14 +1,17 @@ #pragma once -#include -#include #include #include +#include #include #include #include + #include #include +#include +#include + namespace NKikimr::NOlap { class TFetchedData { @@ -18,14 +21,20 @@ class TFetchedData { YDB_READONLY_DEF(std::shared_ptr, Table); YDB_READONLY_DEF(std::shared_ptr, Filter); YDB_READONLY(bool, UseFilter, false); + public: TFetchedData(const bool useFilter) - : UseFilter(useFilter) - { + : UseFilter(useFilter) { + } + ui32 GetFilteredCount(const ui32 recordsCount, const ui32 defLimit) const { + if (!Filter) { + return std::min(defLimit, recordsCount); + } + return Filter->GetFilteredCount().value_or(recordsCount); } - void SyncTableColumns(const std::vector>& fields); + void SyncTableColumns(const std::vector>& fields, const ISnapshotSchema& schema); std::shared_ptr GetAppliedFilter() const { return UseFilter ? Filter : nullptr; @@ -60,6 +69,11 @@ class TFetchedData { return (Filter && Filter->IsTotalDenyFilter()) || (Table && !Table->num_rows()); } + void Clear() { + Filter = std::make_shared(NArrow::TColumnFilter::BuildDenyFilter()); + Table = nullptr; + } + void AddFilter(const std::shared_ptr& filter) { if (!filter) { return; @@ -67,6 +81,31 @@ class TFetchedData { return AddFilter(*filter); } + void CutFilter(const ui32 recordsCount, const ui32 limit, const bool reverse) { + auto filter = std::make_shared(NArrow::TColumnFilter::BuildAllowFilter()); + ui32 recordsCountImpl = Filter ? Filter->GetFilteredCount().value_or(recordsCount) : recordsCount; + if (recordsCountImpl < limit) { + return; + } + if (reverse) { + filter->Add(false, recordsCountImpl - limit); + filter->Add(true, limit); + } else { + filter->Add(true, limit); + filter->Add(false, recordsCountImpl - limit); + } + if (Filter) { + if (UseFilter) { + AddFilter(*filter); + } else { + AddFilter(Filter->CombineSequentialAnd(*filter)); + } + } else { + AddFilter(*filter); + } + + } + void AddFilter(const NArrow::TColumnFilter& filter) { if (UseFilter && Table) { AFL_VERIFY(filter.Apply(Table)); @@ -80,14 +119,10 @@ class TFetchedData { } } - void AddBatch(const std::shared_ptr& batch) { - return AddBatch(arrow::Table::Make(batch->schema(), batch->columns(), batch->num_rows())); - } - void AddBatch(const std::shared_ptr& table) { AFL_VERIFY(table); if (UseFilter) { - AddBatch(table->BuildTable()); + AddBatch(table->BuildTableVerified()); } else { if (!Table) { Table = table; @@ -110,13 +145,13 @@ class TFetchedData { AFL_VERIFY(mergeResult.IsSuccess())("error", mergeResult.GetErrorMessage()); } } - }; class TFetchedResult { private: YDB_READONLY_DEF(std::shared_ptr, Batch); YDB_READONLY_DEF(std::shared_ptr, NotAppliedFilter); + public: TFetchedResult(std::unique_ptr&& data) : Batch(data->GetTable()) @@ -128,4 +163,4 @@ class TFetchedResult { } }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.cpp index 995958f48449..2d0ec349aa6a 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.cpp @@ -1,7 +1,10 @@ #include "fetching.h" #include "source.h" + +#include #include -#include +#include +#include #include @@ -15,36 +18,46 @@ bool TStepAction::DoApply(IDataReader& /*owner*/) const { return true; } -bool TStepAction::DoExecute() { - if (Source->IsAborted()) { - return true; +TConclusionStatus TStepAction::DoExecuteImpl() { + if (Source->GetContext()->IsAborted()) { + return TConclusionStatus::Success(); } auto executeResult = Cursor.Execute(Source); if (!executeResult) { - SetErrorMessage(executeResult.GetErrorMessage()); - return false; + return executeResult; } if (*executeResult) { Source->Finalize(); FinishedFlag = true; } - return true; + return TConclusionStatus::Success(); } -TConclusion TColumnBlobsFetchingStep::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const { +TConclusion TColumnBlobsFetchingStep::DoExecuteInplace( + const std::shared_ptr& source, const TFetchingScriptCursor& step) const { return !source->StartFetchingColumns(source, step, Columns); } ui64 TColumnBlobsFetchingStep::DoPredictRawBytes(const std::shared_ptr& source) const { - const ui64 result = source->GetColumnRawBytes(Columns->GetColumnIds()); + ui64 result = source->GetColumnRawBytes(Columns.GetColumnIds()); + if (source->GetContext()->GetReadMetadata()->Limit && source->GetExclusiveIntervalOnly()) { + result = std::max(result * 1.0 * source->GetContext()->GetReadMetadata()->Limit / source->GetRecordsCount(), + source->GetColumnBlobBytes(Columns.GetColumnIds())); + } if (!result) { - return Columns->GetColumnIds().size() * source->GetRecordsCount() * sizeof(ui32); // null for all records for all columns in future will be + return Columns.GetColumnIds().size() * source->GetRecordsCount() * + sizeof(ui32); // null for all records for all columns in future will be } else { return result; } } -TConclusion TIndexBlobsFetchingStep::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const { +ui64 TColumnBlobsFetchingStep::GetProcessingDataSize(const std::shared_ptr& source) const { + return source->GetColumnBlobBytes(Columns.GetColumnIds()); +} + +TConclusion TIndexBlobsFetchingStep::DoExecuteInplace( + const std::shared_ptr& source, const TFetchingScriptCursor& step) const { return !source->StartFetchingIndexes(source, step, Indexes); } @@ -57,7 +70,12 @@ TConclusion TAssemblerStep::DoExecuteInplace(const std::shared_ptr TOptionalAssemblerStep::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { +ui64 TAssemblerStep::GetProcessingDataSize(const std::shared_ptr& source) const { + return source->GetColumnRawBytes(Columns->GetColumnIds()); +} + +TConclusion TOptionalAssemblerStep::DoExecuteInplace( + const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { source->AssembleColumns(Columns); return true; } @@ -71,6 +89,10 @@ bool TOptionalAssemblerStep::DoInitSourceSeqColumnIds(const std::shared_ptr& source) const { + return source->GetColumnRawBytes(Columns->GetColumnIds()); +} + TConclusion TFilterProgramStep::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { AFL_VERIFY(source); AFL_VERIFY(Step); @@ -90,13 +112,20 @@ ui64 TFilterProgramStep::DoPredictRawBytes(const std::shared_ptr& s } TConclusion TPredicateFilter::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { - auto filter = source->GetContext()->GetReadMetadata()->GetPKRangesFilter().BuildFilter(source->GetStageData().GetTable()->BuildTable()); + auto filter = + source->GetContext()->GetReadMetadata()->GetPKRangesFilter().BuildFilter(source->GetStageData().GetTable()->BuildTableVerified()); source->MutableStageData().AddFilter(filter); return true; } TConclusion TSnapshotFilter::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { - auto filter = MakeSnapshotFilter(source->GetStageData().GetTable()->BuildTable(), source->GetContext()->GetReadMetadata()->GetRequestSnapshot()); + auto filter = MakeSnapshotFilter( + source->GetStageData().GetTable()->BuildTableVerified(), source->GetContext()->GetReadMetadata()->GetRequestSnapshot()); + if (filter.GetFilteredCount().value_or(source->GetRecordsCount()) != source->GetRecordsCount()) { + if (source->AddTxConflict()) { + return true; + } + } source->MutableStageData().AddFilter(filter); return true; } @@ -120,7 +149,8 @@ TConclusion TDeletionFilter::DoExecuteInplace(const std::shared_ptr TShardingFilter::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { NYDBTest::TControllers::GetColumnShardController()->OnSelectShardingFilter(); - auto filter = source->GetContext()->GetReadMetadata()->GetRequestShardingInfo()->GetShardingInfo()->GetFilter(source->GetStageData().GetTable()->BuildTable()); + const auto& shardingInfo = source->GetContext()->GetReadMetadata()->GetRequestShardingInfo()->GetShardingInfo(); + auto filter = shardingInfo->GetFilter(source->GetStageData().GetTable()->BuildTableVerified()); source->MutableStageData().AddFilter(filter); return true; } @@ -130,7 +160,8 @@ TConclusion TBuildFakeSpec::DoExecuteInplace(const std::shared_ptrfields()) { columns.emplace_back(NArrow::TThreadSimpleArraysCache::GetConst(f->type(), NArrow::DefaultScalar(f->type()), Count)); } - source->MutableStageData().AddBatch(arrow::RecordBatch::Make(TIndexInfo::ArrowSchemaSnapshot(), Count, columns)); + source->MutableStageData().AddBatch( + std::make_shared(arrow::RecordBatch::Make(TIndexInfo::ArrowSchemaSnapshot(), Count, columns))); return true; } @@ -142,14 +173,21 @@ TConclusion TApplyIndexStep::DoExecuteInplace(const std::shared_ptr TFetchingScriptCursor::Execute(const std::shared_ptr& source) { AFL_VERIFY(source); NMiniKQL::TThrowingBindTerminator bind; + Script->OnExecute(); AFL_VERIFY(!Script->IsFinished(CurrentStepIdx)); while (!Script->IsFinished(CurrentStepIdx)) { if (source->GetStageData().IsEmpty()) { + source->OnEmptyStageData(); break; } auto step = Script->GetStep(CurrentStepIdx); - TMemoryProfileGuard mGuard("SCAN_PROFILE::FETCHING::" + step->GetName() + "::" + Script->GetBranchName(), IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); + TMemoryProfileGuard mGuard("SCAN_PROFILE::FETCHING::" + step->GetName() + "::" + Script->GetBranchName(), + IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("scan_step", step->DebugString())("scan_step_idx", CurrentStepIdx); + AFL_VERIFY(!CurrentStartInstant); + CurrentStartInstant = TMonotonic::Now(); + AFL_VERIFY(!CurrentStartDataSize); + CurrentStartDataSize = step->GetProcessingDataSize(source); const TConclusion resultStep = step->ExecuteInplace(source, *this); if (!resultStep) { return resultStep; @@ -157,9 +195,83 @@ TConclusion TFetchingScriptCursor::Execute(const std::shared_ptr&& guard, + const std::shared_ptr& /*allocation*/) { + auto data = Source.lock(); + if (!data || data->GetContext()->IsAborted()) { + guard->Release(); + return false; + } + data->RegisterAllocationGuard(std::move(guard)); + Step.Next(); + auto task = std::make_shared(data, std::move(Step), data->GetContext()->GetCommonContext()->GetScanActorId()); + NConveyor::TScanServiceOperator::SendTaskToExecute(task); + return true; +} + +TAllocateMemoryStep::TFetchingStepAllocation::TFetchingStepAllocation( + const std::shared_ptr& source, const ui64 mem, const TFetchingScriptCursor& step) + : TBase(mem) + , Source(source) + , Step(step) + , TasksGuard(source->GetContext()->GetCommonContext()->GetCounters().GetResourcesAllocationTasksGuard()) { } + +TConclusion TAllocateMemoryStep::DoExecuteInplace( + const std::shared_ptr& source, const TFetchingScriptCursor& step) const { + + auto allocation = std::make_shared(source, GetProcessingDataSize(source), step); + NGroupedMemoryManager::TScanMemoryLimiterOperator::SendToAllocation(source->GetContext()->GetProcessMemoryControlId(), + source->GetContext()->GetCommonContext()->GetScanId(), source->GetFirstIntervalId(), { allocation }, (ui32)StageIndex); + return false; +} + +ui64 TAllocateMemoryStep::GetProcessingDataSize(const std::shared_ptr& source) const { + ui64 size = source->GetColumnRawBytes(Columns.GetColumnIds()); + + if (source->GetStageData().GetUseFilter() && source->GetContext()->GetReadMetadata()->Limit) { + const ui32 filtered = source->GetStageData().GetFilteredCount(source->GetRecordsCount(), source->GetContext()->GetReadMetadata()->Limit); + if (filtered < source->GetRecordsCount()) { + size = std::max(size * 1.0 * filtered / source->GetRecordsCount(), source->GetColumnBlobBytes(Columns.GetColumnIds())); + } + } + return size; +} + +TString TFetchingScript::DebugString() const { + TStringBuilder sb; + TStringBuilder sbBranch; + for (auto&& i : Steps) { + if (i->GetSumDuration() > TDuration::MilliSeconds(10)) { + sbBranch << "{" << i->DebugString() << "};"; + } + } + if (!sbBranch) { + return ""; + } + sb << "{branch:" << BranchName << ";limit:" << Limit << ";"; + if (FinishInstant && StartInstant) { + sb << "duration:" << *FinishInstant - *StartInstant << ";"; + } + + sb << "steps_10Ms:[" << sbBranch << "]}"; + return sb; +} + +TFetchingScript::TFetchingScript(const TSpecialReadContext& context) + : Limit(context.GetReadMetadata()->Limit) { +} + +NKikimr::TConclusion TFilterCutLimit::DoExecuteInplace( + const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { + source->MutableStageData().CutFilter(source->GetRecordsCount(), Limit, Reverse); + return true; +} + +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.h index 5cddf4955c71..133aa4db3669 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.h @@ -1,23 +1,38 @@ #pragma once #include "columns_set.h" -#include -#include -#include + +#include #include #include +#include +#include +#include + +#include namespace NKikimr::NOlap::NReader::NPlain { class IDataSource; class TFetchingScriptCursor; +class TSpecialReadContext; class IFetchingStep { private: YDB_READONLY_DEF(TString, Name); + YDB_READONLY(TDuration, SumDuration, TDuration::Zero()); + YDB_READONLY(ui64, SumSize, 0); + protected: virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const = 0; virtual TString DoDebugString() const { return ""; } + public: + void AddDuration(const TDuration d) { + SumDuration += d; + } + void AddDataSize(const ui64 size) { + SumSize += size; + } virtual ui64 DoPredictRawBytes(const std::shared_ptr& /*source*/) const { return 0; } @@ -31,15 +46,18 @@ class IFetchingStep { return DoExecuteInplace(source, step); } - IFetchingStep(const TString& name) - : Name(name) - { + virtual ui64 GetProcessingDataSize(const std::shared_ptr& /*source*/) const { + return 0; + } + IFetchingStep(const TString& name) + : Name(name) { } TString DebugString() const { TStringBuilder sb; - sb << "name=" << Name << ";details={" << DoDebugString() << "};"; + sb << "name=" << Name << ";duration=" << SumDuration << ";" + << "size=" << 1e-9 * SumSize << ";details={" << DoDebugString() << "};"; return sb; } }; @@ -48,19 +66,30 @@ class TFetchingScript { private: YDB_ACCESSOR(TString, BranchName, "UNDEFINED"); std::vector> Steps; + std::optional StartInstant; + std::optional FinishInstant; + const ui32 Limit; + public: - TFetchingScript() = default; + TFetchingScript(const TSpecialReadContext& context); - TString DebugString() const { - TStringBuilder sb; - sb << "["; - for (auto&& i : Steps) { - sb << "{" << i->DebugString() << "};"; + void AddStepDataSize(const ui32 index, const ui64 size) { + GetStep(index)->AddDataSize(size); + } + + void AddStepDuration(const ui32 index, const TDuration d) { + FinishInstant = TMonotonic::Now(); + GetStep(index)->AddDuration(d); + } + + void OnExecute() { + if (!StartInstant) { + StartInstant = TMonotonic::Now(); } - sb << "]"; - return sb; } + TString DebugString() const; + const std::shared_ptr& GetStep(const ui32 index) const { AFL_VERIFY(index < Steps.size()); return Steps[index]; @@ -68,7 +97,7 @@ class TFetchingScript { ui64 PredictRawBytes(const std::shared_ptr& source) const { ui64 result = 0; - for (auto&& current: Steps) { + for (auto&& current : Steps) { result += current->DoPredictRawBytes(source); } return result; @@ -98,14 +127,23 @@ class TFetchingScript { class TFetchingScriptCursor { private: + std::optional CurrentStartInstant; + std::optional CurrentStartDataSize; ui32 CurrentStepIdx = 0; std::shared_ptr Script; + void FlushDuration() { + AFL_VERIFY(CurrentStartInstant); + AFL_VERIFY(CurrentStartDataSize); + Script->AddStepDuration(CurrentStepIdx, TMonotonic::Now() - *CurrentStartInstant); + Script->AddStepDataSize(CurrentStepIdx, *CurrentStartDataSize); + CurrentStartInstant.reset(); + CurrentStartDataSize.reset(); + } + public: TFetchingScriptCursor(const std::shared_ptr& script, const ui32 index) : CurrentStepIdx(index) - , Script(script) - { - + , Script(script) { } const TString& GetName() const { @@ -117,6 +155,7 @@ class TFetchingScriptCursor { } bool Next() { + FlushDuration(); return !Script->IsFinished(++CurrentStepIdx); } @@ -129,9 +168,11 @@ class TStepAction: public IDataTasksProcessor::ITask { std::shared_ptr Source; TFetchingScriptCursor Cursor; bool FinishedFlag = false; + protected: virtual bool DoApply(IDataReader& owner) const override; - virtual bool DoExecute() override; + virtual TConclusionStatus DoExecuteImpl() override; + public: virtual TString GetTaskClassIdentifier() const override { return "STEP_ACTION"; @@ -140,9 +181,7 @@ class TStepAction: public IDataTasksProcessor::ITask { TStepAction(const std::shared_ptr& source, TFetchingScriptCursor&& cursor, const NActors::TActorId& ownerActorId) : TBase(ownerActorId) , Source(source) - , Cursor(std::move(cursor)) - { - + , Cursor(std::move(cursor)) { } }; @@ -150,16 +189,17 @@ class TBuildFakeSpec: public IFetchingStep { private: using TBase = IFetchingStep; const ui32 Count = 0; + protected: virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; virtual ui64 DoPredictRawBytes(const std::shared_ptr& /*source*/) const override { return TIndexInfo::GetSpecialColumnsRecordSize() * Count; } + public: TBuildFakeSpec(const ui32 count) : TBase("FAKE_SPEC") - , Count(count) - { + , Count(count) { AFL_VERIFY(Count); } }; @@ -168,33 +208,73 @@ class TApplyIndexStep: public IFetchingStep { private: using TBase = IFetchingStep; const NIndexes::TIndexCheckerContainer IndexChecker; + protected: virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + public: TApplyIndexStep(const NIndexes::TIndexCheckerContainer& indexChecker) : TBase("APPLY_INDEX") - , IndexChecker(indexChecker) - { + , IndexChecker(indexChecker) { + } +}; + +class TAllocateMemoryStep: public IFetchingStep { +private: + using TBase = IFetchingStep; + TColumnsSetIds Columns; + const EStageFeaturesIndexes StageIndex; + +protected: + class TFetchingStepAllocation: public NGroupedMemoryManager::IAllocation { + private: + using TBase = NGroupedMemoryManager::IAllocation; + std::weak_ptr Source; + TFetchingScriptCursor Step; + NColumnShard::TCounterGuard TasksGuard; + virtual bool DoOnAllocated(std::shared_ptr&& guard, + const std::shared_ptr& allocation) override; + + public: + TFetchingStepAllocation(const std::shared_ptr& source, const ui64 mem, const TFetchingScriptCursor& step); + }; + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + virtual ui64 GetProcessingDataSize(const std::shared_ptr& source) const override; + virtual ui64 DoPredictRawBytes(const std::shared_ptr& /*source*/) const override { + return 0; + } + virtual TString DoDebugString() const override { + return TStringBuilder() << "columns=" << Columns.DebugString() << ";stage=" << StageIndex << ";"; + } + +public: + TAllocateMemoryStep(const TColumnsSetIds& columns, const EStageFeaturesIndexes stageIndex) + : TBase("ALLOCATE_MEMORY::" + ::ToString(stageIndex)) + , Columns(columns) + , StageIndex(stageIndex) { + AFL_VERIFY(Columns.GetColumnsCount()); } }; class TColumnBlobsFetchingStep: public IFetchingStep { private: using TBase = IFetchingStep; - std::shared_ptr Columns; + TColumnsSetIds Columns; + protected: virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; virtual ui64 DoPredictRawBytes(const std::shared_ptr& source) const override; virtual TString DoDebugString() const override { - return TStringBuilder() << "columns=" << Columns->DebugString() << ";"; + return TStringBuilder() << "columns=" << Columns.DebugString() << ";"; } + public: - TColumnBlobsFetchingStep(const std::shared_ptr& columns) + virtual ui64 GetProcessingDataSize(const std::shared_ptr& source) const override; + TColumnBlobsFetchingStep(const TColumnsSetIds& columns) : TBase("FETCHING_COLUMNS") , Columns(columns) { - AFL_VERIFY(Columns); - AFL_VERIFY(Columns->GetColumnsCount()); + AFL_VERIFY(Columns.GetColumnsCount()); } }; @@ -202,12 +282,14 @@ class TIndexBlobsFetchingStep: public IFetchingStep { private: using TBase = IFetchingStep; std::shared_ptr Indexes; + protected: virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; virtual ui64 DoPredictRawBytes(const std::shared_ptr& source) const override; virtual TString DoDebugString() const override { return TStringBuilder() << "indexes=" << Indexes->DebugString() << ";"; } + public: TIndexBlobsFetchingStep(const std::shared_ptr& indexes) : TBase("FETCHING_INDEXES") @@ -224,12 +306,13 @@ class TAssemblerStep: public IFetchingStep { virtual TString DoDebugString() const override { return TStringBuilder() << "columns=" << Columns->DebugString() << ";"; } + public: + virtual ui64 GetProcessingDataSize(const std::shared_ptr& source) const override; virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; TAssemblerStep(const std::shared_ptr& columns, const TString& specName = Default()) : TBase("ASSEMBLER" + (specName ? "::" + specName : "")) - , Columns(columns) - { + , Columns(columns) { AFL_VERIFY(Columns); AFL_VERIFY(Columns->GetColumnsCount()); } @@ -242,9 +325,13 @@ class TOptionalAssemblerStep: public IFetchingStep { virtual TString DoDebugString() const override { return TStringBuilder() << "columns=" << Columns->DebugString() << ";"; } + protected: virtual bool DoInitSourceSeqColumnIds(const std::shared_ptr& source) const override; + public: + virtual ui64 GetProcessingDataSize(const std::shared_ptr& source) const override; + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; TOptionalAssemblerStep(const std::shared_ptr& columns, const TString& specName = Default()) : TBase("OPTIONAL_ASSEMBLER" + (specName ? "::" + specName : "")) @@ -258,13 +345,35 @@ class TFilterProgramStep: public IFetchingStep { private: using TBase = IFetchingStep; std::shared_ptr Step; + protected: virtual ui64 DoPredictRawBytes(const std::shared_ptr& source) const override; + public: virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; TFilterProgramStep(const std::shared_ptr& step) : TBase("PROGRAM") - , Step(step) + , Step(step) { + } +}; + +class TFilterCutLimit: public IFetchingStep { +private: + using TBase = IFetchingStep; + const ui32 Limit; + const bool Reverse; + +protected: + virtual ui64 DoPredictRawBytes(const std::shared_ptr& /*source*/) const override { + return 0; + } + +public: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + TFilterCutLimit(const ui32 limit, const bool reverse) + : TBase("LIMIT") + , Limit(limit) + , Reverse(reverse) { } }; @@ -272,15 +381,15 @@ class TFilterProgramStep: public IFetchingStep { class TPredicateFilter: public IFetchingStep { private: using TBase = IFetchingStep; + public: virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; TPredicateFilter() : TBase("PREDICATE") { - } }; -class TSnapshotFilter : public IFetchingStep { +class TSnapshotFilter: public IFetchingStep { private: using TBase = IFetchingStep; @@ -302,7 +411,7 @@ class TDeletionFilter: public IFetchingStep { } }; -class TShardingFilter : public IFetchingStep { +class TShardingFilter: public IFetchingStep { private: using TBase = IFetchingStep; @@ -313,5 +422,4 @@ class TShardingFilter : public IFetchingStep { } }; - -} +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.cpp index 8e228937b653..9da043a366c1 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.cpp @@ -1,34 +1,30 @@ #include "interval.h" + #include +#include namespace NKikimr::NOlap::NReader::NPlain { void TFetchingInterval::ConstructResult() { - if (ReadySourcesCount.Val() != WaitSourcesCount || !ReadyGuards.Val()) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "skip_construct_result")("interval_idx", IntervalIdx); + const ui32 ready = ReadySourcesCount.Val(); + if (ready != WaitSourcesCount) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "skip_construct_result")("interval_idx", IntervalIdx)( + "count", WaitSourcesCount)("ready", ready)("interval_id", GetIntervalId()); return; } else { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "start_construct_result")("interval_idx", IntervalIdx); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "start_construct_result")("interval_idx", IntervalIdx)( + "interval_id", GetIntervalId()); } if (AtomicCas(&SourcesFinalized, 1, 0)) { IntervalStateGuard.SetStatus(NColumnShard::TScanCounters::EIntervalStatus::WaitMergerStart); + + MergingContext->SetIntervalChunkMemory(Context->GetMemoryForSources(Sources)); + auto task = std::make_shared(MergingContext, Context, std::move(Sources)); task->SetPriority(NConveyor::ITask::EPriority::High); - NConveyor::TScanServiceOperator::SendTaskToExecute(task); - } -} - -void TFetchingInterval::OnInitResourcesGuard(const std::shared_ptr& guard) { - IntervalStateGuard.SetStatus(NColumnShard::TScanCounters::EIntervalStatus::WaitSources); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "allocated")("interval_idx", IntervalIdx); - AFL_VERIFY(guard); - AFL_VERIFY(!ResourcesGuard); - ResourcesGuard = guard; - for (auto&& i : Sources) { - i.second->OnInitResourcesGuard(i.second); + NGroupedMemoryManager::TScanMemoryLimiterOperator::SendToAllocation(Context->GetProcessMemoryControlId(), + Context->GetCommonContext()->GetScanId(), GetIntervalId(), { task }, (ui32)EStageFeaturesIndexes::Merge); } - AFL_VERIFY(ReadyGuards.Inc() <= 1); - ConstructResult(); } void TFetchingInterval::OnSourceFetchStageReady(const ui32 /*sourceIdx*/) { @@ -40,29 +36,27 @@ void TFetchingInterval::OnSourceFetchStageReady(const ui32 /*sourceIdx*/) { TFetchingInterval::TFetchingInterval(const NArrow::NMerger::TSortableBatchPosition& start, const NArrow::NMerger::TSortableBatchPosition& finish, const ui32 intervalIdx, const THashMap>& sources, const std::shared_ptr& context, const bool includeFinish, const bool includeStart, const bool isExclusiveInterval) - : TTaskBase(0, context->GetMemoryForSources(sources, isExclusiveInterval), "", context->GetCommonContext()->GetResourcesTaskContext()) - , MergingContext(std::make_shared(start, finish, intervalIdx, includeFinish, includeStart, isExclusiveInterval)) + : MergingContext(std::make_shared(start, finish, intervalIdx, includeFinish, includeStart, isExclusiveInterval)) , Context(context) , TaskGuard(Context->GetCommonContext()->GetCounters().GetResourcesAllocationTasksGuard()) , Sources(sources) , IntervalIdx(intervalIdx) - , IntervalStateGuard(Context->GetCommonContext()->GetCounters().CreateIntervalStateGuard()) -{ - Y_ABORT_UNLESS(Sources.size()); + , IntervalGroupGuard(NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildGroupGuard( + Context->GetProcessMemoryControlId(), context->GetCommonContext()->GetScanId())) + , IntervalStateGuard(Context->GetCommonContext()->GetCounters().CreateIntervalStateGuard()) { + AFL_VERIFY(Sources.size()); for (auto&& [_, i] : Sources) { if (!i->IsDataReady()) { ++WaitSourcesCount; + } else { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "ready_source")("interval_idx", IntervalIdx)( + "interval_id", GetIntervalId()); } - i->RegisterInterval(*this); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "register_source")("interval_idx", IntervalIdx)("interval_id", GetIntervalId()); + i->RegisterInterval(*this, i); } IntervalStateGuard.SetStatus(NColumnShard::TScanCounters::EIntervalStatus::WaitResources); -} - -void TFetchingInterval::DoOnAllocationSuccess(const std::shared_ptr& guard) { - AFL_VERIFY(guard); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("interval_idx", IntervalIdx)("event", "resources_allocated") - ("resources", guard->DebugString())("start", MergingContext->GetIncludeStart())("finish", MergingContext->GetIncludeFinish())("sources", Sources.size()); - OnInitResourcesGuard(guard); + ConstructResult(); } void TFetchingInterval::SetMerger(std::unique_ptr&& merger) { @@ -82,13 +76,15 @@ void TFetchingInterval::OnPartSendingComplete() { AFL_VERIFY(Merger); AFL_VERIFY(AtomicCas(&PartSendingWait, 0, 1)); AFL_VERIFY(AtomicGet(SourcesFinalized) == 1); - if (AbortedFlag) { + if (Context->IsAborted()) { return; } IntervalStateGuard.SetStatus(NColumnShard::TScanCounters::EIntervalStatus::WaitMergerContinue); + auto task = std::make_shared(MergingContext, Context, std::move(Merger)); task->SetPriority(NConveyor::ITask::EPriority::High); - NConveyor::TScanServiceOperator::SendTaskToExecute(task); + NGroupedMemoryManager::TScanMemoryLimiterOperator::SendToAllocation(Context->GetProcessMemoryControlId(), + Context->GetCommonContext()->GetScanId(), GetIntervalId(), { task }, (ui32)EStageFeaturesIndexes::Merge); } -} +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.h index 80613ef5b2d2..86c3f1aa0510 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.h @@ -6,11 +6,9 @@ namespace NKikimr::NOlap::NReader::NPlain { -class TFetchingInterval: public TNonCopyable, public NResourceBroker::NSubscribe::ITask { +class TFetchingInterval: public TNonCopyable { private: - using TTaskBase = NResourceBroker::NSubscribe::ITask; std::shared_ptr MergingContext; - bool AbortedFlag = false; TAtomic SourcesFinalized = 0; TAtomic PartSendingWait = 0; std::unique_ptr Merger; @@ -20,15 +18,11 @@ class TFetchingInterval: public TNonCopyable, public NResourceBroker::NSubscribe void ConstructResult(); - std::shared_ptr ResourcesGuard; const ui32 IntervalIdx; + const std::shared_ptr IntervalGroupGuard; TAtomicCounter ReadySourcesCount = 0; - TAtomicCounter ReadyGuards = 0; ui32 WaitSourcesCount = 0; NColumnShard::TConcreteScanCounters::TScanIntervalStateGuard IntervalStateGuard; - void OnInitResourcesGuard(const std::shared_ptr& guard); -protected: - virtual void DoOnAllocationSuccess(const std::shared_ptr& guard) override; public: std::set GetPathIds() const { @@ -43,16 +37,16 @@ class TFetchingInterval: public TNonCopyable, public NResourceBroker::NSubscribe return IntervalIdx; } - const THashMap>& GetSources() const { - return Sources; + ui32 GetIntervalId() const { + AFL_VERIFY(IntervalGroupGuard); + return IntervalGroupGuard->GetGroupId(); } - const std::shared_ptr& GetResourcesGuard() const { - return ResourcesGuard; + const THashMap>& GetSources() const { + return Sources; } void Abort() { - AbortedFlag = true; if (AtomicCas(&SourcesFinalized, 1, 0)) { for (auto&& i : Sources) { i.second->Abort(); @@ -83,10 +77,16 @@ class TFetchingInterval: public TNonCopyable, public NResourceBroker::NSubscribe void OnPartSendingComplete(); void SetMerger(std::unique_ptr&& merger); bool HasMerger() const; + std::shared_ptr GetGroupGuard() const { + return IntervalGroupGuard; + } TFetchingInterval(const NArrow::NMerger::TSortableBatchPosition& start, const NArrow::NMerger::TSortableBatchPosition& finish, const ui32 intervalIdx, const THashMap>& sources, const std::shared_ptr& context, const bool includeFinish, const bool includeStart, const bool isExclusiveInterval); + + ~TFetchingInterval() { + } }; } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.cpp index 60e41095303c..f705deb4501c 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.cpp @@ -11,7 +11,7 @@ TColumnShardScanIterator::TColumnShardScanIterator(const std::shared_ptrGetReadMetadata()->IsSorted()); } -TConclusion> TColumnShardScanIterator::GetBatch() { +TConclusion> TColumnShardScanIterator::GetBatch() { FillReadyResults(); return ReadyResults.pop_front(); } @@ -33,11 +33,11 @@ void TColumnShardScanIterator::FillReadyResults() { i64 limitLeft = Context->GetReadMetadata()->Limit == 0 ? INT64_MAX : Context->GetReadMetadata()->Limit - ItemsRead; for (size_t i = 0; i < ready.size() && limitLeft; ++i) { auto& batch = ReadyResults.emplace_back(std::move(ready[i])); - if (batch.GetResultBatch().num_rows() > limitLeft) { - batch.Cut(limitLeft); + if (batch->GetResultBatch().num_rows() > limitLeft) { + batch->Cut(limitLeft); } - limitLeft -= batch.GetResultBatch().num_rows(); - ItemsRead += batch.GetResultBatch().num_rows(); + limitLeft -= batch->GetResultBatch().num_rows(); + ItemsRead += batch->GetResultBatch().num_rows(); } if (limitLeft == 0) { @@ -53,7 +53,7 @@ TColumnShardScanIterator::~TColumnShardScanIterator() { ReadMetadata->ReadStats->PrintToLog(); } -void TColumnShardScanIterator::Apply(IDataTasksProcessor::ITask::TPtr task) { +void TColumnShardScanIterator::Apply(const std::shared_ptr& task) { if (!IndexedData->IsFinished()) { Y_ABORT_UNLESS(task->Apply(*IndexedData)); } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.h index 38f05ff276cd..38b1fcc29882 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.h @@ -9,7 +9,7 @@ namespace NKikimr::NOlap::NReader::NPlain { class TReadyResults { private: const NColumnShard::TConcreteScanCounters Counters; - std::deque Data; + std::deque> Data; i64 RecordsCount = 0; public: TString DebugString() const { @@ -19,7 +19,7 @@ class TReadyResults { << "records_count:" << RecordsCount << ";" ; if (Data.size()) { - sb << "schema=" << Data.front().GetResultBatch().schema()->ToString() << ";"; + sb << "schema=" << Data.front()->GetResultBatch().schema()->ToString() << ";"; } return sb; } @@ -28,17 +28,19 @@ class TReadyResults { { } - TPartialReadResult& emplace_back(TPartialReadResult&& v) { - RecordsCount += v.GetResultBatch().num_rows(); + const std::shared_ptr& emplace_back(std::shared_ptr&& v) { + AFL_VERIFY(!!v); + RecordsCount += v->GetResultBatch().num_rows(); Data.emplace_back(std::move(v)); return Data.back(); } - std::optional pop_front() { + std::shared_ptr pop_front() { if (Data.empty()) { return {}; } auto result = std::move(Data.front()); - RecordsCount -= result.GetResultBatch().num_rows(); + AFL_VERIFY(RecordsCount >= result->GetResultBatch().num_rows()); + RecordsCount -= result->GetResultBatch().num_rows(); Data.pop_front(); return result; } @@ -84,13 +86,13 @@ class TColumnShardScanIterator: public TScanIteratorBase { ; } - virtual void Apply(IDataTasksProcessor::ITask::TPtr task) override; + virtual void Apply(const std::shared_ptr& task) override; bool Finished() const override { return IndexedData->IsFinished() && ReadyResults.empty(); } - TConclusion> GetBatch() override; + virtual TConclusion> GetBatch() override; virtual void PrepareResults() override; virtual TConclusion ReadNextInterval() override; diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.cpp index 0b7bc55dffec..1981faaa4314 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.cpp @@ -2,6 +2,9 @@ #include "plain_read_data.h" #include "source.h" +#include +#include + namespace NKikimr::NOlap::NReader::NPlain { std::optional TBaseMergeTask::DrainMergerLinearScan(const std::optional resultBufferLimit) { @@ -19,12 +22,13 @@ std::optional TBaseMergeTask::DrainMergerLinearScan(co Merger = nullptr; } Context->GetCommonContext()->GetCounters().OnLinearScanInterval(rbBuilder->GetRecordsCount()); - ResultBatch = NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches({rbBuilder->Finalize()})); + ResultBatch = NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches({ rbBuilder->Finalize() })); return lastResultPosition; } void TBaseMergeTask::PrepareResultBatch() { if (!ResultBatch || ResultBatch->num_rows() == 0) { + AllocationGuard = nullptr; ResultBatch = nullptr; LastPK = nullptr; return; @@ -41,8 +45,10 @@ void TBaseMergeTask::PrepareResultBatch() { } else { ShardedBatch = NArrow::TShardedRecordBatch(ResultBatch); } + AllocationGuard->Update(NArrow::GetTableMemorySize(ResultBatch)); AFL_VERIFY(!!LastPK == !!ShardedBatch->GetRecordsCount())("lpk", !!LastPK)("sb", ShardedBatch->GetRecordsCount()); } else { + AllocationGuard = nullptr; ResultBatch = nullptr; LastPK = nullptr; } @@ -51,14 +57,26 @@ void TBaseMergeTask::PrepareResultBatch() { bool TBaseMergeTask::DoApply(IDataReader& indexedDataRead) const { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "DoApply")("interval_idx", MergingContext->GetIntervalIdx()); auto& reader = static_cast(indexedDataRead); - reader.MutableScanner().OnIntervalResult(ShardedBatch, LastPK, std::move(Merger), IntervalIdx, reader); + auto copy = AllocationGuard; + reader.MutableScanner().OnIntervalResult(std::move(copy), ShardedBatch, LastPK, std::move(Merger), IntervalIdx, reader); + return true; +} + +bool TBaseMergeTask::DoOnAllocated( + std::shared_ptr&& guard, const std::shared_ptr& allocation) { + if (Context->IsAborted()) { + guard->Release(); + return false; + } + AllocationGuard = std::move(guard); + NConveyor::TScanServiceOperator::SendTaskToExecute(static_pointer_cast(allocation)); return true; } -bool TStartMergeTask::DoExecute() { +TConclusionStatus TStartMergeTask::DoExecuteImpl() { if (OnlyEmptySources) { ResultBatch = nullptr; - return true; + return TConclusionStatus::Success(); } bool sourcesInMemory = true; for (auto&& i : Sources) { @@ -71,9 +89,10 @@ bool TStartMergeTask::DoExecute() { TMemoryProfileGuard mGuard("SCAN_PROFILE::MERGE::EXCLUSIVE", IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); auto& container = Sources.begin()->second->GetStageResult().GetBatch(); if (container && container->num_rows()) { - ResultBatch = container->BuildTable(); + ResultBatch = container->BuildTableVerified(); LastPK = Sources.begin()->second->GetLastPK(); - ResultBatch = NArrow::TColumnOperator().VerifyIfAbsent().Extract(ResultBatch, Context->GetProgramInputColumns()->GetColumnNamesVector()); + ResultBatch = + NArrow::TColumnOperator().VerifyIfAbsent().Extract(ResultBatch, Context->GetProgramInputColumns()->GetColumnNamesVector()); Context->GetCommonContext()->GetCounters().OnNoScanInterval(ResultBatch->num_rows()); if (Context->GetCommonContext()->IsReverse()) { ResultBatch = NArrow::ReverseRecords(ResultBatch); @@ -82,7 +101,7 @@ bool TStartMergeTask::DoExecute() { } Sources.clear(); AFL_VERIFY(!!LastPK == (!!ResultBatch && ResultBatch->num_rows())); - return true; + return TConclusionStatus::Success(); } TMemoryProfileGuard mGuard("SCAN_PROFILE::MERGE::COMMON", IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); AFL_VERIFY(!Merger); @@ -100,10 +119,10 @@ bool TStartMergeTask::DoExecute() { AFL_VERIFY(Merger->GetSourcesCount() <= Sources.size()); if (Merger->GetSourcesCount() == 0 || isEmpty) { ResultBatch = nullptr; - return true; + return TConclusionStatus::Success(); } } - Merger->PutControlPoint(MergingContext->GetFinish()); + Merger->PutControlPoint(MergingContext->GetFinish(), false); Merger->SkipToLowerBound(MergingContext->GetStart(), MergingContext->GetIncludeStart()); const ui32 originalSourcesCount = Sources.size(); Sources.clear(); @@ -115,10 +134,12 @@ bool TStartMergeTask::DoExecute() { ResultBatch = Merger->SingleSourceDrain(MergingContext->GetFinish(), MergingContext->GetIncludeFinish(), &lastResultPosition); if (ResultBatch) { Context->GetCommonContext()->GetCounters().OnLogScanInterval(ResultBatch->num_rows()); - AFL_VERIFY(ResultBatch->schema()->Equals(Context->GetProgramInputColumns()->GetSchema()))("res", ResultBatch->schema()->ToString())("ctx", Context->GetProgramInputColumns()->GetSchema()->ToString()); + AFL_VERIFY(ResultBatch->schema()->Equals(Context->GetProgramInputColumns()->GetSchema()))("res", ResultBatch->schema()->ToString())( + "ctx", Context->GetProgramInputColumns()->GetSchema()->ToString()); } if (MergingContext->GetIncludeFinish() && originalSourcesCount == 1) { - AFL_VERIFY(Merger->IsEmpty())("merging_context_finish", MergingContext->GetFinish().DebugJson().GetStringRobust())("merger", Merger->DebugString()); + AFL_VERIFY(Merger->IsEmpty())("merging_context_finish", MergingContext->GetFinish().DebugJson().GetStringRobust())( + "merger", Merger->DebugString()); } } else { TMemoryProfileGuard mGuard("SCAN_PROFILE::MERGE::MANY", IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); @@ -130,13 +151,13 @@ bool TStartMergeTask::DoExecute() { } AFL_VERIFY(!!LastPK == (!!ResultBatch && ResultBatch->num_rows())); PrepareResultBatch(); - return true; + return TConclusionStatus::Success(); } -TStartMergeTask::TStartMergeTask(const std::shared_ptr& mergingContext, const std::shared_ptr& readContext, THashMap>&& sources) +TStartMergeTask::TStartMergeTask(const std::shared_ptr& mergingContext, const std::shared_ptr& readContext, + THashMap>&& sources) : TBase(mergingContext, readContext) - , Sources(std::move(sources)) -{ + , Sources(std::move(sources)) { for (auto&& s : Sources) { AFL_VERIFY(s.second->IsDataReady()); } @@ -147,7 +168,7 @@ TStartMergeTask::TStartMergeTask(const std::shared_ptr& merging } } -bool TContinueMergeTask::DoExecute() { +TConclusionStatus TContinueMergeTask::DoExecuteImpl() { TMemoryProfileGuard mGuard("SCAN_PROFILE::MERGE::CONTINUE", IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); std::optional lastResultPosition = DrainMergerLinearScan(Context->ReadSequentiallyBufferSize); if (lastResultPosition) { @@ -155,7 +176,7 @@ bool TContinueMergeTask::DoExecute() { } AFL_VERIFY(!!LastPK == (!!ResultBatch && ResultBatch->num_rows())); PrepareResultBatch(); - return true; + return TConclusionStatus::Success(); } -} +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.h index 90cda0841363..bbe2d11ccb3a 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.h @@ -1,10 +1,11 @@ #pragma once #include "context.h" + #include #include - -#include #include +#include +#include namespace NKikimr::NOlap::NReader::NPlain { @@ -16,6 +17,8 @@ class TMergingContext { YDB_READONLY(bool, IncludeStart, false); YDB_READONLY(ui32, IntervalIdx, 0); bool IsExclusiveIntervalFlag = false; + std::optional IntervalChunkMemory; + public: TMergingContext(const NArrow::NMerger::TSortableBatchPosition& start, const NArrow::NMerger::TSortableBatchPosition& finish, const ui32 intervalIdx, const bool includeFinish, const bool includeStart, const bool isExclusiveInterval) @@ -24,9 +27,16 @@ class TMergingContext { , IncludeFinish(includeFinish) , IncludeStart(includeStart) , IntervalIdx(intervalIdx) - , IsExclusiveIntervalFlag(isExclusiveInterval) - { + , IsExclusiveIntervalFlag(isExclusiveInterval) { + } + + void SetIntervalChunkMemory(const ui64 value) { + IntervalChunkMemory = value; + } + ui64 GetIntervalChunkMemory() const { + AFL_VERIFY(IntervalChunkMemory); + return *IntervalChunkMemory; } bool IsExclusiveInterval() const { @@ -42,12 +52,12 @@ class TMergingContext { result.InsertValue("exclusive", IsExclusiveIntervalFlag); return result; } - }; -class TBaseMergeTask: public IDataTasksProcessor::ITask { +class TBaseMergeTask: public IDataTasksProcessor::ITask, public NGroupedMemoryManager::IAllocation { private: using TBase = IDataTasksProcessor::ITask; + protected: std::shared_ptr ResultBatch; std::shared_ptr LastPK; @@ -57,20 +67,25 @@ class TBaseMergeTask: public IDataTasksProcessor::ITask { std::shared_ptr MergingContext; const ui32 IntervalIdx; std::optional ShardedBatch; + std::shared_ptr AllocationGuard; [[nodiscard]] std::optional DrainMergerLinearScan(const std::optional resultBufferLimit); void PrepareResultBatch(); + private: virtual bool DoApply(IDataReader& indexedDataRead) const override; + virtual bool DoOnAllocated(std::shared_ptr&& guard, + const std::shared_ptr& allocation) override; + public: TBaseMergeTask(const std::shared_ptr& mergingContext, const std::shared_ptr& readContext) : TBase(readContext->GetCommonContext()->GetScanActorId()) + , IAllocation(TValidator::CheckNotNull(mergingContext)->GetIntervalChunkMemory()) , Guard(readContext->GetCommonContext()->GetCounters().GetMergeTasksGuard()) , Context(readContext) , MergingContext(mergingContext) , IntervalIdx(MergingContext->GetIntervalIdx()) { - } }; @@ -79,32 +94,37 @@ class TStartMergeTask: public TBaseMergeTask { using TBase = TBaseMergeTask; bool OnlyEmptySources = true; THashMap> Sources; + protected: - virtual bool DoExecute() override; + virtual TConclusionStatus DoExecuteImpl() override; + public: virtual TString GetTaskClassIdentifier() const override { return "CS::MERGE_START"; } - TStartMergeTask(const std::shared_ptr& mergingContext, - const std::shared_ptr& readContext, THashMap>&& sources); + TStartMergeTask(const std::shared_ptr& mergingContext, const std::shared_ptr& readContext, + THashMap>&& sources); }; class TContinueMergeTask: public TBaseMergeTask { private: using TBase = TBaseMergeTask; + protected: - virtual bool DoExecute() override; + virtual TConclusionStatus DoExecuteImpl() override; + public: virtual TString GetTaskClassIdentifier() const override { return "CS::MERGE_CONTINUE"; } - TContinueMergeTask(const std::shared_ptr& mergingContext, const std::shared_ptr& readContext, std::unique_ptr&& merger) + TContinueMergeTask(const std::shared_ptr& mergingContext, const std::shared_ptr& readContext, + std::unique_ptr&& merger) : TBase(mergingContext, readContext) { AFL_VERIFY(merger); Merger = std::move(merger); } }; -} +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.cpp index 9def8738cab9..04ed0d1c6f26 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.cpp @@ -8,38 +8,43 @@ TPlainReadData::TPlainReadData(const std::shared_ptr& context) { ui32 sourceIdx = 0; std::deque> sources; - const auto& portionsOrdered = GetReadMetadata()->SelectInfo->GetPortionsOrdered(GetReadMetadata()->IsDescSorted()); + const auto& portions = GetReadMetadata()->SelectInfo->PortionsOrderedPK; const auto& committed = GetReadMetadata()->CommittedBlobs; - auto itCommitted = committed.begin(); - auto itPortion = portionsOrdered.begin(); - ui64 committedPortionsBytes = 0; - ui64 insertedPortionsBytes = 0; ui64 compactedPortionsBytes = 0; - while (itCommitted != committed.end() || itPortion != portionsOrdered.end()) { - bool movePortion = false; - if (itCommitted == committed.end()) { - movePortion = true; - } else if (itPortion == portionsOrdered.end()) { - movePortion = false; - } else if (itCommitted->GetFirstVerified() < (*itPortion)->IndexKeyStart()) { - movePortion = false; + ui64 insertedPortionsBytes = 0; + ui64 committedPortionsBytes = 0; + for (auto&& i : portions) { + if (i->GetMeta().GetProduced() == NPortion::EProduced::COMPACTED || i->GetMeta().GetProduced() == NPortion::EProduced::SPLIT_COMPACTED) { + compactedPortionsBytes += i->GetTotalBlobBytes(); } else { - movePortion = true; + insertedPortionsBytes += i->GetTotalBlobBytes(); + } + sources.emplace_back(std::make_shared(sourceIdx++, i, SpecialReadContext)); + } + for (auto&& i : committed) { + if (i.HasSnapshot()) { + continue; + } + if (GetReadMetadata()->IsMyUncommitted(i.GetWriteIdVerified())) { + continue; } + if (GetReadMetadata()->GetPKRangesFilter().CheckPoint(i.GetFirst()) || + GetReadMetadata()->GetPKRangesFilter().CheckPoint(i.GetLast())) { + GetReadMetadata()->SetConflictedWriteId(i.GetWriteIdVerified()); + } + } - if (movePortion) { - if ((*itPortion)->GetMeta().GetProduced() == NPortion::EProduced::COMPACTED || (*itPortion)->GetMeta().GetProduced() == NPortion::EProduced::SPLIT_COMPACTED) { - compactedPortionsBytes += (*itPortion)->GetTotalBlobBytes(); - } else { - insertedPortionsBytes += (*itPortion)->GetTotalBlobBytes(); + for (auto&& i : committed) { + if (!i.HasSnapshot()) { + if (GetReadMetadata()->IsWriteConflictable(i.GetWriteIdVerified())) { + continue; } - sources.emplace_back(std::make_shared(sourceIdx++, *itPortion, SpecialReadContext, (*itPortion)->IndexKeyStart(), (*itPortion)->IndexKeyEnd())); - ++itPortion; - } else { - sources.emplace_back(std::make_shared(sourceIdx++, *itCommitted, SpecialReadContext, itCommitted->GetFirstVerified(), itCommitted->GetLastVerified())); - committedPortionsBytes += itCommitted->GetSize(); - ++itCommitted; + } else if (GetReadMetadata()->GetPKRangesFilter().IsPortionInPartialUsage(i.GetFirst(), i.GetLast()) == + TPKRangeFilter::EUsageClass::DontUsage) { + continue; } + sources.emplace_back(std::make_shared(sourceIdx++, i, SpecialReadContext)); + committedPortionsBytes += i.GetSize(); } Scanner = std::make_shared(std::move(sources), SpecialReadContext); @@ -54,16 +59,16 @@ TPlainReadData::TPlainReadData(const std::shared_ptr& context) } -std::vector TPlainReadData::DoExtractReadyResults(const int64_t maxRowsInBatch) { - auto result = TPartialReadResult::SplitResults(std::move(PartialResults), maxRowsInBatch); +std::vector> TPlainReadData::DoExtractReadyResults(const int64_t /*maxRowsInBatch*/) { + auto result = std::move(PartialResults); + PartialResults.clear(); +// auto result = TPartialReadResult::SplitResults(std::move(PartialResults), maxRowsInBatch); ui32 count = 0; for (auto&& r: result) { - count += r.GetRecordsCount(); + count += r->GetRecordsCount(); } AFL_VERIFY(count == ReadyResultsCount); - ReadyResultsCount = 0; - PartialResults.clear(); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "DoExtractReadyResults")("result", result.size())("count", count)("finished", Scanner->IsFinished()); return result; @@ -76,7 +81,7 @@ TConclusion TPlainReadData::DoReadNextInterval() { void TPlainReadData::OnIntervalResult(const std::shared_ptr& result) { // result->GetResourcesGuardOnly()->Update(result->GetMemorySize()); ReadyResultsCount += result->GetRecordsCount(); - PartialResults.emplace_back(std::move(*result)); + PartialResults.emplace_back(result); } } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.h index 39d993b156d6..93d2a56bad14 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.h @@ -14,9 +14,8 @@ class TPlainReadData: public IDataReader, TNonCopyable, NColumnShard::TMonitorin using TBase = IDataReader; std::shared_ptr Scanner; std::shared_ptr SpecialReadContext; - std::vector PartialResults; + std::vector> PartialResults; ui32 ReadyResultsCount = 0; - bool AbortedFlag = false; protected: virtual TConclusionStatus DoStart() override { return Scanner->Start(); @@ -31,11 +30,11 @@ class TPlainReadData: public IDataReader, TNonCopyable, NColumnShard::TMonitorin return sb; } - virtual std::vector DoExtractReadyResults(const int64_t maxRowsInBatch) override; + virtual std::vector> DoExtractReadyResults(const int64_t maxRowsInBatch) override; virtual TConclusion DoReadNextInterval() override; virtual void DoAbort() override { - AbortedFlag = true; + SpecialReadContext->Abort(); Scanner->Abort(); PartialResults.clear(); Y_ABORT_UNLESS(IsFinished()); @@ -68,7 +67,7 @@ class TPlainReadData: public IDataReader, TNonCopyable, NColumnShard::TMonitorin TPlainReadData(const std::shared_ptr& context); ~TPlainReadData() { - if (!AbortedFlag) { + if (!SpecialReadContext->IsAborted()) { Abort("unexpected on destructor"); } } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.cpp index 9eafaf6f7cf1..87de386beda9 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.cpp @@ -1,27 +1,33 @@ -#include "scanner.h" #include "plain_read_data.h" +#include "scanner.h" + #include + #include namespace NKikimr::NOlap::NReader::NPlain { -void TScanHead::OnIntervalResult(const std::optional& newBatch, const std::shared_ptr& lastPK, +void TScanHead::OnIntervalResult(std::shared_ptr&& allocationGuard, + const std::optional& newBatch, const std::shared_ptr& lastPK, std::unique_ptr&& merger, const ui32 intervalIdx, TPlainReadData& reader) { - if (Context->GetReadMetadata()->Limit && (!newBatch || newBatch->GetRecordsCount() == 0) && InFlightLimit < 1000) { - if (++ZeroCount == std::max(16, InFlightLimit)) { - InFlightLimit = std::max(MaxInFlight, InFlightLimit * 2); - ZeroCount = 0; - } - } else { - ZeroCount = 0; + if (Context->GetReadMetadata()->Limit && (!newBatch || newBatch->GetRecordsCount() == 0) && InFlightLimit < MaxInFlight) { + InFlightLimit = std::min(MaxInFlight, InFlightLimit * 4); } auto itInterval = FetchingIntervals.find(intervalIdx); AFL_VERIFY(itInterval != FetchingIntervals.end()); itInterval->second->SetMerger(std::move(merger)); AFL_VERIFY(Context->GetCommonContext()->GetReadMetadata()->IsSorted()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "interval_result_received")("interval_idx", intervalIdx)( + "intervalId", itInterval->second->GetIntervalId()); if (newBatch && newBatch->GetRecordsCount()) { - const std::optional callbackIdxSubscriver = itInterval->second->HasMerger() ? std::optional(intervalIdx) : std::nullopt; - AFL_VERIFY(ReadyIntervals.emplace(intervalIdx, std::make_shared(itInterval->second->GetResourcesGuard(), *newBatch, lastPK, callbackIdxSubscriver)).second); + std::optional callbackIdxSubscriver; + std::shared_ptr gGuard; + if (itInterval->second->HasMerger()) { + callbackIdxSubscriver = intervalIdx; + } else { + gGuard = itInterval->second->GetGroupGuard(); + } + AFL_VERIFY(ReadyIntervals.emplace(intervalIdx, std::make_shared(std::move(allocationGuard), std::move(gGuard), *newBatch, lastPK, callbackIdxSubscriver)).second); } else { AFL_VERIFY(ReadyIntervals.emplace(intervalIdx, nullptr).second); } @@ -31,9 +37,13 @@ void TScanHead::OnIntervalResult(const std::optionalGetIntervalIdx(); auto it = ReadyIntervals.find(intervalIdx); if (it == ReadyIntervals.end()) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "interval_result_absent")("interval_idx", intervalIdx)( + "merger", interval->HasMerger())("interval_id", interval->GetIntervalId()); break; + } else { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "interval_result")("interval_idx", intervalIdx)("count", + it->second ? it->second->GetRecordsCount() : 0)("merger", interval->HasMerger())("interval_id", interval->GetIntervalId()); } - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "interval_result")("interval_idx", intervalIdx)("count", it->second ? it->second->GetRecordsCount() : 0); auto result = it->second; ReadyIntervals.erase(it); if (result) { @@ -51,7 +61,8 @@ void TScanHead::OnIntervalResult(const std::optionalfirst); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "wait_interval")("remained", FetchingIntervals.size())( + "interval_idx", FetchingIntervals.begin()->first); } } @@ -67,7 +78,8 @@ TConclusionStatus TScanHead::Start() { i.second->IncIntervalsCount(); } if (!detectorResult) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "scanner_initializer_aborted")("reason", detectorResult.GetErrorMessage()); + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "scanner_initializer_aborted")( + "reason", detectorResult.GetErrorMessage()); Abort(); return detectorResult; } @@ -83,9 +95,11 @@ TConclusionStatus TScanHead::Start() { for (auto&& i : context.GetCurrentSources()) { i.second->IncIntervalsCount(); } - auto detectorResult = DetectSourcesFeatureInContextIntervalScan(context.GetCurrentSources(), guaranteeExclusivePK || context.GetIsExclusiveInterval()); + auto detectorResult = + DetectSourcesFeatureInContextIntervalScan(context.GetCurrentSources(), guaranteeExclusivePK || context.GetIsExclusiveInterval()); if (!detectorResult) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "scanner_initializer_aborted")("reason", detectorResult.GetErrorMessage()); + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "scanner_initializer_aborted")( + "reason", detectorResult.GetErrorMessage()); Abort(); return detectorResult; } @@ -95,12 +109,11 @@ TConclusionStatus TScanHead::Start() { } TScanHead::TScanHead(std::deque>&& sources, const std::shared_ptr& context) - : Context(context) -{ - if (!HasAppData() || !AppDataVerified().ColumnShardConfig.HasMaxInFlightIntervalsOnRequest()) { - MaxInFlight = 256; - } else { - MaxInFlight = AppDataVerified().ColumnShardConfig.GetMaxInFlightIntervalsOnRequest(); + : Context(context) { + if (HasAppData()) { + if (AppDataVerified().ColumnShardConfig.HasMaxInFlightIntervalsOnRequest()) { + MaxInFlight = AppDataVerified().ColumnShardConfig.GetMaxInFlightIntervalsOnRequest(); + } } if (Context->GetReadMetadata()->Limit) { @@ -120,88 +133,89 @@ class TSourcesStorageForMemoryOptimization { private: class TSourceInfo { private: + YDB_READONLY(ui64, Memory, 0); YDB_READONLY_DEF(std::shared_ptr, Source); YDB_READONLY_DEF(std::shared_ptr, FetchingInfo); + public: TSourceInfo(const std::shared_ptr& source, const std::shared_ptr& fetchingInfo) : Source(source) - , FetchingInfo(fetchingInfo) - { - + , FetchingInfo(fetchingInfo) { + Memory = FetchingInfo->PredictRawBytes(Source); } NJson::TJsonValue DebugJson() const { NJson::TJsonValue result = NJson::JSON_MAP; result.InsertValue("source", Source->DebugJsonForMemory()); -// result.InsertValue("fetching", Fetching->DebugJsonForMemory()); + result.InsertValue("memory", Memory); + // result.InsertValue("FetchingInfo", FetchingInfo->DebugJsonForMemory()); + return result; + } + + bool ReduceMemory() { + const bool result = FetchingInfo->InitSourceSeqColumnIds(Source); + if (result) { + Memory = FetchingInfo->PredictRawBytes(Source); + } return result; } + + bool operator<(const TSourceInfo& item) const { + return Memory < item.Memory; + } + }; - std::map> Sources; + std::vector Sources; YDB_READONLY(ui64, MemorySum, 0); - YDB_READONLY_DEF(std::set, PathIds); + public: TString DebugString() const { NJson::TJsonValue resultJson; auto& memorySourcesArr = resultJson.InsertValue("sources_by_memory", NJson::JSON_ARRAY); resultJson.InsertValue("sources_by_memory_count", Sources.size()); - for (auto it = Sources.rbegin(); it != Sources.rend(); ++it) { + for (auto&& it: Sources) { auto& sourceMap = memorySourcesArr.AppendValue(NJson::JSON_MAP); - sourceMap.InsertValue("memory", it->first); auto& sourcesArr = sourceMap.InsertValue("sources", NJson::JSON_ARRAY); - for (auto&& s : it->second) { - sourcesArr.AppendValue(s.second.DebugJson()); - } + sourcesArr.AppendValue(it.DebugJson()); } return resultJson.GetStringRobust(); } - void UpdateSource(const ui64 oldMemoryInfo, const ui32 sourceIdx) { - auto it = Sources.find(oldMemoryInfo); - AFL_VERIFY(it != Sources.end()); - auto itSource = it->second.find(sourceIdx); - AFL_VERIFY(itSource != it->second.end()); - auto sourceInfo = itSource->second; - it->second.erase(itSource); - if (it->second.empty()) { - Sources.erase(it); - } - AFL_VERIFY(MemorySum >= oldMemoryInfo); - MemorySum -= oldMemoryInfo; - AddSource(sourceInfo.GetSource(), sourceInfo.GetFetchingInfo()); - } - void AddSource(const std::shared_ptr& source, const std::shared_ptr& fetching) { - const ui64 sourceMemory = fetching->PredictRawBytes(source); - MemorySum += sourceMemory; - AFL_VERIFY(Sources[sourceMemory].emplace(source->GetSourceIdx(), TSourceInfo(source, fetching)).second); - PathIds.emplace(source->GetPathId()); + Sources.emplace_back(TSourceInfo(source, fetching)); + MemorySum += Sources.back().GetMemory(); } bool Optimize(const ui64 memoryLimit) { - bool modified = true; - while (MemorySum > memoryLimit && modified) { - modified = false; - for (auto it = Sources.rbegin(); it != Sources.rend(); ++it) { - for (auto&& [sourceIdx, sourceInfo] : it->second) { - if (!sourceInfo.GetFetchingInfo()->InitSourceSeqColumnIds(sourceInfo.GetSource())) { - continue; - } - modified = true; - UpdateSource(it->first, sourceIdx); - break; - } - if (modified) { - break; + if (MemorySum <= memoryLimit) { + return true; + } + std::sort(Sources.begin(), Sources.end()); + while (true) { + std::vector nextSources; + while (memoryLimit < MemorySum && Sources.size()) { + const ui64 currentMemory = Sources.back().GetMemory(); + if (Sources.back().ReduceMemory()) { + AFL_VERIFY(currentMemory <= MemorySum); + MemorySum -= currentMemory; + MemorySum += Sources.back().GetMemory(); + nextSources.emplace_back(std::move(Sources.back())); } + Sources.pop_back(); } + if (nextSources.empty() || MemorySum <= memoryLimit) { + break; + } + std::sort(nextSources.begin(), nextSources.end()); + std::swap(nextSources, Sources); } - return MemorySum < memoryLimit; + return MemorySum <= memoryLimit; } }; -TConclusionStatus TScanHead::DetectSourcesFeatureInContextIntervalScan(const THashMap>& intervalSources, const bool isExclusiveInterval) const { +TConclusionStatus TScanHead::DetectSourcesFeatureInContextIntervalScan( + const THashMap>& intervalSources, const bool isExclusiveInterval) const { TSourcesStorageForMemoryOptimization optimizer; for (auto&& i : intervalSources) { if (!isExclusiveInterval) { @@ -212,23 +226,18 @@ TConclusionStatus TScanHead::DetectSourcesFeatureInContextIntervalScan(const THa } const ui64 startMemory = optimizer.GetMemorySum(); if (!optimizer.Optimize(Context->ReduceMemoryIntervalLimit) && Context->RejectMemoryIntervalLimit < optimizer.GetMemorySum()) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "next_internal_broken") - ("reason", "a lot of memory need")("start", startMemory) - ("reduce_limit", Context->ReduceMemoryIntervalLimit) - ("reject_limit", Context->RejectMemoryIntervalLimit) - ("need", optimizer.GetMemorySum()) - ("path_ids", JoinSeq(",", optimizer.GetPathIds())) - ("details", IS_LOG_PRIORITY_ENABLED(NActors::NLog::PRI_DEBUG, NKikimrServices::TX_COLUMNSHARD_SCAN) ? optimizer.DebugString() : "NEED_DEBUG_LEVEL"); + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "next_internal_broken")("reason", "a lot of memory need")("start", startMemory)( + "reduce_limit", Context->ReduceMemoryIntervalLimit)("reject_limit", Context->RejectMemoryIntervalLimit)( + "need", optimizer.GetMemorySum())("path_id", Context->GetReadMetadata()->GetPathId())( + "details", IS_LOG_PRIORITY_ENABLED(NActors::NLog::PRI_DEBUG, NKikimrServices::TX_COLUMNSHARD_SCAN) ? optimizer.DebugString() + : "NEED_DEBUG_LEVEL"); Context->GetCommonContext()->GetCounters().OnOptimizedIntervalMemoryFailed(optimizer.GetMemorySum()); - return TConclusionStatus::Fail("We need a lot of memory in time for interval scanner: " + - ::ToString(optimizer.GetMemorySum()) + " path_ids: " + JoinSeq(",", optimizer.GetPathIds()) + ". We need wait compaction processing. Sorry."); + return TConclusionStatus::Fail("We need a lot of memory in time for interval scanner: " + ::ToString(optimizer.GetMemorySum()) + + " path_id: " + Context->GetReadMetadata()->GetPathId() + ". We need wait compaction processing. Sorry."); } else if (optimizer.GetMemorySum() < startMemory) { - AFL_INFO(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "memory_reduce_active") - ("reason", "need reduce memory")("start", startMemory) - ("reduce_limit", Context->ReduceMemoryIntervalLimit) - ("reject_limit", Context->RejectMemoryIntervalLimit) - ("need", optimizer.GetMemorySum()) - ("path_ids", JoinSeq(",", optimizer.GetPathIds())); + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "memory_reduce_active")("reason", "need reduce memory")("start", startMemory)( + "reduce_limit", Context->ReduceMemoryIntervalLimit)("reject_limit", Context->RejectMemoryIntervalLimit)( + "need", optimizer.GetMemorySum())("path_id", Context->GetReadMetadata()->GetPathId()); Context->GetCommonContext()->GetCounters().OnOptimizedIntervalMemoryReduced(startMemory - optimizer.GetMemorySum()); } Context->GetCommonContext()->GetCounters().OnOptimizedIntervalMemoryRequired(optimizer.GetMemorySum()); @@ -236,22 +245,28 @@ TConclusionStatus TScanHead::DetectSourcesFeatureInContextIntervalScan(const THa } TConclusion TScanHead::BuildNextInterval() { - if (AbortFlag) { + if (Context->IsAborted()) { return false; } - while (BorderPoints.size() && (FetchingIntervals.size() < InFlightLimit || BorderPoints.begin()->second.GetStartSources().empty())) { + while (BorderPoints.size()) { + if (BorderPoints.begin()->second.GetStartSources().size()) { + if (FetchingIntervals.size() >= InFlightLimit) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "skip_next_interval")("reason", "too many intervals in flight")( + "count", FetchingIntervals.size())("limit", InFlightLimit); + return false; + } + } auto firstBorderPointInfo = std::move(BorderPoints.begin()->second); CurrentState.OnStartPoint(firstBorderPointInfo); if (CurrentState.GetIsSpecialPoint()) { const ui32 intervalIdx = SegmentIdxCounter++; - auto interval = std::make_shared( - BorderPoints.begin()->first, BorderPoints.begin()->first, intervalIdx, CurrentState.GetCurrentSources(), - Context, true, true, false); + auto interval = std::make_shared(BorderPoints.begin()->first, BorderPoints.begin()->first, intervalIdx, + CurrentState.GetCurrentSources(), Context, true, true, false); FetchingIntervals.emplace(intervalIdx, interval); IntervalStats.emplace_back(CurrentState.GetCurrentSources().size(), true); - NResourceBroker::NSubscribe::ITask::StartResourceSubscription(Context->GetCommonContext()->GetResourceSubscribeActorId(), interval); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "new_interval")("interval_idx", intervalIdx)("interval", interval->DebugJson()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "new_interval")("interval_idx", intervalIdx)( + "interval", interval->DebugJson()); } CurrentState.OnFinishPoint(firstBorderPointInfo); @@ -262,12 +277,13 @@ TConclusion TScanHead::BuildNextInterval() { Y_ABORT_UNLESS(BorderPoints.size()); CurrentState.OnNextPointInfo(BorderPoints.begin()->second); const ui32 intervalIdx = SegmentIdxCounter++; - auto interval = std::make_shared(*CurrentStart, BorderPoints.begin()->first, intervalIdx, CurrentState.GetCurrentSources(), Context, - CurrentState.GetIncludeFinish(), CurrentState.GetIncludeStart(), CurrentState.GetIsExclusiveInterval()); + auto interval = + std::make_shared(*CurrentStart, BorderPoints.begin()->first, intervalIdx, CurrentState.GetCurrentSources(), + Context, CurrentState.GetIncludeFinish(), CurrentState.GetIncludeStart(), CurrentState.GetIsExclusiveInterval()); FetchingIntervals.emplace(intervalIdx, interval); IntervalStats.emplace_back(CurrentState.GetCurrentSources().size(), false); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "new_interval")("interval_idx", intervalIdx)("interval", interval->DebugJson()); - NResourceBroker::NSubscribe::ITask::StartResourceSubscription(Context->GetCommonContext()->GetResourceSubscribeActorId(), interval); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "new_interval")("interval_idx", intervalIdx)( + "interval", interval->DebugJson()); return true; } else { IntervalStats.emplace_back(CurrentState.GetCurrentSources().size(), false); @@ -285,7 +301,7 @@ bool TScanHead::IsReverse() const { } void TScanHead::Abort() { - AbortFlag = true; + AFL_VERIFY(Context->IsAborted()); THashSet sourceIds; for (auto&& i : FetchingIntervals) { for (auto&& s : i.second->GetSources()) { @@ -310,4 +326,4 @@ void TScanHead::Abort() { Y_ABORT_UNLESS(IsFinished()); } -} +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.h index 7092dac19acd..09649e788147 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.h @@ -2,6 +2,7 @@ #include "source.h" #include "interval.h" #include +#include #include #include @@ -80,12 +81,11 @@ class TScanHead { ui64 InFlightLimit = 1; ui64 MaxInFlight = 256; ui64 ZeroCount = 0; - bool AbortFlag = false; void DrainSources(); [[nodiscard]] TConclusionStatus DetectSourcesFeatureInContextIntervalScan(const THashMap>& intervalSources, const bool isExclusiveInterval) const; public: void OnSentDataFromInterval(const ui32 intervalIdx) const { - if (AbortFlag) { + if (Context->IsAborted()) { return; } auto it = FetchingIntervals.find(intervalIdx); @@ -110,8 +110,10 @@ class TScanHead { return sb; } - void OnIntervalResult(const std::optional& batch, const std::shared_ptr& lastPK, - std::unique_ptr&& merger, const ui32 intervalIdx, TPlainReadData& reader); + void OnIntervalResult(std::shared_ptr&& allocationGuard, + const std::optional& batch, + const std::shared_ptr& lastPK, std::unique_ptr&& merger, + const ui32 intervalIdx, TPlainReadData& reader); TConclusionStatus Start(); diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp index d825d0594392..bef10d38f6b1 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp @@ -1,15 +1,15 @@ -#include "source.h" - #include "constructor.h" #include "fetched_data.h" #include "interval.h" #include "plain_read_data.h" +#include "source.h" -#include +#include #include #include #include #include +#include namespace NKikimr::NOlap::NReader::NPlain { @@ -19,30 +19,19 @@ void IDataSource::InitFetchingPlan(const std::shared_ptr& fetch FetchingPlan = fetching; } -void IDataSource::RegisterInterval(TFetchingInterval& interval) { +void IDataSource::RegisterInterval(TFetchingInterval& interval, const std::shared_ptr& sourcePtr) { AFL_VERIFY(FetchingPlan); + AFL_VERIFY(!Context->IsAborted()); if (!IsReadyFlag) { AFL_VERIFY(Intervals.emplace(interval.GetIntervalIdx(), &interval).second); } -} - -void IDataSource::SetIsReady() { - AFL_VERIFY(!IsReadyFlag); - IsReadyFlag = true; - for (auto&& i : Intervals) { - i.second->OnSourceFetchStageReady(SourceIdx); - } - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "source_ready")("intervals_count", Intervals.size())("source_idx", SourceIdx); - Intervals.clear(); -} - -void IDataSource::OnInitResourcesGuard(const std::shared_ptr& sourcePtr) { - AFL_VERIFY(FetchingPlan); - if (AtomicCas(&FilterStageFlag, 1, 0)) { + if (AtomicCas(&SourceStartedFlag, 1, 0)) { + SetFirstIntervalId(interval.GetIntervalId()); + AFL_VERIFY(FetchingPlan); StageData = std::make_unique(GetExclusiveIntervalOnly() && IsSourceInMemory()); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("InitFetchingPlan", FetchingPlan->DebugString())("source_idx", SourceIdx); NActors::TLogContextGuard logGuard(NActors::TLogContextBuilder::Build()("source", SourceIdx)("method", "InitFetchingPlan")); - if (IsAborted()) { + if (Context->IsAborted()) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "InitFetchingPlanAborted"); return; } @@ -52,6 +41,16 @@ void IDataSource::OnInitResourcesGuard(const std::shared_ptr& sourc } } +void IDataSource::SetIsReady() { + AFL_VERIFY(!IsReadyFlag); + IsReadyFlag = true; + for (auto&& i : Intervals) { + i.second->OnSourceFetchStageReady(SourceIdx); + } + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "source_ready")("intervals_count", Intervals.size())("source_idx", SourceIdx); + Intervals.clear(); +} + void TPortionDataSource::NeedFetchColumns(const std::set& columnIds, TBlobsAction& blobsAction, THashMap& defaultBlocks, const std::shared_ptr& filter) { const NArrow::TColumnFilter& cFilter = filter ? *filter : NArrow::TColumnFilter::BuildAllowFilter(); @@ -65,30 +64,32 @@ void TPortionDataSource::NeedFetchColumns(const std::set& columnIds, TBlob auto itFilter = cFilter.GetIterator(false, Portion->NumRows(i)); bool itFinished = false; for (auto&& c : columnChunks) { - Y_ABORT_UNLESS(!itFinished); + AFL_VERIFY(!itFinished); if (!itFilter.IsBatchForSkip(c->GetMeta().GetNumRows())) { - auto reading = blobsAction.GetReading(Schema->GetIndexInfo().GetColumnStorageId(c->GetColumnId(), Portion->GetMeta().GetTierName())); + auto reading = + blobsAction.GetReading(Schema->GetIndexInfo().GetColumnStorageId(c->GetColumnId(), Portion->GetMeta().GetTierName())); reading->SetIsBackgroundProcess(false); reading->AddRange(Portion->RestoreBlobRange(c->BlobRange)); ++fetchedChunks; } else { defaultBlocks.emplace(c->GetAddress(), - TPortionInfo::TAssembleBlobInfo(c->GetMeta().GetNumRows(), Schema->GetDefaultValueVerified(c->GetColumnId()))); + TPortionInfo::TAssembleBlobInfo(c->GetMeta().GetNumRows(), Schema->GetExternalDefaultValueVerified(c->GetColumnId()))); ++nullChunks; } itFinished = !itFilter.Next(c->GetMeta().GetNumRows()); } AFL_VERIFY(itFinished)("filter", itFilter.DebugString())("count", Portion->NumRows(i)); } - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "chunks_stats")("fetch", fetchedChunks)("null", nullChunks) - ("reading_actions", blobsAction.GetStorageIds())("columns", columnIds.size()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "chunks_stats")("fetch", fetchedChunks)("null", nullChunks)( + "reading_actions", blobsAction.GetStorageIds())("columns", columnIds.size()); } -bool TPortionDataSource::DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& columns) { +bool TPortionDataSource::DoStartFetchingColumns( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const TColumnsSetIds& columns) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step.GetName()); - AFL_VERIFY(columns->GetColumnsCount()); + AFL_VERIFY(columns.GetColumnsCount()); AFL_VERIFY(!StageData->GetAppliedFilter() || !StageData->GetAppliedFilter()->IsTotalDenyFilter()); - auto& columnIds = columns->GetColumnIds(); + auto& columnIds = columns.GetColumnIds(); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step.GetName())("fetching_info", step.DebugString()); TBlobsAction action(GetContext()->GetCommonContext()->GetStoragesManager(), NBlobOperations::EConsumer::SCAN); @@ -108,9 +109,10 @@ bool TPortionDataSource::DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) { +bool TPortionDataSource::DoStartFetchingIndexes( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step.GetName()); - Y_ABORT_UNLESS(indexes->GetIndexesCount()); + AFL_VERIFY(indexes->GetIndexesCount()); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step.GetName())("fetching_info", step.DebugString()); TBlobsAction action(GetContext()->GetCommonContext()->GetStoragesManager(), NBlobOperations::EConsumer::SCAN); @@ -121,9 +123,11 @@ bool TPortionDataSource::DoStartFetchingIndexes(const std::shared_ptrGetIndexInfo().GetIndexStorageId(i.GetIndexId())); - readAction->SetIsBackgroundProcess(false); - readAction->AddRange(Portion->RestoreBlobRange(i.GetBlobRange())); + if (auto bRange = i.GetBlobRangeOptional()) { + auto readAction = action.GetReading(Schema->GetIndexInfo().GetIndexStorageId(i.GetIndexId())); + readAction->SetIsBackgroundProcess(false); + readAction->AddRange(Portion->RestoreBlobRange(*bRange)); + } } if (indexes->GetIndexIdsSet().size() != indexIds.size()) { return false; @@ -154,7 +158,11 @@ void TPortionDataSource::DoApplyIndex(const NIndexes::TIndexCheckerContainer& in if (!indexIds.contains(i->GetIndexId())) { continue; } - indexBlobs[i->GetIndexId()].emplace_back(StageData->ExtractBlob(i->GetAddress())); + if (i->HasBlobData()) { + indexBlobs[i->GetIndexId()].emplace_back(i->GetBlobDataVerified()); + } else { + indexBlobs[i->GetIndexId()].emplace_back(StageData->ExtractBlob(i->GetAddress())); + } } for (auto&& i : indexIds) { if (!indexBlobs.contains(i)) { @@ -181,35 +189,12 @@ void TPortionDataSource::DoApplyIndex(const NIndexes::TIndexCheckerContainer& in void TPortionDataSource::DoAssembleColumns(const std::shared_ptr& columns) { auto blobSchema = GetContext()->GetReadMetadata()->GetLoadSchemaVerified(*Portion); - if (SequentialEntityIds.empty()) { - MutableStageData().AddBatch(Portion->PrepareForAssemble(*blobSchema, columns->GetFilteredSchemaVerified(), MutableStageData().MutableBlobs()).AssembleTable()); - } else { - { - auto inMemColumns = columns->GetColumnIds(); - for (auto&& i : SequentialEntityIds) { - inMemColumns.erase(i); - } - if (inMemColumns.size()) { - auto filteredSchema = std::make_shared(columns->GetFilteredSchemaPtrVerified(), inMemColumns); - MutableStageData().AddBatch(Portion->PrepareForAssemble(*blobSchema, *filteredSchema, MutableStageData().MutableBlobs()).AssembleTable()); - } - } - { - std::set scanColumns; - for (auto&& i : columns->GetColumnIds()) { - if (SequentialEntityIds.contains(i)) { - scanColumns.emplace(i); - } - } - if (scanColumns.size()) { - auto filteredSchema = std::make_shared(columns->GetFilteredSchemaPtrVerified(), scanColumns); - MutableStageData().AddBatch(Portion->PrepareForAssemble(*blobSchema, *filteredSchema, MutableStageData().MutableBlobs()).AssembleForSeqAccess()); - } - } - } + MutableStageData().AddBatch(Portion->PrepareForAssemble(*blobSchema, columns->GetFilteredSchemaVerified(), MutableStageData().MutableBlobs()) + .AssembleToGeneralContainer(SequentialEntityIds)); } -bool TCommittedDataSource::DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& /*columns*/) { +bool TCommittedDataSource::DoStartFetchingColumns( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const TColumnsSetIds& /*columns*/) { if (ReadStarted) { return false; } @@ -222,7 +207,7 @@ bool TCommittedDataSource::DoStartFetchingColumns(const std::shared_ptrSetIsBackgroundProcess(false); readAction->AddRange(CommittedBlob.GetBlobRange()); - std::vector> actions = {readAction}; + std::vector> actions = { readAction }; auto constructor = std::make_shared(actions, sourcePtr, step, GetContext(), "CS::READ::" + step.GetName(), ""); NActors::TActivationContext::AsActorContext().Register(new NOlap::NBlobOperations::NRead::TActor(constructor)); return true; @@ -230,17 +215,21 @@ bool TCommittedDataSource::DoStartFetchingColumns(const std::shared_ptr& columns) { TMemoryProfileGuard mGuard("SCAN_PROFILE::ASSEMBLER::COMMITTED", IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); + const ISnapshotSchema::TPtr batchSchema = GetContext()->GetReadMetadata()->GetIndexVersions().GetSchemaVerified(GetCommitted().GetSchemaVersion()); + const ISnapshotSchema::TPtr resultSchema = GetContext()->GetReadMetadata()->GetResultSchema(); if (!GetStageData().GetTable()) { AFL_VERIFY(GetStageData().GetBlobs().size() == 1); auto bData = MutableStageData().ExtractBlob(GetStageData().GetBlobs().begin()->first); auto schema = GetContext()->GetReadMetadata()->GetBlobSchema(CommittedBlob.GetSchemaVersion()); - auto batch = NArrow::DeserializeBatch(bData, schema); - AFL_VERIFY(batch)("schema", schema->ToString()); - batch = GetContext()->GetReadMetadata()->GetIndexInfo().AddSnapshotColumns(batch, CommittedBlob.GetSnapshot()); - batch = GetContext()->GetReadMetadata()->GetIndexInfo().AddDeleteFlagsColumn(batch, CommittedBlob.GetIsDelete()); + auto rBatch = NArrow::DeserializeBatch(bData, std::make_shared(CommittedBlob.GetSchemaSubset().Apply(schema->fields()))); + AFL_VERIFY(rBatch)("schema", schema->ToString()); + auto batch = std::make_shared(rBatch); + batchSchema->AdaptBatchToSchema(*batch, resultSchema); + GetContext()->GetReadMetadata()->GetIndexInfo().AddSnapshotColumns(*batch, CommittedBlob.GetSnapshotDef(TSnapshot::Zero())); + GetContext()->GetReadMetadata()->GetIndexInfo().AddDeleteFlagsColumn(*batch, CommittedBlob.GetIsDelete()); MutableStageData().AddBatch(batch); } - MutableStageData().SyncTableColumns(columns->GetSchema()->fields()); + MutableStageData().SyncTableColumns(columns->GetSchema()->fields(), *resultSchema); } } // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h index 30e7fe04bf52..889f9fe5e7d4 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h @@ -1,16 +1,18 @@ #pragma once -#include "context.h" #include "columns_set.h" +#include "context.h" #include "fetched_data.h" + +#include +#include #include #include #include #include -#include -#include #include -#include -#include +#include +#include + #include namespace NKikimr::NOlap { @@ -41,8 +43,13 @@ class IDataSource { YDB_READONLY(ui32, IntervalsCount, 0); virtual NJson::TJsonValue DoDebugJson() const = 0; bool MergingStartedFlag = false; - bool AbortedFlag = false; + TAtomic SourceStartedFlag = 0; std::shared_ptr FetchingPlan; + std::vector> ResourceGuards; + std::optional FirstIntervalId; + ui32 CurrentPlanStepIndex = 0; + YDB_READONLY(TPKRangeFilter::EUsageClass, UsageClass, TPKRangeFilter::EUsageClass::PartialUsage); + protected: bool IsSourceInMemoryFlag = true; THashMap Intervals; @@ -53,8 +60,10 @@ class IDataSource { TAtomic FilterStageFlag = 0; bool IsReadyFlag = false; - virtual bool DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& columns) = 0; - virtual bool DoStartFetchingIndexes(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) = 0; + virtual bool DoStartFetchingColumns( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const TColumnsSetIds& columns) = 0; + virtual bool DoStartFetchingIndexes( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) = 0; virtual void DoAssembleColumns(const std::shared_ptr& columns) = 0; virtual void DoAbort() = 0; virtual void DoApplyIndex(const NIndexes::TIndexCheckerContainer& indexMeta) = 0; @@ -62,15 +71,43 @@ class IDataSource { virtual NJson::TJsonValue DoDebugJsonForMemory() const { return NJson::JSON_MAP; } + virtual bool DoAddTxConflict() = 0; + public: - void OnInitResourcesGuard(const std::shared_ptr& sourcePtr); + bool AddTxConflict() { + if (!Context->GetCommonContext()->HasLock()) { + return false; + } + if (DoAddTxConflict()) { + StageData->Clear(); + return true; + } + return false; + } + + ui64 GetResourceGuardsMemory() const { + ui64 result = 0; + for (auto&& i : ResourceGuards) { + result += i->GetMemory(); + } + return result; + } - bool IsAborted() const { - return AbortedFlag; + void RegisterAllocationGuard(const std::shared_ptr& guard) { + ResourceGuards.emplace_back(guard); } + bool IsSourceInMemory() const { return IsSourceInMemoryFlag; } + void SetFirstIntervalId(const ui64 value) { + AFL_VERIFY(!FirstIntervalId); + FirstIntervalId = value; + } + ui64 GetFirstIntervalId() const { + AFL_VERIFY(!!FirstIntervalId); + return *FirstIntervalId; + } virtual bool IsSourceInMemory(const std::set& fieldIds) const = 0; bool AddSequentialEntityIds(const ui32 entityId) { if (DoAddSequentialEntityIds(entityId)) { @@ -114,12 +151,12 @@ class IDataSource { DoAssembleColumns(columns); } - bool StartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& columns) { - AFL_VERIFY(columns); + bool StartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const TColumnsSetIds& columns) { return DoStartFetchingColumns(sourcePtr, step, columns); } - bool StartFetchingIndexes(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) { + bool StartFetchingIndexes( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) { AFL_VERIFY(indexes); return DoStartFetchingIndexes(sourcePtr, step, indexes); } @@ -134,18 +171,18 @@ class IDataSource { virtual ui64 GetColumnRawBytes(const std::set& columnIds) const = 0; virtual ui64 GetIndexRawBytes(const std::set& indexIds) const = 0; + virtual ui64 GetColumnBlobBytes(const std::set& columnsIds) const = 0; bool IsMergingStarted() const { return MergingStartedFlag; } void StartMerging() { - Y_ABORT_UNLESS(!MergingStartedFlag); + AFL_VERIFY(!MergingStartedFlag); MergingStartedFlag = true; } void Abort() { - AbortedFlag = true; Intervals.clear(); DoAbort(); } @@ -172,6 +209,17 @@ class IDataSource { return IsReadyFlag; } + void OnEmptyStageData() { + if (!ResourceGuards.size()) { + return; + } + if (ExclusiveIntervalOnly) { + ResourceGuards.back()->Update(0); + } else { + ResourceGuards.back()->Update(GetColumnRawBytes(Context->GetPKColumns()->GetColumnIds())); + } + } + const TFetchedData& GetStageData() const { AFL_VERIFY(StageData); return *StageData; @@ -182,11 +230,11 @@ class IDataSource { return *StageData; } - void RegisterInterval(TFetchingInterval& interval); + void RegisterInterval(TFetchingInterval& interval, const std::shared_ptr& sourcePtr); - IDataSource(const ui32 sourceIdx, const std::shared_ptr& context, - const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& finish, const TSnapshot& recordSnapshotMin, const TSnapshot& recordSnapshotMax, - const ui32 recordsCount, const std::optional shardingVersion, const bool hasDeletions) + IDataSource(const ui32 sourceIdx, const std::shared_ptr& context, const NArrow::TReplaceKey& start, + const NArrow::TReplaceKey& finish, const TSnapshot& recordSnapshotMin, const TSnapshot& recordSnapshotMax, const ui32 recordsCount, + const std::optional shardingVersion, const bool hasDeletions) : SourceIdx(sourceIdx) , Start(context->GetReadMetadata()->BuildSortedPosition(start)) , Finish(context->GetReadMetadata()->BuildSortedPosition(finish)) @@ -197,8 +245,9 @@ class IDataSource { , RecordSnapshotMax(recordSnapshotMax) , RecordsCount(recordsCount) , ShardingVersionOptional(shardingVersion) - , HasDeletions(hasDeletions) - { + , HasDeletions(hasDeletions) { + UsageClass = Context->GetReadMetadata()->GetPKRangesFilter().IsPortionInPartialUsage(GetStartReplaceKey(), GetFinishReplaceKey()); + AFL_VERIFY(UsageClass != TPKRangeFilter::EUsageClass::DontUsage); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "portions_for_merge")("start", Start.DebugJson())("finish", Finish.DebugJson()); if (Start.IsReverseSort()) { std::swap(Start, Finish); @@ -207,7 +256,7 @@ class IDataSource { } virtual ~IDataSource() { - Y_ABORT_UNLESS(AbortedFlag || Intervals.empty()); + AFL_VERIFY(Intervals.empty()); } }; @@ -217,14 +266,16 @@ class TPortionDataSource: public IDataSource { std::set SequentialEntityIds; std::shared_ptr Portion; std::shared_ptr Schema; + mutable THashMap FingerprintedData; - void NeedFetchColumns(const std::set& columnIds, - TBlobsAction& blobsAction, THashMap& nullBlocks, - const std::shared_ptr& filter); + void NeedFetchColumns(const std::set& columnIds, TBlobsAction& blobsAction, + THashMap& nullBlocks, const std::shared_ptr& filter); virtual void DoApplyIndex(const NIndexes::TIndexCheckerContainer& indexChecker) override; - virtual bool DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& columns) override; - virtual bool DoStartFetchingIndexes(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) override; + virtual bool DoStartFetchingColumns( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const TColumnsSetIds& columns) override; + virtual bool DoStartFetchingIndexes( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) override; virtual void DoAssembleColumns(const std::shared_ptr& columns) override; virtual NJson::TJsonValue DoDebugJson() const override { NJson::TJsonValue result = NJson::JSON_MAP; @@ -239,7 +290,7 @@ class TPortionDataSource: public IDataSource { for (auto&& i : SequentialEntityIds) { AFL_VERIFY(columns.erase(i)); } -// result.InsertValue("sequential_columns", JoinSeq(",", SequentialEntityIds)); + // result.InsertValue("sequential_columns", JoinSeq(",", SequentialEntityIds)); if (SequentialEntityIds.size()) { result.InsertValue("min_memory_seq", Portion->GetMinMemoryForReadColumns(SequentialEntityIds)); result.InsertValue("min_memory_seq_blobs", Portion->GetColumnBlobBytes(SequentialEntityIds)); @@ -257,10 +308,16 @@ class TPortionDataSource: public IDataSource { return Portion->GetPathId(); } virtual bool DoAddSequentialEntityIds(const ui32 entityId) override { + FingerprintedData.clear(); return SequentialEntityIds.emplace(entityId).second; } public: + virtual bool DoAddTxConflict() override { + GetContext()->GetReadMetadata()->SetBrokenWithCommitted(); + return false; + } + virtual bool HasIndexes(const std::set& indexIds) const override { return Portion->HasIndexes(indexIds); } @@ -279,6 +336,13 @@ class TPortionDataSource: public IDataSource { } virtual ui64 GetColumnRawBytes(const std::set& columnsIds) const override { + AFL_VERIFY(columnsIds.size()); + const ui64 fp = CombineHashes(*columnsIds.begin(), *columnsIds.rbegin()); + auto it = FingerprintedData.find(fp); + if (it != FingerprintedData.end()) { + return it->second; + } + ui64 result = 0; if (SequentialEntityIds.size()) { std::set selectedSeq; std::set selectedInMem; @@ -289,12 +353,17 @@ class TPortionDataSource: public IDataSource { selectedInMem.emplace(i); } } - return Portion->GetMinMemoryForReadColumns(selectedSeq) - + Portion->GetColumnBlobBytes(selectedSeq) - + Portion->GetColumnRawBytes(selectedInMem, false); + result = Portion->GetMinMemoryForReadColumns(selectedSeq) + Portion->GetColumnBlobBytes(selectedSeq) + + Portion->GetColumnRawBytes(selectedInMem, false); } else { - return Portion->GetColumnRawBytes(columnsIds, false); + result = Portion->GetColumnRawBytes(columnsIds, false); } + FingerprintedData.emplace(fp, result); + return result; + } + + virtual ui64 GetColumnBlobBytes(const std::set& columnsIds) const override { + return Portion->GetColumnBlobBytes(columnsIds, false); } virtual ui64 GetIndexRawBytes(const std::set& indexIds) const override { @@ -309,13 +378,11 @@ class TPortionDataSource: public IDataSource { return Portion; } - TPortionDataSource(const ui32 sourceIdx, const std::shared_ptr& portion, const std::shared_ptr& context, - const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& finish) - : TBase(sourceIdx, context, start, finish, portion->RecordSnapshotMin(), portion->RecordSnapshotMax(), portion->GetRecordsCount(), portion->GetShardingVersionOptional(), - portion->GetMeta().GetDeletionsCount()) + TPortionDataSource(const ui32 sourceIdx, const std::shared_ptr& portion, const std::shared_ptr& context) + : TBase(sourceIdx, context, portion->IndexKeyStart(), portion->IndexKeyEnd(), portion->RecordSnapshotMin(), portion->RecordSnapshotMax(), + portion->GetRecordsCount(), portion->GetShardingVersionOptional(), portion->GetMeta().GetDeletionsCount()) , Portion(portion) - , Schema(GetContext()->GetReadMetadata()->GetLoadSchemaVerified(*Portion)) - { + , Schema(GetContext()->GetReadMetadata()->GetLoadSchemaVerified(*Portion)) { } }; @@ -326,11 +393,12 @@ class TCommittedDataSource: public IDataSource { bool ReadStarted = false; virtual void DoAbort() override { - } - virtual bool DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& columns) override; - virtual bool DoStartFetchingIndexes(const std::shared_ptr& /*sourcePtr*/, const TFetchingScriptCursor& /*step*/, const std::shared_ptr& /*indexes*/) override { + virtual bool DoStartFetchingColumns( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const TColumnsSetIds& columns) override; + virtual bool DoStartFetchingIndexes(const std::shared_ptr& /*sourcePtr*/, const TFetchingScriptCursor& /*step*/, + const std::shared_ptr& /*indexes*/) override { return false; } virtual void DoApplyIndex(const NIndexes::TIndexCheckerContainer& /*indexMeta*/) override { @@ -350,6 +418,18 @@ class TCommittedDataSource: public IDataSource { virtual bool DoAddSequentialEntityIds(const ui32 /*entityId*/) override { return false; } + + virtual bool DoAddTxConflict() override { + if (CommittedBlob.HasSnapshot()) { + GetContext()->GetReadMetadata()->SetBrokenWithCommitted(); + return true; + } else if (!GetContext()->GetReadMetadata()->IsMyUncommitted(CommittedBlob.GetWriteIdVerified())) { + GetContext()->GetReadMetadata()->SetConflictedWriteId(CommittedBlob.GetWriteIdVerified()); + return true; + } + return false; + } + public: virtual THashMap DecodeBlobAddresses(NBlobOperations::NRead::TCompositeReadBlobs&& blobsOriginal) const override { THashMap result; @@ -373,6 +453,10 @@ class TCommittedDataSource: public IDataSource { return CommittedBlob.GetBlobRange().Size; } + virtual ui64 GetColumnBlobBytes(const std::set& /*columnsIds*/) const override { + return CommittedBlob.GetBlobRange().Size; + } + virtual ui64 GetIndexRawBytes(const std::set& /*columnIds*/) const override { AFL_VERIFY(false); return 0; @@ -382,13 +466,11 @@ class TCommittedDataSource: public IDataSource { return CommittedBlob; } - TCommittedDataSource(const ui32 sourceIdx, const TCommittedBlob& committed, const std::shared_ptr& context, - const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& finish) - : TBase(sourceIdx, context, start, finish, committed.GetSnapshot(), committed.GetSnapshot(), committed.GetRecordsCount(), {}, - committed.GetIsDelete()) + TCommittedDataSource(const ui32 sourceIdx, const TCommittedBlob& committed, const std::shared_ptr& context) + : TBase(sourceIdx, context, committed.GetFirst(), committed.GetLast(), committed.GetSnapshotDef(TSnapshot::Zero()), + committed.GetSnapshotDef(TSnapshot::Zero()), committed.GetRecordsCount(), {}, committed.GetIsDelete()) , CommittedBlob(committed) { - } }; -} +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/ya.make b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/ya.make index cfa691a22e84..93ba27575ade 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/ya.make +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/ya.make @@ -18,6 +18,9 @@ PEERDIR( ydb/core/formats/arrow ydb/core/tx/columnshard/blobs_action ydb/core/tx/conveyor/usage + ydb/core/tx/limiter/grouped_memory/usage ) +GENERATE_ENUM_SERIALIZATION(columns_set.h) + END() diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/filler.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/filler.cpp index 076f8f11adc7..2a23b12c3fae 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/filler.cpp +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/filler.cpp @@ -16,7 +16,8 @@ NKikimr::TConclusionStatus TMetadataFromStore::DoFillMetadata(const NColumnShard } THashSet pathIds; - for (auto&& filter : read.PKRangesFilter) { + AFL_VERIFY(read.PKRangesFilter); + for (auto&& filter : *read.PKRangesFilter) { const ui64 fromPathId = *filter.GetPredicateFrom().Get(0, 0, 1); const ui64 toPathId = *filter.GetPredicateTo().Get(0, 0, Max()); auto pathInfos = logsIndex->GetTables(fromPathId, toPathId); @@ -42,7 +43,8 @@ NKikimr::TConclusionStatus TMetadataFromTable::DoFillMetadata(const NColumnShard if (!logsIndex) { return TConclusionStatus::Success(); } - for (auto&& filter : read.PKRangesFilter) { + AFL_VERIFY(read.PKRangesFilter); + for (auto&& filter : *read.PKRangesFilter) { const ui64 fromPathId = *filter.GetPredicateFrom().Get(0, 0, 1); const ui64 toPathId = *filter.GetPredicateTo().Get(0, 0, Max()); if (fromPathId <= read.PathId && read.PathId <= toPathId) { diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/iterator.h b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/iterator.h index 08f4d78e0c08..33be2ac027b1 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/iterator.h +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/iterator.h @@ -29,12 +29,12 @@ class TStatsIteratorBase: public TScanIteratorBase { return IndexGranules.empty(); } - virtual TConclusion> GetBatch() override { + virtual TConclusion> GetBatch() override { while (!Finished()) { auto batchOpt = ExtractStatsBatch(); if (!batchOpt) { AFL_VERIFY(Finished()); - return std::nullopt; + return std::shared_ptr(); } auto originalBatch = *batchOpt; if (originalBatch->num_rows() == 0) { @@ -55,10 +55,9 @@ class TStatsIteratorBase: public TScanIteratorBase { continue; } auto table = NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches({resultBatch})); - TPartialReadResult out(table, lastKey, std::nullopt); - return std::move(out); + return std::make_shared(table, lastKey, std::nullopt); } - return std::nullopt; + return std::shared_ptr(); } std::optional> ExtractStatsBatch() { @@ -136,10 +135,6 @@ class TStatsIterator : public TStatsIteratorBase { } } - const NTable::TScheme::TTableSchema& GetSchema() const override { - return StatsSchema; - } - NSsa::TColumnInfo GetDefaultColumn() const override { return NSsa::TColumnInfo::Original(1, "PathId"); } diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp index 1aeb83b4ea24..da3cc74f8e92 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp @@ -6,8 +6,19 @@ namespace NKikimr::NOlap::NReader::NSysView::NChunks { void TStatsIterator::AppendStats(const std::vector>& builders, const TPortionInfo& portion) const { auto portionSchema = ReadMetadata->GetLoadSchemaVerified(portion); - const std::string prod = ::ToString(portion.GetMeta().Produced); + auto it = PortionType.find(portion.GetMeta().Produced); + if (it == PortionType.end()) { + it = PortionType.emplace(portion.GetMeta().Produced, ::ToString(portion.GetMeta().Produced)).first; + } + const arrow::util::string_view prodView = it->second.GetView(); const bool activity = !portion.IsRemovedFor(ReadMetadata->GetRequestSnapshot()); + static const TString ConstantEntityIsColumn = "COL"; + static const arrow::util::string_view ConstantEntityIsColumnView = + arrow::util::string_view(ConstantEntityIsColumn.data(), ConstantEntityIsColumn.size()); + static const TString ConstantEntityIsIndex = "IDX"; + static const arrow::util::string_view ConstantEntityIsIndexView = + arrow::util::string_view(ConstantEntityIsIndex.data(), ConstantEntityIsIndex.size()); + auto& entityStorages = EntityStorageNames[portion.GetMeta().GetTierName()]; { std::vector records; for (auto&& r : portion.Records) { @@ -16,26 +27,54 @@ void TStatsIterator::AppendStats(const std::vector blobsIds; + std::optional lastColumnId; + arrow::util::string_view lastColumnName; + arrow::util::string_view lastTierName; for (auto&& r : records) { NArrow::Append(*builders[0], portion.GetPathId()); - NArrow::Append(*builders[1], prod); + NArrow::Append(*builders[1], prodView); NArrow::Append(*builders[2], ReadMetadata->TabletId); NArrow::Append(*builders[3], r->GetMeta().GetNumRows()); NArrow::Append(*builders[4], r->GetMeta().GetRawBytes()); NArrow::Append(*builders[5], portion.GetPortionId()); NArrow::Append(*builders[6], r->GetChunkIdx()); - NArrow::Append(*builders[7], ReadMetadata->GetColumnNameDef(r->GetColumnId()).value_or("undefined")); + if (!lastColumnId || *lastColumnId != r->GetColumnId()) { + { + auto it = ColumnNamesById.find(r->GetColumnId()); + if (it == ColumnNamesById.end()) { + it = + ColumnNamesById.emplace(r->GetColumnId(), portionSchema->GetFieldByColumnIdVerified(r->GetColumnId())->name()).first; + } + lastColumnName = it->second.GetView(); + } + { + auto it = entityStorages.find(r->GetColumnId()); + if (it == entityStorages.end()) { + it = entityStorages.emplace(r->GetColumnId(), + portionSchema->GetIndexInfo().GetEntityStorageId(r->GetColumnId(), portion.GetMeta().GetTierName())).first; + } + lastTierName = it->second.GetView(); + } + lastColumnId = r->GetColumnId(); + } + NArrow::Append(*builders[7], lastColumnName); NArrow::Append(*builders[8], r->GetColumnId()); - std::string blobIdString = portion.GetBlobId(r->GetBlobRange().GetBlobIdxVerified()).ToStringLegacy(); - NArrow::Append(*builders[9], blobIdString); + { + auto itBlobIdString = blobsIds.find(r->GetBlobRange().GetBlobIdxVerified()); + if (itBlobIdString == blobsIds.end()) { + itBlobIdString = blobsIds.emplace( + r->GetBlobRange().GetBlobIdxVerified(), portion.GetBlobId(r->GetBlobRange().GetBlobIdxVerified()).ToStringLegacy()).first; + } + NArrow::Append( + *builders[9], arrow::util::string_view(itBlobIdString->second.data(), itBlobIdString->second.size())); + } NArrow::Append(*builders[10], r->BlobRange.Offset); NArrow::Append(*builders[11], r->BlobRange.Size); - NArrow::Append(*builders[12], activity); + NArrow::Append(*builders[12], activity); - const auto tierName = portionSchema->GetIndexInfo().GetEntityStorageId(r->GetColumnId(), portion.GetMeta().GetTierName()); - std::string strTierName(tierName.data(), tierName.size()); - NArrow::Append(*builders[13], strTierName); - NArrow::Append(*builders[14], "COL"); + NArrow::Append(*builders[13], arrow::util::string_view(lastTierName.data(), lastTierName.size())); + NArrow::Append(*builders[14], ConstantEntityIsColumnView); } } { @@ -48,7 +87,7 @@ void TStatsIterator::AppendStats(const std::vector(*builders[0], portion.GetPathId()); - NArrow::Append(*builders[1], prod); + NArrow::Append(*builders[1], prodView); NArrow::Append(*builders[2], ReadMetadata->TabletId); NArrow::Append(*builders[3], r->GetRecordsCount()); NArrow::Append(*builders[4], r->GetRawBytes()); @@ -56,15 +95,21 @@ void TStatsIterator::AppendStats(const std::vector(*builders[6], r->GetChunkIdx()); NArrow::Append(*builders[7], ReadMetadata->GetEntityName(r->GetIndexId()).value_or("undefined")); NArrow::Append(*builders[8], r->GetIndexId()); - std::string blobIdString = portion.GetBlobId(r->GetBlobRange().GetBlobIdxVerified()).ToStringLegacy(); - NArrow::Append(*builders[9], blobIdString); - NArrow::Append(*builders[10], r->GetBlobRange().Offset); - NArrow::Append(*builders[11], r->GetBlobRange().Size); - NArrow::Append(*builders[12], activity); + if (auto bRange = r->GetBlobRangeOptional()) { + std::string blobIdString = portion.GetBlobId(bRange->GetBlobIdxVerified()).ToStringLegacy(); + NArrow::Append(*builders[9], blobIdString); + NArrow::Append(*builders[10], bRange->Offset); + NArrow::Append(*builders[11], bRange->Size); + } else if (auto bData = r->GetBlobDataOptional()) { + NArrow::Append(*builders[9], "INPLACE"); + NArrow::Append(*builders[10], 0); + NArrow::Append(*builders[11], bData->size()); + } + NArrow::Append(*builders[12], activity); const auto tierName = portionSchema->GetIndexInfo().GetEntityStorageId(r->GetIndexId(), portion.GetMeta().GetTierName()); std::string strTierName(tierName.data(), tierName.size()); NArrow::Append(*builders[13], strTierName); - NArrow::Append(*builders[14], "IDX"); + NArrow::Append(*builders[14], ConstantEntityIsIndexView); } } } diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.h b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.h index c881d4f161bf..6fb758f46911 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.h +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.h @@ -28,6 +28,32 @@ class TReadStatsMetadata: public NAbstract::TReadStatsMetadata, std::enable_shar class TStatsIterator: public NAbstract::TStatsIterator { private: + class TViewContainer { + private: + TString Data; + std::string STLData; + arrow::util::string_view View; + + public: + const arrow::util::string_view& GetView() const { + return View; + } + + TViewContainer(const TString& data) + : Data(data) + , View(arrow::util::string_view(Data.data(), Data.size())) { + } + + TViewContainer(const std::string& data) + : STLData(data) + , View(arrow::util::string_view(STLData.data(), STLData.size())) { + } + }; + + mutable THashMap ColumnNamesById; + mutable THashMap PortionType; + mutable THashMap> EntityStorageNames; + using TBase = NAbstract::TStatsIterator; virtual bool AppendStats(const std::vector>& builders, NAbstract::TGranuleMetaView& granule) const override; virtual ui32 PredictRecordsCount(const NAbstract::TGranuleMetaView& granule) const override; diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp index 76f1bdda2c7c..83b2306a1bff 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp @@ -16,12 +16,23 @@ void TStatsIterator::AppendStats(const std::vector(*builders[6], portion.GetColumnBlobBytes()); NArrow::Append(*builders[7], portion.GetIndexBlobBytes()); NArrow::Append(*builders[8], portion.GetPortionId()); - NArrow::Append(*builders[9], !portion.IsRemovedFor(ReadMetadata->GetRequestSnapshot())); + NArrow::Append(*builders[9], !portion.IsRemovedFor(ReadMetadata->GetRequestSnapshot())); auto tierName = portion.GetTierNameDef(NBlobOperations::TGlobal::DefaultStorageId); NArrow::Append(*builders[10], arrow::util::string_view(tierName.data(), tierName.size())); - auto statInfo = portion.GetMeta().GetStatisticsStorage().SerializeToProto().DebugString(); + NJson::TJsonValue statReport = NJson::JSON_ARRAY; + for (auto&& i : portion.GetIndexes()) { + if (!i.HasBlobData()) { + continue; + } + auto schema = portion.GetSchema(ReadMetadata->GetIndexVersions()); + auto indexMeta = schema->GetIndexInfo().GetIndexVerified(i.GetEntityId()); + statReport.AppendValue(indexMeta->SerializeDataToJson(i, schema->GetIndexInfo())); + } + auto statInfo = statReport.GetStringRobust(); NArrow::Append(*builders[11], arrow::util::string_view(statInfo.data(), statInfo.size())); + + NArrow::Append(*builders[12], portion.HasRuntimeFeature(TPortionInfo::ERuntimeFeature::Optimized)); } ui32 TStatsIterator::PredictRecordsCount(const NAbstract::TGranuleMetaView& granule) const { diff --git a/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.cpp b/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.cpp index 5decb79c2203..55d28a5a61f4 100644 --- a/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.cpp +++ b/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.cpp @@ -1,94 +1,91 @@ #include "tx_internal_scan.h" -#include -#include -#include + #include #include +#include +#include #include +#include +#include namespace NKikimr::NOlap::NReader { -bool TTxInternalScan::Execute(TTransactionContext& /*txc*/, const TActorContext& /*ctx*/) { - TMemoryProfileGuard mpg("TTxInternalScan::Execute"); +void TTxInternalScan::SendError(const TString& problem, const TString& details, const TActorContext& ctx) const { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxScan failed")("problem", problem)("details", details); auto& request = *InternalScanEvent->Get(); - const TSnapshot snapshot = request.ReadToSnapshot.value_or(NOlap::TSnapshot(Self->LastPlannedStep, Self->LastPlannedTxId)); + auto scanComputeActor = InternalScanEvent->Sender; - TReadDescription read(snapshot, request.GetReverse()); - read.PathId = request.GetPathId(); - read.ReadNothing = !Self->TablesManager.HasTable(read.PathId); - std::unique_ptr scannerConstructor(new NPlain::TIndexScannerConstructor(snapshot, request.GetItemsLimit(), request.GetReverse())); - read.ColumnIds = request.GetColumnIds(); - read.ColumnNames = request.GetColumnNames(); - if (request.RangesFilter) { - read.PKRangesFilter = std::move(*request.RangesFilter); - } + auto ev = MakeHolder(ScanGen, Self->TabletID()); + ev->Record.SetStatus(Ydb::StatusIds::BAD_REQUEST); + auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::KIKIMR_BAD_REQUEST, + TStringBuilder() << "Table " << request.GetPathId() << " (shard " << Self->TabletID() << ") scan failed, reason: " << problem << "/" + << details); + NYql::IssueToMessage(issue, ev->Record.MutableIssues()->Add()); - const TVersionedIndex* vIndex = Self->GetIndexOptional() ? &Self->GetIndexOptional()->GetVersionedIndex() : nullptr; - AFL_VERIFY(vIndex); - { - TProgramContainer pContainer; - pContainer.OverrideProcessingColumns(read.ColumnNames); - read.SetProgram(std::move(pContainer)); - } + ctx.Send(scanComputeActor, ev.Release()); +} - { - auto newRange = scannerConstructor->BuildReadMetadata(Self, read); - if (!newRange) { - ErrorDescription = newRange.GetErrorMessage(); - ReadMetadataRange = nullptr; - return true; - } - ReadMetadataRange = newRange.DetachResult(); - } - AFL_VERIFY(ReadMetadataRange); +bool TTxInternalScan::Execute(TTransactionContext& /*txc*/, const TActorContext& /*ctx*/) { return true; } void TTxInternalScan::Complete(const TActorContext& ctx) { TMemoryProfileGuard mpg("TTxInternalScan::Complete"); + auto& request = *InternalScanEvent->Get(); auto scanComputeActor = InternalScanEvent->Sender; - const NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build()("tablet", Self->TabletID()); - - if (!ReadMetadataRange) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxScan failed")("reason", "no metadata")("error", ErrorDescription); + const TSnapshot snapshot = request.ReadToSnapshot.value_or(NOlap::TSnapshot(Self->LastPlannedStep, Self->LastPlannedTxId)); + const NActors::TLogContextGuard gLogging = + NActors::TLogContextBuilder::Build()("tablet", Self->TabletID())("snapshot", snapshot.DebugString()); + TReadMetadataPtr readMetadataRange; + { + TReadDescription read(snapshot, request.GetReverse()); + read.PathId = request.GetPathId(); + read.LockId = LockId; + read.ReadNothing = !Self->TablesManager.HasTable(read.PathId); + std::unique_ptr scannerConstructor( + new NPlain::TIndexScannerConstructor(snapshot, request.GetItemsLimit(), request.GetReverse())); + read.ColumnIds = request.GetColumnIds(); + read.ColumnNames = request.GetColumnNames(); + if (request.RangesFilter) { + read.PKRangesFilter = request.RangesFilter; + } - auto ev = MakeHolder(ScanGen, Self->TabletID()); - ev->Record.SetStatus(Ydb::StatusIds::BAD_REQUEST); - auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::KIKIMR_BAD_REQUEST, TStringBuilder() - << "Table " << request.GetPathId() << " (shard " << Self->TabletID() << ") scan failed, reason: " << ErrorDescription ? ErrorDescription : "no metadata ranges"); - NYql::IssueToMessage(issue, ev->Record.MutableIssues()->Add()); + const TVersionedIndex* vIndex = Self->GetIndexOptional() ? &Self->GetIndexOptional()->GetVersionedIndex() : nullptr; + AFL_VERIFY(vIndex); + { + TProgramContainer pContainer; + pContainer.OverrideProcessingColumns(read.ColumnNames); + read.SetProgram(std::move(pContainer)); + } - ctx.Send(scanComputeActor, ev.Release()); - return; + { + auto newRange = scannerConstructor->BuildReadMetadata(Self, read); + if (!newRange) { + return SendError("cannot create read metadata", newRange.GetErrorMessage(), ctx); + } + readMetadataRange = TValidator::CheckNotNull(newRange.DetachResult()); + } } + TStringBuilder detailedInfo; if (IS_LOG_PRIORITY_ENABLED(NActors::NLog::PRI_TRACE, NKikimrServices::TX_COLUMNSHARD)) { - detailedInfo << " read metadata: (" << *ReadMetadataRange << ")"; + detailedInfo << " read metadata: (" << *readMetadataRange << ")"; } const TVersionedIndex* index = nullptr; if (Self->HasIndex()) { index = &Self->GetIndexAs().GetVersionedIndex(); } - const TConclusion requestCookie = Self->InFlightReadsTracker.AddInFlightRequest(ReadMetadataRange, index); - if (!requestCookie) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxScan failed")("reason", requestCookie.GetErrorMessage())("trace_details", detailedInfo); - auto ev = MakeHolder(ScanGen, Self->TabletID()); + readMetadataRange->OnBeforeStartReading(*Self); - ev->Record.SetStatus(Ydb::StatusIds::INTERNAL_ERROR); - auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::KIKIMR_TEMPORARILY_UNAVAILABLE, TStringBuilder() - << "Table " << request.GetPathId() << " (shard " << Self->TabletID() << ") scan failed, reason: " << requestCookie.GetErrorMessage()); - NYql::IssueToMessage(issue, ev->Record.MutableIssues()->Add()); - Self->ScanCounters.OnScanDuration(NColumnShard::TScanCounters::EStatusFinish::CannotAddInFlight, TDuration::Zero()); - ctx.Send(scanComputeActor, ev.Release()); - return; - } - auto scanActor = ctx.Register(new TColumnShardScan(Self->SelfId(), scanComputeActor, Self->GetStoragesManager(), - TComputeShardingPolicy(), ScanId, TxId, ScanGen, *requestCookie, Self->TabletID(), TDuration::Max(), ReadMetadataRange, - NKikimrDataEvents::FORMAT_ARROW, Self->ScanCounters)); + const ui64 requestCookie = Self->InFlightReadsTracker.AddInFlightRequest(readMetadataRange, index); + auto scanActor = ctx.Register(new TColumnShardScan(Self->SelfId(), scanComputeActor, Self->GetStoragesManager(), TComputeShardingPolicy(), + ScanId, LockId.value_or(0), ScanGen, requestCookie, Self->TabletID(), TDuration::Max(), readMetadataRange, + NKikimrDataEvents::FORMAT_ARROW, + Self->Counters.GetScanCounters())); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxInternalScan started")("actor_id", scanActor)("trace_detailed", detailedInfo); } -} +} // namespace NKikimr::NOlap::NReader diff --git a/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.h b/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.h index 3e20c23a7c10..413d33714867 100644 --- a/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.h +++ b/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.h @@ -7,25 +7,26 @@ namespace NKikimr::NOlap::NReader { class TTxInternalScan: public NTabletFlatExecutor::TTransactionBase { private: using TBase = NTabletFlatExecutor::TTransactionBase; + TEvColumnShard::TEvInternalScan::TPtr InternalScanEvent; const ui32 ScanGen = 1; - const ui32 TxId = 1; const ui32 ScanId = 1; + const std::optional LockId; + void SendError(const TString& problem, const TString& details, const TActorContext& ctx) const; + public: using TReadMetadataPtr = TReadMetadataBase::TConstPtr; TTxInternalScan(NColumnShard::TColumnShard* self, TEvColumnShard::TEvInternalScan::TPtr& ev) : TBase(self) - , InternalScanEvent(ev) { + , InternalScanEvent(ev) + , LockId(InternalScanEvent->Get()->GetLockId()) + { } bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; void Complete(const TActorContext& ctx) override; TTxType GetTxType() const override { return NColumnShard::TXTYPE_START_INTERNAL_SCAN; } -private: - TString ErrorDescription; - TEvColumnShard::TEvInternalScan::TPtr InternalScanEvent; - TReadMetadataPtr ReadMetadataRange; }; } \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.cpp b/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.cpp index ab0d41db1931..74f09deb0197 100644 --- a/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.cpp +++ b/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.cpp @@ -1,181 +1,32 @@ #include "tx_scan.h" -#include -#include -#include + #include #include +#include +#include #include +#include +#include namespace NKikimr::NOlap::NReader { -std::vector ExtractTypes(const std::vector>& columns) { - std::vector types; - types.reserve(columns.size()); - for (auto& [name, type] : columns) { - types.push_back(type); - } - return types; -} - -TString FromCells(const TConstArrayRef& cells, const std::vector>& columns) { - Y_ABORT_UNLESS(cells.size() == columns.size()); - if (cells.empty()) { - return {}; - } - - std::vector types = ExtractTypes(columns); - - NArrow::TArrowBatchBuilder batchBuilder; - batchBuilder.Reserve(1); - auto startStatus = batchBuilder.Start(columns); - Y_ABORT_UNLESS(startStatus.ok(), "%s", startStatus.ToString().c_str()); - - batchBuilder.AddRow(NKikimr::TDbTupleRef(), NKikimr::TDbTupleRef(types.data(), cells.data(), cells.size())); - - auto batch = batchBuilder.FlushBatch(false); - Y_ABORT_UNLESS(batch); - Y_ABORT_UNLESS(batch->num_columns() == (int)cells.size()); - Y_ABORT_UNLESS(batch->num_rows() == 1); - return NArrow::SerializeBatchNoCompression(batch); -} - -std::pair RangePredicates(const TSerializedTableRange& range, const std::vector>& columns) { - std::vector leftCells; - std::vector> leftColumns; - bool leftTrailingNull = false; - { - TConstArrayRef cells = range.From.GetCells(); - const size_t size = cells.size(); - Y_ASSERT(size <= columns.size()); - leftCells.reserve(size); - leftColumns.reserve(size); - for (size_t i = 0; i < size; ++i) { - if (!cells[i].IsNull()) { - leftCells.push_back(cells[i]); - leftColumns.push_back(columns[i]); - leftTrailingNull = false; - } else { - leftTrailingNull = true; - } - } - } - - std::vector rightCells; - std::vector> rightColumns; - bool rightTrailingNull = false; - { - TConstArrayRef cells = range.To.GetCells(); - const size_t size = cells.size(); - Y_ASSERT(size <= columns.size()); - rightCells.reserve(size); - rightColumns.reserve(size); - for (size_t i = 0; i < size; ++i) { - if (!cells[i].IsNull()) { - rightCells.push_back(cells[i]); - rightColumns.push_back(columns[i]); - rightTrailingNull = false; - } else { - rightTrailingNull = true; - } - } - } +void TTxScan::SendError(const TString& problem, const TString& details, const TActorContext& ctx) const { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxScan failed")("problem", problem)("details", details); + const auto& request = Ev->Get()->Record; + const TString table = request.GetTablePath(); + const ui32 scanGen = request.GetGeneration(); + const auto scanComputeActor = Ev->Sender; - const bool fromInclusive = range.FromInclusive || leftTrailingNull; - const bool toInclusive = range.ToInclusive && !rightTrailingNull; - - TString leftBorder = FromCells(leftCells, leftColumns); - TString rightBorder = FromCells(rightCells, rightColumns); - auto leftSchema = NArrow::MakeArrowSchema(leftColumns); - Y_ASSERT(leftSchema.ok()); - auto rightSchema = NArrow::MakeArrowSchema(rightColumns); - Y_ASSERT(rightSchema.ok()); - return std::make_pair( - TPredicate(fromInclusive ? NKernels::EOperation::GreaterEqual : NKernels::EOperation::Greater, leftBorder, leftSchema.ValueUnsafe()), - TPredicate(toInclusive ? NKernels::EOperation::LessEqual : NKernels::EOperation::Less, rightBorder, rightSchema.ValueUnsafe())); -} + auto ev = MakeHolder(scanGen, Self->TabletID()); + ev->Record.SetStatus(Ydb::StatusIds::BAD_REQUEST); + auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::KIKIMR_BAD_REQUEST, + TStringBuilder() << "Table " << table << " (shard " << Self->TabletID() << ") scan failed, reason: " << problem << "/" << details); + NYql::IssueToMessage(issue, ev->Record.MutableIssues()->Add()); -static bool FillPredicatesFromRange(TReadDescription& read, const ::NKikimrTx::TKeyRange& keyRange, - const std::vector>& ydbPk, ui64 tabletId, const TIndexInfo* indexInfo, TString& error) { - TSerializedTableRange range(keyRange); - auto fromPredicate = std::make_shared(); - auto toPredicate = std::make_shared(); - std::tie(*fromPredicate, *toPredicate) = RangePredicates(range, ydbPk); - - LOG_S_DEBUG("TTxScan range predicate. From key size: " << range.From.GetCells().size() - << " To key size: " << range.To.GetCells().size() - << " greater predicate over columns: " << fromPredicate->ToString() - << " less predicate over columns: " << toPredicate->ToString() - << " at tablet " << tabletId); - - if (!read.PKRangesFilter.Add(fromPredicate, toPredicate, indexInfo)) { - error = "Error building filter"; - return false; - } - return true; + ctx.Send(scanComputeActor, ev.Release()); } bool TTxScan::Execute(TTransactionContext& /*txc*/, const TActorContext& /*ctx*/) { - TMemoryProfileGuard mpg("TTxScan::Execute"); - auto& record = Ev->Get()->Record; - TSnapshot snapshot(record.GetSnapshot().GetStep(), record.GetSnapshot().GetTxId()); - const auto scanId = record.GetScanId(); - const ui64 txId = record.GetTxId(); - - LOG_S_DEBUG("TTxScan prepare txId: " << txId << " scanId: " << scanId << " at tablet " << Self->TabletID()); - - TReadDescription read(snapshot, record.GetReverse()); - read.PathId = record.GetLocalPathId(); - read.ReadNothing = !Self->TablesManager.HasTable(read.PathId); - read.TableName = record.GetTablePath(); - bool isIndex = false; - std::unique_ptr scannerConstructor = [&]() { - const ui64 itemsLimit = record.HasItemsLimit() ? record.GetItemsLimit() : 0; - auto sysViewPolicy = NSysView::NAbstract::ISysViewPolicy::BuildByPath(read.TableName); - isIndex = !sysViewPolicy; - if (!sysViewPolicy) { - return std::unique_ptr(new NPlain::TIndexScannerConstructor(snapshot, itemsLimit, record.GetReverse())); - } else { - return sysViewPolicy->CreateConstructor(snapshot, itemsLimit, record.GetReverse()); - } - }(); - read.ColumnIds.assign(record.GetColumnTags().begin(), record.GetColumnTags().end()); - read.StatsMode = record.GetStatsMode(); - - const TVersionedIndex* vIndex = Self->GetIndexOptional() ? &Self->GetIndexOptional()->GetVersionedIndex() : nullptr; - auto parseResult = scannerConstructor->ParseProgram(vIndex, record, read); - if (!parseResult) { - ErrorDescription = parseResult.GetErrorMessage(); - return true; - } - - if (!record.RangesSize()) { - auto range = scannerConstructor->BuildReadMetadata(Self, read); - if (range.IsSuccess()) { - ReadMetadataRange = range.DetachResult(); - } else { - ErrorDescription = range.GetErrorMessage(); - } - return true; - } - - auto ydbKey = scannerConstructor->GetPrimaryKeyScheme(Self); - auto* indexInfo = (vIndex && isIndex) ? &vIndex->GetSchema(snapshot)->GetIndexInfo() : nullptr; - for (auto& range : record.GetRanges()) { - if (!FillPredicatesFromRange(read, range, ydbKey, Self->TabletID(), indexInfo, ErrorDescription)) { - ReadMetadataRange = nullptr; - return true; - } - } - { - auto newRange = scannerConstructor->BuildReadMetadata(Self, read); - if (!newRange) { - ErrorDescription = newRange.GetErrorMessage(); - ReadMetadataRange = nullptr; - return true; - } - ReadMetadataRange = newRange.DetachResult(); - } - AFL_VERIFY(ReadMetadataRange); return true; } @@ -183,67 +34,101 @@ void TTxScan::Complete(const TActorContext& ctx) { TMemoryProfileGuard mpg("TTxScan::Complete"); auto& request = Ev->Get()->Record; auto scanComputeActor = Ev->Sender; - const auto& snapshot = request.GetSnapshot(); + TSnapshot snapshot = TSnapshot(request.GetSnapshot().GetStep(), request.GetSnapshot().GetTxId()); + if (snapshot.IsZero()) { + snapshot = Self->GetLastTxSnapshot(); + } const auto scanId = request.GetScanId(); const ui64 txId = request.GetTxId(); const ui32 scanGen = request.GetGeneration(); - TString table = request.GetTablePath(); - auto dataFormat = request.GetDataFormat(); + const TString table = request.GetTablePath(); + const auto dataFormat = request.GetDataFormat(); const TDuration timeout = TDuration::MilliSeconds(request.GetTimeoutMs()); if (scanGen > 1) { - Self->IncCounter(NColumnShard::COUNTER_SCAN_RESTARTED); + Self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_SCAN_RESTARTED); } - const NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build() - ("tx_id", txId)("scan_id", scanId)("gen", scanGen)("table", table)("snapshot", snapshot)("tablet", Self->TabletID())("timeout", timeout); + const NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build() ("tx_id", txId)("scan_id", scanId)("gen", scanGen)( + "table", table)("snapshot", snapshot)("tablet", Self->TabletID())("timeout", timeout); - if (!ReadMetadataRange) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxScan failed")("reason", "no metadata")("error", ErrorDescription); + TReadMetadataPtr readMetadataRange; + { + LOG_S_DEBUG("TTxScan prepare txId: " << txId << " scanId: " << scanId << " at tablet " << Self->TabletID()); - auto ev = MakeHolder(scanGen, Self->TabletID()); - ev->Record.SetStatus(Ydb::StatusIds::BAD_REQUEST); - auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::KIKIMR_BAD_REQUEST, TStringBuilder() - << "Table " << table << " (shard " << Self->TabletID() << ") scan failed, reason: " << ErrorDescription ? ErrorDescription : "no metadata ranges"); - NYql::IssueToMessage(issue, ev->Record.MutableIssues()->Add()); + TReadDescription read(snapshot, request.GetReverse()); + read.TxId = txId; + if (request.HasLockTxId()) { + read.LockId = request.GetLockTxId(); + } + read.PathId = request.GetLocalPathId(); + read.ReadNothing = !Self->TablesManager.HasTable(read.PathId); + read.TableName = table; + bool isIndex = false; + std::unique_ptr scannerConstructor = [&]() { + const ui64 itemsLimit = request.HasItemsLimit() ? request.GetItemsLimit() : 0; + auto sysViewPolicy = NSysView::NAbstract::ISysViewPolicy::BuildByPath(read.TableName); + isIndex = !sysViewPolicy; + if (!sysViewPolicy) { + return std::unique_ptr(new NPlain::TIndexScannerConstructor(snapshot, itemsLimit, request.GetReverse())); + } else { + return sysViewPolicy->CreateConstructor(snapshot, itemsLimit, request.GetReverse()); + } + }(); + read.ColumnIds.assign(request.GetColumnTags().begin(), request.GetColumnTags().end()); + read.StatsMode = request.GetStatsMode(); + + const TVersionedIndex* vIndex = Self->GetIndexOptional() ? &Self->GetIndexOptional()->GetVersionedIndex() : nullptr; + auto parseResult = scannerConstructor->ParseProgram(vIndex, request, read); + if (!parseResult) { + return SendError("cannot parse program", parseResult.GetErrorMessage(), ctx); + } - ctx.Send(scanComputeActor, ev.Release()); - return; + if (!request.RangesSize()) { + auto newRange = scannerConstructor->BuildReadMetadata(Self, read); + if (newRange.IsSuccess()) { + readMetadataRange = TValidator::CheckNotNull(newRange.DetachResult()); + } else { + return SendError("cannot build metadata withno ranges", newRange.GetErrorMessage(), ctx); + } + } else { + auto ydbKey = scannerConstructor->GetPrimaryKeyScheme(Self); + { + auto filterConclusion = NOlap::TPKRangesFilter::BuildFromProto(request, request.GetReverse(), ydbKey); + if (filterConclusion.IsFail()) { + return SendError("cannot build ranges filter", filterConclusion.GetErrorMessage(), ctx); + } + read.PKRangesFilter = std::make_shared(filterConclusion.DetachResult()); + } + auto newRange = scannerConstructor->BuildReadMetadata(Self, read); + if (!newRange) { + return SendError("cannot build metadata", newRange.GetErrorMessage(), ctx); + } + readMetadataRange = TValidator::CheckNotNull(newRange.DetachResult()); + } } + AFL_VERIFY(readMetadataRange); + readMetadataRange->OnBeforeStartReading(*Self); + TStringBuilder detailedInfo; if (IS_LOG_PRIORITY_ENABLED(NActors::NLog::PRI_TRACE, NKikimrServices::TX_COLUMNSHARD)) { - detailedInfo << " read metadata: (" << *ReadMetadataRange << ")" << " req: " << request; + detailedInfo << " read metadata: (" << *readMetadataRange << ")" + << " req: " << request; } const TVersionedIndex* index = nullptr; if (Self->HasIndex()) { index = &Self->GetIndexAs().GetVersionedIndex(); } - const TConclusion requestCookie = Self->InFlightReadsTracker.AddInFlightRequest(ReadMetadataRange, index); - if (!requestCookie) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxScan failed")("reason", requestCookie.GetErrorMessage())("trace_details", detailedInfo); - auto ev = MakeHolder(scanGen, Self->TabletID()); - - ev->Record.SetStatus(Ydb::StatusIds::INTERNAL_ERROR); - auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::KIKIMR_TEMPORARILY_UNAVAILABLE, TStringBuilder() - << "Table " << table << " (shard " << Self->TabletID() << ") scan failed, reason: " << requestCookie.GetErrorMessage()); - NYql::IssueToMessage(issue, ev->Record.MutableIssues()->Add()); - Self->ScanCounters.OnScanDuration(NColumnShard::TScanCounters::EStatusFinish::CannotAddInFlight, TDuration::Zero()); - ctx.Send(scanComputeActor, ev.Release()); - return; - } - auto statsDelta = Self->InFlightReadsTracker.GetSelectStatsDelta(); + const ui64 requestCookie = Self->InFlightReadsTracker.AddInFlightRequest(readMetadataRange, index); - Self->IncCounter(NColumnShard::COUNTER_READ_INDEX_PORTIONS, statsDelta.Portions); - Self->IncCounter(NColumnShard::COUNTER_READ_INDEX_BLOBS, statsDelta.Blobs); - Self->IncCounter(NColumnShard::COUNTER_READ_INDEX_ROWS, statsDelta.Rows); - Self->IncCounter(NColumnShard::COUNTER_READ_INDEX_BYTES, statsDelta.Bytes); + Self->Counters.GetTabletCounters()->OnScanStarted(Self->InFlightReadsTracker.GetSelectStatsDelta()); TComputeShardingPolicy shardingPolicy; AFL_VERIFY(shardingPolicy.DeserializeFromProto(request.GetComputeShardingPolicy())); - auto scanActor = ctx.Register(new TColumnShardScan(Self->SelfId(), scanComputeActor, Self->GetStoragesManager(), - shardingPolicy, scanId, txId, scanGen, *requestCookie, Self->TabletID(), timeout, ReadMetadataRange, dataFormat, Self->ScanCounters)); + auto scanActor = ctx.Register(new TColumnShardScan(Self->SelfId(), scanComputeActor, Self->GetStoragesManager(), shardingPolicy, scanId, + txId, scanGen, requestCookie, Self->TabletID(), timeout, readMetadataRange, dataFormat, Self->Counters.GetScanCounters())); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxScan started")("actor_id", scanActor)("trace_detailed", detailedInfo); } -} +} // namespace NKikimr::NOlap::NReader diff --git a/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.h b/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.h index 2d9eb9619a64..347def3dea14 100644 --- a/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.h +++ b/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.h @@ -7,6 +7,8 @@ namespace NKikimr::NOlap::NReader { class TTxScan: public NTabletFlatExecutor::TTransactionBase { private: using TBase = NTabletFlatExecutor::TTransactionBase; + void SendError(const TString& problem, const TString& details, const TActorContext& ctx) const; + public: using TReadMetadataPtr = TReadMetadataBase::TConstPtr; @@ -17,12 +19,12 @@ class TTxScan: public NTabletFlatExecutor::TTransactionBase #include -#include +#include #include #include @@ -14,36 +14,25 @@ std::shared_ptr IIndexInfo::GetColumnLoaderVerifi return result; } -std::shared_ptr IIndexInfo::AddDeleteFlagsColumn(const std::shared_ptr& batch, const bool isDelete) { - Y_ABORT_UNLESS(batch); - i64 numColumns = batch->num_columns(); - i64 numRows = batch->num_rows(); +void IIndexInfo::AddDeleteFlagsColumn(NArrow::TGeneralContainer& batch, const bool isDelete) { + const i64 numRows = batch.num_rows(); - AFL_VERIFY(!batch->GetColumnByName(SPEC_COL_DELETE_FLAG)); - return NArrow::TStatusValidator::GetValid(batch->AddColumn(numColumns, arrow::field(SPEC_COL_DELETE_FLAG, arrow::boolean()), - NArrow::TThreadSimpleArraysCache::GetConst(arrow::boolean(), std::make_shared(isDelete), numRows))); + batch.AddField(arrow::field(SPEC_COL_DELETE_FLAG, arrow::boolean()), + NArrow::TThreadSimpleArraysCache::GetConst(arrow::boolean(), std::make_shared(isDelete), numRows)).Validate(); } -std::shared_ptr IIndexInfo::AddSnapshotColumns(const std::shared_ptr& batch, const TSnapshot& snapshot) { - Y_ABORT_UNLESS(batch); - i64 numColumns = batch->num_columns(); - i64 numRows = batch->num_rows(); +void IIndexInfo::AddSnapshotColumns(NArrow::TGeneralContainer& batch, const TSnapshot& snapshot) { + const i64 numRows = batch.num_rows(); - auto res = batch->AddColumn(numColumns, arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64()), - NArrow::MakeUI64Array(snapshot.GetPlanStep(), numRows)); - Y_ABORT_UNLESS(res.ok()); - res = (*res)->AddColumn(numColumns + 1, arrow::field(SPEC_COL_TX_ID, arrow::uint64()), - NArrow::MakeUI64Array(snapshot.GetTxId(), numRows)); - Y_ABORT_UNLESS(res.ok()); - Y_ABORT_UNLESS((*res)->num_columns() == numColumns + 2); - return *res; + batch.AddField(arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64()), NArrow::MakeUI64Array(snapshot.GetPlanStep(), numRows)).Validate(); + batch.AddField(arrow::field(SPEC_COL_TX_ID, arrow::uint64()), NArrow::MakeUI64Array(snapshot.GetTxId(), numRows)).Validate(); } -std::shared_ptr IIndexInfo::NormalizeDeletionColumn(const std::shared_ptr& batch) { - if (batch->schema()->GetFieldIndex(SPEC_COL_DELETE_FLAG) >= 0) { - return batch; +void IIndexInfo::NormalizeDeletionColumn(NArrow::TGeneralContainer& batch) { + if (batch.HasColumn(SPEC_COL_DELETE_FLAG)) { + return; } - return AddDeleteFlagsColumn(batch, false); + AddDeleteFlagsColumn(batch, false); } std::optional IIndexInfo::GetColumnIdOptional(const std::string& name) const { diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h b/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h index ab6cd67a3937..b630b5780fbd 100644 --- a/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h +++ b/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h @@ -1,14 +1,19 @@ #pragma once -#include "loader.h" +#include +#include +#include #include #include namespace NKikimr::NOlap { +using TColumnLoader = NArrow::NAccessor::TColumnLoader; +using TColumnSaver = NArrow::NAccessor::TColumnSaver; + class IIndexInfo { public: - enum class ESpecialColumn: ui32 { + enum class ESpecialColumn : ui32 { PLAN_STEP = NOlap::NPortion::TSpecialColumns::SPEC_COL_PLAN_STEP_INDEX, TX_ID = NOlap::NPortion::TSpecialColumns::SPEC_COL_TX_ID_INDEX, DELETE_FLAG = NOlap::NPortion::TSpecialColumns::SPEC_COL_DELETE_FLAG_INDEX @@ -16,7 +21,7 @@ class IIndexInfo { using TSystemColumnsSet = ui64; - enum class ESystemColumnsSet: ui64 { + enum class ESystemColumnsSet : ui64 { Snapshot = 1, Deletion = 1 << 1, }; @@ -29,6 +34,11 @@ class IIndexInfo { return SPEC_COL_DELETE_FLAG; } + static const std::set& GetNecessarySystemColumnIdsSet() { + static const std::set result = { (ui32)ESpecialColumn::PLAN_STEP, (ui32)ESpecialColumn::TX_ID }; + return result; + } + static const std::vector& GetSnapshotColumnNames() { static const std::vector result = { std::string(SPEC_COL_PLAN_STEP), std::string(SPEC_COL_TX_ID) }; return result; @@ -61,15 +71,10 @@ class IIndexInfo { return std::make_shared(std::move(fields)); } - void AddSpecialFieldIds(std::vector& result) const { - result.emplace_back((ui32)ESpecialColumn::PLAN_STEP); - result.emplace_back((ui32)ESpecialColumn::TX_ID); - result.emplace_back((ui32)ESpecialColumn::DELETE_FLAG); - } - static void AddSpecialFields(std::vector>& fields) { AddSnapshotFields(fields); - fields.push_back(arrow::field(SPEC_COL_DELETE_FLAG, arrow::boolean())); + static const std::shared_ptr f = arrow::field(SPEC_COL_DELETE_FLAG, arrow::boolean()); + fields.push_back(f); } static const std::vector& SnapshotColumnNames() { @@ -78,8 +83,14 @@ class IIndexInfo { } static void AddSnapshotFields(std::vector>& fields) { - fields.push_back(arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64())); - fields.push_back(arrow::field(SPEC_COL_TX_ID, arrow::uint64())); + static const std::shared_ptr ps = arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64()); + static const std::shared_ptr txid = arrow::field(SPEC_COL_TX_ID, arrow::uint64()); + fields.push_back(ps); + fields.push_back(txid); + } + + static void AddDeleteFields(std::vector>& fields) { + fields.push_back(arrow::field(SPEC_COL_DELETE_FLAG, arrow::boolean())); } static const std::set& GetSnapshotColumnIdsSet() { @@ -88,15 +99,38 @@ class IIndexInfo { } static const std::vector& GetSystemColumnNames() { - static const std::vector result = { std::string(SPEC_COL_PLAN_STEP), std::string(SPEC_COL_TX_ID), std::string(SPEC_COL_DELETE_FLAG) }; + static const std::vector result = { std::string(SPEC_COL_PLAN_STEP), std::string(SPEC_COL_TX_ID), + std::string(SPEC_COL_DELETE_FLAG) }; return result; } static const std::vector& GetSystemColumnIds() { - static const std::vector result = { (ui32)ESpecialColumn::PLAN_STEP, (ui32)ESpecialColumn::TX_ID, (ui32)ESpecialColumn::DELETE_FLAG }; + static const std::vector result = { (ui32)ESpecialColumn::PLAN_STEP, (ui32)ESpecialColumn::TX_ID, + (ui32)ESpecialColumn::DELETE_FLAG }; return result; } + [[nodiscard]] static std::vector AddSpecialFieldIds(const std::vector& baseColumnIds) { + std::vector result = baseColumnIds; + const auto& cIds = GetSystemColumnIds(); + result.insert(result.end(), cIds.begin(), cIds.end()); + return result; + } + + [[nodiscard]] static std::set AddSpecialFieldIds(const std::set& baseColumnIds) { + std::set result = baseColumnIds; + const auto& cIds = GetSystemColumnIds(); + result.insert(cIds.begin(), cIds.end()); + return result; + } + + [[nodiscard]] static std::vector AddSnapshotFieldIds(const std::vector& baseColumnIds) { + std::vector result = baseColumnIds; + for (auto&& i : GetSnapshotColumnIds()) { + result.emplace_back(i); + } + return result; + } std::optional GetColumnIdOptional(const std::string& name) const; TString GetColumnName(ui32 id, bool required) const; @@ -106,27 +140,24 @@ class IIndexInfo { virtual std::shared_ptr GetColumnLoaderOptional(const ui32 columnId) const = 0; std::shared_ptr GetColumnLoaderVerified(const ui32 columnId) const; - static std::shared_ptr NormalizeDeletionColumn(const std::shared_ptr& batch); + static void NormalizeDeletionColumn(NArrow::TGeneralContainer& batch); - static std::shared_ptr AddSnapshotColumns(const std::shared_ptr& batch, const TSnapshot& snapshot); - static std::shared_ptr AddDeleteFlagsColumn(const std::shared_ptr& batch, const bool isDelete); + static void AddSnapshotColumns(NArrow::TGeneralContainer& batch, const TSnapshot& snapshot); + static void AddDeleteFlagsColumn(NArrow::TGeneralContainer& batch, const bool isDelete); static ui64 GetSpecialColumnsRecordSize() { return sizeof(ui64) + sizeof(ui64) + sizeof(bool); } static std::shared_ptr ArrowSchemaSnapshot() { - static std::shared_ptr result = std::make_shared(arrow::FieldVector{ - arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64()), - arrow::field(SPEC_COL_TX_ID, arrow::uint64()) - }); + static std::shared_ptr result = std::make_shared( + arrow::FieldVector{ arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64()), arrow::field(SPEC_COL_TX_ID, arrow::uint64()) }); return result; } static std::shared_ptr ArrowSchemaDeletion() { - static std::shared_ptr result = std::make_shared(arrow::FieldVector{ - arrow::field(SPEC_COL_DELETE_FLAG, arrow::boolean()) - }); + static std::shared_ptr result = + std::make_shared(arrow::FieldVector{ arrow::field(SPEC_COL_DELETE_FLAG, arrow::boolean()) }); return result; } @@ -135,15 +166,16 @@ class IIndexInfo { } static bool IsSpecialColumn(const std::string& fieldName) { - return fieldName == SPEC_COL_PLAN_STEP - || fieldName == SPEC_COL_TX_ID - || fieldName == SPEC_COL_DELETE_FLAG; + return fieldName == SPEC_COL_PLAN_STEP || fieldName == SPEC_COL_TX_ID || fieldName == SPEC_COL_DELETE_FLAG; } static bool IsSpecialColumn(const ui32 fieldId) { - return fieldId == (ui32)ESpecialColumn::PLAN_STEP - || fieldId == (ui32)ESpecialColumn::TX_ID - || fieldId == (ui32)ESpecialColumn::DELETE_FLAG; + return fieldId == (ui32)ESpecialColumn::PLAN_STEP || fieldId == (ui32)ESpecialColumn::TX_ID || + fieldId == (ui32)ESpecialColumn::DELETE_FLAG; + } + + static bool IsNullableVerified(const ui32 /*fieldId*/) { + return false; } static ui32 GetSpecialColumnByteWidth(const ui32 field) { @@ -164,4 +196,4 @@ class IIndexInfo { virtual ~IIndexInfo() = default; }; -} // namespace NKikimr::NOlap +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/loader.cpp b/ydb/core/tx/columnshard/engines/scheme/abstract/loader.cpp deleted file mode 100644 index d74dc491519d..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/abstract/loader.cpp +++ /dev/null @@ -1,61 +0,0 @@ -#include "loader.h" -#include - -namespace NKikimr::NOlap { - -TString TColumnLoader::DebugString() const { - TStringBuilder result; - if (ExpectedSchema) { - result << "schema:" << ExpectedSchema->ToString() << ";"; - } - if (Transformer) { - result << "transformer:" << Transformer->DebugString() << ";"; - } - if (Serializer) { - result << "serializer:" << Serializer->DebugString() << ";"; - } - return result; -} - -TColumnLoader::TColumnLoader(NArrow::NTransformation::ITransformer::TPtr transformer, const NArrow::NSerialization::TSerializerContainer& serializer, - const std::shared_ptr& expectedSchema, const std::shared_ptr& defaultValue, const ui32 columnId) - : Transformer(transformer) - , Serializer(serializer) - , ExpectedSchema(expectedSchema) - , DefaultValue(defaultValue) - , ColumnId(columnId) { - Y_ABORT_UNLESS(ExpectedSchema); - auto fieldsCountStr = ::ToString(ExpectedSchema->num_fields()); - Y_ABORT_UNLESS(ExpectedSchema->num_fields() == 1, "%s", fieldsCountStr.data()); - Y_ABORT_UNLESS(Serializer); -} - -const std::shared_ptr& TColumnLoader::GetField() const { - return ExpectedSchema->field(0); -} - -arrow::Result> TColumnLoader::Apply(const TString& data) const { - Y_ABORT_UNLESS(Serializer); - arrow::Result> columnArray = - Transformer ? Serializer->Deserialize(data) : Serializer->Deserialize(data, ExpectedSchema); - if (!columnArray.ok()) { - return columnArray; - } - if (Transformer) { - return Transformer->Transform(*columnArray); - } else { - return columnArray; - } -} - -std::shared_ptr TColumnLoader::ApplyVerified(const TString& data) const { - return NArrow::TStatusValidator::GetValid(Apply(data)); -} - -std::shared_ptr TColumnLoader::ApplyVerifiedColumn(const TString& data) const { - auto rb = ApplyVerified(data); - AFL_VERIFY(rb->num_columns() == 1)("schema", rb->schema()->ToString()); - return rb->column(0); -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/loader.h b/ydb/core/tx/columnshard/engines/scheme/abstract/loader.h deleted file mode 100644 index d128caaecb58..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/abstract/loader.h +++ /dev/null @@ -1,47 +0,0 @@ -#pragma once -#include -#include -#include -#include - -namespace NKikimr::NOlap { - -class TColumnLoader { -private: - NArrow::NTransformation::ITransformer::TPtr Transformer; - NArrow::NSerialization::TSerializerContainer Serializer; - YDB_READONLY_DEF(std::shared_ptr, ExpectedSchema); - YDB_READONLY_DEF(std::shared_ptr, DefaultValue); - const ui32 ColumnId; -public: - bool IsEqualTo(const TColumnLoader& item) const { - if (!!Transformer != !!item.Transformer) { - return false; - } else if (!!Transformer && !Transformer->IsEqualTo(*item.Transformer)) { - return false; - } - if (!Serializer.IsEqualTo(item.Serializer)) { - return false; - } - return true; - } - - TString DebugString() const; - - TColumnLoader(NArrow::NTransformation::ITransformer::TPtr transformer, const NArrow::NSerialization::TSerializerContainer& serializer, - const std::shared_ptr& expectedSchema, const std::shared_ptr& defaultValue, const ui32 columnId); - - ui32 GetColumnId() const { - return ColumnId; - } - - const std::shared_ptr& GetField() const; - - arrow::Result> Apply(const TString& data) const; - - std::shared_ptr ApplyVerified(const TString& data) const; - - std::shared_ptr ApplyVerifiedColumn(const TString& data) const; -}; - -} diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/saver.cpp b/ydb/core/tx/columnshard/engines/scheme/abstract/saver.cpp deleted file mode 100644 index c15db92b8eec..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/abstract/saver.cpp +++ /dev/null @@ -1,31 +0,0 @@ -#include "saver.h" - -namespace NKikimr::NOlap { - -TColumnSaver::TColumnSaver(NArrow::NTransformation::ITransformer::TPtr transformer, const NArrow::NSerialization::TSerializerContainer serializer) - : Transformer(transformer) - , Serializer(serializer) -{ - Y_ABORT_UNLESS(Serializer); -} - -bool TColumnSaver::IsHardPacker() const { - return Serializer->IsHardPacker(); -} - -TString TColumnSaver::Apply(std::shared_ptr data, std::shared_ptr field) const { - auto schema = std::make_shared(arrow::FieldVector{field}); - auto batch = arrow::RecordBatch::Make(schema, data->length(), {data}); - return Apply(batch); -} - -TString TColumnSaver::Apply(const std::shared_ptr& data) const { - Y_ABORT_UNLESS(Serializer); - if (Transformer) { - return Serializer->SerializeFull(Transformer->Transform(data)); - } else { - return Serializer->SerializePayload(data); - } -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/saver.h b/ydb/core/tx/columnshard/engines/scheme/abstract/saver.h deleted file mode 100644 index c4d10c55a359..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/abstract/saver.h +++ /dev/null @@ -1,34 +0,0 @@ -#pragma once -#include -#include -#include -#include - -namespace NKikimr::NOlap { - -class TColumnSaver { -private: - NArrow::NTransformation::ITransformer::TPtr Transformer; - NArrow::NSerialization::TSerializerContainer Serializer; -public: - TColumnSaver() = default; - TColumnSaver(NArrow::NTransformation::ITransformer::TPtr transformer, const NArrow::NSerialization::TSerializerContainer serializer); - - void ResetSerializer(const NArrow::NSerialization::TSerializerContainer& serializer) { - AFL_VERIFY(serializer); - if (Serializer.IsCompatibleForExchange(serializer)) { - Serializer = serializer; - } else { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_reset_serializer")("reason", "incompatible_serializers"); - } - } - - bool IsHardPacker() const; - - TString Apply(std::shared_ptr data, std::shared_ptr field) const; - - TString Apply(const std::shared_ptr& data) const; -}; - - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/ya.make b/ydb/core/tx/columnshard/engines/scheme/abstract/ya.make index b830415daae1..79b12f94389e 100644 --- a/ydb/core/tx/columnshard/engines/scheme/abstract/ya.make +++ b/ydb/core/tx/columnshard/engines/scheme/abstract/ya.make @@ -1,15 +1,12 @@ LIBRARY() SRCS( - saver.cpp index_info.cpp - loader.cpp ) PEERDIR( ydb/library/actors/core - ydb/core/formats/arrow/transformer - ydb/core/formats/arrow/serializer + ydb/core/formats/arrow/save_load ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/tx/columnshard/engines/scheme/column/info.cpp b/ydb/core/tx/columnshard/engines/scheme/column/info.cpp index b9473398fdf3..b4fae5cf09ab 100644 --- a/ydb/core/tx/columnshard/engines/scheme/column/info.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/column/info.cpp @@ -19,8 +19,7 @@ NArrow::NTransformation::ITransformer::TPtr TSimpleColumnInfo::GetLoadTransforme return transformer; } -TConclusionStatus TSimpleColumnInfo::DeserializeFromProto(const NKikimrSchemeOp::TOlapColumnDescription& columnInfo) -{ +TConclusionStatus TSimpleColumnInfo::DeserializeFromProto(const NKikimrSchemeOp::TOlapColumnDescription& columnInfo) { AFL_VERIFY(columnInfo.GetId() == ColumnId); if (columnInfo.HasSerializer()) { AFL_VERIFY(Serializer.DeserializeFromProto(columnInfo.GetSerializer())); @@ -30,29 +29,34 @@ TConclusionStatus TSimpleColumnInfo::DeserializeFromProto(const NKikimrSchemeOp: if (columnInfo.HasDefaultValue()) { DefaultValue.DeserializeFromProto(columnInfo.GetDefaultValue()).Validate(); } + if (columnInfo.HasDataAccessorConstructor()) { + AFL_VERIFY(DataAccessorConstructor.DeserializeFromProto(columnInfo.GetDataAccessorConstructor())); + } + IsNullable = columnInfo.HasNotNull() ? !columnInfo.GetNotNull() : true; AFL_VERIFY(Serializer); if (columnInfo.HasDictionaryEncoding()) { auto settings = NArrow::NDictionary::TEncodingSettings::BuildFromProto(columnInfo.GetDictionaryEncoding()); Y_ABORT_UNLESS(settings.IsSuccess()); DictionaryEncoding = *settings; } - Loader = std::make_shared(GetLoadTransformer(), Serializer, ArrowSchema, DefaultValue.GetValue(), ColumnId); + Loader = std::make_shared(GetLoadTransformer(), Serializer, DataAccessorConstructor, ArrowField, DefaultValue.GetValue(), ColumnId); return TConclusionStatus::Success(); } -TSimpleColumnInfo::TSimpleColumnInfo(const ui32 columnId, const std::shared_ptr& arrowField, const NArrow::NSerialization::TSerializerContainer& serializer, - const bool needMinMax, const bool isSorted, +TSimpleColumnInfo::TSimpleColumnInfo(const ui32 columnId, const std::shared_ptr& arrowField, + const NArrow::NSerialization::TSerializerContainer& serializer, const bool needMinMax, const bool isSorted, const bool isNullable, const std::shared_ptr& defaultValue) : ColumnId(columnId) , ArrowField(arrowField) - , ArrowSchema(std::make_shared(arrow::FieldVector({arrowField}))) , Serializer(serializer) , NeedMinMax(needMinMax) , IsSorted(isSorted) + , IsNullable(isNullable) , DefaultValue(defaultValue) { ColumnName = ArrowField->name(); - Loader = std::make_shared(GetLoadTransformer(), Serializer, ArrowSchema, DefaultValue.GetValue(), ColumnId); + Loader = std::make_shared( + GetLoadTransformer(), Serializer, DataAccessorConstructor, ArrowField, DefaultValue.GetValue(), ColumnId); } std::vector> TSimpleColumnInfo::ActualizeColumnData(const std::vector>& source, const TSimpleColumnInfo& sourceColumnFeatures) const { @@ -86,7 +90,7 @@ std::vector> TSimpleColumnInf } std::vector> result; for (auto&& s : source) { - auto data = NArrow::TStatusValidator::GetValid(sourceColumnFeatures.Loader->Apply(s->GetData())); + auto data = sourceColumnFeatures.Loader->ApplyRawVerified(s->GetData()); result.emplace_back(s->CopyWithAnotherBlob(GetColumnSaver().Apply(data), *this)); } return result; diff --git a/ydb/core/tx/columnshard/engines/scheme/column/info.h b/ydb/core/tx/columnshard/engines/scheme/column/info.h index 6db21f1fe876..ef47445bf665 100644 --- a/ydb/core/tx/columnshard/engines/scheme/column/info.h +++ b/ydb/core/tx/columnshard/engines/scheme/column/info.h @@ -1,17 +1,18 @@ #pragma once -#include -#include -#include - +#include +#include #include +#include +#include #include -#include -#include +#include +#include +#include #include -#include #include +#include namespace NKikimr::NOlap { @@ -22,19 +23,19 @@ class TSimpleColumnInfo { YDB_READONLY(ui32, ColumnId, 0); YDB_READONLY_DEF(TString, ColumnName); YDB_READONLY_DEF(std::shared_ptr, ArrowField); - YDB_READONLY_DEF(std::shared_ptr, ArrowSchema); YDB_READONLY(NArrow::NSerialization::TSerializerContainer, Serializer, NArrow::NSerialization::TSerializerContainer::GetDefaultSerializer()); + YDB_READONLY(NArrow::NAccessor::TConstructorContainer, DataAccessorConstructor, NArrow::NAccessor::TConstructorContainer::GetDefaultConstructor()); YDB_READONLY(bool, NeedMinMax, false); YDB_READONLY(bool, IsSorted, false); + YDB_READONLY(bool, IsNullable, false); YDB_READONLY_DEF(TColumnDefaultScalarValue, DefaultValue); std::optional DictionaryEncoding; std::shared_ptr Loader; NArrow::NTransformation::ITransformer::TPtr GetLoadTransformer() const; public: - - TSimpleColumnInfo(const ui32 columnId, const std::shared_ptr& arrowField, - const NArrow::NSerialization::TSerializerContainer& serializer, const bool needMinMax, const bool isSorted, + TSimpleColumnInfo(const ui32 columnId, const std::shared_ptr& arrowField, + const NArrow::NSerialization::TSerializerContainer& serializer, const bool needMinMax, const bool isSorted, const bool isNullable, const std::shared_ptr& defaultValue); TColumnSaver GetColumnSaver() const { @@ -43,7 +44,8 @@ class TSimpleColumnInfo { return TColumnSaver(transformer, Serializer); } - std::vector> ActualizeColumnData(const std::vector>& source, const TSimpleColumnInfo& sourceColumnFeatures) const; + std::vector> ActualizeColumnData( + const std::vector>& source, const TSimpleColumnInfo& sourceColumnFeatures) const; TString DebugString() const { TStringBuilder sb; @@ -62,4 +64,4 @@ class TSimpleColumnInfo { } }; -} // namespace NKikimr::NOlap +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/column_features.h b/ydb/core/tx/columnshard/engines/scheme/column_features.h index 671b35e57734..c31c2a970a00 100644 --- a/ydb/core/tx/columnshard/engines/scheme/column_features.h +++ b/ydb/core/tx/columnshard/engines/scheme/column_features.h @@ -1,15 +1,14 @@ #pragma once -#include "abstract/loader.h" -#include "abstract/saver.h" #include "column/info.h" #include #include -#include +#include #include #include #include -#include +#include +#include #include #include @@ -34,9 +33,9 @@ class TColumnFeatures: public TSimpleColumnInfo { YDB_READONLY_DEF(std::shared_ptr, Operator); public: TColumnFeatures(const ui32 columnId, const std::shared_ptr& arrowField, const NArrow::NSerialization::TSerializerContainer& serializer, - const std::shared_ptr& bOperator, const bool needMinMax, const bool isSorted, + const std::shared_ptr& bOperator, const bool needMinMax, const bool isSorted, const bool isNullable, const std::shared_ptr& defaultValue) - : TBase(columnId, arrowField, serializer, needMinMax, isSorted, defaultValue) + : TBase(columnId, arrowField, serializer, needMinMax, isSorted, isNullable, defaultValue) , Operator(bOperator) { AFL_VERIFY(Operator); diff --git a/ydb/core/tx/columnshard/engines/scheme/index_info.cpp b/ydb/core/tx/columnshard/engines/scheme/index_info.cpp index b49c6a3e7b53..5059cc5eba79 100644 --- a/ydb/core/tx/columnshard/engines/scheme/index_info.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/index_info.cpp @@ -1,36 +1,25 @@ #include "index_info.h" -#include "statistics/abstract/operator.h" - -#include -#include #include #include #include -#include +#include #include #include +#include +#include +#include +#include namespace NKikimr::NOlap { -static std::vector NamesOnly(const std::vector& columns) { - std::vector out; - out.reserve(columns.size()); - for (const auto& [name, _] : columns) { - out.push_back(name); - } - return out; -} - TIndexInfo::TIndexInfo(const TString& name) - : NTable::TScheme::TTableSchema() - , Name(name) -{ + : Name(name) { CompactionPlannerConstructor = NStorageOptimizer::IOptimizerPlannerConstructor::BuildDefault(); } bool TIndexInfo::CheckCompatible(const TIndexInfo& other) const { - if (!other.GetPrimaryKey()->Equals(GetPrimaryKey())) { + if (!other.GetPrimaryKey()->Equals(PrimaryKey)) { return false; } return true; @@ -43,42 +32,39 @@ ui32 TIndexInfo::GetColumnIdVerified(const std::string& name) const { } std::optional TIndexInfo::GetColumnIdOptional(const std::string& name) const { - const auto ni = ColumnNames.find(name); - - if (ni != ColumnNames.end()) { - return ni->second; + const auto pred = [](const TNameInfo& item, const std::string& value) { + return item.GetName() < value; + }; + auto it = std::lower_bound(ColumnNames.begin(), ColumnNames.end(), name, pred); + if (it != ColumnNames.end() && it->GetName() == name) { + return it->GetColumnId(); } return IIndexInfo::GetColumnIdOptional(name); } -TString TIndexInfo::GetColumnName(ui32 id, bool required) const { - const auto ci = Columns.find(id); - - if (ci != Columns.end()) { - return ci->second.Name; +TString TIndexInfo::GetColumnName(const ui32 id, bool required) const { + const auto& f = GetColumnFeaturesOptional(id); + if (!f) { + AFL_VERIFY(!required); + return ""; + } else { + return f->GetColumnName(); } - - return IIndexInfo::GetColumnName(id, required); } -std::vector TIndexInfo::GetColumnIds(const bool withSpecial) const { - std::vector result; - for (auto&& i : Columns) { - result.emplace_back(i.first); - } +const std::vector& TIndexInfo::GetColumnIds(const bool withSpecial) const { if (withSpecial) { - IIndexInfo::AddSpecialFieldIds(result); + return SchemaColumnIdsWithSpecials; + } else { + return SchemaColumnIds; } - return result; } std::vector TIndexInfo::GetColumnNames(const std::vector& ids) const { std::vector out; out.reserve(ids.size()); for (ui32 id : ids) { - const auto ci = Columns.find(id); - Y_ABORT_UNLESS(ci != Columns.end()); - out.push_back(ci->second.Name); + out.push_back(GetColumnName(id)); } return out; } @@ -87,46 +73,21 @@ std::vector TIndexInfo::GetColumnSTLNames(const std::vector& std::vector out; out.reserve(ids.size()); for (ui32 id : ids) { - const auto ci = Columns.find(id); - Y_ABORT_UNLESS(ci != Columns.end()); - out.push_back(ci->second.Name); + out.push_back(GetColumnName(id)); } return out; } -std::vector TIndexInfo::GetColumns(const std::vector& ids) const { - return NOlap::GetColumns(*this, ids); -} - -std::shared_ptr TIndexInfo::ArrowSchema() const { +const std::shared_ptr& TIndexInfo::ArrowSchema() const { AFL_VERIFY(Schema); return Schema; } -std::shared_ptr TIndexInfo::ArrowSchemaWithSpecials() const { +const std::shared_ptr& TIndexInfo::ArrowSchemaWithSpecials() const { AFL_VERIFY(SchemaWithSpecials); return SchemaWithSpecials; } -std::shared_ptr TIndexInfo::AddColumns( - const std::shared_ptr& src, - const std::vector& columns) const { - std::shared_ptr all = ArrowSchemaWithSpecials(); - auto fields = src->fields(); - - for (const auto& col : columns) { - const std::string name(col.data(), col.size()); - if (!src->GetFieldByName(name)) { - auto field = all->GetFieldByName(name); - if (!field) { - return {}; - } - fields.push_back(field); - } - } - return std::make_shared(std::move(fields)); -} - std::vector TIndexInfo::GetColumnIds(const std::vector& columnNames) const { std::vector ids; ids.reserve(columnNames.size()); @@ -140,33 +101,20 @@ std::vector TIndexInfo::GetColumnIds(const std::vector& columnNam return ids; } -std::shared_ptr TIndexInfo::ArrowColumnFieldVerified(const ui32 columnId) const { - auto result = ArrowColumnFieldOptional(columnId); - AFL_VERIFY(result); - return result; -} - -std::shared_ptr TIndexInfo::ArrowColumnFieldOptional(const ui32 columnId) const { - auto it = ArrowColumnByColumnIdCache.find(columnId); - if (it == ArrowColumnByColumnIdCache.end()) { - return nullptr; - } else { - return it->second; - } -} - -void TIndexInfo::SetAllKeys(const std::shared_ptr& operators) { +void TIndexInfo::SetAllKeys(const std::shared_ptr& operators, const THashMap& columns) { /// @note Setting replace and sorting key to PK we are able to: /// * apply REPLACE by MergeSort /// * apply PK predicate before REPLACE - { - const auto& primaryKeyNames = NamesOnly(GetPrimaryKeyColumns()); - auto columnIds = GetColumnIds(primaryKeyNames); - AFL_VERIFY(columnIds.size()); - PrimaryKey = MakeArrowSchema(Columns, columnIds); + PrimaryKey = MakeArrowSchema(columns, PKColumnIds, nullptr); + + AFL_VERIFY(PKColumns.empty()); + for (auto&& i : PKColumnIds) { + auto it = columns.find(i); + AFL_VERIFY(it != columns.end()); + PKColumns.emplace_back(TNameTypeInfo(it->second.Name, it->second.PType)); } - for (const auto& [colId, column] : Columns) { + for (const auto& [colId, column] : columns) { if (NArrow::IsPrimitiveYqlType(column.PType)) { MinMaxIdxColumnsIds.insert(colId); } @@ -174,38 +122,39 @@ void TIndexInfo::SetAllKeys(const std::shared_ptr& operators) MinMaxIdxColumnsIds.insert(GetPKFirstColumnId()); if (!Schema) { AFL_VERIFY(!SchemaWithSpecials); - InitializeCaches(operators); + InitializeCaches(operators, columns, nullptr); } } TColumnSaver TIndexInfo::GetColumnSaver(const ui32 columnId) const { - auto it = ColumnFeatures.find(columnId); - AFL_VERIFY(it != ColumnFeatures.end()); - return it->second.GetColumnSaver(); + return GetColumnFeaturesVerified(columnId).GetColumnSaver(); } std::shared_ptr TIndexInfo::GetColumnLoaderOptional(const ui32 columnId) const { - auto it = ColumnFeatures.find(columnId); - if (it == ColumnFeatures.end()) { + const auto& cFeatures = GetColumnFeaturesOptional(columnId); + if (!cFeatures) { return nullptr; } else { - return it->second.GetLoader(); + return cFeatures->GetLoader(); } } -std::shared_ptr TIndexInfo::GetColumnFieldOptional(const ui32 columnId) const { - std::shared_ptr schema; - if (IsSpecialColumn(columnId)) { - return IIndexInfo::GetColumnFieldOptional(columnId); +std::optional TIndexInfo::GetColumnIndexOptional(const ui32 id) const { + auto it = std::lower_bound(SchemaColumnIdsWithSpecials.begin(), SchemaColumnIdsWithSpecials.end(), id); + if (it == SchemaColumnIdsWithSpecials.end() || *it != id) { + return std::nullopt; } else { - schema = ArrowSchema(); + return it - SchemaColumnIdsWithSpecials.begin(); } - if (const TString columnName = GetColumnName(columnId, false)) { - return schema->GetFieldByName(columnName); - } else { +} + +std::shared_ptr TIndexInfo::GetColumnFieldOptional(const ui32 columnId) const { + const std::optional index = GetColumnIndexOptional(columnId); + if (!index) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("column_id", columnId)("event", "incorrect_column_id"); return nullptr; } + return ArrowSchemaWithSpecials()->GetFieldByIndexVerified(*index); } std::shared_ptr TIndexInfo::GetColumnFieldVerified(const ui32 columnId) const { @@ -224,20 +173,24 @@ std::shared_ptr TIndexInfo::GetColumnsSchema(const std::set } std::shared_ptr TIndexInfo::GetColumnSchema(const ui32 columnId) const { - return GetColumnsSchema({columnId}); + return GetColumnsSchema({ columnId }); } -bool TIndexInfo::DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, const std::shared_ptr& operators) { +bool TIndexInfo::DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, const std::shared_ptr& operators, + const std::shared_ptr& cache) { if (schema.GetEngine() != NKikimrSchemeOp::COLUMN_ENGINE_REPLACING_TIMESERIES) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_parse_index_info")("reason", "incorrect_engine_in_schema"); return false; } + AFL_VERIFY(cache); { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::Optimizer"); SchemeNeedActualization = schema.GetOptions().GetSchemeNeedActualization(); ExternalGuaranteeExclusivePK = schema.GetOptions().GetExternalGuaranteeExclusivePK(); if (schema.GetOptions().HasCompactionPlannerConstructor()) { - auto container = NStorageOptimizer::TOptimizerPlannerConstructorContainer::BuildFromProto(schema.GetOptions().GetCompactionPlannerConstructor()); + auto container = + NStorageOptimizer::TOptimizerPlannerConstructorContainer::BuildFromProto(schema.GetOptions().GetCompactionPlannerConstructor()); CompactionPlannerConstructor = container.DetachResult().GetObjectPtrVerified(); } else { AFL_VERIFY(!!CompactionPlannerConstructor); @@ -245,6 +198,7 @@ bool TIndexInfo::DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& } if (schema.HasDefaultCompression()) { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::Serializer"); NArrow::NSerialization::TSerializerContainer container; if (!container.DeserializeFromProto(schema.GetDefaultCompression())) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_parse_index_info")("reason", "cannot_parse_default_serializer"); @@ -252,49 +206,68 @@ bool TIndexInfo::DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& } DefaultSerializer = container; } - { - for (const auto& stat : schema.GetStatistics()) { - NStatistics::TOperatorContainer container; - AFL_VERIFY(container.DeserializeFromProto(stat)); - AFL_VERIFY(StatisticsByName.emplace(container.GetName(), std::move(container)).second); - } - NStatistics::TPortionStorageCursor cursor; - for (auto&& [_, container] : StatisticsByName) { - container.SetCursor(cursor); - container->ShiftCursor(cursor); + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::Indexes"); + for (const auto& idx : schema.GetIndexes()) { + NIndexes::TIndexMetaContainer meta; + AFL_VERIFY(meta.DeserializeFromProto(idx)); + Indexes.emplace(meta->GetIndexId(), meta); } } - - for (const auto& idx : schema.GetIndexes()) { - NIndexes::TIndexMetaContainer meta; - AFL_VERIFY(meta.DeserializeFromProto(idx)); - Indexes.emplace(meta->GetIndexId(), meta); - } - for (const auto& col : schema.GetColumns()) { - const ui32 id = col.GetId(); - const TString& name = col.GetName(); - const bool notNull = col.HasNotNull() ? col.GetNotNull() : false; - auto typeInfoMod = NScheme::TypeInfoModFromProtoColumnType(col.GetTypeId(), col.HasTypeInfo() ? &col.GetTypeInfo() : nullptr); - Columns[id] = NTable::TColumn(name, id, typeInfoMod.TypeInfo, typeInfoMod.TypeMod, notNull); - ColumnNames[name] = id; + THashMap columns; + { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::Columns"); + ColumnNames.clear(); + for (const auto& col : schema.GetColumns()) { + const ui32 id = col.GetId(); + const TString& name = cache->GetStringCache(col.GetName()); + const bool notNull = col.HasNotNull() ? col.GetNotNull() : false; + auto typeInfoMod = NScheme::TypeInfoModFromProtoColumnType(col.GetTypeId(), col.HasTypeInfo() ? &col.GetTypeInfo() : nullptr); + columns[id] = NTable::TColumn(name, id, typeInfoMod.TypeInfo, cache->GetStringCache(typeInfoMod.TypeMod), notNull); + ColumnNames.emplace_back(name, id); + } + std::sort(ColumnNames.begin(), ColumnNames.end()); } for (const auto& keyName : schema.GetKeyColumnNames()) { - Y_ABORT_UNLESS(ColumnNames.contains(keyName)); - KeyColumns.push_back(ColumnNames[keyName]); - } - InitializeCaches(operators); - for (const auto& col : schema.GetColumns()) { - auto it = ColumnFeatures.find(col.GetId()); - AFL_VERIFY(it != ColumnFeatures.end()); - auto parsed = it->second.DeserializeFromProto(col, operators); - if (!parsed) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_parse_column_feature")("reason", parsed.GetErrorMessage()); - return false; + PKColumnIds.push_back(GetColumnIdVerified(keyName)); + } + InitializeCaches(operators, columns, cache, false); + SetAllKeys(operators, columns); + { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::Columns::Features"); + for (const auto& col : schema.GetColumns()) { + THashMap> it; + const TString fingerprint = cache ? ("C:" + col.SerializeAsString()) : Default(); + const auto createPred = [&]() -> TConclusion> { + auto f = BuildDefaultColumnFeatures(col.GetId(), columns, operators); + auto parsed = f->DeserializeFromProto(col, operators); + if (parsed.IsFail()) { + return parsed; + } + return f; + }; + auto fConclusion = cache->GetOrCreateColumnFeatures(fingerprint, createPred); + if (fConclusion.IsFail()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_parse_column_feature")("reason", fConclusion.GetErrorMessage()); + return false; + } + ColumnFeatures.emplace_back(fConclusion.DetachResult()); } + for (auto&& cId : GetSystemColumnIds()) { + THashMap> it; + const TString fingerprint = "SC:" + ::ToString(cId); + const auto createPred = [&]() -> TConclusion> { + return BuildDefaultColumnFeatures(cId, {}, operators); + }; + auto fConclusion = cache->GetOrCreateColumnFeatures(fingerprint, createPred); + ColumnFeatures.emplace_back(fConclusion.DetachResult()); + } + const auto pred = [](const std::shared_ptr& l, const std::shared_ptr& r) { + return l->GetColumnId() < r->GetColumnId(); + }; + std::sort(ColumnFeatures.begin(), ColumnFeatures.end(), pred); } - Version = schema.GetVersion(); return true; } @@ -310,25 +283,20 @@ std::vector GetColumns(const NTable::TScheme::TTableSchema& table return out; } -std::optional TIndexInfo::BuildFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, const std::shared_ptr& operators) { +std::optional TIndexInfo::BuildFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, + const std::shared_ptr& operators, const std::shared_ptr& cache) { TIndexInfo result(""); - if (!result.DeserializeFromProto(schema, operators)) { + if (!result.DeserializeFromProto(schema, operators, cache)) { return std::nullopt; } return result; } -std::shared_ptr MakeArrowSchema(const NTable::TScheme::TTableSchema::TColumns& columns, const std::vector& ids, const bool withSpecials) { +std::vector> MakeArrowFields(const NTable::TScheme::TTableSchema::TColumns& columns, const std::vector& ids, + const std::shared_ptr& cache) { std::vector> fields; - if (withSpecials) { - IIndexInfo::AddSpecialFields(fields); - } - for (const ui32 id : ids) { - if (TIndexInfo::IsSpecialColumn(id)) { - AFL_VERIFY(withSpecials); - continue; - } + AFL_VERIFY(!TIndexInfo::IsSpecialColumn(id)); auto it = columns.find(id); AFL_VERIFY(it != columns.end()); @@ -336,51 +304,62 @@ std::shared_ptr MakeArrowSchema(const NTable::TScheme::TTableSche std::string colName(column.Name.data(), column.Name.size()); auto arrowType = NArrow::GetArrowType(column.PType); AFL_VERIFY(arrowType.ok()); - fields.emplace_back(std::make_shared(colName, arrowType.ValueUnsafe(), !column.NotNull)); + auto f = std::make_shared(colName, arrowType.ValueUnsafe(), !column.NotNull); + if (cache) { + auto fFound = cache->GetField(f->ToString(true)); + if (!fFound) { + cache->RegisterField(f->ToString(true), f); + fields.emplace_back(f); + } else { + fields.emplace_back(fFound); + } + } else { + fields.emplace_back(f); + } } - return std::make_shared(std::move(fields)); + return fields; +} + +std::shared_ptr MakeArrowSchema( + const NTable::TScheme::TTableSchema::TColumns& columns, const std::vector& ids, const std::shared_ptr& cache) { + return std::make_shared(MakeArrowFields(columns, ids, cache)); } -void TIndexInfo::InitializeCaches(const std::shared_ptr& operators) { +void TIndexInfo::InitializeCaches(const std::shared_ptr& operators, const THashMap& columns, const std::shared_ptr& cache, + const bool withColumnFeatures) { { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::InitializeCaches::Schema"); AFL_VERIFY(!Schema); - std::vector ids; - ids.reserve(Columns.size()); - for (const auto& [id, _] : Columns) { - ids.push_back(id); + SchemaColumnIds.reserve(columns.size()); + for (const auto& [id, _] : columns) { + SchemaColumnIds.push_back(id); } - // The ids had a set type before so we keep them sorted. - std::sort(ids.begin(), ids.end()); - Schema = MakeArrowSchema(Columns, ids); + std::sort(SchemaColumnIds.begin(), SchemaColumnIds.end()); + auto originalFields = MakeArrowFields(columns, SchemaColumnIds, cache); + Schema = std::make_shared(originalFields); + IIndexInfo::AddSpecialFields(originalFields); + SchemaWithSpecials = std::make_shared(originalFields); } - SchemaWithSpecials = IIndexInfo::AddSpecialFields(ArrowSchema()); - - for (auto&& c : Columns) { - AFL_VERIFY(ArrowColumnByColumnIdCache.emplace(c.first, GetColumnFieldVerified(c.first)).second); - AFL_VERIFY(ColumnFeatures.emplace(c.first, TColumnFeatures(c.first, GetColumnFieldVerified(c.first), DefaultSerializer, operators->GetDefaultOperator(), - NArrow::IsPrimitiveYqlType(c.second.PType), c.first == GetPKFirstColumnId(), nullptr)).second); - } - for (auto&& cId : GetSystemColumnIds()) { - AFL_VERIFY(ArrowColumnByColumnIdCache.emplace(cId, GetColumnFieldVerified(cId)).second); - AFL_VERIFY(ColumnFeatures.emplace(cId, TColumnFeatures(cId, GetColumnFieldVerified(cId), DefaultSerializer, operators->GetDefaultOperator(), - false, false, IIndexInfo::DefaultColumnValue(cId))).second); - } -} - -std::vector> TIndexInfo::MakeEmptyChunks(const ui32 columnId, const std::vector& pages, const TSimpleColumnInfo& columnInfo) const { - std::vector> result; - auto columnArrowSchema = GetColumnSchema(columnId); - TColumnSaver saver = GetColumnSaver(columnId); - ui32 idx = 0; - for (auto p : pages) { - auto arr = NArrow::MakeEmptyBatch(columnArrowSchema, p); - AFL_VERIFY(arr->num_columns() == 1)("count", arr->num_columns()); - result.emplace_back(std::make_shared(saver.Apply(arr), arr->column(0), TChunkAddress(columnId, idx), columnInfo)); - ++idx; + { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::InitializeCaches::SchemaFields"); + SchemaColumnIdsWithSpecials = IIndexInfo::AddSpecialFieldIds(SchemaColumnIds); + } + if (withColumnFeatures) { + { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::InitializeCaches::Columns"); + for (auto&& c : columns) { + ColumnFeatures.emplace_back(BuildDefaultColumnFeatures(c.first, columns, operators)); + } + } + { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::InitializeCaches::SysColumns"); + for (auto&& cId : GetSystemColumnIds()) { + ColumnFeatures.emplace_back(BuildDefaultColumnFeatures(cId, columns, operators)); + } + } } - return result; } NSplitter::TEntityGroups TIndexInfo::GetEntityGroupsByStorageId(const TString& specialTier, const IStoragesManager& storages) const { @@ -401,18 +380,84 @@ std::shared_ptr TIndexInfo::Get return CompactionPlannerConstructor; } -std::shared_ptr TIndexInfo::GetColumnDefaultValueVerified(const std::string& colName) const { +std::shared_ptr TIndexInfo::GetColumnExternalDefaultValueVerified(const std::string& colName) const { const ui32 columnId = GetColumnIdVerified(colName); - return GetColumnDefaultValueVerified(columnId); + return GetColumnExternalDefaultValueVerified(columnId); +} + +std::shared_ptr TIndexInfo::GetColumnExternalDefaultValueVerified(const ui32 columnId) const { + return GetColumnFeaturesVerified(columnId).GetDefaultValue().GetValue(); } -std::shared_ptr TIndexInfo::GetColumnDefaultValueVerified(const ui32 columnId) const { - auto& features = GetColumnFeaturesVerified(columnId); - if (features.GetDefaultValue().IsEmpty() && !IsNullableVerified(columnId)) { - return NArrow::DefaultScalar(GetColumnFieldVerified(columnId)->type()); +NKikimr::TConclusionStatus TIndexInfo::AppendIndex(const THashMap>>& originalData, + const ui32 indexId, const std::shared_ptr& operators, TSecondaryData& result) const { + auto it = Indexes.find(indexId); + AFL_VERIFY(it != Indexes.end()); + auto& index = it->second; + std::shared_ptr chunk = index->BuildIndex(originalData, *this); + auto opStorage = operators->GetOperatorVerified(index->GetStorageId()); + if ((i64)chunk->GetPackedSize() > opStorage->GetBlobSplitSettings().GetMaxBlobSize()) { + return TConclusionStatus::Fail("blob size for secondary data (" + ::ToString(indexId) + ") bigger than limit (" + + ::ToString(opStorage->GetBlobSplitSettings().GetMaxBlobSize()) + ")"); + } + if (index->GetStorageId() == IStoragesManager::LocalMetadataStorageId) { + AFL_VERIFY(result.MutableSecondaryInplaceData().emplace(indexId, chunk).second); } else { - return features.GetDefaultValue().GetValue(); + AFL_VERIFY(result.MutableExternalData().emplace(indexId, std::vector>({chunk})).second); + } + return TConclusionStatus::Success(); +} + +std::shared_ptr TIndexInfo::GetIndexMetaMax(const ui32 columnId) const { + for (auto&& i : Indexes) { + if (i.second->GetClassName() != NIndexes::NMax::TIndexMeta::GetClassNameStatic()) { + continue; + } + auto maxIndex = static_pointer_cast(i.second.GetObjectPtr()); + if (maxIndex->GetColumnId() == columnId) { + return maxIndex; + } + } + return nullptr; +} + +std::shared_ptr TIndexInfo::GetIndexMetaCountMinSketch(const std::set& columnIds) const { + for (auto&& i : Indexes) { + if (i.second->GetClassName() != NIndexes::NCountMinSketch::TIndexMeta::GetClassNameStatic()) { + continue; + } + auto index = static_pointer_cast(i.second.GetObjectPtr()); + if (index->GetColumnIds() == columnIds) { + return index; + } } + return nullptr; +} + +std::vector TIndexInfo::GetEntityIds() const { + auto result = GetColumnIds(true); + for (auto&& i : Indexes) { + result.emplace_back(i.first); + } + return result; +} + +std::shared_ptr TIndexInfo::BuildDefaultColumnFeatures( + const ui32 columnId, const THashMap& columns, const std::shared_ptr& operators) const { + if (IsSpecialColumn(columnId)) { + return std::make_shared(columnId, GetColumnFieldVerified(columnId), DefaultSerializer, operators->GetDefaultOperator(), + false, false, false, IIndexInfo::DefaultColumnValue(columnId)); + } else { + auto itC = columns.find(columnId); + AFL_VERIFY(itC != columns.end()); + return std::make_shared(columnId, GetColumnFieldVerified(columnId), DefaultSerializer, operators->GetDefaultOperator(), + NArrow::IsPrimitiveYqlType(itC->second.PType), columnId == GetPKFirstColumnId(), false, nullptr); + } +} + +std::shared_ptr TIndexInfo::GetColumnExternalDefaultValueByIndexVerified(const ui32 colIndex) const { + AFL_VERIFY(colIndex < ColumnFeatures.size())("index", colIndex)("size", ColumnFeatures.size()); + return ColumnFeatures[colIndex]->GetDefaultValue().GetValue(); } -} // namespace NKikimr::NOlap +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/index_info.h b/ydb/core/tx/columnshard/engines/scheme/index_info.h index a9f77745110d..869d0393b6c1 100644 --- a/ydb/core/tx/columnshard/engines/scheme/index_info.h +++ b/ydb/core/tx/columnshard/engines/scheme/index_info.h @@ -5,75 +5,173 @@ #include "abstract/index_info.h" #include "indexes/abstract/meta.h" -#include "statistics/abstract/operator.h" -#include "statistics/abstract/common.h" -#include - -#include -#include -#include #include #include -#include +#include #include +#include +#include +#include +#include + +#include namespace arrow { - class Array; - class Field; - class Schema; -} +class Array; +class Field; +class Schema; +} // namespace arrow namespace NKikimr::NOlap { + +namespace NIndexes::NMax { +class TIndexMeta; +} + +namespace NIndexes::NCountMinSketch { +class TIndexMeta; +} + namespace NStorageOptimizer { class IOptimizerPlannerConstructor; } class TPortionInfoWithBlobs; -struct TInsertedData; class TSnapshotColumnInfo; class ISnapshotSchema; using TNameTypeInfo = std::pair; +class TSchemaObjectsCache { +private: + THashMap> Fields; + THashMap> ColumnFeatures; + THashSet StringsCache; + mutable ui64 AcceptionFieldsCount = 0; + mutable ui64 AcceptionFeaturesCount = 0; + +public: + const TString& GetStringCache(const TString& original) { + auto it = StringsCache.find(original); + if (it == StringsCache.end()) { + it = StringsCache.emplace(original).first; + } + return *it; + } + + void RegisterField(const TString& fingerprint, const std::shared_ptr& f) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "register_field")("fp", fingerprint)("f", f->ToString()); + AFL_VERIFY(Fields.emplace(fingerprint, f).second); + } + void RegisterColumnFeatures(const TString& fingerprint, const std::shared_ptr& f) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "register_column_features")("fp", fingerprint)("info", f->DebugString()); + AFL_VERIFY(ColumnFeatures.emplace(fingerprint, f).second); + } + std::shared_ptr GetField(const TString& fingerprint) const { + auto it = Fields.find(fingerprint); + if (it == Fields.end()) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "get_field_miss")("fp", fingerprint)("count", Fields.size())( + "acc", AcceptionFieldsCount); + return nullptr; + } + if (++AcceptionFieldsCount % 1000 == 0) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "get_field_accept")("fp", fingerprint)("count", Fields.size())( + "acc", AcceptionFieldsCount); + } + return it->second; + } + template + TConclusion> GetOrCreateColumnFeatures(const TString& fingerprint, const TConstructor& constructor) { + auto it = ColumnFeatures.find(fingerprint); + if (it == ColumnFeatures.end()) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "get_column_features_miss")("fp", UrlEscapeRet(fingerprint))( + "count", ColumnFeatures.size())("acc", AcceptionFeaturesCount); + TConclusion> resultConclusion = constructor(); + if (resultConclusion.IsFail()) { + return resultConclusion; + } + it = ColumnFeatures.emplace(fingerprint, resultConclusion.DetachResult()).first; + AFL_VERIFY(it->second); + } else { + if (++AcceptionFeaturesCount % 1000 == 0) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "get_column_features_accept")("fp", UrlEscapeRet(fingerprint))( + "count", ColumnFeatures.size())("acc", AcceptionFeaturesCount); + } + } + return it->second; + } +}; + /// Column engine index description in terms of tablet's local table. /// We have to use YDB types for keys here. -struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { +struct TIndexInfo: public IIndexInfo { private: - THashMap ColumnFeatures; - THashMap> ArrowColumnByColumnIdCache; + using TColumns = THashMap; + + class TNameInfo { + private: + YDB_READONLY_DEF(TString, Name); + YDB_READONLY(ui32, ColumnId, 0); + + public: + TNameInfo(const TString& name, const ui32 columnId) + : Name(name) + , ColumnId(columnId) + { + + } + + bool operator<(const TNameInfo& item) const { + return Name < item.Name; + } + }; + + std::vector ColumnNames; + std::vector PKColumnIds; + std::vector PKColumns; + + std::vector> ColumnFeatures; THashMap Indexes; - std::map StatisticsByName; TIndexInfo(const TString& name); bool SchemeNeedActualization = false; std::shared_ptr CompactionPlannerConstructor; bool ExternalGuaranteeExclusivePK = false; - bool DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, const std::shared_ptr& operators); - void InitializeCaches(const std::shared_ptr& operators); + bool DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, const std::shared_ptr& operators, + const std::shared_ptr& cache); + void InitializeCaches(const std::shared_ptr& operators, const THashMap& columns, + const std::shared_ptr& cache, const bool withColumnFeatures = true); + std::shared_ptr BuildDefaultColumnFeatures( + const ui32 columnId, const THashMap& columns, const std::shared_ptr& operators) const; + public: std::shared_ptr GetCompactionPlannerConstructor() const; - - bool IsNullableVerified(const std::string& fName) const { - auto it = Columns.find(GetColumnIdVerified(fName)); - AFL_VERIFY(it != Columns.end()); - return !it->second.NotNull; + bool IsNullableVerifiedByIndex(const ui32 colIndex) const { + AFL_VERIFY(colIndex < ColumnFeatures.size()); + return ColumnFeatures[colIndex]->GetIsNullable(); } bool IsNullableVerified(const ui32 colId) const { - auto it = Columns.find(colId); - AFL_VERIFY(it != Columns.end()); - return !it->second.NotNull; + return GetColumnFeaturesVerified(colId).GetIsNullable(); } - std::shared_ptr GetColumnDefaultValueVerified(const std::string& colName) const; - std::shared_ptr GetColumnDefaultValueVerified(const ui32 colId) const; + std::shared_ptr GetColumnExternalDefaultValueVerified(const std::string& colName) const; + std::shared_ptr GetColumnExternalDefaultValueVerified(const ui32 colId) const; + std::shared_ptr GetColumnExternalDefaultValueByIndexVerified(const ui32 colIndex) const; + bool GetExternalGuaranteeExclusivePK() const { return ExternalGuaranteeExclusivePK; } const TColumnFeatures& GetColumnFeaturesVerified(const ui32 columnId) const { - auto it = ColumnFeatures.find(columnId); - AFL_VERIFY(it != ColumnFeatures.end()); - return it->second; + return *ColumnFeatures[GetColumnIndexVerified(columnId)]; + } + + const std::shared_ptr& GetColumnFeaturesOptional(const ui32 columnId) const { + if (auto idx = GetColumnIndexOptional(columnId)) { + return ColumnFeatures[*idx]; + } else { + return Default>(); + } } NSplitter::TEntityGroups GetEntityGroupsByStorageId(const TString& specialTier, const IStoragesManager& storages) const; @@ -88,27 +186,12 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { result.emplace(portionTierName); } else { for (auto&& i : ColumnFeatures) { - result.emplace(i.second.GetOperator()->GetStorageId()); + result.emplace(i->GetOperator()->GetStorageId()); } } return result; } - std::vector> MakeEmptyChunks(const ui32 columnId, const std::vector& pages, const TSimpleColumnInfo& columnInfo) const; - - const std::map& GetStatisticsByName() const { - return StatisticsByName; - } - - NStatistics::TOperatorContainer GetStatistics(const NStatistics::TIdentifier& id) const { - for (auto&& i : StatisticsByName) { - if (i.second->GetIdentifier() == id) { - return i.second; - } - } - return NStatistics::TOperatorContainer(); - } - const THashMap& GetIndexes() const { return Indexes; } @@ -123,9 +206,7 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { if (specialTier && specialTier != IStoragesManager::DefaultStorageId) { return specialTier; } else { - auto it = ColumnFeatures.find(columnId); - AFL_VERIFY(it != ColumnFeatures.end()); - return it->second.GetOperator()->GetStorageId(); + return GetColumnFeaturesVerified(columnId).GetOperator()->GetStorageId(); } } @@ -140,33 +221,48 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { TString DebugString() const { TStringBuilder sb; sb << "(" - << "version=" << Version << ";" - << "name=" << Name << ";" - << ")"; + << "version=" << Version << ";" + << "name=" << Name << ";" + << ")"; for (auto&& i : ColumnFeatures) { - sb << GetColumnName(i.first) << ":" << i.second.DebugString() << ";"; + sb << i->GetColumnName() << ":" << i->DebugString() << ";"; } return sb; } + void SetAllKeys(const std::shared_ptr& operators, const THashMap& columns); + public: static TIndexInfo BuildDefault() { TIndexInfo result("dummy"); return result; } - std::vector> ActualizeColumnData(const std::vector>& source, const TIndexInfo& sourceIndexInfo, const ui32 columnId) const { - auto itCurrent = ColumnFeatures.find(columnId); - auto itPred = sourceIndexInfo.ColumnFeatures.find(columnId); - AFL_VERIFY(itCurrent != ColumnFeatures.end()); - AFL_VERIFY(itPred != sourceIndexInfo.ColumnFeatures.end()); - return itCurrent->second.ActualizeColumnData(source, itPred->second); + static TIndexInfo BuildDefault( + const std::shared_ptr& operators, const TColumns& columns, const std::vector& pkNames) { + TIndexInfo result = BuildDefault(); + for (auto&& i : columns) { + result.ColumnNames.emplace_back(i.second.Name, i.first); + } + std::sort(result.ColumnNames.begin(), result.ColumnNames.end()); + for (auto&& i : pkNames) { + const ui32 columnId = result.GetColumnIdVerified(i); + result.PKColumnIds.emplace_back(columnId); + } + result.SetAllKeys(operators, columns); + return result; } - static std::optional BuildFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, const std::shared_ptr& operators); + std::vector> ActualizeColumnData( + const std::vector>& source, const TIndexInfo& sourceIndexInfo, const ui32 columnId) const { + return GetColumnFeaturesVerified(columnId).ActualizeColumnData(source, sourceIndexInfo.GetColumnFeaturesVerified(columnId)); + } + + static std::optional BuildFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, + const std::shared_ptr& operators, const std::shared_ptr& cache); bool HasColumnId(const ui32 columnId) const { - return ColumnFeatures.contains(columnId); + return !!GetColumnIndexOptional(columnId); } bool HasColumnName(const std::string& columnName) const { @@ -177,6 +273,12 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { return Indexes.contains(indexId); } + std::optional GetColumnIndexOptional(const ui32 id) const; + ui32 GetColumnIndexVerified(const ui32 id) const { + auto result = GetColumnIndexOptional(id); + AFL_VERIFY(result); + return *result; + } std::shared_ptr GetColumnFieldOptional(const ui32 columnId) const; std::shared_ptr GetColumnFieldVerified(const ui32 columnId) const; std::shared_ptr GetColumnSchema(const ui32 columnId) const; @@ -199,6 +301,12 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { return it->second; } + NIndexes::TIndexMetaContainer GetIndexVerified(const ui32 indexId) const { + auto it = Indexes.find(indexId); + AFL_VERIFY(it != Indexes.end()); + return it->second; + } + std::optional GetIndexNameOptional(const ui32 indexId) const { auto meta = GetIndexOptional(indexId); if (!meta) { @@ -207,25 +315,38 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { return meta->GetIndexName(); } - void AppendIndexes(THashMap>>& originalData) const { + class TSecondaryData { + private: + using TStorageData = THashMap>; + YDB_ACCESSOR_DEF(TStorageData, SecondaryInplaceData); + using TPrimaryStorageData = THashMap>>; + YDB_ACCESSOR_DEF(TPrimaryStorageData, ExternalData); + + public: + TSecondaryData() = default; + }; + + [[nodiscard]] TConclusion AppendIndexes(const THashMap>>& primaryData, + const std::shared_ptr& operators) const { + TSecondaryData result; + result.MutableExternalData() = primaryData; for (auto&& i : Indexes) { - std::shared_ptr chunk = i.second->BuildIndex(i.first, originalData, *this); - AFL_VERIFY(originalData.emplace(i.first, std::vector>({chunk})).second); + auto conclusion = AppendIndex(primaryData, i.first, operators, result); + if (conclusion.IsFail()) { + return conclusion; + } } + return result; } - void AppendIndex(THashMap>>& originalData, const ui32 indexId) const { - auto it = Indexes.find(indexId); - AFL_VERIFY(it != Indexes.end()); - std::shared_ptr chunk = it->second->BuildIndex(indexId, originalData, *this); - AFL_VERIFY(originalData.emplace(indexId, std::vector>({chunk})).second); - } + std::shared_ptr GetIndexMetaMax(const ui32 columnId) const; + std::shared_ptr GetIndexMetaCountMinSketch(const std::set& columnIds) const; + + [[nodiscard]] TConclusionStatus AppendIndex(const THashMap>>& originalData, + const ui32 indexId, const std::shared_ptr& operators, TSecondaryData& result) const; /// Returns an id of the column located by name. The name should exists in the schema. ui32 GetColumnIdVerified(const std::string& name) const; - ui32 GetColumnId(const std::string& name) const { - return GetColumnIdVerified(name); - } std::set GetColumnIdsVerified(const std::set& names) const { std::set result; for (auto&& i : names) { @@ -236,58 +357,41 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { std::optional GetColumnIdOptional(const std::string& name) const; /// Returns a name of the column located by id. - TString GetColumnName(ui32 id, bool required = true) const; + TString GetColumnName(const ui32 id, bool required = true) const; /// Returns names of columns defined by the specific ids. std::vector GetColumnNames(const std::vector& ids) const; std::vector GetColumnSTLNames(const std::vector& ids) const; - std::vector GetColumnIds(const bool withSpecial = true) const; - std::vector GetEntityIds() const { - auto result = GetColumnIds(); - for (auto&& i : Indexes) { - result.emplace_back(i.first); - } - return result; + const std::vector& GetColumnIds(const bool withSpecial = true) const; + const std::vector& GetPKColumnIds() const { + AFL_VERIFY(PKColumnIds.size()); + return PKColumnIds; } - - /// Returns info of columns defined by specific ids. - std::vector GetColumns(const std::vector& ids) const; + std::vector GetEntityIds() const; /// Traditional Primary Key (includes uniqueness, search and sorting logic) - std::vector GetPrimaryKeyColumns() const { - return GetColumns(KeyColumns); + const std::vector& GetPrimaryKeyColumns() const { + return PKColumns; } /// Returns id of the first column of the primary key. ui32 GetPKFirstColumnId() const { - Y_ABORT_UNLESS(KeyColumns.size()); - return KeyColumns[0]; + Y_ABORT_UNLESS(PKColumnIds.size()); + return PKColumnIds[0]; } const std::shared_ptr& GetReplaceKey() const { return PrimaryKey; } const std::shared_ptr& GetPrimaryKey() const { return PrimaryKey; } - /// Initializes sorting, replace, index and extended keys. - void SetAllKeys(const std::shared_ptr& operators); - void CheckTtlColumn(const TString& ttlColumn) const { Y_ABORT_UNLESS(!ttlColumn.empty()); - Y_ABORT_UNLESS(MinMaxIdxColumnsIds.contains(GetColumnId(ttlColumn))); + Y_ABORT_UNLESS(MinMaxIdxColumnsIds.contains(GetColumnIdVerified(ttlColumn))); } std::vector GetColumnIds(const std::vector& columnNames) const; - std::shared_ptr ArrowSchema() const; - std::shared_ptr ArrowSchemaWithSpecials() const; - std::shared_ptr AddColumns(const std::shared_ptr& schema, - const std::vector& columns) const; - - std::shared_ptr ArrowColumnFieldOptional(const ui32 columnId) const; - std::shared_ptr ArrowColumnFieldVerified(const ui32 columnId) const; - - const THashSet& GetRequiredColumns() const { - return RequiredColumns; - } + const std::shared_ptr& ArrowSchema() const; + const std::shared_ptr& ArrowSchemaWithSpecials() const; const THashSet& GetMinMaxIdxColumns() const { return MinMaxIdxColumnsIds; @@ -296,8 +400,12 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { bool AllowTtlOverColumn(const TString& name) const; /// Returns whether the sorting keys defined. - bool IsSorted() const { return true; } - bool IsSortedColumn(const ui32 columnId) const { return GetPKFirstColumnId() == columnId; } + bool IsSorted() const { + return true; + } + bool IsSortedColumn(const ui32 columnId) const { + return GetPKFirstColumnId() == columnId; + } ui64 GetVersion() const { return Version; @@ -311,17 +419,21 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { private: ui64 Version = 0; TString Name; - std::shared_ptr Schema; - std::shared_ptr SchemaWithSpecials; + std::vector SchemaColumnIds; + std::vector SchemaColumnIdsWithSpecials; + std::shared_ptr SchemaWithSpecials; + std::shared_ptr Schema; std::shared_ptr PrimaryKey; - THashSet RequiredColumns; THashSet MinMaxIdxColumnsIds; NArrow::NSerialization::TSerializerContainer DefaultSerializer = NArrow::NSerialization::TSerializerContainer::GetDefaultSerializer(); }; -std::shared_ptr MakeArrowSchema(const NTable::TScheme::TTableSchema::TColumns& columns, const std::vector& ids, const bool withSpecials = false); +std::shared_ptr MakeArrowSchema(const NTable::TScheme::TTableSchema::TColumns& columns, const std::vector& ids, + const std::shared_ptr& cache = nullptr); +std::vector> MakeArrowFields(const NTable::TScheme::TTableSchema::TColumns& columns, const std::vector& ids, + const std::shared_ptr& cache = nullptr); /// Extracts columns with the specific ids from the schema. std::vector GetColumns(const NTable::TScheme::TTableSchema& tableSchema, const std::vector& ids); -} // namespace NKikimr::NOlap +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/checker.h b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/checker.h index a67a72df8e2f..88815c374eb2 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/checker.h +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/checker.h @@ -1,5 +1,5 @@ #pragma once -#include +#include #include #include #include diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.cpp b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.cpp index a93507bec06f..e6dad360d159 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.cpp @@ -2,4 +2,19 @@ namespace NKikimr::NOlap::NIndexes { +NKikimr::TConclusionStatus IIndexMetaConstructor::DeserializeFromJson(const NJson::TJsonValue& jsonInfo) { + if (jsonInfo.Has("storage_id")) { + if (!jsonInfo["storage_id"].IsString()) { + return TConclusionStatus::Fail("incorrect storage_id field in json index description (have to be string)"); + } + StorageId = jsonInfo["storage_id"].GetStringSafe(); + if (!*StorageId) { + return TConclusionStatus::Fail("storage_id cannot be empty string"); + } else if (*StorageId != "__LOCAL_METADATA" && *StorageId != "__DEFAULT") { + return TConclusionStatus::Fail("storage_id have to been one of variant ['__LOCAL_METADATA', '__DEFAULT']"); + } + } + return DoDeserializeFromJson(jsonInfo); +} + } // namespace NKikimr::NOlap::NIndexes \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.h b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.h index e6fe22e3f273..dded1abd0081 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.h +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.h @@ -14,6 +14,9 @@ class TOlapSchema; namespace NKikimr::NOlap::NIndexes { class IIndexMetaConstructor { +private: + YDB_READONLY_DEF(std::optional, StorageId); + protected: virtual TConclusionStatus DoDeserializeFromJson(const NJson::TJsonValue& jsonInfo) = 0; virtual std::shared_ptr DoCreateIndexMeta(const ui32 indexId, const TString& indexName, const NSchemeShard::TOlapSchema& currentSchema, NSchemeShard::IErrorCollector& errors) const = 0; @@ -25,19 +28,23 @@ class IIndexMetaConstructor { virtual ~IIndexMetaConstructor() = default; - TConclusionStatus DeserializeFromJson(const NJson::TJsonValue& jsonInfo) { - return DoDeserializeFromJson(jsonInfo); - } + TConclusionStatus DeserializeFromJson(const NJson::TJsonValue& jsonInfo); std::shared_ptr CreateIndexMeta(const ui32 indexId, const TString& indexName, const NSchemeShard::TOlapSchema& currentSchema, NSchemeShard::IErrorCollector& errors) const { return DoCreateIndexMeta(indexId, indexName, currentSchema, errors); } TConclusionStatus DeserializeFromProto(const NKikimrSchemeOp::TOlapIndexRequested& proto) { + if (proto.HasStorageId()) { + StorageId = proto.GetStorageId(); + } return DoDeserializeFromProto(proto); } void SerializeToProto(NKikimrSchemeOp::TOlapIndexRequested& proto) const { + if (StorageId) { + proto.SetStorageId(*StorageId); + } return DoSerializeToProto(proto); } diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.cpp b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.cpp index ae4fd22fba3d..4d4efc52a2d6 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.cpp @@ -1,14 +1,42 @@ #include "meta.h" +#include namespace NKikimr::NOlap::NIndexes { bool IIndexMeta::DeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto) { + if (!proto.GetId()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("error", "cannot parse secondary data builder")("reason", "incorrect id - 0"); + return false; + } + if (!proto.GetName()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("error", "cannot parse secondary data builder")("reason", "incorrect name - empty string"); + return false; + } IndexId = proto.GetId(); - AFL_VERIFY(IndexId); IndexName = proto.GetName(); - AFL_VERIFY(IndexName); StorageId = proto.GetStorageId() ? proto.GetStorageId() : IStoragesManager::DefaultStorageId; return DoDeserializeFromProto(proto); } +void IIndexMeta::SerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& proto) const { + AFL_VERIFY(IndexId); + proto.SetId(IndexId); + AFL_VERIFY(IndexName); + proto.SetName(IndexName); + if (StorageId) { + proto.SetStorageId(StorageId); + } + return DoSerializeToProto(proto); +} + +NJson::TJsonValue IIndexMeta::SerializeDataToJson(const TIndexChunk& iChunk, const TIndexInfo& indexInfo) const { + NJson::TJsonValue result = NJson::JSON_MAP; + result.InsertValue("entity_id", iChunk.GetEntityId()); + result.InsertValue("chunk_idx", iChunk.GetChunkIdx()); + if (iChunk.HasBlobData()) { + result.InsertValue("data", DoSerializeDataToJson(iChunk.GetBlobDataVerified(), indexInfo)); + } + return result; +} + } // namespace NKikimr::NOlap::NIndexes \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.h b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.h index 821d36af8cf5..d5185cbca236 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.h +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.h @@ -16,6 +16,7 @@ class TExprBase; namespace NKikimr::NOlap { struct TIndexInfo; class TProgramContainer; +class TIndexChunk; } namespace NKikimr::NSchemeShard { @@ -30,24 +31,30 @@ class IIndexMeta { YDB_READONLY(ui32, IndexId, 0); YDB_READONLY(TString, StorageId, IStoragesManager::DefaultStorageId); protected: - virtual std::shared_ptr DoBuildIndex(const ui32 indexId, THashMap>>& data, const TIndexInfo& indexInfo) const = 0; + virtual std::shared_ptr DoBuildIndex(const THashMap>>& data, const TIndexInfo& indexInfo) const = 0; virtual void DoFillIndexCheckers(const std::shared_ptr& info, const NSchemeShard::TOlapSchema& schema) const = 0; virtual bool DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto) = 0; virtual void DoSerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& proto) const = 0; virtual TConclusionStatus DoCheckModificationCompatibility(const IIndexMeta& newMeta) const = 0; + virtual NJson::TJsonValue DoSerializeDataToJson(const TString& /*data*/, const TIndexInfo& /*indexInfo*/) const { + return "NO_IMPLEMENTED"; + } public: using TFactory = NObjectFactory::TObjectFactory; using TProto = NKikimrSchemeOp::TOlapIndexDescription; IIndexMeta() = default; - IIndexMeta(const ui32 indexId, const TString& indexName) + IIndexMeta(const ui32 indexId, const TString& indexName, const TString& storageId) : IndexName(indexName) , IndexId(indexId) + , StorageId(storageId) { } + NJson::TJsonValue SerializeDataToJson(const TIndexChunk& iChunk, const TIndexInfo& indexInfo) const; + TConclusionStatus CheckModificationCompatibility(const std::shared_ptr& newMeta) const { if (!newMeta) { return TConclusionStatus::Fail("new meta cannot be absent"); @@ -60,8 +67,8 @@ class IIndexMeta { virtual ~IIndexMeta() = default; - std::shared_ptr BuildIndex(const ui32 indexId, THashMap>>& data, const TIndexInfo& indexInfo) const { - return DoBuildIndex(indexId, data, indexInfo); + std::shared_ptr BuildIndex(const THashMap>>& data, const TIndexInfo& indexInfo) const { + return DoBuildIndex(data, indexInfo); } void FillIndexCheckers(const std::shared_ptr& info, const NSchemeShard::TOlapSchema& schema) const { @@ -69,17 +76,7 @@ class IIndexMeta { } bool DeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto); - - void SerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& proto) const { - AFL_VERIFY(IndexId); - proto.SetId(IndexId); - AFL_VERIFY(IndexName); - proto.SetName(IndexName); - if (StorageId) { - proto.SetStorageId(StorageId); - } - return DoSerializeToProto(proto); - } + void SerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& proto) const; virtual TString GetClassName() const = 0; }; diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/program.cpp b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/program.cpp index d16f5fcfb33f..d6a3e9b800e5 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/program.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/program.cpp @@ -401,12 +401,15 @@ class TNormalForm { public: TNormalForm() = default; - bool Add(const NSsa::TAssign& assign) { + bool Add(const NSsa::TAssign& assign, const TProgramContainer& program) { std::vector> argNodes; for (auto&& arg : assign.GetArguments()) { if (arg.IsGenerated()) { auto it = Nodes.find(arg.GetColumnName()); - AFL_VERIFY(it != Nodes.end()); + if (it == Nodes.end()) { + AFL_CRIT(NKikimrServices::TX_COLUMNSHARD)("event", "program_arg_is_missing")("program", program.DebugString()); + return false; + } argNodes.emplace_back(it->second); } else { argNodes.emplace_back(std::make_shared(arg.GetColumnName())); @@ -443,7 +446,7 @@ std::shared_ptr TDataForIndexesCheckers::Build(const TP auto fStep = program.GetSteps().front(); TNormalForm nForm; for (auto&& s : fStep->GetAssignes()) { - if (!nForm.Add(s)) { + if (!nForm.Add(s, program)) { return nullptr; } } diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/ya.make b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/ya.make index e758f9ecc430..a9991e37e26a 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/ya.make +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/ya.make @@ -11,7 +11,7 @@ SRCS( PEERDIR( ydb/core/formats/arrow - ydb/core/formats/arrow/protos + ydb/library/formats/arrow/protos ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.cpp deleted file mode 100644 index e7960e66809e..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.cpp +++ /dev/null @@ -1,40 +0,0 @@ -#include "common.h" -#include - -namespace NKikimr::NOlap::NStatistics { - -TIdentifier::TIdentifier(const EType type, const std::vector& entities) - : Type(type) - , EntityIds(entities) -{ - AFL_VERIFY(EntityIds.size()); -} - -bool TIdentifier::operator<(const TIdentifier& item) const { - if (Type != item.Type) { - return (ui32)Type < (ui32)item.Type; - } - for (ui32 i = 0; i < std::min(EntityIds.size(), item.EntityIds.size()); ++i) { - if (EntityIds[i] < item.EntityIds[i]) { - return true; - } - } - return false; -} - -bool TIdentifier::operator==(const TIdentifier& item) const { - if (Type != item.Type) { - return false; - } - if (EntityIds.size() != item.EntityIds.size()) { - return false; - } - for (ui32 i = 0; i < EntityIds.size(); ++i) { - if (EntityIds[i] != item.EntityIds[i]) { - return false; - } - } - return true; -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.h b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.h deleted file mode 100644 index abfd7159a97b..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.h +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once -#include -#include -#include - -namespace NKikimr::NOlap::NStatistics { -enum class EType { - Undefined /* "undefined" */, - Max /* "max" */, - Variability /* "variability" */ -}; - -class TIdentifier { -private: - YDB_READONLY(EType, Type, EType::Undefined); - YDB_READONLY_DEF(std::vector, EntityIds); -public: - TIdentifier(const EType type, const std::vector& entities); - - bool operator<(const TIdentifier& item) const; - bool operator==(const TIdentifier& item) const; -}; - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.cpp deleted file mode 100644 index 5713317c7d21..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include "constructor.h" - -namespace NKikimr::NOlap::NStatistics { - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.h b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.h deleted file mode 100644 index 8948e93d482c..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.h +++ /dev/null @@ -1,73 +0,0 @@ -#pragma once -#include "common.h" -#include "portion_storage.h" -#include "operator.h" - -#include - -namespace NKikimr::NSchemeShard { -class TOlapSchema; -} - -namespace NKikimrColumnShardStatisticsProto { -class TOperatorContainer; -} - -namespace NKikimr::NOlap::NStatistics { - -class IConstructor { -private: - YDB_READONLY(EType, Type, EType::Undefined); - IConstructor() = default; -protected: - virtual TConclusion> DoCreateOperator(const NSchemeShard::TOlapSchema& currentSchema) const = 0; - virtual bool DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) = 0; - virtual void DoSerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) const = 0; - virtual TConclusionStatus DoDeserializeFromJson(const NJson::TJsonValue& jsonData) = 0; -public: - using TProto = NKikimrColumnShardStatisticsProto::TConstructorContainer; - using TFactory = NObjectFactory::TObjectFactory; - - virtual ~IConstructor() = default; - - IConstructor(const EType type) - :Type(type) { - - } - - TConclusionStatus DeserializeFromJson(const NJson::TJsonValue& jsonData) { - return DoDeserializeFromJson(jsonData); - } - - TConclusion CreateOperator(const TString& name, const NSchemeShard::TOlapSchema& currentSchema) const { - auto result = DoCreateOperator(currentSchema); - if (!result) { - return result.GetError(); - } - return TOperatorContainer(name, result.DetachResult()); - } - - TString GetClassName() const { - return ::ToString(Type); - } - - bool DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) { - if (!TryFromString(proto.GetClassName(), Type)) { - return false; - } - return DoDeserializeFromProto(proto); - } - - void SerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) const { - return DoSerializeToProto(proto); - } -}; - -class TConstructorContainer: public NBackgroundTasks::TInterfaceProtoContainer { -private: - using TBase = NBackgroundTasks::TInterfaceProtoContainer; -public: - using TBase::TBase; -}; - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.cpp deleted file mode 100644 index 357d8bbd3934..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include "operator.h" - -namespace NKikimr::NOlap::NStatistics { - -bool IOperator::DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) { - if (!TryFromString(proto.GetClassName(), Type)) { - return false; - } - return DoDeserializeFromProto(proto); -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.h b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.h deleted file mode 100644 index 29f6f6744ac4..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.h +++ /dev/null @@ -1,124 +0,0 @@ -#pragma once -#include "common.h" -#include "portion_storage.h" - -#include -#include - -#include -#include - -#include - -namespace NKikimr::NOlap { -class IPortionDataChunk; -} - -namespace NKikimr::NOlap::NStatistics { - -class IOperator { -private: - YDB_READONLY(EType, Type, EType::Undefined); - IOperator() = default; -protected: - virtual void DoFillStatisticsData(const THashMap>>& data, TPortionStorage& portionStats, const IIndexInfo& index) const = 0; - virtual void DoShiftCursor(TPortionStorageCursor& cursor) const = 0; - virtual bool DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) = 0; - virtual void DoSerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const = 0; - virtual void DoCopyData(const TPortionStorageCursor& cursor, const TPortionStorage& portionStatsFrom, TPortionStorage& portionStatsTo) const = 0; -public: - using TProto = NKikimrColumnShardStatisticsProto::TOperatorContainer; - using TFactory = NObjectFactory::TObjectFactory; - - virtual ~IOperator() = default; - - virtual std::vector GetEntityIds() const = 0; - - IOperator(const EType type) - :Type(type) { - - } - - void ShiftCursor(TPortionStorageCursor& cursor) const { - DoShiftCursor(cursor); - } - - void CopyData(const TPortionStorageCursor& cursor, const TPortionStorage& portionStatsFrom, TPortionStorage& portionStatsTo) const { - return DoCopyData(cursor, portionStatsFrom, portionStatsTo); - } - - void FillStatisticsData(const THashMap>>& data, TPortionStorage& portionStats, const IIndexInfo& index) const { - DoFillStatisticsData(data, portionStats, index); - } - - TString GetClassName() const { - return ::ToString(Type); - } - - TIdentifier GetIdentifier() const { - return TIdentifier(Type, GetEntityIds()); - } - - bool DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto); - - void SerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const { - return DoSerializeToProto(proto); - } -}; - -class TOperatorContainer: public NBackgroundTasks::TInterfaceProtoContainer { -private: - YDB_READONLY_DEF(TString, Name); - std::optional Cursor; - using TBase = NBackgroundTasks::TInterfaceProtoContainer; -public: - TOperatorContainer() = default; - - TOperatorContainer(const TString& name, const std::shared_ptr& object) - : TBase(object) - , Name(name) - { - AFL_VERIFY(Name); - } - - const TPortionStorageCursor& GetCursorVerified() const { - AFL_VERIFY(Cursor); - return *Cursor; - } - - void SetCursor(const TPortionStorageCursor& cursor) { - AFL_VERIFY(!Cursor); - Cursor = cursor; - } - - std::shared_ptr GetScalarVerified(const TPortionStorage& storage) { - AFL_VERIFY(!!Cursor); - return storage.GetScalarVerified(*Cursor); - } - - NKikimrColumnShardStatisticsProto::TOperatorContainer SerializeToProto() const { - NKikimrColumnShardStatisticsProto::TOperatorContainer result = TBase::SerializeToProto(); - result.SetName(Name); - AFL_VERIFY(Name); - return result; - } - - void SerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const { - TBase::SerializeToProto(proto); - proto.SetName(Name); - AFL_VERIFY(Name); - } - - bool DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) { - Name = proto.GetName(); - if (!Name) { - return false; - } - if (!TBase::DeserializeFromProto(proto)) { - return false; - } - return true; - } -}; - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.cpp deleted file mode 100644 index f0d67ecf7d42..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.cpp +++ /dev/null @@ -1,119 +0,0 @@ -#include "portion_storage.h" -#include -#include - -namespace NKikimr::NOlap::NStatistics { - -NKikimrColumnShardStatisticsProto::TScalar TPortionStorage::ScalarToProto(const arrow::Scalar& scalar) { - NKikimrColumnShardStatisticsProto::TScalar result; - switch (scalar.type->id()) { - case arrow::Type::BOOL: - result.SetBool(static_cast(scalar).value); - break; - case arrow::Type::UINT8: - result.SetUint8(static_cast(scalar).value); - break; - case arrow::Type::UINT16: - result.SetUint16(static_cast(scalar).value); - break; - case arrow::Type::UINT32: - result.SetUint32(static_cast(scalar).value); - break; - case arrow::Type::UINT64: - result.SetUint64(static_cast(scalar).value); - break; - case arrow::Type::INT8: - result.SetInt8(static_cast(scalar).value); - break; - case arrow::Type::INT16: - result.SetInt16(static_cast(scalar).value); - break; - case arrow::Type::INT32: - result.SetInt32(static_cast(scalar).value); - break; - case arrow::Type::INT64: - result.SetInt64(static_cast(scalar).value); - break; - case arrow::Type::DOUBLE: - result.SetDouble(static_cast(scalar).value); - break; - case arrow::Type::FLOAT: - result.SetFloat(static_cast(scalar).value); - break; - case arrow::Type::TIMESTAMP: - { - auto* ts = result.MutableTimestamp(); - ts->SetValue(static_cast(scalar).value); - ts->SetUnit(static_cast(*scalar.type).unit()); - break; - } - default: - AFL_VERIFY(false)("problem", "incorrect type for statistics usage")("type", scalar.type->ToString()); - } - return result; -} - -std::shared_ptr TPortionStorage::ProtoToScalar(const NKikimrColumnShardStatisticsProto::TScalar& proto) { - if (proto.HasBool()) { - return std::make_shared(proto.GetBool()); - } else if (proto.HasUint8()) { - return std::make_shared(proto.GetUint8()); - } else if (proto.HasUint16()) { - return std::make_shared(proto.GetUint16()); - } else if (proto.HasUint32()) { - return std::make_shared(proto.GetUint32()); - } else if (proto.HasUint64()) { - return std::make_shared(proto.GetUint64()); - } else if (proto.HasInt8()) { - return std::make_shared(proto.GetInt8()); - } else if (proto.HasInt16()) { - return std::make_shared(proto.GetInt16()); - } else if (proto.HasInt32()) { - return std::make_shared(proto.GetInt32()); - } else if (proto.HasInt64()) { - return std::make_shared(proto.GetInt64()); - } else if (proto.HasDouble()) { - return std::make_shared(proto.GetDouble()); - } else if (proto.HasFloat()) { - return std::make_shared(proto.GetFloat()); - } else if (proto.HasTimestamp()) { - arrow::TimeUnit::type unit = arrow::TimeUnit::type(proto.GetTimestamp().GetUnit()); - return std::make_shared(proto.GetTimestamp().GetValue(), std::make_shared(unit)); - } - AFL_VERIFY(false)("problem", "incorrect statistics proto")("proto", proto.DebugString()); - return nullptr; -} - -std::shared_ptr TPortionStorage::GetScalarVerified(const TPortionStorageCursor& cursor) const { - AFL_VERIFY(cursor.GetScalarsPosition() < Data.size()); - AFL_VERIFY(Data[cursor.GetScalarsPosition()]); - return Data[cursor.GetScalarsPosition()]; -} - -void TPortionStorage::AddScalar(const std::shared_ptr& scalar) { - const auto type = scalar->type->id(); - AFL_VERIFY(type == arrow::Type::BOOL || - type == arrow::Type::UINT8 || type == arrow::Type::UINT16 || type == arrow::Type::UINT32 || type == arrow::Type::UINT64 || - type == arrow::Type::INT8 || type == arrow::Type::INT16 || type == arrow::Type::INT32 || type == arrow::Type::INT64 || - type == arrow::Type::DOUBLE || type == arrow::Type::TIMESTAMP || type == arrow::Type::FLOAT) - ("problem", "incorrect_stat_type")("incoming", scalar->type->ToString()); - Data.emplace_back(scalar); -} - -NKikimrColumnShardStatisticsProto::TPortionStorage TPortionStorage::SerializeToProto() const { - NKikimrColumnShardStatisticsProto::TPortionStorage result; - for (auto&& i : Data) { - AFL_VERIFY(i); - *result.AddScalars() = ScalarToProto(*i); - } - return result; -} - -NKikimr::TConclusionStatus TPortionStorage::DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TPortionStorage& proto) { - for (auto&& i : proto.GetScalars()) { - Data.emplace_back(ProtoToScalar(i)); - } - return TConclusionStatus::Success(); -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.h b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.h deleted file mode 100644 index a3e4b6bcb0dd..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.h +++ /dev/null @@ -1,53 +0,0 @@ -#pragma once - -#include -#include -#include - -#include - -namespace NKikimrColumnShardStatisticsProto { -class TScalar; -class TPortionStorage; -} - -namespace NKikimr::NOlap::NStatistics { -class TPortionStorageCursor { -private: - YDB_READONLY(ui32, ScalarsPosition, 0); -public: - TPortionStorageCursor() = default; - - void AddScalarsPosition(const ui32 shift) { - ScalarsPosition += shift; - } -}; - -class TPortionStorage { -private: - YDB_READONLY_DEF(std::vector>, Data); - static NKikimrColumnShardStatisticsProto::TScalar ScalarToProto(const arrow::Scalar& value); - static std::shared_ptr ProtoToScalar(const NKikimrColumnShardStatisticsProto::TScalar& proto); - TConclusionStatus DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TPortionStorage& proto); - -public: - bool IsEmpty() const { - return Data.empty(); - } - - std::shared_ptr GetScalarVerified(const TPortionStorageCursor& cursor) const; - - void AddScalar(const std::shared_ptr& scalar); - - NKikimrColumnShardStatisticsProto::TPortionStorage SerializeToProto() const; - - static TConclusion BuildFromProto(const NKikimrColumnShardStatisticsProto::TPortionStorage& proto) { - TPortionStorage result; - auto parse = result.DeserializeFromProto(proto); - if (!parse) { - return parse; - } - return result; - } -}; -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/ya.make b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/ya.make deleted file mode 100644 index f63520354edf..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/ya.make +++ /dev/null @@ -1,20 +0,0 @@ -LIBRARY() - -SRCS( - portion_storage.cpp - constructor.cpp - operator.cpp - common.cpp -) - -PEERDIR( - ydb/core/tx/columnshard/engines/scheme/statistics/protos - ydb/core/tx/columnshard/engines/scheme/abstract - contrib/libs/apache/arrow - ydb/library/actors/core - ydb/library/conclusion -) - -GENERATE_ENUM_SERIALIZATION(common.h) - -END() diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.cpp deleted file mode 100644 index a12a27812350..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.cpp +++ /dev/null @@ -1,45 +0,0 @@ -#include "constructor.h" -#include "operator.h" - -namespace NKikimr::NOlap::NStatistics::NMax { - -NKikimr::TConclusion> TConstructor::DoCreateOperator(const NSchemeShard::TOlapSchema& currentSchema) const { - auto column = currentSchema.GetColumns().GetByName(ColumnName); - if (!TOperator::IsAvailableType(column->GetType())) { - return TConclusionStatus::Fail("incorrect type for stat calculation"); - } - return std::make_shared(column->GetId()); -} - -bool TConstructor::DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) { - if (!proto.HasMax()) { - return false; - } - ColumnName = proto.GetMax().GetColumnName(); - if (!ColumnName) { - return false; - } - return true; -} - -void TConstructor::DoSerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) const { - AFL_VERIFY(!!ColumnName); - proto.MutableMax()->SetColumnName(ColumnName); -} - -NKikimr::TConclusionStatus TConstructor::DoDeserializeFromJson(const NJson::TJsonValue& jsonData) { - if (!jsonData.Has("column_name")) { - return TConclusionStatus::Fail("no column_name field in json description"); - } - TString columnNameLocal; - if (!jsonData["column_name"].GetString(&columnNameLocal)) { - return TConclusionStatus::Fail("incorrect column_name field in json description (no string)"); - } - if (!columnNameLocal) { - return TConclusionStatus::Fail("empty column_name field in json description"); - } - ColumnName = columnNameLocal; - return TConclusionStatus::Success(); -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.h b/ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.h deleted file mode 100644 index 695096a63d2f..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.h +++ /dev/null @@ -1,33 +0,0 @@ -#pragma once -#include -#include - -#include - -namespace NKikimr::NOlap::NStatistics::NMax { - -class TConstructor: public IConstructor { -private: - using TBase = IConstructor; - static inline const auto Registrator = TFactory::TRegistrator(::ToString(EType::Max)); - YDB_READONLY(TString, ColumnName, 0); -protected: - virtual TConclusion> DoCreateOperator(const NSchemeShard::TOlapSchema& currentSchema) const override; - virtual bool DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) override; - virtual void DoSerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) const override; - virtual TConclusionStatus DoDeserializeFromJson(const NJson::TJsonValue& jsonData) override; -public: - TConstructor(const TString& columnName) - : TBase(EType::Max) - , ColumnName(columnName) - { - - } - - TConstructor() - :TBase(EType::Max) { - - } -}; - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.cpp deleted file mode 100644 index 8e2c179e077b..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.cpp +++ /dev/null @@ -1,41 +0,0 @@ -#include "operator.h" -#include -#include -#include - -namespace NKikimr::NOlap::NStatistics::NMax { - -void TOperator::DoFillStatisticsData(const THashMap>>& data, TPortionStorage& portionStats, const IIndexInfo& index) const { - auto loader = index.GetColumnLoaderVerified(EntityId); - auto it = data.find(EntityId); - AFL_VERIFY(it != data.end()); - std::shared_ptr result; - for (auto&& i : it->second) { - auto rb = NArrow::TStatusValidator::GetValid(loader->Apply(i->GetData())); - AFL_VERIFY(rb->num_columns() == 1); - auto res = NArrow::FindMinMaxPosition(rb->column(0)); - auto currentScalarMax = NArrow::TStatusValidator::GetValid(rb->column(0)->GetScalar(res.second)); - if (!result || NArrow::ScalarCompare(result, currentScalarMax) < 0) { - result = currentScalarMax; - } - } - portionStats.AddScalar(result); -} - -bool TOperator::DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) { - if (!proto.HasMax()) { - return false; - } - EntityId = proto.GetMax().GetEntityId(); - if (!EntityId) { - return false; - } - return true; -} - -void TOperator::DoSerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const { - AFL_VERIFY(EntityId); - proto.MutableMax()->SetEntityId(EntityId); -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.h b/ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.h deleted file mode 100644 index b3478e5a24bb..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.h +++ /dev/null @@ -1,68 +0,0 @@ -#pragma once -#include -#include - -namespace NKikimr::NOlap::NStatistics::NMax { - -class TOperator: public IOperator { -private: - using TBase = IOperator; - ui32 EntityId = 0; - static inline auto Registrator = TFactory::TRegistrator(::ToString(EType::Max)); -protected: - virtual void DoCopyData(const TPortionStorageCursor& cursor, const TPortionStorage& portionStatsFrom, TPortionStorage& portionStatsTo) const override { - std::shared_ptr scalar = portionStatsFrom.GetScalarVerified(cursor); - portionStatsTo.AddScalar(scalar); - } - - virtual void DoFillStatisticsData(const THashMap>>& data, TPortionStorage& portionStats, const IIndexInfo& index) const override; - virtual void DoShiftCursor(TPortionStorageCursor& cursor) const override { - cursor.AddScalarsPosition(1); - } - virtual std::vector GetEntityIds() const override { - return {EntityId}; - } - virtual bool DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) override; - virtual void DoSerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const override; -public: - - static bool IsAvailableType(const NScheme::TTypeInfo type) { - switch (type.GetTypeId()) { - case NScheme::NTypeIds::Int8: - case NScheme::NTypeIds::Uint8: - case NScheme::NTypeIds::Int16: - case NScheme::NTypeIds::Uint16: - case NScheme::NTypeIds::Int32: - case NScheme::NTypeIds::Uint32: - case NScheme::NTypeIds::Int64: - case NScheme::NTypeIds::Uint64: - case NScheme::NTypeIds::Timestamp: - case NScheme::NTypeIds::Double: - case NScheme::NTypeIds::Float: - case NScheme::NTypeIds::Datetime: - case NScheme::NTypeIds::Date: - case NScheme::NTypeIds::Date32: - case NScheme::NTypeIds::Datetime64: - case NScheme::NTypeIds::Timestamp64: - case NScheme::NTypeIds::Interval64: - return true; - default: - break; - } - return false; - } - - TOperator() - : TBase(EType::Max) - { - - } - - TOperator(const ui32 entityId) - : TBase(EType::Max) - , EntityId(entityId) { - - } -}; - -} diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/max/ya.make b/ydb/core/tx/columnshard/engines/scheme/statistics/max/ya.make deleted file mode 100644 index 631c95eeb3d8..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/max/ya.make +++ /dev/null @@ -1,15 +0,0 @@ -LIBRARY() - -SRCS( - GLOBAL constructor.cpp - GLOBAL operator.cpp -) - -PEERDIR( - ydb/core/tx/columnshard/engines/scheme/statistics/abstract - ydb/core/tx/columnshard/engines/scheme/abstract - ydb/core/tx/columnshard/splitter/abstract - ydb/core/formats/arrow -) - -END() diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/protos/data.proto b/ydb/core/tx/columnshard/engines/scheme/statistics/protos/data.proto deleted file mode 100644 index c99f485d399d..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/protos/data.proto +++ /dev/null @@ -1,66 +0,0 @@ -package NKikimrColumnShardStatisticsProto; - -message TScalar { - message TTimestamp { - optional uint64 Value = 1; - optional uint32 Unit = 2; - } - oneof Value { - bool Bool = 1; - uint32 Uint8 = 2; - uint32 Uint16 = 3; - uint32 Uint32 = 4; - uint64 Uint64 = 5; - - int32 Int8 = 6; - int32 Int16 = 7; - int32 Int32 = 8; - int64 Int64 = 9; - - double Double = 10; - - TTimestamp Timestamp = 11; - - float Float = 12; - } -} - -message TPortionStorage { - repeated TScalar Scalars = 1; -} - -message TMaxConstructor { - optional string ColumnName = 3; -} - -message TVariabilityConstructor { - optional string ColumnName = 3; -} - -message TConstructorContainer { - optional string Name = 1; - - optional string ClassName = 40; - oneof Implementation { - TMaxConstructor Max = 41; - TVariabilityConstructor Variability = 42; - } -} - -message TMaxOperator { - optional uint32 EntityId = 1; -} - -message TVariabilityOperator { - optional uint32 EntityId = 1; -} - -message TOperatorContainer { - optional string Name = 1; - - optional string ClassName = 40; - oneof Implementation { - TMaxOperator Max = 41; - TVariabilityOperator Variability = 42; - } -} diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/protos/ya.make b/ydb/core/tx/columnshard/engines/scheme/statistics/protos/ya.make deleted file mode 100644 index f72b3b7cf620..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/protos/ya.make +++ /dev/null @@ -1,11 +0,0 @@ -PROTO_LIBRARY() - -SRCS( - data.proto -) - -PEERDIR( - -) - -END() diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.cpp deleted file mode 100644 index 25840673fcb5..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.cpp +++ /dev/null @@ -1,45 +0,0 @@ -#include "constructor.h" -#include "operator.h" - -namespace NKikimr::NOlap::NStatistics::NVariability { - -NKikimr::TConclusion> TConstructor::DoCreateOperator(const NSchemeShard::TOlapSchema& currentSchema) const { - auto column = currentSchema.GetColumns().GetByName(ColumnName); - if (!TOperator::IsAvailableType(column->GetType())) { - return TConclusionStatus::Fail("incorrect type for stat calculation"); - } - return std::make_shared(column->GetId()); -} - -bool TConstructor::DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) { - if (!proto.HasVariability()) { - return false; - } - ColumnName = proto.GetVariability().GetColumnName(); - if (!ColumnName) { - return false; - } - return true; -} - -void TConstructor::DoSerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) const { - AFL_VERIFY(!!ColumnName); - proto.MutableVariability()->SetColumnName(ColumnName); -} - -NKikimr::TConclusionStatus TConstructor::DoDeserializeFromJson(const NJson::TJsonValue& jsonData) { - if (!jsonData.Has("column_name")) { - return TConclusionStatus::Fail("no column_name field in json description"); - } - TString columnNameLocal; - if (!jsonData["column_name"].GetString(&columnNameLocal)) { - return TConclusionStatus::Fail("incorrect column_name field in json description (no string)"); - } - if (!columnNameLocal) { - return TConclusionStatus::Fail("empty column_name field in json description"); - } - ColumnName = columnNameLocal; - return TConclusionStatus::Success(); -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.h b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.h deleted file mode 100644 index 809c9043faac..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.h +++ /dev/null @@ -1,33 +0,0 @@ -#pragma once -#include -#include - -#include - -namespace NKikimr::NOlap::NStatistics::NVariability { - -class TConstructor: public IConstructor { -private: - using TBase = IConstructor; - static inline const auto Registrator = TFactory::TRegistrator(::ToString(EType::Variability)); - YDB_READONLY(TString, ColumnName, 0); -protected: - virtual TConclusion> DoCreateOperator(const NSchemeShard::TOlapSchema& currentSchema) const override; - virtual bool DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) override; - virtual void DoSerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) const override; - virtual TConclusionStatus DoDeserializeFromJson(const NJson::TJsonValue& jsonData) override; -public: - TConstructor(const TString& columnName) - : TBase(EType::Max) - , ColumnName(columnName) - { - - } - - TConstructor() - :TBase(EType::Variability) { - - } -}; - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.cpp deleted file mode 100644 index d43d617171bb..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.cpp +++ /dev/null @@ -1,164 +0,0 @@ -#include "operator.h" -#include -#include -#include - -namespace NKikimr::NOlap::NStatistics::NVariability { - -class IValuesContainer { -protected: - std::optional DataType; - ui32 DifferentCount = 0; - - virtual void DoAddArray(const std::shared_ptr& array) = 0; -public: - virtual ~IValuesContainer() = default; - ui32 GetDifferentCount() const { - return DifferentCount; - } - - void AddArray(const std::shared_ptr& array) { - if (!DataType) { - DataType = array->type_id(); - } else { - AFL_VERIFY(DataType == array->type_id())("base", (ui32)*DataType)("to", (ui32)array->type_id()); - } - return DoAddArray(array); - } -}; - -template -class TCTypeValuesContainer: public IValuesContainer { -private: - using TWrap = TArrowElement; - using TArray = typename arrow::TypeTraits::ArrayType; - using TCType = typename TWrap::T::c_type; - using TCContainer = THashSet; - - TCContainer ElementsStorage; -protected: - virtual void DoAddArray(const std::shared_ptr& array) override { - NArrow::SwitchType(array->type_id(), [&](const auto& type) { - using TWrap = std::decay_t; - if constexpr (std::is_same_v) { - const TArray& arrTyped = static_cast(*array); - for (ui32 i = 0; i < array->length(); ++i) { - if constexpr (arrow::has_c_type()) { - if (ElementsStorage.emplace(arrTyped.Value(i)).second) { - ++DifferentCount; - } - continue; - } - AFL_VERIFY(false); - } - return true; - } - AFL_VERIFY(false); - return false; - }); - } -}; - -template -class TStringValuesContainer: public IValuesContainer { -private: - using TWrap = TArrowElement; - using TArray = typename arrow::TypeTraits::ArrayType; - using TCType = TString; - using TCContainer = THashSet; - - TCContainer ElementsStorage; -protected: - virtual void DoAddArray(const std::shared_ptr& array) override { - NArrow::SwitchType(array->type_id(), [&](const auto& type) { - using TWrap = std::decay_t; - if constexpr (std::is_same_v) { - const TArray& arrTyped = static_cast(*array); - for (ui32 i = 0; i < array->length(); ++i) { - if constexpr (arrow::has_string_view()) { - auto value = arrTyped.GetView(i); - if (ElementsStorage.emplace(value.data(), value.size()).second) { - ++DifferentCount; - } - continue; - } - AFL_VERIFY(false); - } - return true; - } - AFL_VERIFY(false); - return false; - }); - } -}; - -class TDifferentElementsAggregator { -private: - std::shared_ptr Container; -public: - TDifferentElementsAggregator() = default; - - bool HasData() const { - return !!Container; - } - - ui32 GetDifferentCount() const { - return Container ? Container->GetDifferentCount() : 0; - } - - void AddArray(const std::shared_ptr& array) { - if (!Container) { - NArrow::SwitchType(array->type_id(), [&](const auto& type) { - using TWrap = std::decay_t; - if (!Container) { - if constexpr (arrow::has_c_type()) { - Container = std::make_shared>(); - Container->AddArray(array); - return true; - } - if constexpr (arrow::has_string_view()) { - Container = std::make_shared>(); - Container->AddArray(array); - return true; - } - AFL_VERIFY(false); - } - return false; - }); - } - Container->AddArray(array); - } -}; - -void TOperator::DoFillStatisticsData(const THashMap>>& data, TPortionStorage& portionStats, const IIndexInfo& index) const { - auto it = data.find(EntityId); - AFL_VERIFY(it != data.end()); - auto loader = index.GetColumnLoaderVerified(EntityId); - std::shared_ptr result; - TDifferentElementsAggregator aggregator; - for (auto&& i : it->second) { - auto rb = NArrow::TStatusValidator::GetValid(loader->Apply(i->GetData())); - AFL_VERIFY(rb->num_columns() == 1); - aggregator.AddArray(rb->column(0)); - } - AFL_VERIFY(aggregator.HasData()); - portionStats.AddScalar(std::make_shared(aggregator.GetDifferentCount())); -} - -bool TOperator::DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) { - if (!proto.HasVariability()) { - return false; - } - EntityId = proto.GetVariability().GetEntityId(); - if (!EntityId) { - return false; - } - return true; -} - -void TOperator::DoSerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const { - AFL_VERIFY(EntityId); - proto.MutableVariability()->SetEntityId(EntityId); -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.h b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.h deleted file mode 100644 index ca46daf02aa6..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.h +++ /dev/null @@ -1,71 +0,0 @@ -#pragma once -#include -#include - -namespace NKikimr::NOlap::NStatistics::NVariability { - -class TOperator: public IOperator { -private: - using TBase = IOperator; - ui32 EntityId = 0; - static inline auto Registrator = TFactory::TRegistrator(::ToString(EType::Variability)); -protected: - virtual void DoCopyData(const TPortionStorageCursor& cursor, const TPortionStorage& portionStatsFrom, TPortionStorage& portionStatsTo) const override { - std::shared_ptr scalar = portionStatsFrom.GetScalarVerified(cursor); - portionStatsTo.AddScalar(scalar); - } - - virtual void DoFillStatisticsData(const THashMap>>& data, TPortionStorage& portionStats, const IIndexInfo& index) const override; - virtual void DoShiftCursor(TPortionStorageCursor& cursor) const override { - cursor.AddScalarsPosition(1); - } - virtual std::vector GetEntityIds() const override { - return {EntityId}; - } - virtual bool DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) override; - virtual void DoSerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const override; -public: - - static bool IsAvailableType(const NScheme::TTypeInfo type) { - switch (type.GetTypeId()) { - case NScheme::NTypeIds::Int8: - case NScheme::NTypeIds::Uint8: - case NScheme::NTypeIds::Int16: - case NScheme::NTypeIds::Uint16: - case NScheme::NTypeIds::Int32: - case NScheme::NTypeIds::Uint32: - case NScheme::NTypeIds::Int64: - case NScheme::NTypeIds::Uint64: - case NScheme::NTypeIds::String: - case NScheme::NTypeIds::Utf8: - case NScheme::NTypeIds::Uuid: - case NScheme::NTypeIds::Timestamp: - case NScheme::NTypeIds::Double: - case NScheme::NTypeIds::Float: - case NScheme::NTypeIds::Datetime: - case NScheme::NTypeIds::Date: - case NScheme::NTypeIds::Date32: - case NScheme::NTypeIds::Datetime64: - case NScheme::NTypeIds::Timestamp64: - case NScheme::NTypeIds::Interval64: - return true; - default: - break; - } - return false; - } - - TOperator() - : TBase(EType::Variability) - { - - } - - TOperator(const ui32 entityId) - : TBase(EType::Variability) - , EntityId(entityId) { - - } -}; - -} diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/ya.make b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/ya.make deleted file mode 100644 index 631c95eeb3d8..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/ya.make +++ /dev/null @@ -1,15 +0,0 @@ -LIBRARY() - -SRCS( - GLOBAL constructor.cpp - GLOBAL operator.cpp -) - -PEERDIR( - ydb/core/tx/columnshard/engines/scheme/statistics/abstract - ydb/core/tx/columnshard/engines/scheme/abstract - ydb/core/tx/columnshard/splitter/abstract - ydb/core/formats/arrow -) - -END() diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/ya.make b/ydb/core/tx/columnshard/engines/scheme/statistics/ya.make deleted file mode 100644 index 3baed9c3538a..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/ya.make +++ /dev/null @@ -1,10 +0,0 @@ -LIBRARY() - -PEERDIR( - ydb/core/tx/columnshard/engines/scheme/statistics/abstract - ydb/core/tx/columnshard/engines/scheme/statistics/max - ydb/core/tx/columnshard/engines/scheme/statistics/variability - ydb/core/tx/columnshard/engines/scheme/statistics/protos -) - -END() diff --git a/ydb/core/tx/columnshard/engines/scheme/tiering/tier_info.h b/ydb/core/tx/columnshard/engines/scheme/tiering/tier_info.h index c65cb1703ad0..8d290a8adcf2 100644 --- a/ydb/core/tx/columnshard/engines/scheme/tiering/tier_info.h +++ b/ydb/core/tx/columnshard/engines/scheme/tiering/tier_info.h @@ -2,7 +2,7 @@ #include "common.h" #include -#include +#include #include #include #include @@ -109,7 +109,7 @@ class TTiering { using TTiersMap = THashMap>; TTiersMap TierByName; TSet OrderedTiers; - TString TTLColumnName; + std::optional TTLColumnName; public: class TTieringContext { @@ -174,9 +174,14 @@ class TTiering { [[nodiscard]] bool Add(const std::shared_ptr& tier) { AFL_VERIFY(tier); if (!TTLColumnName) { + if (tier->GetEvictColumnName().Empty()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("problem", "empty_evict_column_name"); + return false; + } TTLColumnName = tier->GetEvictColumnName(); - } else if (TTLColumnName != tier->GetEvictColumnName()) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("problem", "incorrect_tiering_metadata")("column_before", TTLColumnName)("column_new", tier->GetEvictColumnName()); + } else if (*TTLColumnName != tier->GetEvictColumnName()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("problem", "incorrect_tiering_metadata")("column_before", *TTLColumnName) + ("column_new", tier->GetEvictColumnName()); return false; } @@ -194,13 +199,9 @@ class TTiering { return {}; } - const TString& GetTtlColumn() const { - AFL_VERIFY(TTLColumnName); - return TTLColumnName; - } - const TString& GetEvictColumnName() const { - return TTLColumnName; + AFL_VERIFY(TTLColumnName); + return *TTLColumnName; } TString GetDebugString() const { diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.cpp b/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.cpp index 300952cc3227..8143c27a82ef 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include namespace NKikimr::NOlap { @@ -27,34 +27,38 @@ std::set ISnapshotSchema::GetPkColumnsIds() const { } -TConclusion> ISnapshotSchema::NormalizeBatch(const ISnapshotSchema& dataSchema, const std::shared_ptr batch) const { +TConclusion> ISnapshotSchema::NormalizeBatch( + const ISnapshotSchema& dataSchema, const std::shared_ptr& batch, const std::set& restoreColumnIds) const { + AFL_VERIFY(dataSchema.GetSnapshot() <= GetSnapshot()); if (dataSchema.GetSnapshot() == GetSnapshot()) { - return batch; + if (batch->GetColumnsCount() == GetColumnsCount()) { + return batch; + } } - Y_ABORT_UNLESS(dataSchema.GetSnapshot() < GetSnapshot()); - const std::shared_ptr& resultArrowSchema = GetSchema(); - std::vector> newColumns; - newColumns.reserve(resultArrowSchema->num_fields()); + const std::shared_ptr& resultArrowSchema = GetSchema(); + std::shared_ptr result = std::make_shared(batch->GetRecordsCount()); for (size_t i = 0; i < resultArrowSchema->fields().size(); ++i) { auto& resultField = resultArrowSchema->fields()[i]; - auto columnId = GetIndexInfo().GetColumnId(resultField->name()); - auto oldColumnIndex = dataSchema.GetFieldIndex(columnId); - if (oldColumnIndex >= 0) { // ColumnExists - auto oldColumnInfo = dataSchema.GetFieldByIndex(oldColumnIndex); - Y_ABORT_UNLESS(oldColumnInfo); - auto columnData = batch->GetColumnByName(oldColumnInfo->name()); - Y_ABORT_UNLESS(columnData); - newColumns.push_back(columnData); - } else { // AddNullColumn - auto conclusion = BuildDefaultBatch({ resultField }, batch->num_rows()); - if (conclusion.IsFail()) { - return conclusion; + auto columnId = GetIndexInfo().GetColumnIdVerified(resultField->name()); + auto oldField = dataSchema.GetFieldByColumnIdOptional(columnId); + if (oldField) { + auto fAccessor = batch->GetAccessorByNameOptional(oldField->name()); + if (fAccessor) { + auto conclusion = result->AddField(resultField, fAccessor); + if (conclusion.IsFail()) { + return conclusion; + } + continue; } - newColumns.push_back((*conclusion)->column(0)); + } + if (restoreColumnIds.contains(columnId)) { + AFL_VERIFY(!!GetExternalDefaultValueVerified(columnId) || GetIndexInfo().IsNullableVerified(columnId))("column_name", + GetIndexInfo().GetColumnName(columnId, false))("id", columnId); + result->AddField(resultField, GetColumnLoaderVerified(columnId)->BuildDefaultAccessor(batch->num_rows())).Validate(); } } - return arrow::RecordBatch::Make(resultArrowSchema, batch->num_rows(), newColumns); + return result; } TConclusion> ISnapshotSchema::PrepareForModification( @@ -74,16 +78,18 @@ TConclusion> ISnapshotSchema::PrepareForModi return TConclusionStatus::Fail("not valid incoming batch: " + status.ToString()); } - const std::shared_ptr dstSchema = GetIndexInfo().ArrowSchema(); + const std::shared_ptr dstSchema = GetIndexInfo().ArrowSchema(); - auto batch = NArrow::TColumnOperator().SkipIfAbsent().Extract(incomingBatch, dstSchema->field_names()); + auto batch = NArrow::TColumnOperator().SkipIfAbsent().Extract(incomingBatch, dstSchema->fields()); for (auto&& i : batch->schema()->fields()) { - AFL_VERIFY(GetIndexInfo().HasColumnName(i->name())); - if (!dstSchema->GetFieldByName(i->name())->Equals(i)) { - return TConclusionStatus::Fail("not equal field types for column '" + i->name() + "'"); + const ui32 columnId = GetIndexInfo().GetColumnIdVerified(i->name()); + auto fSchema = GetIndexInfo().GetColumnFieldVerified(columnId); + if (!fSchema->Equals(i)) { + return TConclusionStatus::Fail( + "not equal field types for column '" + i->name() + "': " + i->ToString() + " vs " + fSchema->ToString()); } - if (GetIndexInfo().IsNullableVerified(i->name())) { + if (GetIndexInfo().IsNullableVerified(columnId)) { continue; } if (NArrow::HasNulls(batch->GetColumnByName(i->name()))) { @@ -110,33 +116,48 @@ TConclusion> ISnapshotSchema::PrepareForModi Y_DEBUG_ABORT_UNLESS(NArrow::IsSortedAndUnique(batch, GetIndexInfo().GetPrimaryKey())); switch (mType) { - case NEvWrite::EModificationType::Delete: - return AddDefault(batch, true); case NEvWrite::EModificationType::Replace: - case NEvWrite::EModificationType::Insert: - return AddDefault(batch, false); case NEvWrite::EModificationType::Upsert: { AFL_VERIFY(batch->num_columns() <= dstSchema->num_fields()); if (batch->num_columns() < dstSchema->num_fields()) { - for (auto&& f : dstSchema->fields()) { - if (GetIndexInfo().IsNullableVerified(f->name())) { + for (ui32 idx = 0; idx < (ui32)dstSchema->num_fields(); ++idx) { + if (GetIndexInfo().IsNullableVerifiedByIndex(idx)) { continue; } - if (batch->GetColumnByName(f->name())) { + if (GetIndexInfo().GetColumnExternalDefaultValueByIndexVerified(idx)) { continue; } - if (!GetIndexInfo().GetColumnDefaultValueVerified(f->name())) { - return TConclusionStatus::Fail("empty field for non-default column: '" + f->name() + "'"); + if (batch->GetColumnByName(dstSchema->field(idx)->name())) { + continue; } + return TConclusionStatus::Fail("empty field for non-default column: '" + dstSchema->field(idx)->name() + "'"); } } return batch; } + case NEvWrite::EModificationType::Delete: + case NEvWrite::EModificationType::Insert: case NEvWrite::EModificationType::Update: return batch; } } +void ISnapshotSchema::AdaptBatchToSchema(NArrow::TGeneralContainer& batch, const ISnapshotSchema::TPtr& targetSchema) const { + if (targetSchema->GetVersion() != GetVersion()) { + std::vector columnIdxToDelete; + for (size_t columnIdx = 0; columnIdx < batch.GetSchema()->GetFields().size(); ++columnIdx) { + const std::optional targetColumnId = targetSchema->GetColumnIdOptional(batch.GetSchema()->field(columnIdx)->name()); + const ui32 batchColumnId = GetColumnIdVerified(GetFieldByIndex(columnIdx)->name()); + if (!targetColumnId || *targetColumnId != batchColumnId) { + columnIdxToDelete.emplace_back(columnIdx); + } + } + if (!columnIdxToDelete.empty()) { + batch.DeleteFieldsByIndex(columnIdxToDelete); + } + } +} + ui32 ISnapshotSchema::GetColumnId(const std::string& columnName) const { auto id = GetColumnIdOptional(columnName); AFL_VERIFY(id)("column_name", columnName)("schema", JoinSeq(",", GetSchema()->field_names())); @@ -149,19 +170,19 @@ std::shared_ptr ISnapshotSchema::GetFieldByColumnIdVerified(const return result; } -std::shared_ptr ISnapshotSchema::GetColumnLoaderVerified(const ui32 columnId) const { +std::shared_ptr ISnapshotSchema::GetColumnLoaderVerified(const ui32 columnId) const { auto result = GetColumnLoaderOptional(columnId); AFL_VERIFY(result); return result; } -std::shared_ptr ISnapshotSchema::GetColumnLoaderVerified(const std::string& columnName) const { +std::shared_ptr ISnapshotSchema::GetColumnLoaderVerified(const std::string& columnName) const { auto result = GetColumnLoaderOptional(columnName); AFL_VERIFY(result); return result; } -std::shared_ptr ISnapshotSchema::GetColumnLoaderOptional(const std::string& columnName) const { +std::shared_ptr ISnapshotSchema::GetColumnLoaderOptional(const std::string& columnName) const { const std::optional id = GetColumnIdOptional(columnName); if (id) { return GetColumnLoaderOptional(*id); @@ -184,48 +205,78 @@ std::vector> ISnapshotSchema::GetAbsentFields(cons return result; } -TConclusion> ISnapshotSchema::BuildDefaultBatch(const std::vector>& fields, const ui32 rowsCount) const { - std::vector> columns; +TConclusionStatus ISnapshotSchema::CheckColumnsDefault(const std::vector>& fields) const { for (auto&& i : fields) { - auto defaultValue = GetDefaultValueVerified(i->name()); - if (!defaultValue && !GetIndexInfo().IsNullableVerified(i->name())) { + const ui32 colId = GetColumnIdVerified(i->name()); + auto defaultValue = GetExternalDefaultValueVerified(colId); + if (!defaultValue && !GetIndexInfo().IsNullableVerified(colId)) { return TConclusionStatus::Fail("not nullable field with no default: " + i->name()); } + } + return TConclusionStatus::Success(); +} + +TConclusion> ISnapshotSchema::BuildDefaultBatch( + const std::vector>& fields, const ui32 rowsCount, const bool force) const { + std::vector> columns; + for (auto&& i : fields) { + const ui32 columnId = GetColumnIdVerified(i->name()); + auto defaultValue = GetExternalDefaultValueVerified(columnId); + if (!defaultValue && !GetIndexInfo().IsNullableVerified(columnId)) { + if (force) { + defaultValue = NArrow::DefaultScalar(i->type()); + } else { + return TConclusionStatus::Fail("not nullable field with no default: " + i->name()); + } + } columns.emplace_back(NArrow::TThreadSimpleArraysCache::Get(i->type(), defaultValue, rowsCount)); } return arrow::RecordBatch::Make(std::make_shared(fields), rowsCount, columns); } -std::shared_ptr ISnapshotSchema::GetDefaultValueVerified(const std::string& columnName) const { - return GetIndexInfo().GetColumnDefaultValueVerified(columnName); +std::shared_ptr ISnapshotSchema::GetExternalDefaultValueVerified(const std::string& columnName) const { + return GetIndexInfo().GetColumnExternalDefaultValueVerified(columnName); } -std::shared_ptr ISnapshotSchema::GetDefaultValueVerified(const ui32 columnId) const { - return GetIndexInfo().GetColumnDefaultValueVerified(columnId); +std::shared_ptr ISnapshotSchema::GetExternalDefaultValueVerified(const ui32 columnId) const { + return GetIndexInfo().GetColumnExternalDefaultValueVerified(columnId); } -TConclusion> ISnapshotSchema::AddDefault(const std::shared_ptr& batch, const bool force) const { - auto result = batch; - for (auto&& i : GetIndexInfo().ArrowSchema()->fields()) { - if (batch->schema()->GetFieldIndex(i->name()) != -1) { - continue; - } - auto defaultValue = GetDefaultValueVerified(i->name()); - if (!defaultValue && !GetIndexInfo().IsNullableVerified(i->name())) { - if (!force) { - return TConclusionStatus::Fail("not nullable field withno default: " + i->name()); - } else { - defaultValue = NArrow::DefaultScalar(i->type()); +bool ISnapshotSchema::IsSpecialColumnId(const ui32 columnId) const { + return GetIndexInfo().IsSpecialColumn(columnId); +} + +std::set ISnapshotSchema::GetColumnsWithDifferentDefaults( + const THashMap& schemas, const ISnapshotSchema::TPtr& targetSchema) { + std::set result; + if (schemas.size() <= 1) { + return {}; + } + std::map> defaults; + for (auto& [_, blobSchema] : schemas) { + for (auto&& columnId : blobSchema->GetIndexInfo().GetColumnIds(true)) { + if (result.contains(columnId)) { + continue; + } + if (targetSchema && !targetSchema->HasColumnId(columnId)) { + continue; + } + auto def = blobSchema->GetIndexInfo().GetColumnExternalDefaultValueVerified(columnId); + if (!blobSchema->GetIndexInfo().IsNullableVerified(columnId) && !def) { + continue; + } + auto it = defaults.find(columnId); + if (it == defaults.end()) { + defaults.emplace(columnId, def); + } else if (NArrow::ScalarCompareNullable(def, it->second) != 0) { + result.emplace(columnId); } } - std::shared_ptr column = NArrow::TThreadSimpleArraysCache::Get(i->type(), defaultValue, batch->num_rows()); - result = NArrow::TStatusValidator::GetValid(result->AddColumn(result->num_columns(), i->name(), column)); + if (targetSchema && result.size() == targetSchema->GetIndexInfo().GetColumnIds(true).size()) { + break; + } } return result; } -bool ISnapshotSchema::IsSpecialColumnId(const ui32 columnId) const { - return GetIndexInfo().IsSpecialColumn(columnId); -} - } diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h b/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h index 91d53230ec38..962989d75fb2 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h +++ b/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h @@ -1,6 +1,8 @@ #pragma once -#include -#include +#include + +#include +#include #include #include @@ -19,34 +21,39 @@ class ISnapshotSchema { using TPtr = std::shared_ptr; virtual ~ISnapshotSchema() {} - virtual std::shared_ptr GetColumnLoaderOptional(const ui32 columnId) const = 0; - std::shared_ptr GetColumnLoaderVerified(const ui32 columnId) const; - std::shared_ptr GetColumnLoaderOptional(const std::string& columnName) const; - std::shared_ptr GetColumnLoaderVerified(const std::string& columnName) const; + virtual std::shared_ptr GetColumnLoaderOptional(const ui32 columnId) const = 0; + std::shared_ptr GetColumnLoaderVerified(const ui32 columnId) const; + std::shared_ptr GetColumnLoaderOptional(const std::string& columnName) const; + std::shared_ptr GetColumnLoaderVerified(const std::string& columnName) const; bool IsSpecialColumnId(const ui32 columnId) const; + virtual const std::vector& GetColumnIds() const = 0; - virtual TColumnSaver GetColumnSaver(const ui32 columnId) const = 0; - TColumnSaver GetColumnSaver(const TString& columnName) const { + virtual NArrow::NAccessor::TColumnSaver GetColumnSaver(const ui32 columnId) const = 0; + NArrow::NAccessor::TColumnSaver GetColumnSaver(const TString& columnName) const { return GetColumnSaver(GetColumnId(columnName)); } - TColumnSaver GetColumnSaver(const std::string& columnName) const { + NArrow::NAccessor::TColumnSaver GetColumnSaver(const std::string& columnName) const { return GetColumnSaver(TString(columnName.data(), columnName.size())); } std::vector> GetAbsentFields(const std::shared_ptr& existsSchema) const; - std::shared_ptr GetDefaultValueVerified(const std::string& columnName) const; - std::shared_ptr GetDefaultValueVerified(const ui32 columnId) const; - - TConclusion> BuildDefaultBatch(const std::vector>& fields, const ui32 rowsCount) const; - TConclusion> AddDefault(const std::shared_ptr& batch, const bool force) const; + std::shared_ptr GetExternalDefaultValueVerified(const std::string& columnName) const; + std::shared_ptr GetExternalDefaultValueVerified(const ui32 columnId) const; + TConclusion> BuildDefaultBatch( + const std::vector>& fields, const ui32 rowsCount, const bool force) const; + TConclusionStatus CheckColumnsDefault(const std::vector>& fields) const; std::vector GetPKColumnNames() const; virtual std::optional GetColumnIdOptional(const std::string& columnName) const = 0; + virtual ui32 GetColumnIdVerified(const std::string& columnName) const = 0; virtual int GetFieldIndex(const ui32 columnId) const = 0; + bool HasColumnId(const ui32 columnId) const { + return GetFieldIndex(columnId) >= 0; + } ui32 GetColumnId(const std::string& columnName) const; std::shared_ptr GetFieldByIndex(const int index) const; @@ -56,7 +63,7 @@ class ISnapshotSchema { TString DebugString() const { return DoDebugString(); } - virtual const std::shared_ptr& GetSchema() const = 0; + virtual const std::shared_ptr& GetSchema() const = 0; virtual const TIndexInfo& GetIndexInfo() const = 0; virtual const TSnapshot& GetSnapshot() const = 0; virtual ui64 GetVersion() const = 0; @@ -64,9 +71,13 @@ class ISnapshotSchema { std::set GetPkColumnsIds() const; - [[nodiscard]] TConclusion> NormalizeBatch(const ISnapshotSchema& dataSchema, const std::shared_ptr batch) const; + static std::set GetColumnsWithDifferentDefaults(const THashMap& schemas, const ISnapshotSchema::TPtr& targetSchema); + + [[nodiscard]] TConclusion> NormalizeBatch( + const ISnapshotSchema& dataSchema, const std::shared_ptr& batch, const std::set& restoreColumnIds) const; [[nodiscard]] TConclusion> PrepareForModification( const std::shared_ptr& incomingBatch, const NEvWrite::EModificationType mType) const; + void AdaptBatchToSchema(NArrow::TGeneralContainer& batch, const ISnapshotSchema::TPtr& targetSchema) const; }; } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.cpp b/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.cpp index 8832e7eb0ec8..a371806b2349 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.cpp @@ -4,66 +4,58 @@ namespace NKikimr::NOlap { -TFilteredSnapshotSchema::TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::vector& columnIds) - : TFilteredSnapshotSchema(originalSnapshot, std::set(columnIds.begin(), columnIds.end())) -{} +TFilteredSnapshotSchema::TFilteredSnapshotSchema(const ISnapshotSchema::TPtr& originalSnapshot, const std::set& columnIds) + : TFilteredSnapshotSchema(originalSnapshot, std::vector(columnIds.begin(), columnIds.end())) { +} -TFilteredSnapshotSchema::TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::set& columnIds) +TFilteredSnapshotSchema::TFilteredSnapshotSchema(const ISnapshotSchema::TPtr& originalSnapshot, const std::vector& columnIds) : OriginalSnapshot(originalSnapshot) , ColumnIds(columnIds) { std::vector> schemaFields; - for (auto&& i : OriginalSnapshot->GetSchema()->fields()) { - if (!ColumnIds.contains(OriginalSnapshot->GetIndexInfo().GetColumnId(i->name()))) { - continue; - } - schemaFields.emplace_back(i); - } - Schema = std::make_shared(schemaFields); -} - -TFilteredSnapshotSchema::TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::set& columnNames) - : OriginalSnapshot(originalSnapshot) { - for (auto&& i : columnNames) { - ColumnIds.emplace(OriginalSnapshot->GetColumnId(i)); - } - std::vector> schemaFields; - for (auto&& i : OriginalSnapshot->GetSchema()->fields()) { - if (!columnNames.contains(i->name())) { - continue; - } - schemaFields.emplace_back(i); + for (auto&& i : columnIds) { + IdIntoIndex.emplace(i, schemaFields.size()); + schemaFields.emplace_back(originalSnapshot->GetFieldByColumnIdVerified(i)); } - Schema = std::make_shared(schemaFields); + Schema = std::make_shared(schemaFields); } TColumnSaver TFilteredSnapshotSchema::GetColumnSaver(const ui32 columnId) const { - Y_ABORT_UNLESS(ColumnIds.contains(columnId)); + AFL_VERIFY(IdIntoIndex.contains(columnId)); return OriginalSnapshot->GetColumnSaver(columnId); } std::shared_ptr TFilteredSnapshotSchema::GetColumnLoaderOptional(const ui32 columnId) const { - Y_ABORT_UNLESS(ColumnIds.contains(columnId)); + AFL_VERIFY(IdIntoIndex.contains(columnId)); return OriginalSnapshot->GetColumnLoaderOptional(columnId); } std::optional TFilteredSnapshotSchema::GetColumnIdOptional(const std::string& columnName) const { - return OriginalSnapshot->GetColumnIdOptional(columnName); + auto result = OriginalSnapshot->GetColumnIdOptional(columnName); + if (!result) { + return result; + } + if (!IdIntoIndex.contains(*result)) { + return std::nullopt; + } + return result; +} + +ui32 TFilteredSnapshotSchema::GetColumnIdVerified(const std::string& columnName) const { + auto result = OriginalSnapshot->GetColumnIdVerified(columnName); + AFL_VERIFY(IdIntoIndex.contains(result)); + return result; } int TFilteredSnapshotSchema::GetFieldIndex(const ui32 columnId) const { - if (!ColumnIds.contains(columnId)) { - return -1; - } - TString columnName = OriginalSnapshot->GetIndexInfo().GetColumnName(columnId, false); - if (!columnName) { + auto it = IdIntoIndex.find(columnId); + if (it == IdIntoIndex.end()) { return -1; } - std::string name(columnName.data(), columnName.size()); - return Schema->GetFieldIndex(name); + return it->second; } -const std::shared_ptr& TFilteredSnapshotSchema::GetSchema() const { +const std::shared_ptr& TFilteredSnapshotSchema::GetSchema() const { return Schema; } diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.h b/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.h index e9fa1b41b7c2..8fc82ee6a304 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.h +++ b/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.h @@ -8,21 +8,26 @@ namespace NKikimr::NOlap { class TFilteredSnapshotSchema: public ISnapshotSchema { ISnapshotSchema::TPtr OriginalSnapshot; - std::shared_ptr Schema; - std::set ColumnIds; + std::shared_ptr Schema; + std::vector ColumnIds; + THashMap IdIntoIndex; + protected: virtual TString DoDebugString() const override; public: - TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::vector& columnIds); - TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::set& columnIds); - TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::set& columnNames); + TFilteredSnapshotSchema(const ISnapshotSchema::TPtr& originalSnapshot, const std::vector& columnIds); + TFilteredSnapshotSchema(const ISnapshotSchema::TPtr& originalSnapshot, const std::set& columnIds); + virtual const std::vector& GetColumnIds() const override { + return ColumnIds; + } TColumnSaver GetColumnSaver(const ui32 columnId) const override; std::shared_ptr GetColumnLoaderOptional(const ui32 columnId) const override; std::optional GetColumnIdOptional(const std::string& columnName) const override; + ui32 GetColumnIdVerified(const std::string& columnName) const override; int GetFieldIndex(const ui32 columnId) const override; - const std::shared_ptr& GetSchema() const override; + const std::shared_ptr& GetSchema() const override; const TIndexInfo& GetIndexInfo() const override; const TSnapshot& GetSnapshot() const override; ui32 GetColumnsCount() const override; diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.cpp b/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.cpp index 1fe6820cf547..05277b7b8967 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.cpp @@ -21,16 +21,15 @@ std::optional TSnapshotSchema::GetColumnIdOptional(const std::string& colu return IndexInfo.GetColumnIdOptional(columnName); } +ui32 TSnapshotSchema::GetColumnIdVerified(const std::string& columnName) const { + return IndexInfo.GetColumnIdVerified(columnName); +} + int TSnapshotSchema::GetFieldIndex(const ui32 columnId) const { - const TString& columnName = IndexInfo.GetColumnName(columnId, false); - if (!columnName) { - return -1; - } - std::string name(columnName.data(), columnName.size()); - return Schema->GetFieldIndex(name); + return IndexInfo.GetColumnIndexOptional(columnId).value_or(-1); } -const std::shared_ptr& TSnapshotSchema::GetSchema() const { +const std::shared_ptr& TSnapshotSchema::GetSchema() const { return Schema; } diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.h b/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.h index 539d8f99a02c..5fa3c4ef7551 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.h +++ b/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.h @@ -9,7 +9,7 @@ namespace NKikimr::NOlap { class TSnapshotSchema: public ISnapshotSchema { private: TIndexInfo IndexInfo; - std::shared_ptr Schema; + std::shared_ptr Schema; TSnapshot Snapshot; protected: virtual TString DoDebugString() const override { @@ -23,12 +23,17 @@ class TSnapshotSchema: public ISnapshotSchema { public: TSnapshotSchema(TIndexInfo&& indexInfo, const TSnapshot& snapshot); + virtual const std::vector& GetColumnIds() const override { + return IndexInfo.GetColumnIds(); + } + TColumnSaver GetColumnSaver(const ui32 columnId) const override; std::shared_ptr GetColumnLoaderOptional(const ui32 columnId) const override; std::optional GetColumnIdOptional(const std::string& columnName) const override; + ui32 GetColumnIdVerified(const std::string& columnName) const override; int GetFieldIndex(const ui32 columnId) const override; - const std::shared_ptr& GetSchema() const override; + const std::shared_ptr& GetSchema() const override; const TIndexInfo& GetIndexInfo() const override; const TSnapshot& GetSnapshot() const override; ui32 GetColumnsCount() const override; diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/versioned_index.h b/ydb/core/tx/columnshard/engines/scheme/versions/versioned_index.h index 70cf8830b051..fe554a790d8f 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/versioned_index.h +++ b/ydb/core/tx/columnshard/engines/scheme/versions/versioned_index.h @@ -91,7 +91,7 @@ class TVersionedIndex { } } Y_ABORT_UNLESS(!Snapshots.empty()); - Y_ABORT_UNLESS(version.IsZero()); +// Y_ABORT_UNLESS(version.IsZero()); return Snapshots.begin()->second; } diff --git a/ydb/core/tx/columnshard/engines/scheme/ya.make b/ydb/core/tx/columnshard/engines/scheme/ya.make index 8684d7894338..744458ff4dcb 100644 --- a/ydb/core/tx/columnshard/engines/scheme/ya.make +++ b/ydb/core/tx/columnshard/engines/scheme/ya.make @@ -15,14 +15,13 @@ PEERDIR( ydb/library/actors/core ydb/core/tx/columnshard/engines/scheme/indexes - ydb/core/tx/columnshard/engines/scheme/statistics ydb/core/tx/columnshard/engines/scheme/abstract ydb/core/tx/columnshard/engines/scheme/versions ydb/core/tx/columnshard/engines/scheme/tiering ydb/core/tx/columnshard/engines/scheme/column ydb/core/tx/columnshard/engines/scheme/defaults + ydb/core/formats/arrow/accessor ydb/core/tx/columnshard/blobs_action/abstract - ydb/core/tx/columnshard/engines/changes/compaction ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/scheme/counters.h b/ydb/core/tx/columnshard/engines/storage/actualizer/scheme/counters.h index 8f5ab2ff4311..95aa18603f46 100644 --- a/ydb/core/tx/columnshard/engines/storage/actualizer/scheme/counters.h +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/scheme/counters.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include #include #include #include diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/counters.h b/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/counters.h index 7d7a1cc3d830..a5c278799fdd 100644 --- a/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/counters.h +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/counters.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include #include #include #include diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.cpp b/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.cpp index 3a2ce59ae22f..f664eb6afb6a 100644 --- a/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.cpp +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.cpp @@ -1,6 +1,7 @@ #include "tiering.h" #include #include +#include #include #include #include @@ -25,17 +26,18 @@ std::optional TTieringActualizer::Bu if (Tiering) { AFL_VERIFY(TieringColumnId); - auto statOperator = portionSchema->GetIndexInfo().GetStatistics(NStatistics::TIdentifier(NStatistics::EType::Max, {*TieringColumnId})); + auto indexMeta = portionSchema->GetIndexInfo().GetIndexMetaMax(*TieringColumnId); std::shared_ptr max; - if (!statOperator) { + if (!indexMeta) { max = portion.MaxValue(*TieringColumnId); if (!max) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "scalar_less_not_max"); return {}; } } else { - NYDBTest::TControllers::GetColumnShardController()->OnStatisticsUsage(statOperator); - max = statOperator.GetScalarVerified(portion.GetMeta().GetStatisticsStorage()); + NYDBTest::TControllers::GetColumnShardController()->OnStatisticsUsage(NIndexes::TIndexMetaContainer(indexMeta)); + const std::vector data = portion.GetIndexInplaceDataVerified(indexMeta->GetIndexId()); + max = indexMeta->GetMaxScalarVerified(data, portionSchema->GetIndexInfo().GetColumnFieldVerified(*TieringColumnId)->type()); } auto tieringInfo = Tiering->GetTierToMove(max, now); AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("tiering_info", tieringInfo.DebugString()); @@ -172,7 +174,7 @@ void TTieringActualizer::DoExtractTasks(TTieringProcessContext& tasksContext, co void TTieringActualizer::Refresh(const std::optional& info, const TAddExternalContext& externalContext) { Tiering = info; if (Tiering) { - TieringColumnId = VersionedIndex.GetLastSchema()->GetColumnId(Tiering->GetTtlColumn()); + TieringColumnId = VersionedIndex.GetLastSchema()->GetColumnId(Tiering->GetEvictColumnName()); } else { TieringColumnId = {}; } diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/column.cpp b/ydb/core/tx/columnshard/engines/storage/chunks/column.cpp index 485802b0e3c1..4a527f913fc7 100644 --- a/ydb/core/tx/columnshard/engines/storage/chunks/column.cpp +++ b/ydb/core/tx/columnshard/engines/storage/chunks/column.cpp @@ -1,18 +1,20 @@ #include "column.h" -#include +#include namespace NKikimr::NOlap::NChunks { -std::vector> TChunkPreparation::DoInternalSplitImpl(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const { - auto rb = NArrow::TStatusValidator::GetValid(ColumnInfo.GetLoader()->Apply(Data)); +std::vector> TChunkPreparation::DoInternalSplitImpl( + const TColumnSaver& saver, const std::shared_ptr& /*counters*/, const std::vector& splitSizes) const { + auto accessor = ColumnInfo.GetLoader()->ApplyVerified(Data, GetRecordsCountVerified()); + std::vector chunks = accessor->SplitBySizes(saver, Data, splitSizes); - auto chunks = TSimpleSplitter(saver, counters).SplitBySizes(rb, Data, splitSizes); std::vector> newChunks; for (auto&& i : chunks) { - Y_ABORT_UNLESS(i.GetSlicedBatch()->num_columns() == 1); - newChunks.emplace_back(std::make_shared(saver.Apply(i.GetSlicedBatch()), i.GetSlicedBatch()->column(0), TChunkAddress(GetColumnId(), GetChunkIdxOptional().value_or(0)), ColumnInfo)); + newChunks.emplace_back(std::make_shared( + i.GetSerializedData(), i.GetArray(), TChunkAddress(GetColumnId(), GetChunkIdxOptional().value_or(0)), ColumnInfo)); } + return newChunks; } -} +} // namespace NKikimr::NOlap::NChunks diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/column.h b/ydb/core/tx/columnshard/engines/storage/chunks/column.h index 7e05b45a9638..9de818c49fb6 100644 --- a/ydb/core/tx/columnshard/engines/storage/chunks/column.h +++ b/ydb/core/tx/columnshard/engines/storage/chunks/column.h @@ -22,6 +22,9 @@ class TChunkPreparation: public IPortionColumnChunk { virtual ui32 DoGetRecordsCountImpl() const override { return Record.GetMeta().GetNumRows(); } + virtual ui64 DoGetRawBytesImpl() const override { + return Record.GetMeta().GetRawBytes(); + } virtual TString DoDebugString() const override { return ""; } @@ -53,14 +56,14 @@ class TChunkPreparation: public IPortionColumnChunk { AFL_VERIFY(Data.size() == Record.BlobRange.Size || Record.BlobRange.Size == 0)("data", Data.size())("record", Record.BlobRange.Size); } - TChunkPreparation(const TString& data, const std::shared_ptr& column, const TChunkAddress& address, const TSimpleColumnInfo& columnInfo) + TChunkPreparation(const TString& data, const std::shared_ptr& column, const TChunkAddress& address, const TSimpleColumnInfo& columnInfo) : TBase(address.GetColumnId()) , Data(data) , Record(address, column, columnInfo) , ColumnInfo(columnInfo) { - Y_ABORT_UNLESS(column->length()); - First = NArrow::TStatusValidator::GetValid(column->GetScalar(0)); - Last = NArrow::TStatusValidator::GetValid(column->GetScalar(column->length() - 1)); + Y_ABORT_UNLESS(column->GetRecordsCount()); + First = column->GetScalar(0); + Last = column->GetScalar(column->GetRecordsCount() - 1); Record.BlobRange.Size = data.size(); } }; diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/data.cpp b/ydb/core/tx/columnshard/engines/storage/chunks/data.cpp index 79613b5b9798..007dff83e914 100644 --- a/ydb/core/tx/columnshard/engines/storage/chunks/data.cpp +++ b/ydb/core/tx/columnshard/engines/storage/chunks/data.cpp @@ -9,4 +9,13 @@ void TPortionIndexChunk::DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRan portionInfo.AddIndex(TIndexChunk(GetEntityId(), GetChunkIdxVerified(), RecordsCount, RawBytes, bRange)); } +std::shared_ptr TPortionIndexChunk::DoCopyWithAnotherBlob( + TString&& data, const TSimpleColumnInfo& /*columnInfo*/) const { + return std::make_shared(GetChunkAddressVerified(), RecordsCount, RawBytes, std::move(data)); +} + +void TPortionIndexChunk::DoAddInplaceIntoPortion(TPortionInfoConstructor& portionInfo) const { + portionInfo.AddIndex(TIndexChunk(GetEntityId(), GetChunkIdxVerified(), RecordsCount, RawBytes, GetData())); +} + } // namespace NKikimr::NOlap::NIndexes \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/data.h b/ydb/core/tx/columnshard/engines/storage/chunks/data.h index d5a91c19609c..e3f22ae2ed9d 100644 --- a/ydb/core/tx/columnshard/engines/storage/chunks/data.h +++ b/ydb/core/tx/columnshard/engines/storage/chunks/data.h @@ -17,6 +17,7 @@ class TPortionIndexChunk: public IPortionDataChunk { return ""; } virtual std::vector> DoInternalSplit(const TColumnSaver& /*saver*/, const std::shared_ptr& /*counters*/, const std::vector& /*splitSizes*/) const override { + AFL_VERIFY(false); return {}; } virtual bool DoIsSplittable() const override { @@ -25,6 +26,9 @@ class TPortionIndexChunk: public IPortionDataChunk { virtual std::optional DoGetRecordsCount() const override { return RecordsCount; } + virtual std::optional DoGetRawBytes() const override { + return RawBytes; + } virtual std::shared_ptr DoGetFirstScalar() const override { return nullptr; } @@ -32,9 +36,9 @@ class TPortionIndexChunk: public IPortionDataChunk { return nullptr; } virtual void DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRange, TPortionInfoConstructor& portionInfo) const override; - virtual std::shared_ptr DoCopyWithAnotherBlob(TString&& data, const TSimpleColumnInfo& /*columnInfo*/) const override { - return std::make_shared(GetChunkAddressVerified(), RecordsCount, RawBytes, std::move(data)); - } + virtual std::shared_ptr DoCopyWithAnotherBlob(TString&& data, const TSimpleColumnInfo& /*columnInfo*/) const override; + virtual void DoAddInplaceIntoPortion(TPortionInfoConstructor& portionInfo) const override; + public: TPortionIndexChunk(const TChunkAddress& address, const ui32 recordsCount, const ui64 rawBytes, const TString& data) : TBase(address.GetColumnId(), address.GetChunkIdx()) diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/null_column.cpp b/ydb/core/tx/columnshard/engines/storage/chunks/null_column.cpp deleted file mode 100644 index 9aa56e56eda3..000000000000 --- a/ydb/core/tx/columnshard/engines/storage/chunks/null_column.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include "null_column.h" - -namespace NKikimr::NOlap::NChunks { - -} diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/null_column.h b/ydb/core/tx/columnshard/engines/storage/chunks/null_column.h deleted file mode 100644 index c848f0f0cb45..000000000000 --- a/ydb/core/tx/columnshard/engines/storage/chunks/null_column.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once -#include -#include -#include -#include - -namespace NKikimr::NOlap::NChunks { - -class TDefaultChunkPreparation: public IPortionColumnChunk { -private: - using TBase = IPortionColumnChunk; - const std::shared_ptr DefaultValue; - const ui32 RecordsCount; - TString Data; -protected: - virtual std::vector> DoInternalSplitImpl(const TColumnSaver& /*saver*/, const std::shared_ptr& /*counters*/, - const std::vector& /*splitSizes*/) const override { - AFL_VERIFY(false); - return {}; - } - virtual const TString& DoGetData() const override { - return Data; - } - virtual ui32 DoGetRecordsCountImpl() const override { - return RecordsCount; - } - virtual TString DoDebugString() const override { - return TStringBuilder() << "rc=" << RecordsCount << ";data_size=" << Data.size() << ";"; - } - virtual TSimpleChunkMeta DoBuildSimpleChunkMeta() const override { - AFL_VERIFY(false); - return TSimpleChunkMeta(nullptr, false, false); - } - virtual std::shared_ptr DoGetFirstScalar() const override { - return DefaultValue; - } - virtual std::shared_ptr DoGetLastScalar() const override { - return DefaultValue; - } - -public: - TDefaultChunkPreparation(const ui32 columnId, const ui32 recordsCount, const std::shared_ptr& f, - const std::shared_ptr& defaultValue, const TColumnSaver& saver) - : TBase(columnId) - , DefaultValue(defaultValue) - , RecordsCount(recordsCount) - { - Y_ABORT_UNLESS(RecordsCount); - Data = saver.Apply(NArrow::TThreadSimpleArraysCache::Get(f->type(), defaultValue, RecordsCount), f); - SetChunkIdx(0); - } -}; - -} diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/ya.make b/ydb/core/tx/columnshard/engines/storage/chunks/ya.make index d61554bd6f0c..cff5b9f40b53 100644 --- a/ydb/core/tx/columnshard/engines/storage/chunks/ya.make +++ b/ydb/core/tx/columnshard/engines/storage/chunks/ya.make @@ -3,7 +3,6 @@ LIBRARY() SRCS( data.cpp column.cpp - null_column.cpp ) PEERDIR( diff --git a/ydb/core/tx/columnshard/engines/storage/granule/granule.cpp b/ydb/core/tx/columnshard/engines/storage/granule/granule.cpp index d38851486e3d..ebb2b9acde63 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/granule.cpp +++ b/ydb/core/tx/columnshard/engines/storage/granule/granule.cpp @@ -47,10 +47,9 @@ bool TGranuleMeta::ErasePortion(const ui64 portion) { void TGranuleMeta::OnAfterChangePortion(const std::shared_ptr portionAfter, NStorageOptimizer::IOptimizerPlanner::TModificationGuard* modificationGuard) { if (portionAfter) { - PortionsIndex.AddPortion(portionAfter); - PortionInfoGuard.OnNewPortion(portionAfter); if (!portionAfter->HasRemoveSnapshot()) { + PortionsIndex.AddPortion(portionAfter); if (modificationGuard) { modificationGuard->AddPortion(portionAfter); } else { @@ -74,10 +73,9 @@ void TGranuleMeta::OnAfterChangePortion(const std::shared_ptr port void TGranuleMeta::OnBeforeChangePortion(const std::shared_ptr portionBefore) { if (portionBefore) { - PortionsIndex.RemovePortion(portionBefore); - PortionInfoGuard.OnDropPortion(portionBefore); if (!portionBefore->HasRemoveSnapshot()) { + PortionsIndex.RemovePortion(portionBefore); OptimizerPlanner->StartModificationGuard().RemovePortion(portionBefore); ActualizationIndex->RemovePortion(portionBefore); } @@ -138,8 +136,7 @@ TGranuleMeta::TGranuleMeta(const ui64 pathId, const TGranulesStorage& owner, con , PortionInfoGuard(owner.GetCounters().BuildPortionBlobsGuard()) , Stats(owner.GetStats()) , StoragesManager(owner.GetStoragesManager()) - , PortionsIndex(*this) -{ + , PortionsIndex(*this, Counters.GetPortionsIndexCounters()) { NStorageOptimizer::IOptimizerPlannerConstructor::TBuildContext context(PathId, owner.GetStoragesManager(), versionedIndex.GetLastSchema()->GetIndexInfo().GetPrimaryKey()); OptimizerPlanner = versionedIndex.GetLastSchema()->GetIndexInfo().GetCompactionPlannerConstructor()->BuildPlanner(context).DetachResult(); AFL_VERIFY(!!OptimizerPlanner); diff --git a/ydb/core/tx/columnshard/engines/storage/granule/granule.h b/ydb/core/tx/columnshard/engines/storage/granule/granule.h index 3b3db33a72cf..d79ef50e1883 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/granule.h +++ b/ydb/core/tx/columnshard/engines/storage/granule/granule.h @@ -21,9 +21,10 @@ class TColumnChunkLoadContext; class TDataClassSummary: public NColumnShard::TBaseGranuleDataClassSummary { private: friend class TGranuleMeta; - THashMap ColumnStats; + THashMap ColumnStats; + public: - const THashMap& GetColumnStats() const { + const THashMap& GetColumnStats() const { return ColumnStats; } @@ -196,7 +197,7 @@ class TGranuleMeta: TNonCopyable { return OptimizerPlanner->SerializeToJsonVisual(); } - std::vector GetBucketPositions() const { + NArrow::NMerger::TIntervalPositions GetBucketPositions() const { return OptimizerPlanner->GetBucketPositions(); } @@ -231,11 +232,11 @@ class TGranuleMeta: TNonCopyable { } } - std::shared_ptr BuildSerializationStats(ISnapshotSchema::TPtr schema) const { - auto result = std::make_shared(); + std::shared_ptr BuildSerializationStats(ISnapshotSchema::TPtr schema) const { + auto result = std::make_shared(); for (auto&& i : GetAdditiveSummary().GetCompacted().GetColumnStats()) { auto field = schema->GetFieldByColumnIdVerified(i.first); - NOlap::TColumnSerializationStat columnInfo(i.first, field->name()); + NArrow::NSplitter::TColumnSerializationStat columnInfo(i.first, field->name()); columnInfo.Merge(i.second); result->AddStat(columnInfo); } diff --git a/ydb/core/tx/columnshard/engines/storage/granule/portions_index.cpp b/ydb/core/tx/columnshard/engines/storage/granule/portions_index.cpp index 676d40ea1c48..e56487e5f8ef 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/portions_index.cpp +++ b/ydb/core/tx/columnshard/engines/storage/granule/portions_index.cpp @@ -11,7 +11,7 @@ TPortionsIndex::TPortionIntervals TPortionsIndex::GetIntervalFeatures(const TPor TPortionIntervals portionExcludeIntervals; while (true) { std::optional nextKey; - for (auto&& p : itFrom->second.GetPortionIds()) { + for (auto&& [p, _] : itFrom->second.GetPortionIds()) { if (skipPortions.contains(p)) { continue; } @@ -55,9 +55,13 @@ void TPortionsIndex::RemovePortion(const std::shared_ptr& p) { auto itTo = Points.find(p->IndexKeyEnd()); AFL_VERIFY(itTo != Points.end()); { + const TPortionInfoStat stat(p); auto it = itFrom; while (true) { - it->second.RemoveContained(p->GetPortionId()); + RemoveFromMemoryUsageControl(it->second.GetIntervalStats()); + it->second.RemoveContained(stat); + RawMemoryUsage.Add(it->second.GetIntervalStats().GetMinRawBytes()); + BlobMemoryUsage.Add(it->second.GetIntervalStats().GetBlobBytes()); if (it == itTo) { break; } @@ -67,19 +71,24 @@ void TPortionsIndex::RemovePortion(const std::shared_ptr& p) { if (itFrom != itTo) { itFrom->second.RemoveStart(p); if (itFrom->second.IsEmpty()) { + RemoveFromMemoryUsageControl(itFrom->second.GetIntervalStats()); Points.erase(itFrom); } itTo->second.RemoveFinish(p); if (itTo->second.IsEmpty()) { + RemoveFromMemoryUsageControl(itTo->second.GetIntervalStats()); Points.erase(itTo); } } else { itTo->second.RemoveStart(p); itTo->second.RemoveFinish(p); if (itTo->second.IsEmpty()) { + RemoveFromMemoryUsageControl(itTo->second.GetIntervalStats()); Points.erase(itTo); } } + RawMemoryUsage.FlushCounters(); + BlobMemoryUsage.FlushCounters(); } void TPortionsIndex::AddPortion(const std::shared_ptr& p) { @@ -89,13 +98,19 @@ void TPortionsIndex::AddPortion(const std::shared_ptr& p) { itTo->second.AddFinish(p); auto it = itFrom; + const TPortionInfoStat stat(p); while (true) { - it->second.AddContained(p->GetPortionId()); + RemoveFromMemoryUsageControl(it->second.GetIntervalStats()); + it->second.AddContained(stat); + RawMemoryUsage.Add(it->second.GetIntervalStats().GetMinRawBytes()); + BlobMemoryUsage.Add(it->second.GetIntervalStats().GetBlobBytes()); if (it == itTo) { break; } AFL_VERIFY(++it != Points.end()); } + RawMemoryUsage.FlushCounters(); + BlobMemoryUsage.FlushCounters(); } } \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/storage/granule/portions_index.h b/ydb/core/tx/columnshard/engines/storage/granule/portions_index.h index 09ca2d65e7c0..981943dc4dab 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/portions_index.h +++ b/ydb/core/tx/columnshard/engines/storage/granule/portions_index.h @@ -1,4 +1,5 @@ #pragma once +#include #include namespace NKikimr::NOlap { @@ -7,26 +8,74 @@ class TGranuleMeta; namespace NKikimr::NOlap::NGranule::NPortionsIndex { +class TPortionInfoStat { +private: + std::shared_ptr PortionInfo; + YDB_READONLY(ui64, MinRawBytes, 0); + YDB_READONLY(ui64, BlobBytes, 0); + +public: + TPortionInfoStat(const std::shared_ptr& portionInfo) + : PortionInfo(portionInfo) + , MinRawBytes(PortionInfo->GetMinMemoryForReadColumns({})) + , BlobBytes(PortionInfo->GetTotalBlobBytes()) + { + + } + + const TPortionInfo& GetPortionInfoVerified() const { + AFL_VERIFY(PortionInfo); + return *PortionInfo; + } +}; + +class TIntervalInfoStat { +private: + YDB_READONLY(ui64, MinRawBytes, 0); + YDB_READONLY(ui64, BlobBytes, 0); + +public: + void Add(const TPortionInfoStat& source) { + MinRawBytes += source.GetMinRawBytes(); + BlobBytes += source.GetBlobBytes(); + } + + void Sub(const TPortionInfoStat& source) { + AFL_VERIFY(MinRawBytes >= source.GetMinRawBytes()); + MinRawBytes -= source.GetMinRawBytes(); + AFL_VERIFY(BlobBytes >= source.GetBlobBytes()); + BlobBytes -= source.GetBlobBytes(); + AFL_VERIFY(!!BlobBytes == !!MinRawBytes); + } + + bool operator!() const { + return !BlobBytes && !MinRawBytes; + } +}; + class TPortionsPKPoint { private: THashMap> Start; THashMap> Finish; - THashSet PortionIds; + THashMap PortionIds; + YDB_READONLY_DEF(TIntervalInfoStat, IntervalStats); + public: const THashMap>& GetStart() const { return Start; } void ProvidePortions(const TPortionsPKPoint& source) { - for (auto&& i : source.PortionIds) { + IntervalStats = TIntervalInfoStat(); + for (auto&& [i, stat] : source.PortionIds) { if (source.Finish.contains(i)) { continue; } - AFL_VERIFY(PortionIds.emplace(i).second); + AddContained(stat); } } - const THashSet& GetPortionIds() const { + const THashMap& GetPortionIds() const { return PortionIds; } @@ -34,12 +83,19 @@ class TPortionsPKPoint { return Start.empty() && Finish.empty(); } - void AddContained(const ui64 portionId) { - AFL_VERIFY(PortionIds.emplace(portionId).second); + void AddContained(const TPortionInfoStat& stat) { + if (!stat.GetPortionInfoVerified().HasRemoveSnapshot()) { + IntervalStats.Add(stat); + } + AFL_VERIFY(PortionIds.emplace(stat.GetPortionInfoVerified().GetPortionId(), stat).second); } - void RemoveContained(const ui64 portionId) { - AFL_VERIFY(PortionIds.erase(portionId)); + void RemoveContained(const TPortionInfoStat& stat) { + if (!stat.GetPortionInfoVerified().HasRemoveSnapshot()) { + IntervalStats.Sub(stat); + } + AFL_VERIFY(PortionIds.erase(stat.GetPortionInfoVerified().GetPortionId())); + AFL_VERIFY(PortionIds.size() || !IntervalStats); } void RemoveStart(const std::shared_ptr& p) { @@ -61,9 +117,48 @@ class TPortionsPKPoint { } }; +class TIntervalMemoryMonitoring { +private: + std::map CountMemoryUsages; + const NColumnShard::TIntervalMemoryCounters& Counters; + +public: + void Add(const ui64 mem) { + ++CountMemoryUsages[mem]; + } + + void Remove(const ui64 mem) { + auto it = CountMemoryUsages.find(mem); + AFL_VERIFY(it != CountMemoryUsages.end())("mem", mem); + if (!--it->second) { + CountMemoryUsages.erase(it); + } + } + + TIntervalMemoryMonitoring(const NColumnShard::TIntervalMemoryCounters& counters) + : Counters(counters) + { + + } + + ui64 GetMax() const { + if (CountMemoryUsages.size()) { + return CountMemoryUsages.rbegin()->first; + } else { + return 0; + } + } + + void FlushCounters() const { + Counters.MinReadBytes->SetValue(GetMax()); + } +}; + class TPortionsIndex { private: std::map Points; + TIntervalMemoryMonitoring RawMemoryUsage; + TIntervalMemoryMonitoring BlobMemoryUsage; const TGranuleMeta& Owner; std::map::iterator InsertPoint(const NArrow::TReplaceKey& key) { @@ -75,17 +170,34 @@ class TPortionsIndex { --itPred; it->second.ProvidePortions(itPred->second); } + RawMemoryUsage.Add(it->second.GetIntervalStats().GetMinRawBytes()); + BlobMemoryUsage.Add(it->second.GetIntervalStats().GetBlobBytes()); } return it; } + void RemoveFromMemoryUsageControl(const TIntervalInfoStat& stat) { + RawMemoryUsage.Remove(stat.GetMinRawBytes()); + BlobMemoryUsage.Remove(stat.GetBlobBytes()); + } + public: - TPortionsIndex(const TGranuleMeta& owner) - : Owner(owner) + TPortionsIndex(const TGranuleMeta& owner, const NColumnShard::TPortionsIndexCounters& counters) + : RawMemoryUsage(counters.RawBytes) + , BlobMemoryUsage(counters.BlobBytes) + , Owner(owner) { } + ui64 GetMinRawMemoryRead() const { + return RawMemoryUsage.GetMax(); + } + + ui64 GetMinBlobMemoryRead() const { + return BlobMemoryUsage.GetMax(); + } + const std::map& GetPoints() const { return Points; } diff --git a/ydb/core/tx/columnshard/engines/storage/granule/storage.cpp b/ydb/core/tx/columnshard/engines/storage/granule/storage.cpp index 385f9d818d23..b017464eefeb 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/storage.cpp +++ b/ydb/core/tx/columnshard/engines/storage/granule/storage.cpp @@ -8,7 +8,7 @@ std::shared_ptr TGranulesStorage::GetGranuleForCom std::map> granulesSorted; ui32 countChecker = 0; std::optional priorityChecker; - const TDuration actualizationLag = NYDBTest::TControllers::GetColumnShardController()->GetCompactionActualizationLag(TDuration::Seconds(1)); + const TDuration actualizationLag = NYDBTest::TControllers::GetColumnShardController()->GetCompactionActualizationLag(); for (auto&& i : Tables) { NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("path_id", i.first); i.second->ActualizeOptimizer(now, actualizationLag); diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.cpp index 1ec8aede916d..1613bd10e7d0 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.cpp +++ b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.cpp @@ -1,6 +1,6 @@ #include "checker.h" #include -#include +#include #include #include @@ -13,15 +13,12 @@ void TBloomFilterChecker::DoSerializeToProtoImpl(NKikimrSSA::TProgram::TOlapInde } bool TBloomFilterChecker::DoCheckImpl(const std::vector& blobs) const { + AFL_VERIFY(blobs.size() == 1); for (auto&& blob : blobs) { - auto rb = NArrow::TStatusValidator::GetValid(NArrow::NSerialization::TSerializerContainer::GetDefaultSerializer()->Deserialize(blob)); - AFL_VERIFY(rb); - AFL_VERIFY(rb->schema()->num_fields() == 1); - AFL_VERIFY(rb->schema()->field(0)->type()->id() == arrow::Type::BOOL); - auto& bArray = static_cast(*rb->column(0)); + TFixStringBitsStorage bits(blob); bool found = true; for (auto&& i : HashValues) { - if (!bArray.Value(i % bArray.length())) { + if (!bits.Get(i % bits.GetSizeBits())) { found = false; break; } diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.h b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.h index 92ecf9534d29..740af9f1720d 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.h +++ b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.h @@ -2,6 +2,42 @@ #include namespace NKikimr::NOlap::NIndexes { +class TFixStringBitsStorage { +private: + YDB_READONLY_DEF(TString, Data); + +public: + TFixStringBitsStorage(const TString& data) + : Data(data) + {} + + ui32 GetSizeBits() const { + return Data.size() * 8; + } + + TFixStringBitsStorage(const ui32 sizeBits) + : Data(sizeBits / 8 + ((sizeBits % 8) ? 1 : 0), '\0') { + } + + void Set(const bool val, const ui32 idx) { + AFL_VERIFY(idx < GetSizeBits()); + auto* start = &Data[idx / 8]; + ui8 word = (*(ui8*)start); + if (val) { + word |= 1 << (idx % 8); + } else { + word &= (Max() - (1 << (idx % 8))); + } + memcpy(start, &word, sizeof(ui8)); + } + + bool Get(const ui32 idx) const { + AFL_VERIFY(idx < GetSizeBits()); + const ui8 start = (*(ui8*)&Data[idx / 8]); + return start & (1 << (idx % 8)); + } +}; + class TBloomFilterChecker: public TSimpleIndexChecker { public: static TString GetClassNameStatic() { diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/constructor.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/constructor.cpp index 9a464f67d595..fa11002fe17f 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/constructor.cpp @@ -15,7 +15,7 @@ std::shared_ptr TBloomIndexConstructor::Do } AFL_VERIFY(columnIds.emplace(columnInfo->GetId()).second); } - return std::make_shared(indexId, indexName, columnIds, FalsePositiveProbability); + return std::make_shared(indexId, indexName, GetStorageId().value_or(NBlobOperations::TGlobal::DefaultStorageId), columnIds, FalsePositiveProbability); } NKikimr::TConclusionStatus TBloomIndexConstructor::DoDeserializeFromJson(const NJson::TJsonValue& jsonInfo) { diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.cpp index 3c52518d0b8b..a2d84cb10f6d 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.cpp +++ b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.cpp @@ -1,6 +1,6 @@ #include "meta.h" #include "checker.h" -#include +#include #include #include #include @@ -10,32 +10,26 @@ namespace NKikimr::NOlap::NIndexes { -std::shared_ptr TBloomIndexMeta::DoBuildIndexImpl(TChunkedBatchReader& reader) const { +TString TBloomIndexMeta::DoBuildIndexImpl(TChunkedBatchReader& reader) const { std::set hashes; - for (ui32 i = 0; i < HashesCount; ++i) { - NArrow::NHash::NXX64::TStreamStringHashCalcer hashCalcer(3 * i); + { + NArrow::NHash::NXX64::TStreamStringHashCalcer hashCalcer(0); for (reader.Start(); reader.IsCorrect(); reader.ReadNext()) { hashCalcer.Start(); for (auto&& i : reader) { NArrow::NHash::TXX64::AppendField(i.GetCurrentChunk(), i.GetCurrentRecordIndex(), hashCalcer); } - const ui64 h = hashCalcer.Finish(); - hashes.emplace(h); + hashes.emplace(hashCalcer.Finish()); } } - const ui32 bitsCount = hashes.size() / std::log(2); - std::vector flags(bitsCount, false); - for (auto&& i : hashes) { - flags[i % flags.size()] = true; - } - - arrow::BooleanBuilder builder; - auto res = builder.Reserve(flags.size()); - NArrow::TStatusValidator::Validate(builder.AppendValues(flags)); - std::shared_ptr out; - NArrow::TStatusValidator::Validate(builder.Finish(&out)); - return arrow::RecordBatch::Make(ResultSchema, bitsCount, {out}); + const ui32 bitsCount = HashesCount * hashes.size() / std::log(2); + TFixStringBitsStorage bits(bitsCount); + const auto pred = [&bits](const ui64 hash) { + bits.Set(true, hash % bits.GetSizeBits()); + }; + BuildHashesSet(hashes, pred); + return bits.GetData(); } void TBloomIndexMeta::DoFillIndexCheckers(const std::shared_ptr& info, const NSchemeShard::TOlapSchema& schema) const { @@ -57,17 +51,19 @@ void TBloomIndexMeta::DoFillIndexCheckers(const std::shared_ptr hashes; + const auto pred = [&hashes](const ui64 hash) { + hashes.emplace(hash); + }; + NArrow::NHash::NXX64::TStreamStringHashCalcer calcer(0); for (ui32 i = 0; i < HashesCount; ++i) { - NArrow::NHash::NXX64::TStreamStringHashCalcer calcer(3 * i); calcer.Start(); for (auto&& i : foundColumns) { NArrow::NHash::TXX64::AppendField(i.second, calcer); } - const ui64 hash = calcer.Finish(); - hashes.emplace(hash); + BuildHashesSet(calcer.Finish(), pred); } branch->MutableIndexes().emplace_back(std::make_shared(GetIndexId(), std::move(hashes))); } } -} // namespace NKikimr::NOlap::NIndexes \ No newline at end of file +} // namespace NKikimr::NOlap::NIndexes diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.h b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.h index d7cb9c6ab4da..ac07cd4793de 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.h +++ b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.h @@ -20,6 +20,27 @@ class TBloomIndexMeta: public TIndexByColumns { AFL_VERIFY(FalsePositiveProbability < 1 && FalsePositiveProbability >= 0.01); HashesCount = -1 * std::log(FalsePositiveProbability) / std::log(2); } + + static const ui64 HashesConstructorP = ((ui64)2 << 31) - 1; + static const ui64 HashesConstructorA = (ui64)2 << 16; + + template + void BuildHashesSet(const ui64 originalHash, const TActor& actor) const { + AFL_VERIFY(HashesCount < HashesConstructorP); + for (ui32 b = 1; b < HashesCount; ++b) { + const ui64 hash = (HashesConstructorA * originalHash + b) % HashesConstructorP; + actor(hash); + } + } + + template + void BuildHashesSet(const TContainer& originalHashes, const TActor& actor) const { + AFL_VERIFY(HashesCount < HashesConstructorP); + for (auto&& hOriginal : originalHashes) { + BuildHashesSet(hOriginal, actor); + } + } + protected: virtual TConclusionStatus DoCheckModificationCompatibility(const IIndexMeta& newMeta) const override { const auto* bMeta = dynamic_cast(&newMeta); @@ -31,7 +52,7 @@ class TBloomIndexMeta: public TIndexByColumns { } virtual void DoFillIndexCheckers(const std::shared_ptr& info, const NSchemeShard::TOlapSchema& schema) const override; - virtual std::shared_ptr DoBuildIndexImpl(TChunkedBatchReader& reader) const override; + virtual TString DoBuildIndexImpl(TChunkedBatchReader& reader) const override; virtual bool DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto) override { AFL_VERIFY(TBase::DoDeserializeFromProto(proto)); @@ -54,8 +75,8 @@ class TBloomIndexMeta: public TIndexByColumns { public: TBloomIndexMeta() = default; - TBloomIndexMeta(const ui32 indexId, const TString& indexName, std::set& columnIds, const double fpProbability) - : TBase(indexId, indexName, columnIds) + TBloomIndexMeta(const ui32 indexId, const TString& indexName, const TString& storageId, std::set& columnIds, const double fpProbability) + : TBase(indexId, indexName, columnIds, storageId) , FalsePositiveProbability(fpProbability) { Initialize(); } @@ -65,4 +86,4 @@ class TBloomIndexMeta: public TIndexByColumns { } }; -} // namespace NKikimr::NOlap::NIndexes \ No newline at end of file +} // namespace NKikimr::NOlap::NIndexes diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/checker.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/checker.cpp new file mode 100644 index 000000000000..aa40668897d4 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/checker.cpp @@ -0,0 +1,22 @@ +#include "checker.h" +#include +#include +#include +#include + +namespace NKikimr::NOlap::NIndexes::NCountMinSketch { + +void TCountMinSketchChecker::DoSerializeToProtoImpl(NKikimrSSA::TProgram::TOlapIndexChecker& proto) const { + proto.MutableCountMinSketch(); +} + +bool TCountMinSketchChecker::DoCheckImpl(const std::vector& blobs) const { + Y_UNUSED(blobs); + return true; +} + +bool TCountMinSketchChecker::DoDeserializeFromProtoImpl(const NKikimrSSA::TProgram::TOlapIndexChecker& proto) { + return proto.HasCountMinSketch(); +} + +} // namespace NKikimr::NOlap::NIndexes diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/checker.h b/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/checker.h new file mode 100644 index 000000000000..ffa073b2e400 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/checker.h @@ -0,0 +1,32 @@ +#pragma once +#include + +namespace NKikimr::NOlap::NIndexes::NCountMinSketch { + +class TCountMinSketchChecker: public TSimpleIndexChecker { +public: + static TString GetClassNameStatic() { + return "COUNT_MIN_SKETCH"; + } +private: + using TBase = TSimpleIndexChecker; + static inline auto Registrator = TFactory::TRegistrator(GetClassNameStatic()); + +protected: + virtual bool DoDeserializeFromProtoImpl(const NKikimrSSA::TProgram::TOlapIndexChecker& proto) override; + virtual void DoSerializeToProtoImpl(NKikimrSSA::TProgram::TOlapIndexChecker& proto) const override; + + virtual bool DoCheckImpl(const std::vector& blobs) const override; + +public: + TCountMinSketchChecker() = default; + TCountMinSketchChecker(const ui32 indexId) + : TBase(indexId) + {} + + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } +}; + +} // namespace NKikimr::NOlap::NIndexes diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/constructor.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/constructor.cpp new file mode 100644 index 000000000000..7dcbaa9db476 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/constructor.cpp @@ -0,0 +1,63 @@ +#include "constructor.h" +#include "meta.h" + +#include + +namespace NKikimr::NOlap::NIndexes::NCountMinSketch { + +std::shared_ptr TCountMinSketchConstructor::DoCreateIndexMeta(const ui32 indexId, const TString& indexName, const NSchemeShard::TOlapSchema& currentSchema, NSchemeShard::IErrorCollector& errors) const { + std::set columnIds; + if (ColumnNames.empty()) { + for (const auto& [id, _] : currentSchema.GetColumns().GetColumns()) { + AFL_VERIFY(columnIds.emplace(id).second); + } + } + for (auto&& i : ColumnNames) { + auto* columnInfo = currentSchema.GetColumns().GetByName(i); + if (!columnInfo) { + errors.AddError("no column with name " + i); + return nullptr; + } + AFL_VERIFY(columnIds.emplace(columnInfo->GetId()).second); + } + return std::make_shared(indexId, indexName, GetStorageId().value_or(NBlobOperations::TGlobal::LocalMetadataStorageId), columnIds); +} + +NKikimr::TConclusionStatus TCountMinSketchConstructor::DoDeserializeFromJson(const NJson::TJsonValue& jsonInfo) { + if (!jsonInfo.Has("column_names")) { + return TConclusionStatus::Fail("column_names have to be in count min sketch features"); + } + const NJson::TJsonValue::TArray* columnNamesArray; + if (!jsonInfo["column_names"].GetArrayPointer(&columnNamesArray)) { + return TConclusionStatus::Fail("column_names have to be in count min sketch features as array ['column_name_1', ... , 'column_name_N']"); + } + for (auto&& i : *columnNamesArray) { + if (!i.IsString()) { + return TConclusionStatus::Fail("column_names have to be in count min sketch features as array of strings ['column_name_1', ... , 'column_name_N']"); + } + ColumnNames.emplace(i.GetString()); + } + return TConclusionStatus::Success(); +} + +NKikimr::TConclusionStatus TCountMinSketchConstructor::DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexRequested& proto) { + if (!proto.HasCountMinSketch()) { + const TString errorMessage = "not found CountMinSketch section in proto: \"" + proto.DebugString() + "\""; + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("problem", errorMessage); + return TConclusionStatus::Fail(errorMessage); + } + auto& sketch = proto.GetCountMinSketch(); + for (auto&& i : sketch.GetColumnNames()) { + ColumnNames.emplace(i); + } + return TConclusionStatus::Success(); +} + +void TCountMinSketchConstructor::DoSerializeToProto(NKikimrSchemeOp::TOlapIndexRequested& proto) const { + auto* sketchProto = proto.MutableCountMinSketch(); + for (auto&& i : ColumnNames) { + sketchProto->AddColumnNames(i); + } +} + +} // namespace NKikimr::NOlap::NIndexes diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/constructor.h b/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/constructor.h new file mode 100644 index 000000000000..86d7e34fa577 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/constructor.h @@ -0,0 +1,31 @@ +#pragma once +#include + +namespace NKikimr::NOlap::NIndexes::NCountMinSketch { + +class TCountMinSketchConstructor: public IIndexMetaConstructor { +public: + static TString GetClassNameStatic() { + return "COUNT_MIN_SKETCH"; + } +private: + std::set ColumnNames; + static inline auto Registrator = TFactory::TRegistrator(GetClassNameStatic()); + +protected: + virtual std::shared_ptr DoCreateIndexMeta(const ui32 indexId, const TString& indexName, const NSchemeShard::TOlapSchema& currentSchema, NSchemeShard::IErrorCollector& errors) const override; + + virtual TConclusionStatus DoDeserializeFromJson(const NJson::TJsonValue& jsonInfo) override; + + virtual TConclusionStatus DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexRequested& proto) override; + virtual void DoSerializeToProto(NKikimrSchemeOp::TOlapIndexRequested& proto) const override; + +public: + TCountMinSketchConstructor() = default; + + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } +}; + +} // namespace NKikimr::NOlap::NIndexes diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/meta.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/meta.cpp new file mode 100644 index 000000000000..80d154a751be --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/meta.cpp @@ -0,0 +1,56 @@ +#include "meta.h" +#include "checker.h" +#include +#include +#include +#include +#include + +#include +#include + +namespace NKikimr::NOlap::NIndexes::NCountMinSketch { + +TString TIndexMeta::DoBuildIndexImpl(TChunkedBatchReader& reader) const { + auto sketch = std::unique_ptr(TCountMinSketch::Create()); + + for (auto& colReader : reader) { + for (colReader.Start(); colReader.IsCorrect(); colReader.ReadNextChunk()) { + auto array = colReader.GetCurrentChunk(); + + NArrow::SwitchType(array->type_id(), [&](const auto& type) { + using TWrap = std::decay_t; + using TArray = typename arrow::TypeTraits::ArrayType; + + const TArray& arrTyped = static_cast(*array); + if constexpr (arrow::has_c_type()) { + for (int64_t i = 0; i < arrTyped.length(); ++i) { + auto cell = TCell::Make(arrTyped.Value(i)); + sketch->Count(cell.Data(), cell.Size()); + } + return true; + } + if constexpr (arrow::has_string_view()) { + for (int64_t i = 0; i < arrTyped.length(); ++i) { + auto view = arrTyped.GetView(i); + sketch->Count(view.data(), view.size()); + } + return true; + } + AFL_VERIFY(false)("message", "Unsupported arrow type for building an index"); + return false; + }); + } + } + + TString result(sketch->AsStringBuf()); + return result; +} + +void TIndexMeta::DoFillIndexCheckers(const std::shared_ptr& info, const NSchemeShard::TOlapSchema& /*schema*/) const { + for (auto&& branch : info->GetBranches()) { + branch->MutableIndexes().emplace_back(std::make_shared(GetIndexId())); + } +} + +} // namespace NKikimr::NOlap::NIndexes diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/meta.h b/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/meta.h new file mode 100644 index 000000000000..2c23af1fefdb --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/meta.h @@ -0,0 +1,63 @@ +#pragma once +#include + +namespace NKikimr::NOlap::NIndexes::NCountMinSketch { + +class TIndexMeta: public TIndexByColumns { +public: + static TString GetClassNameStatic() { + return "COUNT_MIN_SKETCH"; + } + +private: + using TBase = TIndexByColumns; + + static inline auto Registrator = TFactory::TRegistrator(GetClassNameStatic()); + +protected: + virtual TConclusionStatus DoCheckModificationCompatibility(const IIndexMeta& newMeta) const override { + const auto* bMeta = dynamic_cast(&newMeta); + if (!bMeta) { + return TConclusionStatus::Fail("cannot read meta as appropriate class: " + GetClassName() + ". Meta said that class name is " + newMeta.GetClassName()); + } + return TBase::CheckSameColumnsForModification(newMeta); + } + + virtual void DoFillIndexCheckers(const std::shared_ptr& info, const NSchemeShard::TOlapSchema& schema) const override; + + virtual TString DoBuildIndexImpl(TChunkedBatchReader& reader) const override; + + virtual bool DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto) override { + AFL_VERIFY(TBase::DoDeserializeFromProto(proto)); + AFL_VERIFY(proto.HasCountMinSketch()); + auto& sketch = proto.GetCountMinSketch(); + for (auto&& i : sketch.GetColumnIds()) { + ColumnIds.emplace(i); + } + return true; + } + + virtual void DoSerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& proto) const override { + auto* sketchProto = proto.MutableCountMinSketch(); + for (auto&& i : ColumnIds) { + sketchProto->AddColumnIds(i); + } + } + +public: + TIndexMeta() = default; + TIndexMeta(const ui32 indexId, const TString& indexName, const TString& storageId, const std::set& columnIds) + : TBase(indexId, indexName, columnIds, storageId) { + } + + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } + + const std::set& GetColumnIds() const { + return ColumnIds; + } + +}; + +} // namespace NKikimr::NOlap::NIndexes diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/ya.make b/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/ya.make new file mode 100644 index 000000000000..bcba53e477ae --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/ya.make @@ -0,0 +1,15 @@ +LIBRARY() + +SRCS( + GLOBAL constructor.cpp + GLOBAL meta.cpp + GLOBAL checker.cpp +) + +PEERDIR( + ydb/core/protos + ydb/core/formats/arrow + ydb/core/tx/columnshard/engines/storage/indexes/portions +) + +END() diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/max/constructor.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/max/constructor.cpp new file mode 100644 index 000000000000..6c1efcc0e570 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/indexes/max/constructor.cpp @@ -0,0 +1,56 @@ +#include "constructor.h" +#include "meta.h" + +#include + +namespace NKikimr::NOlap::NIndexes::NMax { + +std::shared_ptr TIndexConstructor::DoCreateIndexMeta( + const ui32 indexId, const TString& indexName, const NSchemeShard::TOlapSchema& currentSchema, NSchemeShard::IErrorCollector& errors) const { + ui32 columnId; + { + auto* columnInfo = currentSchema.GetColumns().GetByName(ColumnName); + if (!columnInfo) { + errors.AddError("no column with name " + ColumnName); + return nullptr; + } + if (!TIndexMeta::IsAvailableType(columnInfo->GetType())) { + errors.AddError("inappropriate type for max index"); + return nullptr; + } + columnId = columnInfo->GetId(); + } + return std::make_shared(indexId, indexName, GetStorageId().value_or(NBlobOperations::TGlobal::LocalMetadataStorageId), columnId); +} + +NKikimr::TConclusionStatus TIndexConstructor::DoDeserializeFromJson(const NJson::TJsonValue& jsonInfo) { + if (!jsonInfo.Has("column_name")) { + return TConclusionStatus::Fail("column_name have to be in max index features"); + } + if (!jsonInfo["column_name"].GetString(&ColumnName)) { + return TConclusionStatus::Fail("column_name have to be in max index features as string"); + } + return TConclusionStatus::Success(); +} + +NKikimr::TConclusionStatus TIndexConstructor::DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexRequested& proto) { + if (!proto.HasMaxIndex()) { + const TString errorMessage = "Not found MaxIndex section in proto: \"" + proto.DebugString() + "\""; + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("problem", errorMessage); + return TConclusionStatus::Fail(errorMessage); + } + auto& bIndex = proto.GetMaxIndex(); + ColumnName = bIndex.GetColumnName(); + if (!ColumnName) { + return TConclusionStatus::Fail("Empty column name in MaxIndex proto"); + } + return TConclusionStatus::Success(); +} + +void TIndexConstructor::DoSerializeToProto(NKikimrSchemeOp::TOlapIndexRequested& proto) const { + auto* filterProto = proto.MutableMaxIndex(); + AFL_VERIFY(!!ColumnName)("problem", "not initialized max index info trying to serialize"); + filterProto->SetColumnName(ColumnName); +} + +} // namespace NKikimr::NOlap::NIndexes::NMax diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/max/constructor.h b/ydb/core/tx/columnshard/engines/storage/indexes/max/constructor.h new file mode 100644 index 000000000000..35faabebb220 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/indexes/max/constructor.h @@ -0,0 +1,30 @@ +#pragma once +#include +namespace NKikimr::NOlap::NIndexes::NMax { + +class TIndexConstructor: public IIndexMetaConstructor { +public: + static TString GetClassNameStatic() { + return "MAX"; + } +private: + TString ColumnName; + static inline auto Registrator = TFactory::TRegistrator(GetClassNameStatic()); + +protected: + virtual std::shared_ptr DoCreateIndexMeta(const ui32 indexId, const TString& indexName, const NSchemeShard::TOlapSchema& currentSchema, NSchemeShard::IErrorCollector& errors) const override; + + virtual TConclusionStatus DoDeserializeFromJson(const NJson::TJsonValue& jsonInfo) override; + + virtual TConclusionStatus DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexRequested& proto) override; + virtual void DoSerializeToProto(NKikimrSchemeOp::TOlapIndexRequested& proto) const override; + +public: + TIndexConstructor() = default; + + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } +}; + +} // namespace NKikimr::NOlap::NIndexes \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.cpp new file mode 100644 index 000000000000..b672f278e017 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.cpp @@ -0,0 +1,52 @@ +#include "meta.h" + +#include +#include +#include +#include + +#include +#include + +namespace NKikimr::NOlap::NIndexes::NMax { + +TString TIndexMeta::DoBuildIndexImpl(TChunkedBatchReader& reader) const { + std::shared_ptr result; + AFL_VERIFY(reader.GetColumnsCount() == 1)("count", reader.GetColumnsCount()); + { + TChunkedColumnReader cReader = *reader.begin(); + for (reader.Start(); cReader.IsCorrect(); cReader.ReadNextChunk()) { + auto currentScalar = cReader.GetCurrentAccessor()->GetMaxScalar(); + AFL_VERIFY(currentScalar); + if (!result || NArrow::ScalarCompare(*result, *currentScalar) == -1) { + result = currentScalar; + } + } + } + return NArrow::NScalar::TSerializer::SerializePayloadToString(result).DetachResult(); +} + +void TIndexMeta::DoFillIndexCheckers( + const std::shared_ptr& /*info*/, const NSchemeShard::TOlapSchema& /*schema*/) const { +} + +std::shared_ptr TIndexMeta::GetMaxScalarVerified( + const std::vector& data, const std::shared_ptr& dataType) const { + AFL_VERIFY(data.size()); + std::shared_ptr result; + for (auto&& d : data) { + std::shared_ptr current = NArrow::NScalar::TSerializer::DeserializeFromStringWithPayload(d, dataType).DetachResult(); + if (!result || NArrow::ScalarCompare(*result, *current) == -1) { + result = current; + } + } + return result; +} + +NJson::TJsonValue TIndexMeta::DoSerializeDataToJson(const TString& data, const TIndexInfo& indexInfo) const { + AFL_VERIFY(ColumnIds.size() == 1); + auto scalar = GetMaxScalarVerified({ data }, indexInfo.GetColumnFeaturesVerified(*ColumnIds.begin()).GetArrowField()->type()); + return scalar->ToString(); +} + +} // namespace NKikimr::NOlap::NIndexes::NMax diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.h b/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.h new file mode 100644 index 000000000000..ef58ede92956 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.h @@ -0,0 +1,79 @@ +#pragma once +#include +namespace NKikimr::NOlap::NIndexes::NMax { + +class TIndexMeta: public TIndexByColumns { +public: + static TString GetClassNameStatic() { + return "MAX"; + } +private: + using TBase = TIndexByColumns; + static inline auto Registrator = TFactory::TRegistrator(GetClassNameStatic()); +protected: + virtual TConclusionStatus DoCheckModificationCompatibility(const IIndexMeta& newMeta) const override { + Y_UNUSED(newMeta); + return TConclusionStatus::Fail("max index not modifiable"); + } + virtual void DoFillIndexCheckers( + const std::shared_ptr& info, const NSchemeShard::TOlapSchema& schema) const override; + + virtual TString DoBuildIndexImpl(TChunkedBatchReader& reader) const override; + + virtual bool DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto) override { + AFL_VERIFY(TBase::DoDeserializeFromProto(proto)); + AFL_VERIFY(proto.HasMaxIndex()); + auto& bFilter = proto.GetMaxIndex(); + if (!bFilter.GetColumnId()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("problem", "incorrect column id"); + return false; + }; + ColumnIds.emplace(bFilter.GetColumnId()); + return true; + } + + virtual NJson::TJsonValue DoSerializeDataToJson(const TString& data, const TIndexInfo& indexInfo) const override; + + virtual void DoSerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& proto) const override { + AFL_VERIFY(ColumnIds.size() == 1); + auto* filterProto = proto.MutableMaxIndex(); + filterProto->SetColumnId(*ColumnIds.begin()); + } + +public: + TIndexMeta() = default; + TIndexMeta(const ui32 indexId, const TString& indexName, const TString& storageId, const ui32& columnId) + : TBase(indexId, indexName, { columnId }, storageId) { + } + + ui32 GetColumnId() const { + AFL_VERIFY(ColumnIds.size() == 1); + return *ColumnIds.begin(); + } + + static bool IsAvailableType(const NScheme::TTypeInfo type) { + auto dataTypeResult = NArrow::GetArrowType(type); + if (!dataTypeResult.ok()) { + return false; + } + if (!NArrow::SwitchType((*dataTypeResult)->id(), [&](const auto& type) { + using TWrap = std::decay_t; + if constexpr (arrow::has_c_type()) { + return true; + } + return false; + })) { + return false; + } + + return true; + } + + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } + + std::shared_ptr GetMaxScalarVerified(const std::vector& data, const std::shared_ptr& type) const; +}; + +} // namespace NKikimr::NOlap::NIndexes diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/max/ya.make b/ydb/core/tx/columnshard/engines/storage/indexes/max/ya.make new file mode 100644 index 000000000000..7a24787285e2 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/indexes/max/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +SRCS( + GLOBAL constructor.cpp + GLOBAL meta.cpp +) + +PEERDIR( + ydb/core/protos + ydb/core/formats/arrow + ydb/core/tx/columnshard/engines/storage/indexes/portions +) + +END() diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.cpp index 0aa6e4168f7e..3f8634cac619 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.cpp +++ b/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.cpp @@ -6,12 +6,8 @@ namespace NKikimr::NOlap::NIndexes { -void TPortionIndexChunk::DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRange, TPortionInfoConstructor& portionInfo) const { - AFL_VERIFY(!bRange.IsValid()); - portionInfo.AddIndex(TIndexChunk(GetEntityId(), GetChunkIdxVerified(), RecordsCount, RawBytes, bRange)); -} - -std::shared_ptr TIndexByColumns::DoBuildIndex(const ui32 indexId, THashMap>>& data, const TIndexInfo& indexInfo) const { +std::shared_ptr TIndexByColumns::DoBuildIndex( + const THashMap>>& data, const TIndexInfo& indexInfo) const { AFL_VERIFY(Serializer); AFL_VERIFY(data.size()); std::vector columnReaders; @@ -25,9 +21,8 @@ std::shared_ptr TIndexByColumns::DoBuildIndex recordsCount += i->GetRecordsCountVerified(); } TChunkedBatchReader reader(std::move(columnReaders)); - std::shared_ptr indexBatch = DoBuildIndexImpl(reader); - const TString indexData = Serializer->SerializeFull(indexBatch); - return std::make_shared(TChunkAddress(indexId, 0), recordsCount, NArrow::GetBatchDataSize(indexBatch), indexData); + const TString indexData = DoBuildIndexImpl(reader); + return std::make_shared(TChunkAddress(GetIndexId(), 0), recordsCount, indexData.size(), indexData); } bool TIndexByColumns::DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& /*proto*/) { @@ -35,8 +30,8 @@ bool TIndexByColumns::DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDe return true; } -TIndexByColumns::TIndexByColumns(const ui32 indexId, const TString& indexName, const std::set& columnIds) - : TBase(indexId, indexName) +TIndexByColumns::TIndexByColumns(const ui32 indexId, const TString& indexName, const std::set& columnIds, const TString& storageId) + : TBase(indexId, indexName, storageId) , ColumnIds(columnIds) { Serializer = NArrow::NSerialization::TSerializerContainer::GetDefaultSerializer(); @@ -58,4 +53,4 @@ NKikimr::TConclusionStatus TIndexByColumns::CheckSameColumnsForModification(cons return TConclusionStatus::Success(); } -} // namespace NKikimr::NOlap::NIndexes \ No newline at end of file +} // namespace NKikimr::NOlap::NIndexes diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.h b/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.h index fba7daa9e77c..5356d5c4302d 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.h +++ b/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.h @@ -1,66 +1,28 @@ #pragma once #include -#include +#include #include namespace NKikimr::NOlap::NIndexes { -class TPortionIndexChunk: public IPortionDataChunk { -private: - using TBase = IPortionDataChunk; - const ui32 RecordsCount; - const ui64 RawBytes; - const TString Data; -protected: - virtual const TString& DoGetData() const override { - return Data; - } - virtual TString DoDebugString() const override { - return ""; - } - virtual std::vector> DoInternalSplit(const TColumnSaver& /*saver*/, const std::shared_ptr& /*counters*/, const std::vector& /*splitSizes*/) const override { - return {}; - } - virtual bool DoIsSplittable() const override { - return false; - } - virtual std::optional DoGetRecordsCount() const override { - return RecordsCount; - } - virtual std::shared_ptr DoGetFirstScalar() const override { - return nullptr; - } - virtual std::shared_ptr DoGetLastScalar() const override { - return nullptr; - } - virtual void DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRange, TPortionInfoConstructor& portionInfo) const override; -public: - TPortionIndexChunk(const TChunkAddress& address, const ui32 recordsCount, const ui64 rawBytes, const TString& data) - : TBase(address.GetColumnId(), address.GetChunkIdx()) - , RecordsCount(recordsCount) - , RawBytes(rawBytes) - , Data(data) - { - } - -}; - class TIndexByColumns: public IIndexMeta { private: using TBase = IIndexMeta; std::shared_ptr Serializer; + protected: std::set ColumnIds; - virtual std::shared_ptr DoBuildIndexImpl(TChunkedBatchReader& reader) const = 0; - virtual std::shared_ptr DoBuildIndex(const ui32 indexId, THashMap>>& data, const TIndexInfo& indexInfo) const override final; - virtual bool DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& /*proto*/) override; + virtual TString DoBuildIndexImpl(TChunkedBatchReader& reader) const = 0; + + virtual std::shared_ptr DoBuildIndex(const THashMap>>& data, const TIndexInfo& indexInfo) const override final; + virtual bool DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto) override; TConclusionStatus CheckSameColumnsForModification(const IIndexMeta& newMeta) const; public: TIndexByColumns() = default; - TIndexByColumns(const ui32 indexId, const TString& indexName, const std::set& columnIds); + TIndexByColumns(const ui32 indexId, const TString& indexName, const std::set& columnIds, const TString& storageId); }; -} // namespace NKikimr::NOlap::NIndexes \ No newline at end of file +} // namespace NKikimr::NOlap::NIndexes diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/portions/ya.make b/ydb/core/tx/columnshard/engines/storage/indexes/portions/ya.make index 076d439d54af..0ce6d8f9987f 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/portions/ya.make +++ b/ydb/core/tx/columnshard/engines/storage/indexes/portions/ya.make @@ -7,7 +7,7 @@ SRCS( PEERDIR( ydb/core/formats/arrow - ydb/core/formats/arrow/protos + ydb/library/formats/arrow/protos ydb/core/tx/columnshard/engines/storage/chunks ydb/core/tx/columnshard/engines/scheme/indexes/abstract ydb/core/tx/columnshard/engines/portions diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/ya.make b/ydb/core/tx/columnshard/engines/storage/indexes/ya.make index c97b9d1ae656..0459c906d836 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/ya.make +++ b/ydb/core/tx/columnshard/engines/storage/indexes/ya.make @@ -3,6 +3,8 @@ LIBRARY() PEERDIR( ydb/core/tx/columnshard/engines/storage/indexes/portions ydb/core/tx/columnshard/engines/storage/indexes/bloom + ydb/core/tx/columnshard/engines/storage/indexes/max + ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch ) END() diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/abstract/optimizer.h b/ydb/core/tx/columnshard/engines/storage/optimizer/abstract/optimizer.h index 21647072eb60..4bd196e552d0 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/abstract/optimizer.h +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/abstract/optimizer.h @@ -134,7 +134,7 @@ class IOptimizerPlanner { return DoDebugString(); } - virtual std::vector GetBucketPositions() const = 0; + virtual NArrow::NMerger::TIntervalPositions GetBucketPositions() const = 0; bool IsLocked(const std::shared_ptr& dataLocksManager) const { return DoIsLocked(dataLocksManager); } diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.cpp index a36e976ed351..36f467a03133 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.cpp +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.cpp @@ -3,8 +3,7 @@ namespace NKikimr::NOlap::NStorageOptimizer::NLBuckets { TDuration GetCommonFreshnessCheckDuration() { - static const TDuration CommonFreshnessCheckDuration = TDuration::Seconds(300); - return NYDBTest::TControllers::GetColumnShardController()->GetOptimizerFreshnessCheckDuration(CommonFreshnessCheckDuration); + return NYDBTest::TControllers::GetColumnShardController()->GetOptimizerFreshnessCheckDuration(); } } diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.h b/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.h index 246b349f2a77..d686fc719112 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.h +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.h @@ -19,8 +19,6 @@ namespace NKikimr::NOlap::NStorageOptimizer::NLBuckets { -static const ui64 SmallPortionDetectSizeLimit = 1 << 20; - TDuration GetCommonFreshnessCheckDuration(); class TSimplePortionsGroupInfo { @@ -683,7 +681,7 @@ class TPortionsBucket: public TMoveOnly { return; } MainPortion->InitRuntimeFeature(TPortionInfo::ERuntimeFeature::Optimized, Others.IsEmpty() && currentInstant > MainPortion->RecordSnapshotMax().GetPlanInstant() + - NYDBTest::TControllers::GetColumnShardController()->GetLagForCompactionBeforeTierings(TDuration::Minutes(60))); + NYDBTest::TControllers::GetColumnShardController()->GetLagForCompactionBeforeTierings()); } public: TTaskDescription GetTaskDescription() const { @@ -899,15 +897,15 @@ class TPortionsBucket: public TMoveOnly { auto result = std::make_shared(granule, portions, saverContext); if (MainPortion) { NArrow::NMerger::TSortableBatchPosition pos(MainPortion->IndexKeyStart().ToBatch(primaryKeysSchema), 0, primaryKeysSchema->field_names(), {}, false); - result->AddCheckPoint(pos, false, false); + result->AddCheckPoint(pos, false); } if (!nextBorder && MainPortion && !forceMergeForTests) { NArrow::NMerger::TSortableBatchPosition pos(MainPortion->IndexKeyEnd().ToBatch(primaryKeysSchema), 0, primaryKeysSchema->field_names(), {}, false); - result->AddCheckPoint(pos, true, false); + result->AddCheckPoint(pos, true); } if (stopPoint) { NArrow::NMerger::TSortableBatchPosition pos(stopPoint->ToBatch(primaryKeysSchema), 0, primaryKeysSchema->field_names(), {}, false); - result->AddCheckPoint(pos, false, false); + result->AddCheckPoint(pos, false); } return result; } @@ -1104,7 +1102,7 @@ class TPortionBuckets { } void RemovePortion(const std::shared_ptr& portion) { - if (portion->GetTotalBlobBytes() < NYDBTest::TControllers::GetColumnShardController()->GetSmallPortionSizeDetector(SmallPortionDetectSizeLimit)) { + if (portion->GetTotalBlobBytes() < NYDBTest::TControllers::GetColumnShardController()->GetSmallPortionSizeDetector()) { Counters->SmallPortions->RemovePortion(portion); } if (!RemoveBucket(portion)) { @@ -1146,7 +1144,7 @@ class TPortionBuckets { } void AddPortion(const std::shared_ptr& portion, const TInstant now) { - if (portion->GetTotalBlobBytes() < NYDBTest::TControllers::GetColumnShardController()->GetSmallPortionSizeDetector(SmallPortionDetectSizeLimit)) { + if (portion->GetTotalBlobBytes() < NYDBTest::TControllers::GetColumnShardController()->GetSmallPortionSizeDetector()) { Counters->SmallPortions->AddPortion(portion); AddOther(portion, now); return; @@ -1181,15 +1179,15 @@ class TPortionBuckets { } } - std::vector GetBucketPositions() const { - std::vector result; + NArrow::NMerger::TIntervalPositions GetBucketPositions() const { + NArrow::NMerger::TIntervalPositions result; for (auto&& i : Buckets) { AFL_VERIFY(i.second->GetStartPos()); - result.emplace_back(*i.second->GetStartPos()); + result.AddPosition(*i.second->GetStartPos(), false); } - if (Buckets.size()) { + if (Buckets.size() && Buckets.rbegin()->second->GetPortion()->GetRecordsCount() > 1) { NArrow::NMerger::TSortableBatchPosition pos(Buckets.rbegin()->second->GetPortion()->IndexKeyEnd().ToBatch(PrimaryKeysSchema), 0, PrimaryKeysSchema->field_names(), {}, false); - result.emplace_back(pos); + result.AddPosition(std::move(pos), false); } return result; } @@ -1254,7 +1252,7 @@ class TOptimizerPlanner: public IOptimizerPlanner { public: - virtual std::vector GetBucketPositions() const override { + virtual NArrow::NMerger::TIntervalPositions GetBucketPositions() const override { return Buckets.GetBucketPositions(); } diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/common/optimizer.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/common/optimizer.cpp index baf229b3d1fd..2fe68710d805 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/common/optimizer.cpp +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/common/optimizer.cpp @@ -4,8 +4,7 @@ namespace NKikimr::NOlap::NStorageOptimizer::NSBuckets { TDuration GetCommonFreshnessCheckDuration() { - static const TDuration CommonFreshnessCheckDuration = TDuration::Seconds(300); - return NYDBTest::TControllers::GetColumnShardController()->GetOptimizerFreshnessCheckDuration(CommonFreshnessCheckDuration); + return NYDBTest::TControllers::GetColumnShardController()->GetOptimizerFreshnessCheckDuration(); } -} +} // namespace NKikimr::NOlap::NStorageOptimizer::NSBuckets diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/constructor/constructor.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/constructor/constructor.cpp index 9e8d21bb9357..bc007d0fff10 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/constructor/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/constructor/constructor.cpp @@ -2,12 +2,11 @@ #include #include #include -#include namespace NKikimr::NOlap::NStorageOptimizer::NSBuckets { std::shared_ptr TOptimizerPlannerConstructor::BuildLogic() const { - const TDuration freshnessCheckDuration = NYDBTest::TControllers::GetColumnShardController()->GetOptimizerFreshnessCheckDuration(FreshnessCheckDuration); + const TDuration freshnessCheckDuration = NYDBTest::TControllers::GetColumnShardController()->GetOptimizerFreshnessCheckDuration(); std::shared_ptr logic; if (LogicName == "one_head") { logic = std::make_shared(freshnessCheckDuration); diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/constructor/constructor.h b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/constructor/constructor.h index 8b85a25c0877..cabe72ccc2a9 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/constructor/constructor.h +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/constructor/constructor.h @@ -1,13 +1,16 @@ #pragma once +#include #include #include +#include namespace NKikimr::NOlap::NStorageOptimizer::NSBuckets { class TOptimizerPlannerConstructor: public IOptimizerPlannerConstructor { private: YDB_READONLY_DEF(TString, LogicName); - YDB_READONLY(TDuration, FreshnessCheckDuration, TDuration::Seconds(300)); + YDB_READONLY(TDuration, FreshnessCheckDuration, NYDBTest::TControllers::GetColumnShardController()->GetOptimizerFreshnessCheckDuration()); + public: static TString GetClassNameStatic() { return "s-buckets"; diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/index/bucket.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/index/bucket.cpp index 4fd497984104..ec344a674fd7 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/index/bucket.cpp +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/index/bucket.cpp @@ -9,7 +9,7 @@ namespace NKikimr::NOlap::NStorageOptimizer::NSBuckets { void TPortionsBucket::RebuildOptimizedFeature(const TInstant currentInstant) const { for (auto&& [_, p] : Portions) { p.MutablePortionInfo().InitRuntimeFeature(TPortionInfo::ERuntimeFeature::Optimized, Portions.size() == 1 && currentInstant > p->RecordSnapshotMax().GetPlanInstant() + - NYDBTest::TControllers::GetColumnShardController()->GetLagForCompactionBeforeTierings(TDuration::Minutes(60)) + NYDBTest::TControllers::GetColumnShardController()->GetLagForCompactionBeforeTierings() ); } } @@ -28,7 +28,7 @@ std::shared_ptr TPortionsBucket::BuildOpti auto result = std::make_shared(granule, context.GetPortions(), saverContext); for (auto&& i : context.GetSplitRightOpenIntervalPoints()) { NArrow::NMerger::TSortableBatchPosition pos(i.ToBatch(primaryKeysSchema), 0, primaryKeysSchema->field_names(), {}, false); - result->AddCheckPoint(pos, false, false); + result->AddCheckPoint(pos, false); } return result; } diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/index/index.h b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/index/index.h index a551fbbb8a1d..56bddb8547fb 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/index/index.h +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/index/index.h @@ -220,14 +220,14 @@ class TPortionBuckets { return bucketForOptimization->BuildOptimizationTask(granule, locksManager, PrimaryKeysSchema, StoragesManager); } - std::vector GetBucketPositions() const { - std::vector result; + NArrow::NMerger::TIntervalPositions GetBucketPositions() const { + NArrow::NMerger::TIntervalPositions result; for (auto&& i : Buckets) { if (!i.first.HasValue()) { continue; } NArrow::NMerger::TSortableBatchPosition posStart(i.first.GetValueVerified().ToBatch(PrimaryKeysSchema), 0, PrimaryKeysSchema->field_names(), {}, false); - result.emplace_back(posStart); + result.AddPosition(std::move(posStart), false); } return result; } diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/logic/abstract/logic.h b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/logic/abstract/logic.h index 3e4cef0f2970..b2d169db8698 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/logic/abstract/logic.h +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/logic/abstract/logic.h @@ -2,7 +2,7 @@ #include #include -#include +#include namespace NKikimr::NOlap::NStorageOptimizer::NSBuckets { diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/logic/slices/logic.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/logic/slices/logic.cpp index 465e19e8379b..28d2914ed392 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/logic/slices/logic.cpp +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/logic/slices/logic.cpp @@ -67,7 +67,7 @@ NKikimr::NOlap::NStorageOptimizer::NSBuckets::TCompactionTaskResult TTimeSliceLo NKikimr::NOlap::NStorageOptimizer::NSBuckets::TCalcWeightResult TTimeSliceLogic::DoCalcWeight(const TInstant /*now*/, const TBucketInfo& bucket) const { ui64 size = 0; - ui32 count = 0; + ui64 count = 0; for (auto&& [maxInstant, portions] : bucket.GetSnapshotPortions()) { for (auto&& [_, p] : portions) { if (p.GetTotalBlobBytes() > compactedDetector) { diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/optimizer/optimizer.h b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/optimizer/optimizer.h index 4e8595e20f1d..7d756f09deff 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/optimizer/optimizer.h +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/optimizer/optimizer.h @@ -64,7 +64,7 @@ class TOptimizerPlanner: public IOptimizerPlanner { } public: - virtual std::vector GetBucketPositions() const override { + virtual NArrow::NMerger::TIntervalPositions GetBucketPositions() const override { return Buckets.GetBucketPositions(); } diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/ut/ut_optimizer.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/ut/ut_optimizer.cpp index c4aec7dd234e..420a9e5901e9 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/ut/ut_optimizer.cpp +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/ut/ut_optimizer.cpp @@ -3,8 +3,8 @@ #include #include #include -#include -#include +#include +#include #include #include diff --git a/ydb/core/tx/columnshard/engines/ut/helper.cpp b/ydb/core/tx/columnshard/engines/ut/helper.cpp index eee9159edea7..56a5c26ba492 100644 --- a/ydb/core/tx/columnshard/engines/ut/helper.cpp +++ b/ydb/core/tx/columnshard/engines/ut/helper.cpp @@ -1,10 +1,25 @@ #include "helper.h" +#include namespace NKikimr::NOlap::NEngines::NTest { +std::shared_ptr TLocalHelper::GetMetaSchema() { + return std::make_shared(arrow::FieldVector({ std::make_shared("1", arrow::uint64()) })); +} + NKikimrTxColumnShard::TLogicalMetadata TLocalHelper::GetMetaProto() { NKikimrTxColumnShard::TLogicalMetadata result; result.SetDirtyWriteTimeSeconds(TInstant::Now().Seconds()); + + std::vector> columns; + auto schema = GetMetaSchema(); + for (auto&& i : schema->fields()) { + columns.emplace_back(NArrow::TThreadSimpleArraysCache::Get(i->type(), NArrow::DefaultScalar(i->type()), 1)); + } + auto batch = arrow::RecordBatch::Make(schema, 1, columns); + + NArrow::TFirstLastSpecialKeys flKeys = NArrow::TFirstLastSpecialKeys(batch); + result.SetSpecialKeysPayloadData(flKeys.SerializePayloadToString()); return result; } diff --git a/ydb/core/tx/columnshard/engines/ut/helper.h b/ydb/core/tx/columnshard/engines/ut/helper.h index 8e17730a21c6..c7072c13af91 100644 --- a/ydb/core/tx/columnshard/engines/ut/helper.h +++ b/ydb/core/tx/columnshard/engines/ut/helper.h @@ -6,6 +6,7 @@ namespace NKikimr::NOlap::NEngines::NTest { class TLocalHelper { public: static NKikimrTxColumnShard::TLogicalMetadata GetMetaProto(); + static std::shared_ptr GetMetaSchema(); }; }; \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/ut/ut_insert_table.cpp b/ydb/core/tx/columnshard/engines/ut/ut_insert_table.cpp index 946b657400ca..d840a5a64f37 100644 --- a/ydb/core/tx/columnshard/engines/ut/ut_insert_table.cpp +++ b/ydb/core/tx/columnshard/engines/ut/ut_insert_table.cpp @@ -18,13 +18,13 @@ class TTestInsertTableDB : public IDbWrapper { public: void Insert(const TInsertedData&) override { } - void Commit(const TInsertedData&) override { + void Commit(const TCommittedData&) override { } void Abort(const TInsertedData&) override { } void EraseInserted(const TInsertedData&) override { } - void EraseCommitted(const TInsertedData&) override { + void EraseCommitted(const TCommittedData&) override { } void EraseAborted(const TInsertedData&) override { } @@ -73,7 +73,7 @@ class TTestInsertTableDB : public IDbWrapper { Y_UNIT_TEST_SUITE(TColumnEngineTestInsertTable) { Y_UNIT_TEST(TestInsertCommit) { - ui64 writeId = 0; + TInsertWriteId writeId = (TInsertWriteId)0; ui64 tableId = 0; TString dedupId = "0"; TUnifiedBlobId blobId1(2222, 1, 1, 100, 2, 0, 1); @@ -81,47 +81,38 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestInsertTable) { TTestInsertTableDB dbTable; TInsertTable insertTable; ui64 indexSnapshot = 0; - + // insert, not commited - bool ok = insertTable.Insert(dbTable, TInsertedData(writeId, tableId, dedupId, blobId1, TLocalHelper::GetMetaProto(), indexSnapshot, {})); + auto userData1 = std::make_shared(tableId, TBlobRange(blobId1), TLocalHelper::GetMetaProto(), indexSnapshot, std::nullopt); + bool ok = insertTable.Insert(dbTable, TInsertedData(writeId, userData1)); UNIT_ASSERT(ok); - // insert the same blobId1 again - ok = insertTable.Insert(dbTable, TInsertedData(writeId, tableId, dedupId, blobId1, TLocalHelper::GetMetaProto(), indexSnapshot, {})); - UNIT_ASSERT(!ok); - - // insert different blodId with the same writeId and dedupId - TUnifiedBlobId blobId2(2222, 1, 2, 100, 2, 0, 1); - ok = insertTable.Insert(dbTable, TInsertedData(writeId, tableId, dedupId, blobId2, TLocalHelper::GetMetaProto(), indexSnapshot, {})); - UNIT_ASSERT(!ok); - // read nothing - auto blobs = insertTable.Read(tableId, TSnapshot::Zero(), nullptr); + auto blobs = insertTable.Read(tableId, {}, TSnapshot::Zero(), TLocalHelper::GetMetaSchema(), nullptr); UNIT_ASSERT_EQUAL(blobs.size(), 0); - blobs = insertTable.Read(tableId + 1, TSnapshot::Zero(), nullptr); + blobs = insertTable.Read(tableId + 1, {}, TSnapshot::Zero(), TLocalHelper::GetMetaSchema(), nullptr); UNIT_ASSERT_EQUAL(blobs.size(), 0); // commit ui64 planStep = 100; ui64 txId = 42; - insertTable.Commit(dbTable, planStep, txId, {TWriteId{writeId}}, [](ui64) { + insertTable.Commit(dbTable, planStep, txId, { writeId }, [](ui64) { return true; }); - - UNIT_ASSERT_EQUAL(insertTable.GetPathPriorities().size(), 1); - UNIT_ASSERT_EQUAL(insertTable.GetPathPriorities().begin()->second.size(), 1); - UNIT_ASSERT_EQUAL((*insertTable.GetPathPriorities().begin()->second.begin())->GetCommitted().size(), 1); +// UNIT_ASSERT_EQUAL(insertTable.GetPathPriorities().size(), 1); +// UNIT_ASSERT_EQUAL(insertTable.GetPathPriorities().begin()->second.size(), 1); +// UNIT_ASSERT_EQUAL((*insertTable.GetPathPriorities().begin()->second.begin())->GetCommitted().size(), 1); // read old snapshot - blobs = insertTable.Read(tableId, TSnapshot::Zero(), nullptr); + blobs = insertTable.Read(tableId, {}, TSnapshot::Zero(), TLocalHelper::GetMetaSchema(), nullptr); UNIT_ASSERT_EQUAL(blobs.size(), 0); - blobs = insertTable.Read(tableId + 1, TSnapshot::Zero(), nullptr); + blobs = insertTable.Read(tableId + 1, {}, TSnapshot::Zero(), TLocalHelper::GetMetaSchema(), nullptr); UNIT_ASSERT_EQUAL(blobs.size(), 0); // read new snapshot - blobs = insertTable.Read(tableId, TSnapshot(planStep, txId), nullptr); + blobs = insertTable.Read(tableId, {}, TSnapshot(planStep, txId), TLocalHelper::GetMetaSchema(), nullptr); UNIT_ASSERT_EQUAL(blobs.size(), 1); - blobs = insertTable.Read(tableId + 1, TSnapshot::Zero(), nullptr); + blobs = insertTable.Read(tableId + 1, {}, TSnapshot::Zero(), TLocalHelper::GetMetaSchema(), nullptr); UNIT_ASSERT_EQUAL(blobs.size(), 0); } } diff --git a/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp b/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp index 4c07ce331af4..ecde3aa56673 100644 --- a/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp +++ b/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp @@ -48,27 +48,27 @@ class TTestDbWrapper : public IDbWrapper { } void Insert(const TInsertedData& data) override { - Inserted.emplace(TWriteId{data.WriteTxId}, data); + Inserted.emplace(data.GetInsertWriteId(), data); } - void Commit(const TInsertedData& data) override { - Committed[data.PathId].emplace(data); + void Commit(const TCommittedData& data) override { + Committed[data.GetPathId()].emplace(data); } void Abort(const TInsertedData& data) override { - Aborted.emplace(TWriteId{data.WriteTxId}, data); + Aborted.emplace(data.GetInsertWriteId(), data); } void EraseInserted(const TInsertedData& data) override { - Inserted.erase(TWriteId{data.WriteTxId}); + Inserted.erase(data.GetInsertWriteId()); } - void EraseCommitted(const TInsertedData& data) override { - Committed[data.PathId].erase(data); + void EraseCommitted(const TCommittedData& data) override { + Committed[data.GetPathId()].erase(data); } void EraseAborted(const TInsertedData& data) override { - Aborted.erase(TWriteId{data.WriteTxId}); + Aborted.erase(data.GetInsertWriteId()); } bool Load(TInsertTableAccessor& accessor, @@ -189,9 +189,9 @@ class TTestDbWrapper : public IDbWrapper { } private: - THashMap Inserted; - THashMap> Committed; - THashMap Aborted; + THashMap Inserted; + THashMap> Committed; + THashMap Aborted; THashMap Indices; }; @@ -275,23 +275,27 @@ TString MakeTestBlob(i64 start = 0, i64 end = 100, ui32 step = 1) { return NArrow::SerializeBatchNoCompression(batch); } -void AddIdsToBlobs(std::vector& portions, NBlobOperations::NRead::TCompositeReadBlobs& blobs, ui32& step) { +void AddIdsToBlobs(std::vector& portions, NBlobOperations::NRead::TCompositeReadBlobs& blobs, ui32& step) { for (auto& portion : portions) { - for (auto& rec : portion.GetPortionConstructor().MutableRecords()) { - rec.BlobRange.BlobIdx = portion.GetPortionConstructor().RegisterBlobId(MakeUnifiedBlobId(++step, portion.GetBlobFullSizeVerified(rec.ColumnId, rec.Chunk))); - TString data = portion.GetBlobByRangeVerified(rec.ColumnId, rec.Chunk); - blobs.Add(IStoragesManager::DefaultStorageId, portion.GetPortionConstructor().RestoreBlobRange(rec.BlobRange), std::move(data)); + THashMap blobsData; + for (auto& b : portion.GetBlobs()) { + const auto blobId = MakeUnifiedBlobId(++step, b.GetSize()); + b.RegisterBlobId(portion, blobId); + blobsData.emplace(blobId, b.GetResultBlob()); + } + for (auto&& rec : portion.GetPortionConstructor().GetRecords()) { + auto range = portion.GetPortionConstructor().RestoreBlobRange(rec.BlobRange); + auto it = blobsData.find(range.BlobId); + AFL_VERIFY(it != blobsData.end()); + const TString& data = it->second; + AFL_VERIFY(range.Offset + range.Size <= data.size()); + blobs.Add(IStoragesManager::DefaultStorageId, range, data.substr(range.Offset, range.Size)); } } } -bool Insert(TColumnEngineForLogs& engine, TTestDbWrapper& db, TSnapshot snap, - std::vector&& dataToIndex, NBlobOperations::NRead::TCompositeReadBlobs& blobs, ui32& step) { - - for (ui32 i = 0; i < dataToIndex.size(); ++i) { - // Commited data always has nonzero planstep (for WriteLoadRead tests) - dataToIndex[i].PlanStep = i + 1; - }; +bool Insert(TColumnEngineForLogs& engine, TTestDbWrapper& db, TSnapshot snap, std::vector&& dataToIndex, + NBlobOperations::NRead::TCompositeReadBlobs& blobs, ui32& step) { std::shared_ptr changes = engine.StartInsert(std::move(dataToIndex)); if (!changes) { return false; @@ -425,6 +429,7 @@ std::shared_ptr CommonStoragesManager = Initia Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { void WriteLoadRead(const std::vector& ydbSchema, const std::vector& key) { + TTestBasicRuntime runtime; TTestDbWrapper db; TIndexInfo tableInfo = NColumnShard::BuildTableInfo(ydbSchema, key); @@ -438,16 +443,16 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { // PlanStep, TxId, PathId, DedupId, BlobId, Data, [Metadata] // load - TSnapshot indexSnaphot(1, 1); - TColumnEngineForLogs engine(0, CommonStoragesManager, indexSnaphot, TIndexInfo(tableInfo)); + TSnapshot indexSnapshot(1, 1); + TColumnEngineForLogs engine(0, CommonStoragesManager, indexSnapshot, TIndexInfo(tableInfo)); for (auto&& i : paths) { engine.RegisterTable(i); } engine.Load(db); - std::vector dataToIndex = { - TInsertedData(2, paths[0], "", blobRanges[0].BlobId, TLocalHelper::GetMetaProto(), 0, {}), - TInsertedData(1, paths[0], "", blobRanges[1].BlobId, TLocalHelper::GetMetaProto(), 0, {}) + std::vector dataToIndex = { + TCommittedData(TUserData::Build(paths[0], blobRanges[0], TLocalHelper::GetMetaProto(), 0, {}), TSnapshot(1, 2), (TInsertWriteId)2), + TCommittedData(TUserData::Build(paths[0], blobRanges[1], TLocalHelper::GetMetaProto(), 0, {}), TSnapshot(2, 1), (TInsertWriteId)1) }; // write @@ -465,12 +470,12 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { // selects auto lastSchema = engine.GetVersionedIndex().GetLastSchema(); - UNIT_ASSERT_EQUAL(lastSchema->GetSnapshot(), indexSnaphot); + UNIT_ASSERT_EQUAL(lastSchema->GetSnapshot(), indexSnapshot); const TIndexInfo& indexInfo = lastSchema->GetIndexInfo(); - THashSet oneColumnId = { indexInfo.GetColumnId(testColumns[0].GetName()) }; + THashSet oneColumnId = { indexInfo.GetColumnIdVerified(testColumns[0].GetName()) }; THashSet columnIds; for (auto& c : testColumns) { - columnIds.insert(indexInfo.GetColumnId(c.GetName())); + columnIds.insert(indexInfo.GetColumnIdVerified(c.GetName())); } { // select from snap before insert @@ -492,7 +497,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { ui64 txId = 1; auto selectInfo = engine.Select(paths[0], TSnapshot(planStep, txId), NOlap::TPKRangesFilter(false)); UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK.size(), 1); - UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK[0]->NumChunks(), columnIds.size() + TIndexInfo::GetSystemColumnNames().size()); + UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK[0]->NumChunks(), columnIds.size() + TIndexInfo::GetSnapshotColumnIdsSet().size()); } { // select another pathId @@ -520,6 +525,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { void ReadWithPredicates(const std::vector& ydbSchema, const std::vector& key) { + TTestBasicRuntime runtime; TTestDbWrapper db; TIndexInfo tableInfo = NColumnShard::BuildTableInfo(ydbSchema, key); @@ -544,11 +550,12 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { blobs.Add(IStoragesManager::DefaultStorageId, blobRange, std::move(str1)); // PlanStep, TxId, PathId, DedupId, BlobId, Data, [Metadata] - std::vector dataToIndex; + std::vector dataToIndex; + TSnapshot ss(planStep, txId); dataToIndex.push_back( - TInsertedData(txId, pathId, "", blobRange.BlobId, TLocalHelper::GetMetaProto(), 0, {})); + TCommittedData(TUserData::Build(pathId, blobRange, TLocalHelper::GetMetaProto(), 0, {}), ss, (TInsertWriteId)txId)); - bool ok = Insert(engine, db, TSnapshot(planStep, txId), std::move(dataToIndex), blobs, step); + bool ok = Insert(engine, db, ss, std::move(dataToIndex), blobs, step); UNIT_ASSERT(ok); } @@ -565,7 +572,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { planStep = 3; const TIndexInfo& indexInfo = engine.GetVersionedIndex().GetLastSchema()->GetIndexInfo(); - THashSet oneColumnId = { indexInfo.GetColumnId(key[0].GetName()) }; + THashSet oneColumnId = { indexInfo.GetColumnIdVerified(key[0].GetName()) }; { // full scan ui64 txId = 1; @@ -582,7 +589,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { gt10k = MakeStrPredicate("10000", NArrow::EOperation::Greater); } NOlap::TPKRangesFilter pkFilter(false); - Y_ABORT_UNLESS(pkFilter.Add(gt10k, nullptr, nullptr)); + Y_ABORT_UNLESS(pkFilter.Add(gt10k, nullptr, indexInfo.GetReplaceKey())); auto selectInfo = engine.Select(pathId, TSnapshot(planStep, txId), pkFilter); UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK.size(), 10); } @@ -594,7 +601,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { lt10k = MakeStrPredicate("08999", NArrow::EOperation::Less); } NOlap::TPKRangesFilter pkFilter(false); - Y_ABORT_UNLESS(pkFilter.Add(nullptr, lt10k, nullptr)); + Y_ABORT_UNLESS(pkFilter.Add(nullptr, lt10k, indexInfo.GetReplaceKey())); auto selectInfo = engine.Select(pathId, TSnapshot(planStep, txId), pkFilter); UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK.size(), 9); } @@ -616,6 +623,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { } Y_UNIT_TEST(IndexWriteOverload) { + TTestBasicRuntime runtime; TTestDbWrapper db; auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); TIndexInfo tableInfo = NColumnShard::BuildTableInfo(testColumns, testKey);; @@ -641,11 +649,11 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { blobs.Add(IStoragesManager::DefaultStorageId, blobRange, std::move(testBlob)); // PlanStep, TxId, PathId, DedupId, BlobId, Data, [Metadata] - std::vector dataToIndex; - dataToIndex.push_back( - TInsertedData(txId, pathId, "", blobRange.BlobId, TLocalHelper::GetMetaProto(), 0, {})); + std::vector dataToIndex; + TSnapshot ss(planStep, txId); + dataToIndex.push_back(TCommittedData(TUserData::Build(pathId, blobRange, TLocalHelper::GetMetaProto(), 0, {}), ss, (TInsertWriteId)txId)); - bool ok = Insert(engine, db, TSnapshot(planStep, txId), std::move(dataToIndex), blobs, step); + bool ok = Insert(engine, db, ss, std::move(dataToIndex), blobs, step); blobsAll.Merge(std::move(blobs)); UNIT_ASSERT(ok); } @@ -672,11 +680,11 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { blobs.Add(IStoragesManager::DefaultStorageId, blobRange, std::move(testBlob)); // PlanStep, TxId, PathId, DedupId, BlobId, Data, [Metadata] - std::vector dataToIndex; - dataToIndex.push_back( - TInsertedData(txId, pathId, "", blobRange.BlobId, TLocalHelper::GetMetaProto(), 0, {})); + std::vector dataToIndex; + TSnapshot ss(planStep, txId); + dataToIndex.push_back(TCommittedData(TUserData::Build(pathId, blobRange, TLocalHelper::GetMetaProto(), 0, {}), ss, TInsertWriteId(txId))); - bool ok = Insert(engine, db, TSnapshot(planStep, txId), std::move(dataToIndex), blobs, step); + bool ok = Insert(engine, db, ss, std::move(dataToIndex), blobs, step); UNIT_ASSERT(ok); } @@ -688,10 +696,11 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { } Y_UNIT_TEST(IndexTtl) { + TTestBasicRuntime runtime; TTestDbWrapper db; TIndexInfo tableInfo = NColumnShard::BuildTableInfo(testColumns, testKey); auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); - csDefaultControllerGuard->SetTasksActualizationLag(TDuration::Zero()); + csDefaultControllerGuard->SetOverrideTasksActualizationLag(TDuration::Zero()); ui64 pathId = 1; ui32 step = 1000; @@ -718,11 +727,12 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { blobs.Add(IStoragesManager::DefaultStorageId, blobRange, std::move(str1)); // PlanStep, TxId, PathId, DedupId, BlobId, Data, [Metadata] - std::vector dataToIndex; + TSnapshot ss(planStep, txId); + std::vector dataToIndex; dataToIndex.push_back( - TInsertedData(txId, pathId, "", blobRange.BlobId, TLocalHelper::GetMetaProto(), 0, {})); + TCommittedData(TUserData::Build(pathId, blobRange, TLocalHelper::GetMetaProto(), 0, {}), ss, TInsertWriteId(txId))); - bool ok = Insert(engine, db, TSnapshot(planStep, txId), std::move(dataToIndex), blobs, step); + bool ok = Insert(engine, db, ss, std::move(dataToIndex), blobs, step); UNIT_ASSERT(ok); blobStartTs += blobTsRange; if (txId == txCount / 2) { @@ -742,7 +752,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { planStep = 3; const TIndexInfo& indexInfo = engine.GetVersionedIndex().GetLastSchema()->GetIndexInfo(); - THashSet oneColumnId = {indexInfo.GetColumnId(testColumns[0].GetName())}; + THashSet oneColumnId = {indexInfo.GetColumnIdVerified(testColumns[0].GetName())}; { // full scan ui64 txId = 1; @@ -782,7 +792,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { engine.Load(db); const TIndexInfo& indexInfo = engine.GetVersionedIndex().GetLastSchema()->GetIndexInfo(); - THashSet oneColumnId = {indexInfo.GetColumnId(testColumns[0].GetName())}; + THashSet oneColumnId = { indexInfo.GetColumnIdVerified(testColumns[0].GetName()) }; { // full scan ui64 txId = 1; diff --git a/ydb/core/tx/columnshard/engines/ut/ut_program.cpp b/ydb/core/tx/columnshard/engines/ut/ut_program.cpp index 798ba6ec505d..f957cfea5592 100644 --- a/ydb/core/tx/columnshard/engines/ut/ut_program.cpp +++ b/ydb/core/tx/columnshard/engines/ut/ut_program.cpp @@ -852,4 +852,64 @@ Y_UNIT_TEST_SUITE(TestProgram) { auto expected = result.BuildArrow(); UNIT_ASSERT_VALUES_EQUAL(batch->ToString(), expected->ToString()); } + + Y_UNIT_TEST(CountWithNulls) { + TIndexInfo indexInfo = BuildTableInfo(testColumns, testKey); + ; + NReader::NPlain::TIndexColumnResolver columnResolver(indexInfo); + + NKikimrSSA::TProgram programProto; + { + auto* command = programProto.AddCommand(); + auto* functionProto = command->MutableAssign()->MutableFunction(); + auto* column = command->MutableAssign()->MutableColumn(); + column->SetName("0"); + auto* funcArg = functionProto->AddArguments(); + funcArg->SetName("uid"); + functionProto->SetId(NKikimrSSA::TProgram::TAssignment::EFunction::TProgram_TAssignment_EFunction_FUNC_IS_NULL); + } + { + auto* command = programProto.AddCommand(); + auto* filter = command->MutableFilter(); + auto* predicate = filter->MutablePredicate(); + predicate->SetName("0"); + } + { + auto* command = programProto.AddCommand(); + auto* groupBy = command->MutableGroupBy(); + auto* aggregate = groupBy->AddAggregates(); + aggregate->MutableFunction()->SetId(static_cast(NArrow::EAggregate::Count)); + aggregate->MutableColumn()->SetName("1"); + } + { + auto* command = programProto.AddCommand(); + auto* projectionProto = command->MutableProjection(); + auto* column = projectionProto->AddColumns(); + column->SetName("1"); + } + const auto programSerialized = SerializeProgram(programProto); + + TProgramContainer program; + TString errors; + UNIT_ASSERT_C( + program.Init(columnResolver, NKikimrSchemeOp::EOlapProgramType::OLAP_PROGRAM_SSA_PROGRAM_WITH_PARAMETERS, programSerialized, errors), + errors); + + TTableUpdatesBuilder updates(NArrow::MakeArrowSchema({ std::make_pair("uid", TTypeInfo(NTypeIds::Utf8)) })); + updates.AddRow().Add("a"); + updates.AddRow().AddNull(); + updates.AddRow().Add("bbb"); + updates.AddRow().AddNull(); + updates.AddRow().AddNull(); + + auto batch = updates.BuildArrow(); + auto res = program.ApplyProgram(batch); + UNIT_ASSERT_C(res.ok(), res.ToString()); + + TTableUpdatesBuilder result(NArrow::MakeArrowSchema({ std::make_pair("1", TTypeInfo(NTypeIds::Uint64)) })); + result.AddRow().Add(3); + + auto expected = result.BuildArrow(); + UNIT_ASSERT_VALUES_EQUAL(batch->ToString(), expected->ToString()); + } } diff --git a/ydb/core/tx/columnshard/engines/writer/buffer/actor.cpp b/ydb/core/tx/columnshard/engines/writer/buffer/actor.cpp index 0ffaaf3a9fee..eeb6242103d1 100644 --- a/ydb/core/tx/columnshard/engines/writer/buffer/actor.cpp +++ b/ydb/core/tx/columnshard/engines/writer/buffer/actor.cpp @@ -20,7 +20,7 @@ void TActor::Bootstrap() { void TActor::Flush() { if (Aggregations.size()) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "flush_writing")("size", SumSize)("count", Aggregations.size()); - auto action = Aggregations.front()->GetWriteData()->GetBlobsAction(); + auto action = Aggregations.front()->GetBlobsAction(); auto writeController = std::make_shared(ParentActorId, action, std::move(Aggregations)); if (action->NeedDraftTransaction()) { TActorContext::AsActorContext().Send(ParentActorId, std::make_unique(writeController)); @@ -48,7 +48,8 @@ void TActor::Handle(TEvAddInsertedDataToBuffer::TPtr& ev) { auto* evBase = ev->Get(); AFL_VERIFY(evBase->GetWriteData()->GetBlobsAction()->GetStorageId() == NOlap::IStoragesManager::DefaultStorageId); SumSize += evBase->GetWriteData()->GetSize(); - Aggregations.emplace_back(std::make_shared(evBase->GetWriteData(), std::move(evBase->MutableBlobsToWrite()))); + Aggregations.emplace_back( + std::make_shared(*evBase->GetWriteData(), std::move(evBase->MutableBlobsToWrite()), evBase->GetRecordBatch())); if (SumSize > 4 * 1024 * 1024 || Aggregations.size() > 750 || !FlushDuration) { Flush(); } diff --git a/ydb/core/tx/columnshard/engines/writer/buffer/events.h b/ydb/core/tx/columnshard/engines/writer/buffer/events.h index ee750ad69bcf..d2a4b4453b04 100644 --- a/ydb/core/tx/columnshard/engines/writer/buffer/events.h +++ b/ydb/core/tx/columnshard/engines/writer/buffer/events.h @@ -11,11 +11,15 @@ namespace NKikimr::NColumnShard::NWriting { class TEvAddInsertedDataToBuffer: public NActors::TEventLocal { private: YDB_READONLY_DEF(std::shared_ptr, WriteData); + YDB_READONLY_DEF(std::shared_ptr, RecordBatch); YDB_ACCESSOR_DEF(std::vector, BlobsToWrite); + public: - explicit TEvAddInsertedDataToBuffer(const std::shared_ptr& writeData, std::vector&& blobs) + explicit TEvAddInsertedDataToBuffer(const std::shared_ptr& writeData, std::vector&& blobs, + const std::shared_ptr& recordBatch) : WriteData(writeData) + , RecordBatch(recordBatch) , BlobsToWrite(blobs) { } diff --git a/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.cpp b/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.cpp index 976c2945d59a..9836a72a60cc 100644 --- a/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.cpp +++ b/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.cpp @@ -19,9 +19,9 @@ TCompactedWriteController::TCompactedWriteController(const TActorId& dstActor, T } auto* pInfo = changes.GetWritePortionInfo(i); Y_ABORT_UNLESS(pInfo); - TWritePortionInfoWithBlobs& portionWithBlobs = *pInfo; + TWritePortionInfoWithBlobsResult& portionWithBlobs = *pInfo; for (auto&& b : portionWithBlobs.GetBlobs()) { - auto& task = AddWriteTask(TBlobWriteInfo::BuildWriteTask(b.GetBlob(), changes.MutableBlobsAction().GetWriting(b.GetOperator()->GetStorageId()))); + auto& task = AddWriteTask(TBlobWriteInfo::BuildWriteTask(b.GetResultBlob(), changes.MutableBlobsAction().GetWriting(b.GetOperator()->GetStorageId()))); b.RegisterBlobId(portionWithBlobs, task.GetBlobId()); WriteVolume += b.GetSize(); } diff --git a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp index 9966dcd008c7..ceacecf155b6 100644 --- a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp +++ b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp @@ -35,19 +35,19 @@ void TWideSerializedBatch::InitBlobId(const TUnifiedBlobId& id) { void TWritingBuffer::InitReadyInstant(const TMonotonic instant) { for (auto&& aggr : Aggregations) { - aggr->GetWriteData()->MutableWriteMeta().SetWriteMiddle5StartInstant(instant); + aggr->MutableWriteMeta().SetWriteMiddle5StartInstant(instant); } } void TWritingBuffer::InitStartSending(const TMonotonic instant) { for (auto&& aggr : Aggregations) { - aggr->GetWriteData()->MutableWriteMeta().SetWriteMiddle4StartInstant(instant); + aggr->MutableWriteMeta().SetWriteMiddle4StartInstant(instant); } } void TWritingBuffer::InitReplyReceived(const TMonotonic instant) { for (auto&& aggr : Aggregations) { - aggr->GetWriteData()->MutableWriteMeta().SetWriteMiddle6StartInstant(instant); + aggr->MutableWriteMeta().SetWriteMiddle6StartInstant(instant); } } diff --git a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h index 49cc6efe47b6..92e59e9b197c 100644 --- a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h +++ b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h @@ -86,23 +86,56 @@ class TWritingBlob { class TWriteAggregation { private: - YDB_READONLY_DEF(std::shared_ptr, WriteData); + NEvWrite::TWriteMeta WriteMeta; + YDB_READONLY(ui64, SchemaVersion, 0); + YDB_READONLY(ui64, Size, 0); + YDB_READONLY(ui64, Rows, 0); YDB_ACCESSOR_DEF(std::vector, SplittedBlobs); - YDB_READONLY_DEF(TVector, WriteIds); + YDB_READONLY_DEF(TVector, InsertWriteIds); + YDB_READONLY_DEF(std::shared_ptr, BlobsAction); + YDB_READONLY_DEF(NArrow::TSchemaSubset, SchemaSubset); + std::shared_ptr RecordBatch; + public: - void AddWriteId(const TWriteId& id) { - WriteIds.emplace_back(id); + const std::shared_ptr& GetRecordBatch() const { + AFL_VERIFY(RecordBatch); + return RecordBatch; + } + + const NEvWrite::TWriteMeta& GetWriteMeta() const { + return WriteMeta; + } + + NEvWrite::TWriteMeta& MutableWriteMeta() { + return WriteMeta; } - TWriteAggregation(const std::shared_ptr& writeData, std::vector&& splittedBlobs) - : WriteData(writeData) { + void AddInsertWriteId(const TInsertWriteId id) { + InsertWriteIds.emplace_back(id); + } + + TWriteAggregation(const NEvWrite::TWriteData& writeData, std::vector&& splittedBlobs, const std::shared_ptr& batch) + : WriteMeta(writeData.GetWriteMeta()) + , SchemaVersion(writeData.GetData()->GetSchemaVersion()) + , Size(writeData.GetSize()) + , BlobsAction(writeData.GetBlobsAction()) + , SchemaSubset(writeData.GetSchemaSubsetVerified()) + , RecordBatch(batch) + { for (auto&& s : splittedBlobs) { SplittedBlobs.emplace_back(std::move(s), *this); } + for (const auto& batch : SplittedBlobs) { + Rows += batch->GetRowsCount(); + } } - TWriteAggregation(const std::shared_ptr& writeData) - : WriteData(writeData) { + TWriteAggregation(const NEvWrite::TWriteData& writeData) + : WriteMeta(writeData.GetWriteMeta()) + , SchemaVersion(writeData.GetData()->GetSchemaVersion()) + , Size(writeData.GetSize()) + , BlobsAction(writeData.GetBlobsAction()) { + AFL_VERIFY(!writeData.GetSchemaSubset()); } }; @@ -120,7 +153,7 @@ class TWritingBuffer: public TMoveOnly { { AFL_VERIFY(BlobsAction); for (auto&& aggr : Aggregations) { - SumSize += aggr->GetWriteData()->GetSize(); + SumSize += aggr->GetSize(); } } diff --git a/ydb/core/tx/columnshard/engines/ya.make b/ydb/core/tx/columnshard/engines/ya.make index 4772008f14f1..66b72ec25122 100644 --- a/ydb/core/tx/columnshard/engines/ya.make +++ b/ydb/core/tx/columnshard/engines/ya.make @@ -13,6 +13,7 @@ SRCS( filter.cpp portion_info.cpp tier_info.cpp + defs.cpp ) PEERDIR( diff --git a/ydb/core/tx/columnshard/hooks/abstract/abstract.cpp b/ydb/core/tx/columnshard/hooks/abstract/abstract.cpp index a4d916545eac..b275e17f2fdb 100644 --- a/ydb/core/tx/columnshard/hooks/abstract/abstract.cpp +++ b/ydb/core/tx/columnshard/hooks/abstract/abstract.cpp @@ -1,4 +1,26 @@ #include "abstract.h" +#include + namespace NKikimr::NYDBTest { + +TDuration ICSController::GetGuaranteeIndexationInterval() const { + const TDuration defaultValue = NColumnShard::TSettings::GuaranteeIndexationInterval; + return DoGetGuaranteeIndexationInterval(defaultValue); +} + +TDuration ICSController::GetPeriodicWakeupActivationPeriod() const { + const TDuration defaultValue = NColumnShard::TSettings::DefaultPeriodicWakeupActivationPeriod; + return DoGetPeriodicWakeupActivationPeriod(defaultValue); +} + +TDuration ICSController::GetStatsReportInterval() const { + const TDuration defaultValue = NColumnShard::TSettings::DefaultStatsReportInterval; + return DoGetStatsReportInterval(defaultValue); +} + +ui64 ICSController::GetGuaranteeIndexationStartBytesLimit() const { + const ui64 defaultValue = NColumnShard::TSettings::GuaranteeIndexationStartBytesLimit; + return DoGetGuaranteeIndexationStartBytesLimit(defaultValue); +} } diff --git a/ydb/core/tx/columnshard/hooks/abstract/abstract.h b/ydb/core/tx/columnshard/hooks/abstract/abstract.h index d1ce343edf7e..c104590235d3 100644 --- a/ydb/core/tx/columnshard/hooks/abstract/abstract.h +++ b/ydb/core/tx/columnshard/hooks/abstract/abstract.h @@ -1,30 +1,33 @@ #pragma once #include +#include #include - -#include #include +#include #include -#include -#include +#include + #include +#include +#include + #include namespace NKikimr::NColumnShard { class TTiersManager; class TColumnShard; -} +} // namespace NKikimr::NColumnShard namespace NKikimr::NOlap { class TColumnEngineChanges; class IBlobsGCAction; class TPortionInfo; -namespace NStatistics { -class TOperatorContainer; -} +namespace NIndexes { +class TIndexMetaContainer; } +} // namespace NKikimr::NOlap namespace arrow { class RecordBatch; } @@ -41,7 +44,8 @@ class ILocalDBModifier { public: using TPtr = std::shared_ptr; - virtual ~ILocalDBModifier() {} + virtual ~ILocalDBModifier() { + } virtual void Apply(NTabletFlatExecutor::TTransactionContext& txc) const = 0; }; @@ -55,6 +59,7 @@ class ICSController { Cleanup, GC }; + protected: virtual void DoOnTabletInitCompleted(const ::NKikimr::NColumnShard::TColumnShard& /*shard*/) { return; @@ -77,10 +82,75 @@ class ICSController { } virtual void DoOnDataSharingFinished(const ui64 /*tabletId*/, const TString& /*sessionId*/) { } - virtual void DoOnDataSharingStarted(const ui64 /*tabletId*/, const TString & /*sessionId*/) { + virtual void DoOnDataSharingStarted(const ui64 /*tabletId*/, const TString& /*sessionId*/) { + } + + virtual TDuration DoGetPingCheckPeriod(const TDuration defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetOverridenGCPeriod(const TDuration defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetCompactionActualizationLag(const TDuration defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetActualizationTasksLag(const TDuration defaultValue) const { + return defaultValue; + } + virtual ui64 DoGetReduceMemoryIntervalLimit(const ui64 defaultValue) const { + return defaultValue; + } + virtual ui64 DoGetRejectMemoryIntervalLimit(const ui64 defaultValue) const { + return defaultValue; + } + virtual ui64 DoGetReadSequentiallyBufferSize(const ui64 defaultValue) const { + return defaultValue; + } + virtual ui64 DoGetSmallPortionSizeDetector(const ui64 defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetReadTimeoutClean(const TDuration defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetGuaranteeIndexationInterval(const TDuration defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetPeriodicWakeupActivationPeriod(const TDuration defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetStatsReportInterval(const TDuration defaultValue) const { + return defaultValue; + } + virtual ui64 DoGetGuaranteeIndexationStartBytesLimit(const ui64 defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetOptimizerFreshnessCheckDuration(const TDuration defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetLagForCompactionBeforeTierings(const TDuration defaultValue) const { + return defaultValue; + } + +private: + inline static const NKikimrConfig::TColumnShardConfig DefaultConfig = {}; + + static const NKikimrConfig::TColumnShardConfig& GetConfig() { + if (HasAppData()) { + return AppDataVerified().ColumnShardConfig; + } + return DefaultConfig; } public: + virtual void OnRequestTracingChanges( + const std::set& /*snapshotsToSave*/, const std::set& /*snapshotsToRemove*/) { + } + + TDuration GetPingCheckPeriod() const { + const TDuration defaultValue = 0.6 * GetReadTimeoutClean(); + return DoGetPingCheckPeriod(defaultValue); + } + virtual bool IsBackgroundEnabled(const EBackground /*id*/) const { return true; } @@ -88,53 +158,51 @@ class ICSController { using TPtr = std::shared_ptr; virtual ~ICSController() = default; - virtual TDuration GetOverridenGCPeriod(const TDuration def) const { - return def; + TDuration GetOverridenGCPeriod() const { + const TDuration defaultValue = TDuration::MilliSeconds(GetConfig().GetGCIntervalMs()); + return DoGetOverridenGCPeriod(defaultValue); } virtual void OnSelectShardingFilter() { - } - virtual TDuration GetCompactionActualizationLag(const TDuration def) const { - return def; + TDuration GetCompactionActualizationLag() const { + const TDuration defaultValue = TDuration::MilliSeconds(GetConfig().GetCompactionActualizationLagMs()); + return DoGetCompactionActualizationLag(defaultValue); } - virtual NColumnShard::TBlobPutResult::TPtr OverrideBlobPutResultOnCompaction(const NColumnShard::TBlobPutResult::TPtr original, const NOlap::TWriteActionsCollection& /*actions*/) const { + virtual NColumnShard::TBlobPutResult::TPtr OverrideBlobPutResultOnCompaction( + const NColumnShard::TBlobPutResult::TPtr original, const NOlap::TWriteActionsCollection& /*actions*/) const { return original; } - virtual TDuration GetRemovedPortionLivetime(const TDuration def) const { - return def; + TDuration GetActualizationTasksLag() const { + const TDuration defaultValue = TDuration::MilliSeconds(GetConfig().GetActualizationTasksLagMs()); + return DoGetActualizationTasksLag(defaultValue); } - virtual TDuration GetActualizationTasksLag(const TDuration d) const { - return d; + ui64 GetReduceMemoryIntervalLimit() const { + const ui64 defaultValue = NOlap::TGlobalLimits::DefaultReduceMemoryIntervalLimit; + return DoGetReduceMemoryIntervalLimit(defaultValue); } - - virtual ui64 GetReduceMemoryIntervalLimit(const ui64 def) const { - return def; - } - virtual ui64 GetRejectMemoryIntervalLimit(const ui64 def) const { - return def; + ui64 GetRejectMemoryIntervalLimit() const { + const ui64 defaultValue = NOlap::TGlobalLimits::DefaultRejectMemoryIntervalLimit; + return DoGetRejectMemoryIntervalLimit(defaultValue); } virtual bool NeedForceCompactionBacketsConstruction() const { return false; } - virtual ui64 GetSmallPortionSizeDetector(const ui64 def) const { - return def; + ui64 GetSmallPortionSizeDetector() const { + const ui64 defaultValue = GetConfig().GetSmallPortionDetectSizeLimit(); + return DoGetSmallPortionSizeDetector(defaultValue); } virtual void OnExportFinished() { - } virtual void OnActualizationRefreshScheme() { - } virtual void OnActualizationRefreshTiering() { - } virtual void AddPortionForActualizer(const i32 /*portionsCount*/) { - } void OnDataSharingFinished(const ui64 tabletId, const TString& sessionId) { @@ -143,17 +211,16 @@ class ICSController { void OnDataSharingStarted(const ui64 tabletId, const TString& sessionId) { return DoOnDataSharingStarted(tabletId, sessionId); } - virtual void OnStatisticsUsage(const NOlap::NStatistics::TOperatorContainer& /*statOperator*/) { - + virtual void OnStatisticsUsage(const NOlap::NIndexes::TIndexMetaContainer& /*statOperator*/) { } virtual void OnPortionActualization(const NOlap::TPortionInfo& /*info*/) { - } virtual void OnMaxValueUsage() { } - virtual TDuration GetLagForCompactionBeforeTierings(const TDuration def) const { - return def; + virtual TDuration GetLagForCompactionBeforeTierings() const { + const TDuration defaultValue = TDuration::MilliSeconds(GetConfig().GetLagForCompactionBeforeTieringsMs()); + return DoGetLagForCompactionBeforeTierings(defaultValue); } void OnTabletInitCompleted(const NColumnShard::TColumnShard& shard) { @@ -182,29 +249,20 @@ class ICSController { } virtual void OnIndexSelectProcessed(const std::optional /*result*/) { } - virtual TDuration GetReadTimeoutClean(const TDuration def) { - return def; + TDuration GetReadTimeoutClean() const { + const TDuration defaultValue = TDuration::MilliSeconds(GetConfig().GetMaxReadStaleness_ms()); + return DoGetReadTimeoutClean(defaultValue); } virtual EOptimizerCompactionWeightControl GetCompactionControl() const { return EOptimizerCompactionWeightControl::Force; } - virtual TDuration GetTTLDefaultWaitingDuration(const TDuration defaultValue) const { - return defaultValue; - } - virtual TDuration GetGuaranteeIndexationInterval(const TDuration defaultValue) const { - return defaultValue; - } - virtual TDuration GetPeriodicWakeupActivationPeriod(const TDuration defaultValue) const { - return defaultValue; - } - virtual TDuration GetStatsReportInterval(const TDuration defaultValue) const { - return defaultValue; - } - virtual ui64 GetGuaranteeIndexationStartBytesLimit(const ui64 defaultValue) const { - return defaultValue; - } - virtual TDuration GetOptimizerFreshnessCheckDuration(const TDuration defaultValue) const { - return defaultValue; + TDuration GetGuaranteeIndexationInterval() const; + TDuration GetPeriodicWakeupActivationPeriod() const; + TDuration GetStatsReportInterval() const; + ui64 GetGuaranteeIndexationStartBytesLimit() const; + TDuration GetOptimizerFreshnessCheckDuration() const { + const TDuration defaultValue = TDuration::MilliSeconds(GetConfig().GetOptimizerFreshnessCheckDurationMs()); + return DoGetOptimizerFreshnessCheckDuration(defaultValue); } virtual void OnTieringModified(const std::shared_ptr& /*tiers*/) { @@ -215,7 +273,8 @@ class ICSController { } virtual NMetadata::NFetcher::ISnapshot::TPtr GetFallbackTiersSnapshot() const { - static std::shared_ptr result = std::make_shared(TInstant::Now()); + static std::shared_ptr result = + std::make_shared(TInstant::Now()); return result; } @@ -231,15 +290,16 @@ class ICSController { class TControllers { private: ICSController::TPtr CSController = std::make_shared(); + public: template class TGuard: TNonCopyable { private: std::shared_ptr Controller; + public: TGuard(std::shared_ptr controller) - : Controller(controller) - { + : Controller(controller) { Y_ABORT_UNLESS(Controller); } @@ -270,4 +330,4 @@ class TControllers { } }; -} +} // namespace NKikimr::NYDBTest diff --git a/ydb/core/tx/columnshard/hooks/testing/controller.cpp b/ydb/core/tx/columnshard/hooks/testing/controller.cpp index e47dc08dcd67..9cf3a7e7e9b5 100644 --- a/ydb/core/tx/columnshard/hooks/testing/controller.cpp +++ b/ydb/core/tx/columnshard/hooks/testing/controller.cpp @@ -12,10 +12,6 @@ namespace NKikimr::NYDBTest::NColumnShard { bool TController::DoOnWriteIndexComplete(const NOlap::TColumnEngineChanges& change, const ::NKikimr::NColumnShard::TColumnShard& shard) { TGuard g(Mutex); - if (SharingIds.empty()) { - TCheckContext context; - CheckInvariants(shard, context); - } return TBase::DoOnWriteIndexComplete(change, shard); } @@ -24,9 +20,6 @@ void TController::DoOnAfterGCAction(const ::NKikimr::NColumnShard::TColumnShard& for (auto d = action.GetBlobsToRemove().GetDirect().GetIterator(); d.IsValid(); ++d) { AFL_VERIFY(RemovedBlobIds[action.GetStorageId()][d.GetBlobId()].emplace(d.GetTabletId()).second); } -// if (SharingIds.empty()) { -// CheckInvariants(); -// } } void TController::CheckInvariants(const ::NKikimr::NColumnShard::TColumnShard& shard, TCheckContext& context) const { @@ -59,11 +52,11 @@ void TController::CheckInvariants(const ::NKikimr::NColumnShard::TColumnShard& s const NOlap::TTabletsByBlob blobs = manager->GetBlobsToDelete(); for (auto b = blobs.GetIterator(); b.IsValid(); ++b) { Cerr << shard.TabletID() << " SHARING_REMOVE_LOCAL:" << b.GetBlobId().ToStringNew() << " FROM " << b.GetTabletId() << Endl; - i.second.RemoveSharing(b.GetTabletId(), b.GetBlobId()); + Y_UNUSED(i.second.RemoveSharing(b.GetTabletId(), b.GetBlobId())); } for (auto b = blobs.GetIterator(); b.IsValid(); ++b) { Cerr << shard.TabletID() << " BORROWED_REMOVE_LOCAL:" << b.GetBlobId().ToStringNew() << " FROM " << b.GetTabletId() << Endl; - i.second.RemoveBorrowed(b.GetTabletId(), b.GetBlobId()); + Y_UNUSED(i.second.RemoveBorrowed(b.GetTabletId(), b.GetBlobId())); } } context.AddCategories(shard.TabletID(), std::move(shardBlobsCategories)); diff --git a/ydb/core/tx/columnshard/hooks/testing/controller.h b/ydb/core/tx/columnshard/hooks/testing/controller.h index c8211afb5443..a8e259877fd0 100644 --- a/ydb/core/tx/columnshard/hooks/testing/controller.h +++ b/ydb/core/tx/columnshard/hooks/testing/controller.h @@ -12,20 +12,21 @@ namespace NKikimr::NYDBTest::NColumnShard { class TController: public TReadOnlyController { private: using TBase = TReadOnlyController; - YDB_ACCESSOR_DEF(std::optional, LagForCompactionBeforeTierings); - YDB_ACCESSOR(std::optional, GuaranteeIndexationInterval, TDuration::Zero()); - YDB_ACCESSOR(std::optional, PeriodicWakeupActivationPeriod, std::nullopt); - YDB_ACCESSOR(std::optional, StatsReportInterval, std::nullopt); - YDB_ACCESSOR(std::optional, GuaranteeIndexationStartBytesLimit, 0); - YDB_ACCESSOR(std::optional, OptimizerFreshnessCheckDuration, TDuration::Zero()); - YDB_ACCESSOR_DEF(std::optional, CompactionActualizationLag); - YDB_ACCESSOR_DEF(std::optional, TasksActualizationLag); + YDB_ACCESSOR_DEF(std::optional, OverrideRequestsTracePingCheckPeriod); + YDB_ACCESSOR_DEF(std::optional, OverrideLagForCompactionBeforeTierings); + YDB_ACCESSOR(std::optional, OverrideGuaranteeIndexationInterval, TDuration::Zero()); + YDB_ACCESSOR(std::optional, OverridePeriodicWakeupActivationPeriod, std::nullopt); + YDB_ACCESSOR(std::optional, OverrideStatsReportInterval, std::nullopt); + YDB_ACCESSOR(std::optional, OverrideGuaranteeIndexationStartBytesLimit, 0); + YDB_ACCESSOR(std::optional, OverrideOptimizerFreshnessCheckDuration, TDuration::Zero()); + YDB_ACCESSOR_DEF(std::optional, OverrideCompactionActualizationLag); + YDB_ACCESSOR_DEF(std::optional, OverrideTasksActualizationLag); + YDB_ACCESSOR_DEF(std::optional, OverrideReadTimeoutClean); EOptimizerCompactionWeightControl CompactionControl = EOptimizerCompactionWeightControl::Force; YDB_ACCESSOR(std::optional, OverrideReduceMemoryIntervalLimit, 1024); YDB_ACCESSOR_DEF(std::optional, OverrideRejectMemoryIntervalLimit); - std::optional ReadTimeoutClean; std::optional ExpectedShardsCount; THashMap ShardActuals; @@ -129,12 +130,16 @@ class TController: public TReadOnlyController { THashSet SharingIds; protected: virtual ::NKikimr::NColumnShard::TBlobPutResult::TPtr OverrideBlobPutResultOnCompaction(const ::NKikimr::NColumnShard::TBlobPutResult::TPtr original, const NOlap::TWriteActionsCollection& actions) const override; - virtual TDuration GetLagForCompactionBeforeTierings(const TDuration def) const override { - return LagForCompactionBeforeTierings.value_or(def); + virtual TDuration DoGetLagForCompactionBeforeTierings(const TDuration def) const override { + return OverrideLagForCompactionBeforeTierings.value_or(def); } - virtual TDuration GetCompactionActualizationLag(const TDuration def) const override { - return CompactionActualizationLag.value_or(def); + virtual TDuration DoGetPingCheckPeriod(const TDuration def) const override { + return OverrideRequestsTracePingCheckPeriod.value_or(def); + } + + virtual TDuration DoGetCompactionActualizationLag(const TDuration def) const override { + return OverrideCompactionActualizationLag.value_or(def); } @@ -143,8 +148,8 @@ class TController: public TReadOnlyController { return !DisabledBackgrounds.contains(id); } - virtual TDuration GetActualizationTasksLag(const TDuration d) const override { - return TasksActualizationLag.value_or(d); + virtual TDuration DoGetActualizationTasksLag(const TDuration d) const override { + return OverrideTasksActualizationLag.value_or(d); } virtual void DoOnTabletInitCompleted(const ::NKikimr::NColumnShard::TColumnShard& shard) override; @@ -152,23 +157,29 @@ class TController: public TReadOnlyController { virtual void DoOnAfterGCAction(const ::NKikimr::NColumnShard::TColumnShard& shard, const NOlap::IBlobsGCAction& action) override; virtual bool DoOnWriteIndexComplete(const NOlap::TColumnEngineChanges& changes, const ::NKikimr::NColumnShard::TColumnShard& shard) override; - virtual TDuration GetGuaranteeIndexationInterval(const TDuration defaultValue) const override { - return GuaranteeIndexationInterval.value_or(defaultValue); + virtual TDuration DoGetGuaranteeIndexationInterval(const TDuration defaultValue) const override { + return OverrideGuaranteeIndexationInterval.value_or(defaultValue); } - TDuration GetPeriodicWakeupActivationPeriod(const TDuration defaultValue) const override { - return PeriodicWakeupActivationPeriod.value_or(defaultValue); + virtual TDuration DoGetPeriodicWakeupActivationPeriod(const TDuration defaultValue) const override { + return OverridePeriodicWakeupActivationPeriod.value_or(defaultValue); } - TDuration GetStatsReportInterval(const TDuration defaultValue) const override { - return StatsReportInterval.value_or(defaultValue); + virtual TDuration DoGetStatsReportInterval(const TDuration defaultValue) const override { + return OverrideStatsReportInterval.value_or(defaultValue); } - virtual ui64 GetGuaranteeIndexationStartBytesLimit(const ui64 defaultValue) const override { - return GuaranteeIndexationStartBytesLimit.value_or(defaultValue); + virtual ui64 DoGetGuaranteeIndexationStartBytesLimit(const ui64 defaultValue) const override { + return OverrideGuaranteeIndexationStartBytesLimit.value_or(defaultValue); } - virtual TDuration GetOptimizerFreshnessCheckDuration(const TDuration defaultValue) const override { - return OptimizerFreshnessCheckDuration.value_or(defaultValue); + virtual TDuration DoGetOptimizerFreshnessCheckDuration(const TDuration defaultValue) const override { + return OverrideOptimizerFreshnessCheckDuration.value_or(defaultValue); } - virtual TDuration GetReadTimeoutClean(const TDuration def) override { - return ReadTimeoutClean.value_or(def); + virtual TDuration DoGetReadTimeoutClean(const TDuration def) const override { + return OverrideReadTimeoutClean.value_or(def); + } + virtual ui64 DoGetReduceMemoryIntervalLimit(const ui64 def) const override { + return OverrideReduceMemoryIntervalLimit.value_or(def); + } + virtual ui64 DoGetRejectMemoryIntervalLimit(const ui64 def) const override { + return OverrideRejectMemoryIntervalLimit.value_or(def); } virtual EOptimizerCompactionWeightControl GetCompactionControl() const override { return CompactionControl; @@ -177,9 +188,6 @@ class TController: public TReadOnlyController { virtual void DoOnDataSharingFinished(const ui64 /*tabletId*/, const TString& sessionId) override { TGuard g(Mutex); AFL_VERIFY(SharingIds.erase(sessionId)); - if (SharingIds.empty()) { - CheckInvariants(); - } } virtual void DoOnDataSharingStarted(const ui64 /*tabletId*/, const TString& sessionId) override { // dont check here. on finish only @@ -188,18 +196,9 @@ class TController: public TReadOnlyController { } public: - virtual TDuration GetRemovedPortionLivetime(const TDuration /*def*/) const override { - return TDuration::Zero(); - } const TAtomicCounter& GetIndexWriteControllerBrokeCount() const { return IndexWriteControllerBrokeCount; } - virtual ui64 GetReduceMemoryIntervalLimit(const ui64 def) const override { - return OverrideReduceMemoryIntervalLimit.value_or(def); - } - virtual ui64 GetRejectMemoryIntervalLimit(const ui64 def) const override { - return OverrideRejectMemoryIntervalLimit.value_or(def); - } bool IsTrivialLinks() const; TCheckContext CheckInvariants() const; @@ -235,9 +234,6 @@ class TController: public TReadOnlyController { void SetCompactionControl(const EOptimizerCompactionWeightControl value) { CompactionControl = value; } - void SetReadTimeoutClean(const TDuration d) { - ReadTimeoutClean = d; - } bool HasPKSortingOnly() const; diff --git a/ydb/core/tx/columnshard/hooks/testing/ro_controller.h b/ydb/core/tx/columnshard/hooks/testing/ro_controller.h index c271878ea838..c55be9455204 100644 --- a/ydb/core/tx/columnshard/hooks/testing/ro_controller.h +++ b/ydb/core/tx/columnshard/hooks/testing/ro_controller.h @@ -31,9 +31,18 @@ class TReadOnlyController: public ICSController { YDB_READONLY(TAtomicCounter, ActualizationRefreshTieringCount, 0); YDB_READONLY(TAtomicCounter, ShardingFiltersCount, 0); + YDB_READONLY(TAtomicCounter, RequestTracingSnapshotsSave, 0); + YDB_READONLY(TAtomicCounter, RequestTracingSnapshotsRemove, 0); + YDB_ACCESSOR(TAtomicCounter, CompactionsLimit, 10000000); protected: + virtual void OnRequestTracingChanges( + const std::set& snapshotsToSave, const std::set& snapshotsToRemove) override { + RequestTracingSnapshotsSave.Add(snapshotsToSave.size()); + RequestTracingSnapshotsRemove.Add(snapshotsToRemove.size()); + } + virtual void OnSelectShardingFilter() override { ShardingFiltersCount.Inc(); } @@ -62,11 +71,11 @@ class TReadOnlyController: public ICSController { return EOptimizerCompactionWeightControl::Force; } -public: - virtual TDuration GetOverridenGCPeriod(const TDuration /*def*/) const override { + virtual TDuration DoGetOverridenGCPeriod(const TDuration /*def*/) const override { return TDuration::Zero(); } +public: void WaitCompactions(const TDuration d) const { TInstant start = TInstant::Now(); ui32 compactionsStart = GetCompactionStartedCounter().Val(); @@ -82,10 +91,10 @@ class TReadOnlyController: public ICSController { void WaitIndexation(const TDuration d) const { TInstant start = TInstant::Now(); - ui32 compactionsStart = GetInsertStartedCounter().Val(); + ui32 insertsStart = GetInsertStartedCounter().Val(); while (Now() - start < d) { - if (compactionsStart != GetInsertStartedCounter().Val()) { - compactionsStart = GetInsertStartedCounter().Val(); + if (insertsStart != GetInsertStartedCounter().Val()) { + insertsStart = GetInsertStartedCounter().Val(); start = TInstant::Now(); } Cerr << "WAIT_INDEXATION: " << GetInsertStartedCounter().Val() << Endl; diff --git a/ydb/core/tx/columnshard/inflight_request_tracker.cpp b/ydb/core/tx/columnshard/inflight_request_tracker.cpp index 98ca6d7ab6da..6b7830b26cb0 100644 --- a/ydb/core/tx/columnshard/inflight_request_tracker.cpp +++ b/ydb/core/tx/columnshard/inflight_request_tracker.cpp @@ -1,90 +1,155 @@ +#include "columnshard_impl.h" +#include "columnshard_schema.h" #include "inflight_request_tracker.h" + +#include "data_sharing/common/transactions/tx_extension.h" #include "engines/column_engine.h" #include "engines/reader/plain_reader/constructor/read_metadata.h" +#include "hooks/abstract/abstract.h" namespace NKikimr::NColumnShard { -void TInFlightReadsTracker::RemoveInFlightRequest(ui64 cookie, const NOlap::TVersionedIndex* index) { - Y_ABORT_UNLESS(RequestsMeta.contains(cookie), "Unknown request cookie %" PRIu64, cookie); - const auto& readMetaList = RequestsMeta[cookie]; - - for (const auto& readMetaBase : readMetaList) { - NOlap::NReader::NPlain::TReadMetadata::TConstPtr readMeta = std::dynamic_pointer_cast(readMetaBase); - - if (!readMeta) { - continue; - } - - THashMap> portionBlobIds; - for (const auto& portion : readMeta->SelectInfo->PortionsOrderedPK) { - const ui64 portionId = portion->GetPortion(); - AFL_VERIFY(index); - portion->FillBlobIdsByStorage(portionBlobIds, *index); - auto it = PortionUseCount.find(portionId); - Y_ABORT_UNLESS(it != PortionUseCount.end(), "Portion id %" PRIu64 " not found in request %" PRIu64, portionId, cookie); - if (it->second == 1) { - PortionUseCount.erase(it); - } else { - it->second--; +NOlap::NReader::TReadMetadataBase::TConstPtr TInFlightReadsTracker::ExtractInFlightRequest( + ui64 cookie, const NOlap::TVersionedIndex* /*index*/, const TInstant now) { + auto it = RequestsMeta.find(cookie); + AFL_VERIFY(it != RequestsMeta.end())("cookie", cookie); + const NOlap::NReader::TReadMetadataBase::TConstPtr readMetaBase = it->second; + + { + { + auto it = SnapshotsLive.find(readMetaBase->GetRequestSnapshot()); + AFL_VERIFY(it != SnapshotsLive.end()); + if (it->second.DelRequest(cookie, now)) { + SnapshotsLive.erase(it); } } - for (auto&& i : portionBlobIds) { - auto storage = StoragesManager->GetOperatorVerified(i.first); - auto tracker = storage->GetBlobsTracker(); - for (auto& blobId : i.second) { - tracker->FreeBlob(blobId); + if (NOlap::NReader::NPlain::TReadMetadata::TConstPtr readMeta = + std::dynamic_pointer_cast(readMetaBase)) { + auto insertStorage = StoragesManager->GetInsertOperator(); + auto tracker = insertStorage->GetBlobsTracker(); + for (const auto& committedBlob : readMeta->CommittedBlobs) { + tracker->FreeBlob(committedBlob.GetBlobRange().GetBlobId()); } } - - auto insertStorage = StoragesManager->GetInsertOperator(); - auto tracker = insertStorage->GetBlobsTracker(); - for (const auto& committedBlob : readMeta->CommittedBlobs) { - tracker->FreeBlob(committedBlob.GetBlobRange().GetBlobId()); - } } + Counters->OnSnapshotsInfo(SnapshotsLive.size(), GetSnapshotToClean()); RequestsMeta.erase(cookie); + return readMetaBase; } -TConclusionStatus TInFlightReadsTracker::AddToInFlightRequest(const ui64 cookie, NOlap::NReader::TReadMetadataBase::TConstPtr readMetaBase, const NOlap::TVersionedIndex* index) { - RequestsMeta[cookie].push_back(readMetaBase); +void TInFlightReadsTracker::AddToInFlightRequest( + const ui64 cookie, NOlap::NReader::TReadMetadataBase::TConstPtr readMetaBase, const NOlap::TVersionedIndex* /*index*/) { + AFL_VERIFY(RequestsMeta.emplace(cookie, readMetaBase).second); auto readMeta = std::dynamic_pointer_cast(readMetaBase); if (!readMeta) { - return TConclusionStatus::Success(); + return; } auto selectInfo = readMeta->SelectInfo; Y_ABORT_UNLESS(selectInfo); SelectStatsDelta += selectInfo->Stats(); - THashMap> portionBlobIds; - for (const auto& portion : readMeta->SelectInfo->PortionsOrderedPK) { - const ui64 portionId = portion->GetPortion(); - PortionUseCount[portionId]++; - AFL_VERIFY(index); - portion->FillBlobIdsByStorage(portionBlobIds, *index); + auto insertStorage = StoragesManager->GetInsertOperator(); + auto tracker = insertStorage->GetBlobsTracker(); + for (const auto& committedBlob : readMeta->CommittedBlobs) { + tracker->UseBlob(committedBlob.GetBlobRange().GetBlobId()); } +} - for (auto&& i : portionBlobIds) { - auto storage = StoragesManager->GetOperatorOptional(i.first); - if (!storage) { - return TConclusionStatus::Fail("blobs storage info not ready for '" + i.first + "'"); +namespace { +class TTransactionSavePersistentSnapshots: public NOlap::NDataSharing::TExtendedTransactionBase { +private: + using TBase = NOlap::NDataSharing::TExtendedTransactionBase; + const std::set SaveSnapshots; + const std::set RemoveSnapshots; + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& /*ctx*/) override { + using namespace NColumnShard; + NIceDb::TNiceDb db(txc.DB); + for (auto&& i : SaveSnapshots) { + db.Table().Key(i.GetPlanStep(), i.GetTxId()).Update(); } - auto tracker = storage->GetBlobsTracker(); - for (auto& blobId : i.second) { - tracker->UseBlob(blobId); + for (auto&& i : RemoveSnapshots) { + db.Table().Key(i.GetPlanStep(), i.GetTxId()).Delete(); } + return true; } - auto insertStorage = StoragesManager->GetInsertOperator(); - auto tracker = insertStorage->GetBlobsTracker(); - for (const auto& committedBlob : readMeta->CommittedBlobs) { - tracker->UseBlob(committedBlob.GetBlobRange().GetBlobId()); + virtual void DoComplete(const TActorContext& /*ctx*/) override { + } + +public: + TTransactionSavePersistentSnapshots( + NColumnShard::TColumnShard* self, std::set&& saveSnapshots, std::set&& removeSnapshots) + : TBase(self) + , SaveSnapshots(std::move(saveSnapshots)) + , RemoveSnapshots(std::move(removeSnapshots)) { + AFL_VERIFY(SaveSnapshots.size() || RemoveSnapshots.size()); + } +}; +} // namespace + +std::unique_ptr TInFlightReadsTracker::Ping( + TColumnShard* self, const TDuration critDuration, const TInstant now) { + std::set snapshotsToSave; + std::set snapshotsToFree; + for (auto&& i : SnapshotsLive) { + if (i.second.Ping(critDuration, now)) { + if (i.second.GetIsLock()) { + Counters->OnSnapshotLocked(); + snapshotsToSave.emplace(i.first); + } else { + Counters->OnSnapshotUnlocked(); + snapshotsToFree.emplace(i.first); + } + } + } + for (auto&& i : snapshotsToFree) { + SnapshotsLive.erase(i); + } + Counters->OnSnapshotsInfo(SnapshotsLive.size(), GetSnapshotToClean()); + if (snapshotsToFree.size() || snapshotsToSave.size()) { + NYDBTest::TControllers::GetColumnShardController()->OnRequestTracingChanges(snapshotsToSave, snapshotsToFree); + return std::make_unique(self, std::move(snapshotsToSave), std::move(snapshotsToFree)); + } else { + return nullptr; } - return TConclusionStatus::Success(); } +bool TInFlightReadsTracker::LoadFromDatabase(NTable::TDatabase& tableDB) { + NIceDb::TNiceDb db(tableDB); + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return false; + } + + while (!rowset.EndOfSet()) { + const NOlap::TSnapshot snapshot( + rowset.GetValue(), rowset.GetValue()); + AFL_VERIFY(SnapshotsLive.emplace(snapshot, TSnapshotLiveInfo::BuildFromDatabase(snapshot)).second); + + if (!rowset.Next()) { + return false; + } + } + Counters->OnSnapshotsInfo(SnapshotsLive.size(), GetSnapshotToClean()); + return true; } + +ui64 TInFlightReadsTracker::AddInFlightRequest( + NOlap::NReader::TReadMetadataBase::TConstPtr readMeta, const NOlap::TVersionedIndex* index) { + const ui64 cookie = NextCookie++; + auto it = SnapshotsLive.find(readMeta->GetRequestSnapshot()); + if (it == SnapshotsLive.end()) { + it = SnapshotsLive.emplace(readMeta->GetRequestSnapshot(), TSnapshotLiveInfo::BuildFromRequest(readMeta->GetRequestSnapshot())).first; + Counters->OnSnapshotsInfo(SnapshotsLive.size(), GetSnapshotToClean()); + } + it->second.AddRequest(cookie); + AddToInFlightRequest(cookie, readMeta, index); + return cookie; +} + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/inflight_request_tracker.h b/ydb/core/tx/columnshard/inflight_request_tracker.h index d530c11d7a4f..0aeec5acddbe 100644 --- a/ydb/core/tx/columnshard/inflight_request_tracker.h +++ b/ydb/core/tx/columnshard/inflight_request_tracker.h @@ -1,6 +1,9 @@ #pragma once #include "blob.h" + +#include "counters/req_tracer.h" + #include namespace NKikimr::NOlap { @@ -8,49 +11,111 @@ class TVersionedIndex; } namespace NKikimr::NColumnShard { - +class TColumnShard; using NOlap::IBlobInUseTracker; -class TInFlightReadsTracker { +class TSnapshotLiveInfo { +private: + const NOlap::TSnapshot Snapshot; + std::optional LastPingInstant; + std::optional LastRequestFinishedInstant; + THashSet Requests; + YDB_READONLY(bool, IsLock, false); + + TSnapshotLiveInfo(const NOlap::TSnapshot& snapshot) + : Snapshot(snapshot) { + } + public: - // Returns a unique cookie associated with this request - [[nodiscard]] TConclusion AddInFlightRequest(NOlap::NReader::TReadMetadataBase::TConstPtr readMeta, const NOlap::TVersionedIndex* index) { - const ui64 cookie = NextCookie++; - auto status = AddToInFlightRequest(cookie, readMeta, index); - if (!status) { - return status; + void AddRequest(const ui32 cookie) { + AFL_VERIFY(Requests.emplace(cookie).second); + } + + [[nodiscard]] bool DelRequest(const ui32 cookie, const TInstant now) { + AFL_VERIFY(Requests.erase(cookie)); + if (Requests.empty()) { + LastRequestFinishedInstant = now; + } + if (!IsLock && Requests.empty()) { + return true; } - return cookie; + return false; } - void RemoveInFlightRequest(ui64 cookie, const NOlap::TVersionedIndex* index); + static TSnapshotLiveInfo BuildFromRequest(const NOlap::TSnapshot& reqSnapshot) { + return TSnapshotLiveInfo(reqSnapshot); + } - // Checks if the portion is in use by any in-flight request - bool IsPortionUsed(ui64 portionId) const { - return PortionUseCount.contains(portionId); + static TSnapshotLiveInfo BuildFromDatabase(const NOlap::TSnapshot& reqSnapshot) { + TSnapshotLiveInfo result(reqSnapshot); + result.LastPingInstant = TInstant::Now(); + result.LastRequestFinishedInstant = result.LastPingInstant; + result.IsLock = true; + return result; } + bool Ping(const TDuration critDuration, const TInstant now) { + LastPingInstant = now; + if (Requests.empty()) { + AFL_VERIFY(LastRequestFinishedInstant); + if (critDuration < *LastPingInstant - *LastRequestFinishedInstant && IsLock) { + IsLock = false; + return true; + } + } else { + if (critDuration < *LastPingInstant - Snapshot.GetPlanInstant() && !IsLock) { + IsLock = true; + return true; + } + } + return false; + } +}; + +class TInFlightReadsTracker { +private: + std::map SnapshotsLive; + std::shared_ptr Counters; + +public: + std::optional GetSnapshotToClean() const { + if (SnapshotsLive.empty()) { + return std::nullopt; + } else { + return SnapshotsLive.begin()->first; + } + } + + bool LoadFromDatabase(NTable::TDatabase& db); + + [[nodiscard]] std::unique_ptr Ping(TColumnShard* self, const TDuration critDuration, const TInstant now); + + // Returns a unique cookie associated with this request + [[nodiscard]] ui64 AddInFlightRequest( + NOlap::NReader::TReadMetadataBase::TConstPtr readMeta, const NOlap::TVersionedIndex* index); + + [[nodiscard]] NOlap::NReader::TReadMetadataBase::TConstPtr ExtractInFlightRequest(ui64 cookie, const NOlap::TVersionedIndex* index, const TInstant now); + NOlap::TSelectInfo::TStats GetSelectStatsDelta() { auto delta = SelectStatsDelta; SelectStatsDelta = NOlap::TSelectInfo::TStats(); return delta; } - TInFlightReadsTracker(const std::shared_ptr& storagesManager) - : StoragesManager(storagesManager) - { - + TInFlightReadsTracker(const std::shared_ptr& storagesManager, const std::shared_ptr& counters) + : Counters(counters) + , StoragesManager(storagesManager) { } private: - [[nodiscard]] TConclusionStatus AddToInFlightRequest(const ui64 cookie, NOlap::NReader::TReadMetadataBase::TConstPtr readMetaBase, const NOlap::TVersionedIndex* index); + void AddToInFlightRequest( + const ui64 cookie, NOlap::NReader::TReadMetadataBase::TConstPtr readMetaBase, const NOlap::TVersionedIndex* index); private: std::shared_ptr StoragesManager; - ui64 NextCookie{1}; - THashMap> RequestsMeta; - THashMap PortionUseCount; + ui64 NextCookie = 1; + THashMap RequestsMeta; NOlap::TSelectInfo::TStats SelectStatsDelta; }; -} +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/normalizer/abstract/abstract.h b/ydb/core/tx/columnshard/normalizer/abstract/abstract.h index aaa6b6be1d87..e75099ecd9ba 100644 --- a/ydb/core/tx/columnshard/normalizer/abstract/abstract.h +++ b/ydb/core/tx/columnshard/normalizer/abstract/abstract.h @@ -56,6 +56,9 @@ enum class ENormalizerSequentialId: ui32 { TablesCleaner, PortionsMetadata, CleanGranuleId, + EmptyPortionsCleaner, + CleanInsertionDedup, + GCCountersNormalizer, MAX }; diff --git a/ydb/core/tx/columnshard/normalizer/insert_table/broken_dedup.cpp b/ydb/core/tx/columnshard/normalizer/insert_table/broken_dedup.cpp new file mode 100644 index 000000000000..5a0934261879 --- /dev/null +++ b/ydb/core/tx/columnshard/normalizer/insert_table/broken_dedup.cpp @@ -0,0 +1,151 @@ +#include "broken_dedup.h" + +#include +#include + +namespace NKikimr::NOlap::NInsertionDedup { + +class TNormalizerRemoveChanges: public INormalizerChanges { +private: + std::vector Insertions; +public: + virtual bool ApplyOnExecute(NTabletFlatExecutor::TTransactionContext& txc, const TNormalizationController& /*normalizationContext*/) const override { + NIceDb::TNiceDb db(txc.DB); + for (auto&& i : Insertions) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "remove_aborted_record")("write_id", i.GetInsertWriteId()); + i.Remove(db); + } + return true; + } + virtual void ApplyOnComplete(const TNormalizationController& /*normalizationContext*/) const override { + + } + + virtual ui64 GetSize() const override { + return Insertions.size(); + } + + TNormalizerRemoveChanges(const std::vector& insertions) + : Insertions(insertions) + { + + } +}; + +class TNormalizerCleanDedupChanges: public INormalizerChanges { +private: + mutable std::vector Insertions; + +public: + virtual bool ApplyOnExecute( + NTabletFlatExecutor::TTransactionContext& txc, const TNormalizationController& /*normalizationContext*/) const override { + NIceDb::TNiceDb db(txc.DB); + for (auto&& i : Insertions) { + AFL_VERIFY(i.GetDedupId()); + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "correct_record")("dedup", i.GetDedupId()); + i.Remove(db); + i.SetDedupId(""); + i.Upsert(db); + } + return true; + } + virtual void ApplyOnComplete(const TNormalizationController& /*normalizationContext*/) const override { + } + + virtual ui64 GetSize() const override { + return Insertions.size(); + } + + TNormalizerCleanDedupChanges(const std::vector& insertions) + : Insertions(insertions) { + } +}; + + +class TCollectionStates { +private: + YDB_READONLY_DEF(std::optional, Inserted); + YDB_READONLY_DEF(std::optional, Aborted); +public: + void SetInserted(const TInsertTableRecordLoadContext& context) { + AFL_VERIFY(!Inserted); + Inserted = context; + } + void SetAborted(const TInsertTableRecordLoadContext& context) { + AFL_VERIFY(!Aborted); + Aborted = context; + } +}; + +TConclusion> TInsertionsDedupNormalizer::DoInit( + const TNormalizationController& /*controller*/, NTabletFlatExecutor::TTransactionContext& txc) { + NIceDb::TNiceDb db(txc.DB); + + using namespace NColumnShard; + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return TConclusionStatus::Fail("cannot read insertion info"); + } + THashMap insertions; + while (!rowset.EndOfSet()) { + TInsertTableRecordLoadContext constructor; + constructor.ParseFromDatabase(rowset); + if (constructor.GetRecType() == NColumnShard::Schema::EInsertTableIds::Committed) { + AFL_VERIFY(constructor.GetPlanStep()); + } else { + AFL_VERIFY(!constructor.GetPlanStep()); + if (constructor.GetRecType() == NColumnShard::Schema::EInsertTableIds::Aborted) { + insertions[constructor.GetInsertWriteId()].SetAborted(constructor); + } else if (constructor.GetRecType() == NColumnShard::Schema::EInsertTableIds::Inserted) { + insertions[constructor.GetInsertWriteId()].SetInserted(constructor); + } else { + AFL_VERIFY(false); + } + } + if (!rowset.Next()) { + return TConclusionStatus::Fail("cannot read insertion info"); + } + } + + std::vector result; + std::vector toRemove; + std::vector toCleanDedup; + for (auto&& [id, i] : insertions) { + if (i.GetInserted() && i.GetAborted()) { + toRemove.emplace_back(*i.GetInserted()); + if (i.GetAborted()->GetDedupId()) { + toCleanDedup.emplace_back(*i.GetAborted()); + } + } else if (i.GetAborted()) { + if (i.GetAborted()->GetDedupId()) { + toCleanDedup.emplace_back(*i.GetAborted()); + } + } else if (i.GetInserted()) { + if (i.GetInserted()->GetDedupId()) { + toCleanDedup.emplace_back(*i.GetInserted()); + } + } else { + AFL_VERIFY(false); + } + if (toCleanDedup.size() == 1000) { + result.emplace_back(std::make_shared(std::make_shared(toCleanDedup))); + toCleanDedup.clear(); + } + if (toRemove.size() == 1000) { + result.emplace_back(std::make_shared(std::make_shared(toRemove))); + toRemove.clear(); + } + } + if (toCleanDedup.size()) { + result.emplace_back(std::make_shared(std::make_shared(toCleanDedup))); + toCleanDedup.clear(); + } + if (toRemove.size()) { + result.emplace_back(std::make_shared(std::make_shared(toRemove))); + toRemove.clear(); + } + + return result; +} + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/normalizer/insert_table/broken_dedup.h b/ydb/core/tx/columnshard/normalizer/insert_table/broken_dedup.h new file mode 100644 index 000000000000..c9a935e24371 --- /dev/null +++ b/ydb/core/tx/columnshard/normalizer/insert_table/broken_dedup.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include + + +namespace NKikimr::NOlap::NInsertionDedup { + +class TInsertionsDedupNormalizer: public TNormalizationController::INormalizerComponent { +public: + static TString GetClassNameStatic() { + return "CleanInsertionDedup"; + } +private: + class TNormalizerResult; + + static const inline INormalizerComponent::TFactory::TRegistrator Registrator = + INormalizerComponent::TFactory::TRegistrator(GetClassNameStatic()); + +public: + TInsertionsDedupNormalizer(const TNormalizationController::TInitContext&) { + } + + virtual std::optional DoGetEnumSequentialId() const override { + return ENormalizerSequentialId::CleanInsertionDedup; + } + + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } + + virtual TConclusion> DoInit(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) override; +}; + +} diff --git a/ydb/core/tx/columnshard/normalizer/insert_table/ya.make b/ydb/core/tx/columnshard/normalizer/insert_table/ya.make new file mode 100644 index 000000000000..99cdc40cfaf4 --- /dev/null +++ b/ydb/core/tx/columnshard/normalizer/insert_table/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + GLOBAL broken_dedup.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/normalizer/abstract +) + +END() diff --git a/ydb/core/tx/columnshard/normalizer/portion/chunks.cpp b/ydb/core/tx/columnshard/normalizer/portion/chunks.cpp index 03bd39787984..f42f38061e45 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/chunks.cpp +++ b/ydb/core/tx/columnshard/normalizer/portion/chunks.cpp @@ -49,7 +49,7 @@ class TRowsAndBytesChangesTask: public NConveyor::ITask { std::vector Chunks; TNormalizationContext NormContext; protected: - virtual bool DoExecute() override { + virtual TConclusionStatus DoExecute(const std::shared_ptr& /*taskPtr*/) override { for (auto&& chunkInfo : Chunks) { const auto& blobRange = chunkInfo.GetBlobRange(); @@ -59,16 +59,17 @@ class TRowsAndBytesChangesTask: public NConveyor::ITask { Y_ABORT_UNLESS(!!columnLoader); TPortionInfo::TAssembleBlobInfo assembleBlob(blobData); + assembleBlob.SetExpectedRecordsCount(chunkInfo.GetRecordsCount()); auto batch = assembleBlob.BuildRecordBatch(*columnLoader); Y_ABORT_UNLESS(!!batch); - chunkInfo.MutableUpdate().SetNumRows(batch->num_rows()); - chunkInfo.MutableUpdate().SetRawBytes(NArrow::GetBatchDataSize(batch)); + chunkInfo.MutableUpdate().SetNumRows(batch->GetRecordsCount()); + chunkInfo.MutableUpdate().SetRawBytes(batch->GetRawSizeVerified()); } auto changes = std::make_shared(std::move(Chunks)); TActorContext::AsActorContext().Send(NormContext.GetShardActor(), std::make_unique(changes)); - return true; + return TConclusionStatus::Success(); } public: diff --git a/ydb/core/tx/columnshard/normalizer/portion/chunks.h b/ydb/core/tx/columnshard/normalizer/portion/chunks.h index 59fbfe57da0c..c8a09669c7b8 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/chunks.h +++ b/ydb/core/tx/columnshard/normalizer/portion/chunks.h @@ -74,6 +74,10 @@ namespace NKikimr::NOlap { , CLContext(rowset, dsGroupSelector) {} + ui32 GetRecordsCount() const { + return CLContext.GetMetaProto().GetNumRows(); + } + const TBlobRange& GetBlobRange() const { return CLContext.GetBlobRange(); } diff --git a/ydb/core/tx/columnshard/normalizer/portion/clean.cpp b/ydb/core/tx/columnshard/normalizer/portion/clean.cpp index 435083000889..d1e00669f8b3 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/clean.cpp +++ b/ydb/core/tx/columnshard/normalizer/portion/clean.cpp @@ -76,6 +76,7 @@ INormalizerTask::TPtr TCleanPortionsNormalizer::BuildTask(std::vector + + +namespace NKikimr::NOlap { + +namespace { +std::optional> GetColumnPortionAddresses(NTabletFlatExecutor::TTransactionContext& txc) { + using namespace NColumnShard; + NIceDb::TNiceDb db(txc.DB); + if (!Schema::Precharge(db, txc.DB.GetScheme())) { + return std::nullopt; + } + THashSet usedPortions; + auto rowset = db.Table().Select< + Schema::IndexColumns::PathId, + Schema::IndexColumns::Portion + >(); + if (!rowset.IsReady()) { + return std::nullopt; + } + while (!rowset.EndOfSet()) { + usedPortions.emplace( + rowset.GetValue(), + rowset.GetValue() + ); + if (!rowset.Next()) { + return std::nullopt; + } + } + return usedPortions; +} + +using TBatch = std::vector; + +std::optional> GetPortionsToDelete(NTabletFlatExecutor::TTransactionContext& txc) { + using namespace NColumnShard; + const auto usedPortions = GetColumnPortionAddresses(txc); + if (!usedPortions) { + return std::nullopt; + } + const size_t MaxBatchSize = 10000; + NIceDb::TNiceDb db(txc.DB); + if (!Schema::Precharge(db, txc.DB.GetScheme())) { + return std::nullopt; + } + auto rowset = db.Table().Select< + Schema::IndexPortions::PathId, + Schema::IndexPortions::PortionId + >(); + if (!rowset.IsReady()) { + return std::nullopt; + } + std::vector result; + TBatch portionsToDelete; + while (!rowset.EndOfSet()) { + TPortionAddress addr( + rowset.GetValue(), + rowset.GetValue() + ); + if (!usedPortions->contains(addr)) { + ACFL_WARN("normalizer", "TCleanEmptyPortionsNormalizer")("message", TStringBuilder() << addr.DebugString() << " marked for deletion"); + portionsToDelete.emplace_back(std::move(addr)); + if (portionsToDelete.size() == MaxBatchSize) { + result.emplace_back(std::move(portionsToDelete)); + portionsToDelete = TBatch{}; + } + } + if (!rowset.Next()) { + return std::nullopt; + } + } + if (!portionsToDelete.empty()) { + result.emplace_back(std::move(portionsToDelete)); + } + return result; +} + +class TChanges : public INormalizerChanges { +public: + TChanges(TBatch&& addresses) + : Addresses(addresses) + {} + bool ApplyOnExecute(NTabletFlatExecutor::TTransactionContext& txc, const TNormalizationController&) const override { + using namespace NColumnShard; + NIceDb::TNiceDb db(txc.DB); + for(const auto& a: Addresses) { + db.Table().Key( + a.GetPathId(), + a.GetPortionId() + ).Delete(); + } + ACFL_WARN("normalizer", "TCleanEmptyPortionsNormalizer")("message", TStringBuilder() << GetSize() << " portions deleted"); + return true; + } + + ui64 GetSize() const override { + return Addresses.size(); + } +private: + const TBatch Addresses; +}; + +} //namespace + +TConclusion> TCleanEmptyPortionsNormalizer::DoInit(const TNormalizationController&, NTabletFlatExecutor::TTransactionContext& txc) { + using namespace NColumnShard; + auto batchesToDelete = GetPortionsToDelete(txc); + if (!batchesToDelete) { + return TConclusionStatus::Fail("Not ready"); + } + + std::vector result; + for (auto&& b: *batchesToDelete) { + result.emplace_back(std::make_shared(std::make_shared(std::move(b)))); + } + return result; +} + +} //namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/normalizer/portion/clean_empty.h b/ydb/core/tx/columnshard/normalizer/portion/clean_empty.h new file mode 100644 index 000000000000..920b3d8c0f56 --- /dev/null +++ b/ydb/core/tx/columnshard/normalizer/portion/clean_empty.h @@ -0,0 +1,28 @@ +#pragma once + +#include + +namespace NKikimr::NOlap { + +class TCleanEmptyPortionsNormalizer : public TNormalizationController::INormalizerComponent { + + static TString ClassName() { + return ToString(ENormalizerSequentialId::EmptyPortionsCleaner); + } + static inline auto Registrator = INormalizerComponent::TFactory::TRegistrator(ClassName()); +public: + TCleanEmptyPortionsNormalizer(const TNormalizationController::TInitContext&) + {} + + std::optional DoGetEnumSequentialId() const override { + return ENormalizerSequentialId::EmptyPortionsCleaner; + } + + TString GetClassName() const override { + return ClassName(); + } + + TConclusion> DoInit(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) override; +}; + +} //namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/normalizer/portion/normalizer.cpp b/ydb/core/tx/columnshard/normalizer/portion/normalizer.cpp index fffee39865b2..63cea8b19952 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/normalizer.cpp +++ b/ydb/core/tx/columnshard/normalizer/portion/normalizer.cpp @@ -7,7 +7,7 @@ namespace NKikimr::NOlap { TConclusion> TPortionsNormalizerBase::DoInit(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) { - auto initRes = DoInitImpl(controller,txc); + auto initRes = DoInitImpl(controller, txc); if (initRes.IsFail()) { return initRes; @@ -33,51 +33,24 @@ TConclusion> TPortionsNormalizerBase::DoInit( return tasks; } - auto columnsFilter = GetColumnsFilter(tablesManager.GetPrimaryIndexSafe().GetVersionedIndex().GetLastSchema()); - THashMap portions; auto schemas = std::make_shared>(); - { - auto rowset = db.Table().Select(); - if (!rowset.IsReady()) { - return TConclusionStatus::Fail("Not ready"); + auto conclusion = InitColumns(tablesManager, db, portions); + if (conclusion.IsFail()) { + return conclusion; } - - TPortionInfo::TSchemaCursor schema(tablesManager.GetPrimaryIndexSafe().GetVersionedIndex()); - auto initPortion = [&](TPortionInfoConstructor&& portion, const TColumnChunkLoadContext& loadContext) { - auto currentSchema = schema.GetSchema(portion); - portion.SetSchemaVersion(currentSchema->GetVersion()); - - if (!columnsFilter.empty() && !columnsFilter.contains(loadContext.GetAddress().GetColumnId())) { - return; - } - auto it = portions.find(portion.GetPortionIdVerified()); - if (it == portions.end()) { - (*schemas)[portion.GetPortionIdVerified()] = currentSchema; - const ui64 portionId = portion.GetPortionIdVerified(); - it = portions.emplace(portionId, std::move(portion)).first; - } else { - it->second.Merge(std::move(portion)); - } - it->second.LoadRecord(currentSchema->GetIndexInfo(), loadContext); - }; - - while (!rowset.EndOfSet()) { - TPortionInfoConstructor portion(rowset.GetValue(), rowset.GetValue()); - Y_ABORT_UNLESS(rowset.GetValue() == 0); - - portion.SetMinSnapshotDeprecated(NOlap::TSnapshot(rowset.GetValue(), rowset.GetValue())); - portion.SetRemoveSnapshot(rowset.GetValue(), rowset.GetValue()); - - NOlap::TColumnChunkLoadContext chunkLoadContext(rowset, &DsGroupSelector); - initPortion(std::move(portion), chunkLoadContext); - - if (!rowset.Next()) { - return TConclusionStatus::Fail("Not ready"); - } + } + { + auto conclusion = InitIndexes(db, portions); + if (conclusion.IsFail()) { + return conclusion; } } + TPortionInfo::TSchemaCursor schema(tablesManager.GetPrimaryIndexSafe().GetVersionedIndex()); + for (auto&& [_, p] : portions) { + (*schemas)[p.GetPortionIdVerified()] = schema.GetSchema(p); + } std::vector> package; package.reserve(100); @@ -110,4 +83,70 @@ TConclusion> TPortionsNormalizerBase::DoInit( return tasks; } +TConclusionStatus TPortionsNormalizerBase::InitColumns( + const NColumnShard::TTablesManager& tablesManager, NIceDb::TNiceDb& db, THashMap& portions) { + using namespace NColumnShard; + auto columnsFilter = GetColumnsFilter(tablesManager.GetPrimaryIndexSafe().GetVersionedIndex().GetLastSchema()); + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return TConclusionStatus::Fail("Not ready"); + } + + TPortionInfo::TSchemaCursor schema(tablesManager.GetPrimaryIndexSafe().GetVersionedIndex()); + auto initPortion = [&](TPortionInfoConstructor&& portion, const TColumnChunkLoadContext& loadContext) { + auto currentSchema = schema.GetSchema(portion); + portion.SetSchemaVersion(currentSchema->GetVersion()); + + if (!columnsFilter.empty() && !columnsFilter.contains(loadContext.GetAddress().GetColumnId())) { + return; + } + auto it = portions.find(portion.GetPortionIdVerified()); + if (it == portions.end()) { + const ui64 portionId = portion.GetPortionIdVerified(); + it = portions.emplace(portionId, std::move(portion)).first; + } else { + it->second.Merge(std::move(portion)); + } + it->second.LoadRecord(currentSchema->GetIndexInfo(), loadContext); + }; + + while (!rowset.EndOfSet()) { + TPortionInfoConstructor portion(rowset.GetValue(), rowset.GetValue()); + Y_ABORT_UNLESS(rowset.GetValue() == 0); + + portion.SetMinSnapshotDeprecated( + NOlap::TSnapshot(rowset.GetValue(), rowset.GetValue())); + portion.SetRemoveSnapshot(rowset.GetValue(), rowset.GetValue()); + + NOlap::TColumnChunkLoadContext chunkLoadContext(rowset, &DsGroupSelector); + initPortion(std::move(portion), chunkLoadContext); + + if (!rowset.Next()) { + return TConclusionStatus::Fail("Not ready"); + } + } + return TConclusionStatus::Success(); +} + +TConclusionStatus TPortionsNormalizerBase::InitIndexes(NIceDb::TNiceDb& db, THashMap& portions) { + using IndexIndexes = NColumnShard::Schema::IndexIndexes; + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return TConclusionStatus::Fail("Not ready"); + } + + while (!rowset.EndOfSet()) { + NOlap::TIndexChunkLoadContext chunkLoadContext(rowset, &DsGroupSelector); + + auto it = portions.find(rowset.GetValue()); + AFL_VERIFY(it != portions.end()); + it->second.LoadIndex(chunkLoadContext); + + if (!rowset.Next()) { + return TConclusionStatus::Fail("Not ready"); + } + } + return TConclusionStatus::Success(); +} + } diff --git a/ydb/core/tx/columnshard/normalizer/portion/normalizer.h b/ydb/core/tx/columnshard/normalizer/portion/normalizer.h index 574a1c212873..8c23395eba0b 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/normalizer.h +++ b/ydb/core/tx/columnshard/normalizer/portion/normalizer.h @@ -85,7 +85,12 @@ class TPortionsNormalizerBase : public TNormalizationController::INormalizerComp : DsGroupSelector(info.GetStorageInfo()) {} - virtual TConclusion> DoInit(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) override final; + TConclusionStatus InitColumns( + const NColumnShard::TTablesManager& tablesManager, NIceDb::TNiceDb& db, THashMap& portions); + TConclusionStatus InitIndexes(NIceDb::TNiceDb& db, THashMap& portions); + + virtual TConclusion> DoInit( + const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) override final; protected: virtual INormalizerTask::TPtr BuildTask(std::vector>&& portions, std::shared_ptr> schemas) const = 0; diff --git a/ydb/core/tx/columnshard/normalizer/portion/ya.make b/ydb/core/tx/columnshard/normalizer/portion/ya.make index ec31c82f7b31..ff813694a971 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/ya.make +++ b/ydb/core/tx/columnshard/normalizer/portion/ya.make @@ -5,6 +5,7 @@ SRCS( GLOBAL portion.cpp GLOBAL chunks.cpp GLOBAL clean.cpp + GLOBAL clean_empty.cpp GLOBAL broken_blobs.cpp ) diff --git a/ydb/core/tx/columnshard/normalizer/tablet/broken_txs.cpp b/ydb/core/tx/columnshard/normalizer/tablet/broken_txs.cpp new file mode 100644 index 000000000000..1b41c455217b --- /dev/null +++ b/ydb/core/tx/columnshard/normalizer/tablet/broken_txs.cpp @@ -0,0 +1,31 @@ +#include "broken_txs.h" + +#include +#include + +namespace NKikimr::NOlap { + +TConclusion> TBrokenTxsNormalizer::DoInit( + const TNormalizationController& /*controller*/, NTabletFlatExecutor::TTransactionContext& txc) { + NIceDb::TNiceDb db(txc.DB); + + using namespace NColumnShard; + auto rowset = db.Table().GreaterOrEqual(0).Select(); + if (!rowset.IsReady()) { + return TConclusionStatus::Fail("cannot read TxInfo"); + } + while (!rowset.EndOfSet()) { + const ui64 txId = rowset.GetValue(); + if (!rowset.HaveValue()) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("tx_id", txId)("event", "removed_by_normalizer")("condition", "no_kind"); + Schema::EraseTxInfo(db, txId); + } + + if (!rowset.Next()) { + return TConclusionStatus::Fail("cannot read TxInfo"); + } + } + return std::vector(); +} + +} diff --git a/ydb/core/tx/columnshard/normalizer/tablet/broken_txs.h b/ydb/core/tx/columnshard/normalizer/tablet/broken_txs.h new file mode 100644 index 000000000000..1ff68530bf35 --- /dev/null +++ b/ydb/core/tx/columnshard/normalizer/tablet/broken_txs.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include + + +namespace NKikimr::NOlap { + +class TBrokenTxsNormalizer: public TNormalizationController::INormalizerComponent { +public: + static TString GetClassNameStatic() { + return "BrokenTxsNormalizer"; + } +private: + class TNormalizerResult; + + static const inline INormalizerComponent::TFactory::TRegistrator Registrator = + INormalizerComponent::TFactory::TRegistrator(GetClassNameStatic()); + +public: + TBrokenTxsNormalizer(const TNormalizationController::TInitContext&) { + } + + virtual std::optional DoGetEnumSequentialId() const override { + return {}; + } + + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } + + virtual TConclusion> DoInit(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) override; +}; + +} diff --git a/ydb/core/tx/columnshard/normalizer/tablet/gc_counters.h b/ydb/core/tx/columnshard/normalizer/tablet/gc_counters.h index 80b56c080071..8787da559489 100644 --- a/ydb/core/tx/columnshard/normalizer/tablet/gc_counters.h +++ b/ydb/core/tx/columnshard/normalizer/tablet/gc_counters.h @@ -21,7 +21,7 @@ class TGCCountersNormalizer: public TNormalizationController::INormalizerCompone } virtual std::optional DoGetEnumSequentialId() const override { - return {}; + return ENormalizerSequentialId::GCCountersNormalizer; } virtual TString GetClassName() const override { diff --git a/ydb/core/tx/columnshard/normalizer/tablet/ya.make b/ydb/core/tx/columnshard/normalizer/tablet/ya.make index 9be6336eede5..0b39efc35b5b 100644 --- a/ydb/core/tx/columnshard/normalizer/tablet/ya.make +++ b/ydb/core/tx/columnshard/normalizer/tablet/ya.make @@ -2,6 +2,7 @@ LIBRARY() SRCS( GLOBAL gc_counters.cpp + GLOBAL broken_txs.cpp ) PEERDIR( diff --git a/ydb/core/tx/columnshard/normalizer/ya.make b/ydb/core/tx/columnshard/normalizer/ya.make index 46f7baeaea98..ced78fd812af 100644 --- a/ydb/core/tx/columnshard/normalizer/ya.make +++ b/ydb/core/tx/columnshard/normalizer/ya.make @@ -6,6 +6,7 @@ PEERDIR( ydb/core/tx/columnshard/normalizer/tablet ydb/core/tx/columnshard/normalizer/tables ydb/core/tx/columnshard/normalizer/portion + ydb/core/tx/columnshard/normalizer/insert_table ) END() diff --git a/ydb/core/tx/columnshard/operations/batch_builder/builder.cpp b/ydb/core/tx/columnshard/operations/batch_builder/builder.cpp index 319379133f3d..39aa61a9a008 100644 --- a/ydb/core/tx/columnshard/operations/batch_builder/builder.cpp +++ b/ydb/core/tx/columnshard/operations/batch_builder/builder.cpp @@ -6,47 +6,54 @@ #include #include #include - #include namespace NKikimr::NOlap { -void TBuildBatchesTask::ReplyError(const TString& message) { +void TBuildBatchesTask::ReplyError(const TString& message, const NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass errorClass) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("problem", "cannot build batch for insert")("reason", message)("data", WriteData.GetWriteMeta().GetLongTxIdOptional()); auto writeDataPtr = std::make_shared(std::move(WriteData)); - TWritingBuffer buffer(writeDataPtr->GetBlobsAction(), { std::make_shared(writeDataPtr) }); - auto result = NColumnShard::TEvPrivate::TEvWriteBlobsResult::Error( - NKikimrProto::EReplyStatus::CORRUPTED, std::move(buffer), message); + TWritingBuffer buffer(writeDataPtr->GetBlobsAction(), { std::make_shared(*writeDataPtr) }); + auto result = + NColumnShard::TEvPrivate::TEvWriteBlobsResult::Error(NKikimrProto::EReplyStatus::CORRUPTED, std::move(buffer), message, errorClass); TActorContext::AsActorContext().Send(ParentActorId, result.release()); } -bool TBuildBatchesTask::DoExecute() { +TConclusionStatus TBuildBatchesTask::DoExecute(const std::shared_ptr& /*taskPtr*/) { TConclusion> batchConclusion = WriteData.GetData()->ExtractBatch(); if (batchConclusion.IsFail()) { - ReplyError("cannot extract incoming batch: " + batchConclusion.GetErrorMessage()); - return true; + ReplyError( + "cannot extract incoming batch: " + batchConclusion.GetErrorMessage(), NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); + return TConclusionStatus::Fail("cannot extract incoming batch: " + batchConclusion.GetErrorMessage()); } + WritingCounters->OnIncomingData(NArrow::GetBatchDataSize(*batchConclusion)); - auto preparedConclusion = ActualSchema->PrepareForModification(batchConclusion.DetachResult(), WriteData.GetWriteMeta().GetModificationType()); + auto preparedConclusion = + ActualSchema->PrepareForModification(batchConclusion.DetachResult(), WriteData.GetWriteMeta().GetModificationType()); if (preparedConclusion.IsFail()) { - ReplyError("cannot prepare incoming batch: " + preparedConclusion.GetErrorMessage()); - return true; + ReplyError("cannot prepare incoming batch: " + preparedConclusion.GetErrorMessage(), + NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); + return TConclusionStatus::Fail("cannot prepare incoming batch: " + preparedConclusion.GetErrorMessage()); } auto batch = preparedConclusion.DetachResult(); - const std::vector> defaultFields = ActualSchema->GetAbsentFields(batch->schema()); std::shared_ptr merger; switch (WriteData.GetWriteMeta().GetModificationType()) { case NEvWrite::EModificationType::Upsert: { + const std::vector> defaultFields = ActualSchema->GetAbsentFields(batch->schema()); if (defaultFields.empty()) { - std::shared_ptr task = std::make_shared( - TabletId, ParentActorId, BufferActorId, std::move(WriteData), batch, ActualSchema); + std::shared_ptr task = + std::make_shared(TabletId, ParentActorId, BufferActorId, std::move(WriteData), batch, ActualSchema); NConveyor::TInsertServiceOperator::AsyncTaskToExecute(task); - return true; + return TConclusionStatus::Success(); } else { - auto conclusion = ActualSchema->BuildDefaultBatch(ActualSchema->GetIndexInfo().ArrowSchema()->fields(), 1); + auto insertionConclusion = ActualSchema->CheckColumnsDefault(defaultFields); + auto conclusion = ActualSchema->BuildDefaultBatch(ActualSchema->GetIndexInfo().ArrowSchema()->fields(), 1, true); AFL_VERIFY(!conclusion.IsFail())("error", conclusion.GetErrorMessage()); auto batchDefault = conclusion.DetachResult(); - NArrow::NMerger::TSortableBatchPosition pos(batchDefault, 0, batchDefault->schema()->field_names(), batchDefault->schema()->field_names(), false); - merger = std::make_shared(batch, ActualSchema, pos); + NArrow::NMerger::TSortableBatchPosition pos( + batchDefault, 0, batchDefault->schema()->field_names(), batchDefault->schema()->field_names(), false); + merger = std::make_shared( + batch, ActualSchema, insertionConclusion.IsSuccess() ? "" : insertionConclusion.GetErrorMessage(), pos); break; } } @@ -55,22 +62,22 @@ bool TBuildBatchesTask::DoExecute() { break; } case NEvWrite::EModificationType::Update: { - merger = std::make_shared(batch, ActualSchema); + merger = std::make_shared(batch, ActualSchema, ""); break; } case NEvWrite::EModificationType::Replace: case NEvWrite::EModificationType::Delete: { - std::shared_ptr task = std::make_shared( - TabletId, ParentActorId, BufferActorId, std::move(WriteData), batch, ActualSchema); + std::shared_ptr task = + std::make_shared(TabletId, ParentActorId, BufferActorId, std::move(WriteData), batch, ActualSchema); NConveyor::TInsertServiceOperator::AsyncTaskToExecute(task); - return true; + return TConclusionStatus::Success(); } } std::shared_ptr task = std::make_shared( TabletId, ParentActorId, BufferActorId, std::move(WriteData), merger, ActualSchema, ActualSnapshot, batch); NActors::TActivationContext::AsActorContext().Register(new NDataReader::TActor(task)); - return true; + return TConclusionStatus::Success(); } -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/operations/batch_builder/builder.h b/ydb/core/tx/columnshard/operations/batch_builder/builder.h index f49648790d61..31ca0ac7ac43 100644 --- a/ydb/core/tx/columnshard/operations/batch_builder/builder.h +++ b/ydb/core/tx/columnshard/operations/batch_builder/builder.h @@ -1,5 +1,7 @@ #pragma once +#include #include +#include #include #include #include @@ -14,24 +16,27 @@ class TBuildBatchesTask: public NConveyor::ITask { const NActors::TActorId BufferActorId; const std::shared_ptr ActualSchema; const TSnapshot ActualSnapshot; - void ReplyError(const TString& message); + const std::shared_ptr WritingCounters; + void ReplyError(const TString& message, const NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass errorClass); + protected: - virtual bool DoExecute() override; + virtual TConclusionStatus DoExecute(const std::shared_ptr& taskPtr) override; + public: virtual TString GetTaskClassIdentifier() const override { return "Write::ConstructBatches"; } - TBuildBatchesTask(const ui64 tabletId, const NActors::TActorId parentActorId, - const NActors::TActorId bufferActorId, NEvWrite::TWriteData&& writeData, const std::shared_ptr& actualSchema, - const TSnapshot& actualSnapshot) + TBuildBatchesTask(const ui64 tabletId, const NActors::TActorId parentActorId, const NActors::TActorId bufferActorId, + NEvWrite::TWriteData&& writeData, const std::shared_ptr& actualSchema, const TSnapshot& actualSnapshot, + const std::shared_ptr& writingCounters) : WriteData(std::move(writeData)) , TabletId(tabletId) , ParentActorId(parentActorId) , BufferActorId(bufferActorId) , ActualSchema(actualSchema) , ActualSnapshot(actualSnapshot) - { + , WritingCounters(writingCounters) { } }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/operations/batch_builder/merger.cpp b/ydb/core/tx/columnshard/operations/batch_builder/merger.cpp index eb156e5c2451..823f6ac1cf3d 100644 --- a/ydb/core/tx/columnshard/operations/batch_builder/merger.cpp +++ b/ydb/core/tx/columnshard/operations/batch_builder/merger.cpp @@ -1,6 +1,6 @@ #include "merger.h" #include -#include +#include namespace NKikimr::NOlap { @@ -60,17 +60,19 @@ NKikimr::TConclusionStatus TUpdateMerger::OnEqualKeys(const NArrow::NMerger::TSo return TConclusionStatus::Success(); } -TUpdateMerger::TUpdateMerger(const std::shared_ptr& incoming, const std::shared_ptr& actualSchema, const std::optional& defaultExists /*= {}*/) +TUpdateMerger::TUpdateMerger(const std::shared_ptr& incoming, const std::shared_ptr& actualSchema, + const TString& insertDenyReason, const std::optional& defaultExists /*= {}*/) : TBase(incoming, actualSchema) , Builder(actualSchema->GetIndexInfo().ArrowSchema()->fields()) , DefaultExists(defaultExists) + , InsertDenyReason(insertDenyReason) { - for (auto&& i : actualSchema->GetIndexInfo().ArrowSchema()->field_names()) { - auto fIdx = IncomingData->schema()->GetFieldIndex(i); + for (auto&& f : actualSchema->GetIndexInfo().ArrowSchema()->fields()) { + auto fIdx = IncomingData->schema()->GetFieldIndex(f->name()); if (fIdx == -1) { IncomingColumnRemap.emplace_back(); } else { - auto fExistsIdx = IncomingData->schema()->GetFieldIndex("$$EXISTS::" + i); + auto fExistsIdx = IncomingData->schema()->GetFieldIndex("$$EXISTS::" + f->name()); std::shared_ptr flagsArray; if (fExistsIdx != -1) { AFL_VERIFY(IncomingData->column(fExistsIdx)->type_id() == arrow::Type::BOOL); diff --git a/ydb/core/tx/columnshard/operations/batch_builder/merger.h b/ydb/core/tx/columnshard/operations/batch_builder/merger.h index c9c8a986c13e..e503f742b18f 100644 --- a/ydb/core/tx/columnshard/operations/batch_builder/merger.h +++ b/ydb/core/tx/columnshard/operations/batch_builder/merger.h @@ -79,8 +79,12 @@ class TUpdateMerger: public IMerger { std::vector> IncomingColumnRemap; std::vector> HasIncomingDataFlags; const std::optional DefaultExists; + const TString InsertDenyReason; virtual TConclusionStatus OnEqualKeys(const NArrow::NMerger::TSortableBatchPosition& exists, const NArrow::NMerger::TSortableBatchPosition& incoming) override; virtual TConclusionStatus OnIncomingOnly(const NArrow::NMerger::TSortableBatchPosition& incoming) override { + if (!!InsertDenyReason) { + return TConclusionStatus::Fail("insertion is impossible: " + InsertDenyReason); + } if (!DefaultExists) { return TConclusionStatus::Success(); } else { @@ -93,7 +97,7 @@ class TUpdateMerger: public IMerger { } TUpdateMerger(const std::shared_ptr& incoming, const std::shared_ptr& actualSchema, - const std::optional& defaultExists = {}); + const TString& insertDenyReason, const std::optional& defaultExists = {}); }; } diff --git a/ydb/core/tx/columnshard/operations/batch_builder/restore.cpp b/ydb/core/tx/columnshard/operations/batch_builder/restore.cpp index 6ad24b426dc0..e13c7fc74eaf 100644 --- a/ydb/core/tx/columnshard/operations/batch_builder/restore.cpp +++ b/ydb/core/tx/columnshard/operations/batch_builder/restore.cpp @@ -6,16 +6,10 @@ namespace NKikimr::NOlap { std::unique_ptr TModificationRestoreTask::DoBuildRequestInitiator() const { - auto request = std::make_unique(LocalPathId); + auto request = std::make_unique(LocalPathId, WriteData.GetWriteMeta().GetLockIdOptional()); request->ReadToSnapshot = Snapshot; - request->RangesFilter = std::make_shared(false); auto pkData = NArrow::TColumnOperator().VerifyIfAbsent().Extract(IncomingData, ActualSchema->GetPKColumnNames()); - for (ui32 i = 0; i < pkData->num_rows(); ++i) { - auto batch = pkData->Slice(i, 1); - auto pFrom = std::make_shared(NKernels::EOperation::GreaterEqual, batch); - auto pTo = std::make_shared(NKernels::EOperation::LessEqual, batch); - AFL_VERIFY(request->RangesFilter->Add(pFrom, pTo, &ActualSchema->GetIndexInfo())); - } + request->RangesFilter = TPKRangesFilter::BuildFromRecordBatchLines(pkData, false); for (auto&& i : ActualSchema->GetIndexInfo().GetColumnIds(false)) { request->AddColumn(i, ActualSchema->GetIndexInfo().GetColumnName(i)); } @@ -25,17 +19,19 @@ std::unique_ptr TModificationRestoreTa NKikimr::TConclusionStatus TModificationRestoreTask::DoOnDataChunk(const std::shared_ptr& data) { auto result = Merger->AddExistsDataOrdered(data); if (result.IsFail()) { - auto writeDataPtr = std::make_shared(std::move(WriteData)); - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "restore_data_problems") + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "merge_data_problems") ("write_id", WriteData.GetWriteMeta().GetWriteId())("tablet_id", TabletId)("message", result.GetErrorMessage()); - TWritingBuffer buffer(writeDataPtr->GetBlobsAction(), { std::make_shared(writeDataPtr) }); - auto evResult = NColumnShard::TEvPrivate::TEvWriteBlobsResult::Error(NKikimrProto::EReplyStatus::CORRUPTED, - std::move(buffer), result.GetErrorMessage()); - TActorContext::AsActorContext().Send(ParentActorId, evResult.release()); + SendErrorMessage(result.GetErrorMessage(), NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Request); } return result; } +void TModificationRestoreTask::DoOnError(const TString& errorMessage) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "restore_data_problems")("write_id", WriteData.GetWriteMeta().GetWriteId())( + "tablet_id", TabletId)("message", errorMessage); + SendErrorMessage(errorMessage, NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); +} + NKikimr::TConclusionStatus TModificationRestoreTask::DoOnFinished() { { auto result = Merger->Finish(); @@ -65,4 +61,11 @@ TModificationRestoreTask::TModificationRestoreTask(const ui64 tabletId, const NA } +void TModificationRestoreTask::SendErrorMessage(const TString& errorMessage, const NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass errorClass) { + auto writeDataPtr = std::make_shared(std::move(WriteData)); + TWritingBuffer buffer(writeDataPtr->GetBlobsAction(), { std::make_shared(*writeDataPtr) }); + auto evResult = NColumnShard::TEvPrivate::TEvWriteBlobsResult::Error(NKikimrProto::EReplyStatus::CORRUPTED, std::move(buffer), errorMessage, errorClass); + TActorContext::AsActorContext().Send(ParentActorId, evResult.release()); +} + } diff --git a/ydb/core/tx/columnshard/operations/batch_builder/restore.h b/ydb/core/tx/columnshard/operations/batch_builder/restore.h index 7e244f1e95b4..b69a856a8a58 100644 --- a/ydb/core/tx/columnshard/operations/batch_builder/restore.h +++ b/ydb/core/tx/columnshard/operations/batch_builder/restore.h @@ -1,6 +1,7 @@ #pragma once #include "merger.h" +#include #include #include @@ -22,10 +23,13 @@ class TModificationRestoreTask: public NDataReader::IRestoreTask { virtual TConclusionStatus DoOnDataChunk(const std::shared_ptr& data) override; virtual TConclusionStatus DoOnFinished() override; + virtual void DoOnError(const TString& errorMessage) override; + void SendErrorMessage(const TString& errorMessage, const NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass errorClass); + public: - TModificationRestoreTask(const ui64 tabletId, const NActors::TActorId parentActorId, - const NActors::TActorId bufferActorId, NEvWrite::TWriteData&& writeData, const std::shared_ptr& merger, - const std::shared_ptr& actualSchema, const TSnapshot actualSnapshot, const std::shared_ptr& incomingData); + TModificationRestoreTask(const ui64 tabletId, const NActors::TActorId parentActorId, const NActors::TActorId bufferActorId, + NEvWrite::TWriteData&& writeData, const std::shared_ptr& merger, const std::shared_ptr& actualSchema, + const TSnapshot actualSnapshot, const std::shared_ptr& incomingData); }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/operations/manager.cpp b/ydb/core/tx/columnshard/operations/manager.cpp new file mode 100644 index 000000000000..1527ec5d028d --- /dev/null +++ b/ydb/core/tx/columnshard/operations/manager.cpp @@ -0,0 +1,296 @@ +#include "manager.h" + +#include + +namespace NKikimr::NColumnShard { + +bool TOperationsManager::Load(NTabletFlatExecutor::TTransactionContext& txc) { + NIceDb::TNiceDb db(txc.DB); + { + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return false; + } + + while (!rowset.EndOfSet()) { + const TOperationWriteId writeId = (TOperationWriteId)rowset.GetValue(); + const ui64 createdAtSec = rowset.GetValue(); + const ui64 lockId = rowset.GetValue(); + const ui64 cookie = rowset.GetValueOrDefault(0); + const TString metadata = rowset.GetValue(); + const EOperationStatus status = (EOperationStatus)rowset.GetValue(); + std::optional granuleShardingVersionId; + if (rowset.HaveValue() && + rowset.GetValue()) { + granuleShardingVersionId = rowset.GetValue(); + } + + NKikimrTxColumnShard::TInternalOperationData metaProto; + Y_ABORT_UNLESS(metaProto.ParseFromString(metadata)); + + auto operation = std::make_shared( + writeId, lockId, cookie, status, TInstant::Seconds(createdAtSec), granuleShardingVersionId, NEvWrite::EModificationType::Upsert); + operation->FromProto(metaProto); + LinkInsertWriteIdToOperationWriteId(operation->GetInsertWriteIds(), operation->GetWriteId()); + AFL_VERIFY(operation->GetStatus() != EOperationStatus::Draft); + + AFL_VERIFY(Operations.emplace(operation->GetWriteId(), operation).second); + auto it = LockFeatures.find(lockId); + if (it == LockFeatures.end()) { + it = LockFeatures.emplace(lockId, TLockFeatures(lockId, 0)).first; + } + it->second.MutableWriteOperations().emplace_back(operation); + LastWriteId = std::max(LastWriteId, operation->GetWriteId()); + if (!rowset.Next()) { + return false; + } + } + } + { + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return false; + } + + while (!rowset.EndOfSet()) { + const ui64 lockId = rowset.GetValue(); + const ui64 txId = rowset.GetValue(); + AFL_VERIFY(LockFeatures.contains(lockId))("lock_id", lockId); + AFL_VERIFY(Tx2Lock.emplace(txId, lockId).second); + if (!rowset.Next()) { + return false; + } + } + } + + return true; +} + +void TOperationsManager::CommitTransactionOnExecute( + TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot) { + auto& lock = GetLockFeaturesForTxVerified(txId); + TLogContextGuard gLogging( + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("commit_tx_id", txId)("commit_lock_id", lock.GetLockId())); + TVector commited; + for (auto&& opPtr : lock.GetWriteOperations()) { + opPtr->CommitOnExecute(owner, txc, snapshot); + commited.emplace_back(opPtr); + } + OnTransactionFinishOnExecute(commited, lock, txId, txc); +} + +void TOperationsManager::CommitTransactionOnComplete( + TColumnShard& owner, const ui64 txId, const NOlap::TSnapshot& snapshot) { + auto& lock = GetLockFeaturesForTxVerified(txId); + TLogContextGuard gLogging( + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("commit_tx_id", txId)("commit_lock_id", lock.GetLockId())); + for (auto&& i : lock.GetBrokeOnCommit()) { + if (auto lockNotify = GetLockOptional(i)) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("broken_lock_id", i); + lockNotify->SetBroken(); + } + } + + for (auto&& i : lock.GetNotifyOnCommit()) { + if (auto lockNotify = GetLockOptional(i)) { + lockNotify->AddNotifyCommit(lock.GetLockId()); + } + } + + TVector commited; + for (auto&& opPtr : lock.GetWriteOperations()) { + opPtr->CommitOnComplete(owner, snapshot); + commited.emplace_back(opPtr); + } + OnTransactionFinishOnComplete(commited, lock, txId); +} + +void TOperationsManager::AbortTransactionOnExecute(TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { + auto* lock = GetLockFeaturesForTxOptional(txId); + if (!lock) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "abort")("tx_id", txId)("problem", "finished"); + return; + } + TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tx_id", txId)("lock_id", lock->GetLockId())); + + TVector aborted; + for (auto&& opPtr : lock->GetWriteOperations()) { + opPtr->AbortOnExecute(owner, txc); + aborted.emplace_back(opPtr); + } + + OnTransactionFinishOnExecute(aborted, *lock, txId, txc); +} + +void TOperationsManager::AbortTransactionOnComplete(TColumnShard& owner, const ui64 txId) { + auto* lock = GetLockFeaturesForTxOptional(txId); + if (!lock) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "abort")("tx_id", txId)("problem", "finished"); + return; + } + TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tx_id", txId)("lock_id", lock->GetLockId())); + + TVector aborted; + for (auto&& opPtr : lock->GetWriteOperations()) { + opPtr->AbortOnComplete(owner); + aborted.emplace_back(opPtr); + } + + OnTransactionFinishOnComplete(aborted, *lock, txId); +} + +TWriteOperation::TPtr TOperationsManager::GetOperation(const TOperationWriteId writeId) const { + auto it = Operations.find(writeId); + if (it == Operations.end()) { + return nullptr; + } + return it->second; +} + +void TOperationsManager::OnTransactionFinishOnExecute( + const TVector& operations, const TLockFeatures& lock, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { + for (auto&& op : operations) { + RemoveOperationOnExecute(op, txc); + } + NIceDb::TNiceDb db(txc.DB); + db.Table().Key(txId, lock.GetLockId()).Delete(); +} + +void TOperationsManager::OnTransactionFinishOnComplete( + const TVector& operations, const TLockFeatures& lock, const ui64 txId) { + { + lock.RemoveInteractions(InteractionsContext); + LockFeatures.erase(lock.GetLockId()); + } + Tx2Lock.erase(txId); + for (auto&& op : operations) { + RemoveOperationOnComplete(op); + } +} + +void TOperationsManager::RemoveOperationOnExecute(const TWriteOperation::TPtr& op, NTabletFlatExecutor::TTransactionContext& txc) { + NIceDb::TNiceDb db(txc.DB); + db.Table().Key((ui64)op->GetWriteId()).Delete(); +} + +void TOperationsManager::RemoveOperationOnComplete(const TWriteOperation::TPtr& op) { + for (auto&& i : op->GetInsertWriteIds()) { + AFL_VERIFY(InsertWriteIdToOpWriteId.erase(i)); + } + Operations.erase(op->GetWriteId()); +} + +TOperationWriteId TOperationsManager::BuildNextOperationWriteId() { + return ++LastWriteId; +} + +std::optional TOperationsManager::GetLockForTx(const ui64 txId) const { + auto lockIt = Tx2Lock.find(txId); + if (lockIt != Tx2Lock.end()) { + return lockIt->second; + } + return std::nullopt; +} + +void TOperationsManager::LinkTransactionOnExecute(const ui64 lockId, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { + NIceDb::TNiceDb db(txc.DB); + db.Table().Key(txId, lockId).Update(); + Tx2Lock[txId] = lockId; +} + +void TOperationsManager::LinkTransactionOnComplete(const ui64 /*lockId*/, const ui64 /*txId*/) { +} + +TWriteOperation::TPtr TOperationsManager::RegisterOperation( + const ui64 lockId, const ui64 cookie, const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType) { + auto writeId = BuildNextOperationWriteId(); + auto operation = std::make_shared( + writeId, lockId, cookie, EOperationStatus::Draft, AppData()->TimeProvider->Now(), granuleShardingVersionId, mType); + Y_ABORT_UNLESS(Operations.emplace(operation->GetWriteId(), operation).second); + GetLockVerified(operation->GetLockId()).MutableWriteOperations().emplace_back(operation); + GetLockVerified(operation->GetLockId()).AddWrite(); + return operation; +} + +TConclusion TOperationsManager::GetBehaviour(const NEvents::TDataEvents::TEvWrite& evWrite) { + if (evWrite.Record.HasTxId() && evWrite.Record.HasLocks()) { + if (evWrite.Record.GetLocks().GetLocks().size() < 1) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("proto", evWrite.Record.DebugString())("event", "undefined behaviour"); + return TConclusionStatus::Fail("no locks in case tx/locks"); + } + auto& baseLock = evWrite.Record.GetLocks().GetLocks()[0]; + for (auto&& i : evWrite.Record.GetLocks().GetLocks()) { + if (i.GetLockId() != baseLock.GetLockId()) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("proto", evWrite.Record.DebugString())("event", "undefined behaviour"); + return TConclusionStatus::Fail("different lock ids in operation"); + } + if (i.GetGeneration() != baseLock.GetGeneration()) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("proto", evWrite.Record.DebugString())("event", "undefined behaviour"); + return TConclusionStatus::Fail("different lock generations in operation"); + } + if (i.GetCounter() != baseLock.GetCounter()) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("proto", evWrite.Record.DebugString())("event", "undefined behaviour"); + return TConclusionStatus::Fail("different lock generation counters in operation"); + } + } + if (evWrite.Record.GetLocks().GetOp() == NKikimrDataEvents::TKqpLocks::Commit) { + return EOperationBehaviour::CommitWriteLock; + } + if (evWrite.Record.GetLocks().GetOp() == NKikimrDataEvents::TKqpLocks::Rollback) { + return EOperationBehaviour::AbortWriteLock; + } + } + + if (evWrite.Record.HasLockTxId() && evWrite.Record.HasLockNodeId()) { + if (evWrite.Record.GetTxMode() == NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE) { + return EOperationBehaviour::WriteWithLock; + } + + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("proto", evWrite.Record.DebugString())("event", "undefined behaviour"); + return TConclusionStatus::Fail("mode not IMMEDIATE for LockTxId + LockNodeId"); + } + + if (!evWrite.Record.HasLockTxId() && !evWrite.Record.HasLockNodeId() && + evWrite.Record.GetTxMode() == NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE) { + return EOperationBehaviour::NoTxWrite; + } + + if (evWrite.Record.HasTxId() && evWrite.Record.GetTxMode() == NKikimrDataEvents::TEvWrite::MODE_PREPARE) { + return EOperationBehaviour::InTxWrite; + } + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("proto", evWrite.Record.DebugString())("event", "undefined behaviour"); + return TConclusionStatus::Fail("undefined request for detect tx type"); +} + +TOperationsManager::TOperationsManager() { +} + +void TOperationsManager::AddEventForTx(TColumnShard& owner, const ui64 txId, const std::shared_ptr& writer) { + return AddEventForLock(owner, GetLockForTxVerified(txId), writer); +} + +void TOperationsManager::AddEventForLock( + TColumnShard& /*owner*/, const ui64 lockId, const std::shared_ptr& writer) { + AFL_VERIFY(writer); + NOlap::NTxInteractions::TTxConflicts txNotifications; + NOlap::NTxInteractions::TTxConflicts txConflicts; + auto& txLock = GetLockVerified(lockId); + writer->CheckInteraction(lockId, InteractionsContext, txConflicts, txNotifications); + for (auto&& i : txConflicts) { + if (auto lock = GetLockOptional(i.first)) { + GetLockVerified(i.first).AddBrokeOnCommit(i.second); + } else if (txLock.IsCommitted(i.first)) { + txLock.SetBroken(); + } + } + for (auto&& i : txNotifications) { + GetLockVerified(i.first).AddNotificationsOnCommit(i.second); + } + if (auto txEvent = writer->BuildEvent()) { + NOlap::NTxInteractions::TTxEventContainer container(lockId, txEvent); + container.AddToInteraction(InteractionsContext); + txLock.MutableEvents().emplace_back(std::move(container)); + } +} + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/operations/manager.h b/ydb/core/tx/columnshard/operations/manager.h new file mode 100644 index 000000000000..9e2651e24da0 --- /dev/null +++ b/ydb/core/tx/columnshard/operations/manager.h @@ -0,0 +1,221 @@ +#pragma once +#include "write.h" + +#include +#include + +namespace NKikimr::NOlap::NTxInteractions { +class TManager; +class TTxEventContainer; +class TInteractionsContext; +class ITxEventWriter; +} // namespace NKikimr::NOlap::NTxInteractions + +namespace NKikimr::NColumnShard { + +class TColumnShard; +class TLockFeatures; + +class TLockSharingInfo { +private: + const ui64 LockId; + const ui64 Generation; + TAtomicCounter InternalGenerationCounter = 0; + TAtomicCounter Broken = 0; + TAtomicCounter WritesCounter = 0; + friend class TLockFeatures; + +public: + ui64 GetLockId() const { + return LockId; + } + ui64 GetGeneration() const { + return Generation; + } + + TLockSharingInfo(const ui64 lockId, const ui64 generation) + : LockId(lockId) + , Generation(generation) { + } + + bool HasWrites() const { + return WritesCounter.Val(); + } + + bool IsBroken() const { + return Broken.Val(); + } + + ui64 GetCounter() const { + return InternalGenerationCounter.Val(); + } +}; + +class TLockFeatures: TMoveOnly { +private: + YDB_ACCESSOR_DEF(std::vector, WriteOperations); + YDB_ACCESSOR_DEF(std::vector, Events); + YDB_ACCESSOR(ui64, LockId, 0); + YDB_ACCESSOR(ui64, Generation, 0); + std::shared_ptr SharingInfo; + + YDB_READONLY_DEF(THashSet, BrokeOnCommit); + YDB_READONLY_DEF(THashSet, NotifyOnCommit); + YDB_READONLY_DEF(THashSet, Committed); + +public: + const std::shared_ptr& GetSharingInfo() const { + return SharingInfo; + } + + ui64 GetInternalGenerationCounter() const { + return SharingInfo->GetCounter(); + } + + void AddWrite() { + SharingInfo->WritesCounter.Inc(); + } + + void SetBroken() { + SharingInfo->Broken = 1; + SharingInfo->InternalGenerationCounter = (i64)TSysTables::TLocksTable::TLock::ESetErrors::ErrorBroken; + } + + bool IsBroken() const { + return SharingInfo->IsBroken(); + } + + bool IsCommitted(const ui64 lockId) const { + return Committed.contains(lockId); + } + + void AddNotifyCommit(const ui64 lockId) { + AFL_VERIFY(NotifyOnCommit.erase(lockId)); + Committed.emplace(lockId); + } + + void AddBrokeOnCommit(const THashSet& lockIds) { + BrokeOnCommit.insert(lockIds.begin(), lockIds.end()); + } + + void AddNotificationsOnCommit(const THashSet& lockIds) { + NotifyOnCommit.insert(lockIds.begin(), lockIds.end()); + } + + void RemoveInteractions(NOlap::NTxInteractions::TInteractionsContext& context) const { + for (auto&& i : Events) { + i.RemoveFromInteraction(context); + } + } + + TLockFeatures(const ui64 lockId, const ui64 gen) + : LockId(lockId) + , Generation(gen) { + SharingInfo = std::make_shared(lockId, gen); + } +}; + +class TOperationsManager { + NOlap::NTxInteractions::TInteractionsContext InteractionsContext; + + THashMap Tx2Lock; + THashMap InsertWriteIdToOpWriteId; + THashMap LockFeatures; + THashMap Operations; + TOperationWriteId LastWriteId = TOperationWriteId(0); + +public: + + TWriteOperation::TPtr GetOperationByInsertWriteIdVerified(const TInsertWriteId insertWriteId) const { + auto it = InsertWriteIdToOpWriteId.find(insertWriteId); + AFL_VERIFY(it != InsertWriteIdToOpWriteId.end()); + return GetOperationVerified(it->second); + } + + void LinkInsertWriteIdToOperationWriteId(const std::vector& insertions, const TOperationWriteId operationId) { + for (auto&& i : insertions) { + InsertWriteIdToOpWriteId.emplace(i, operationId); + } + } + bool Load(NTabletFlatExecutor::TTransactionContext& txc); + void AddEventForTx(TColumnShard& owner, const ui64 txId, const std::shared_ptr& writer); + void AddEventForLock(TColumnShard& owner, const ui64 lockId, const std::shared_ptr& writer); + + TWriteOperation::TPtr GetOperation(const TOperationWriteId writeId) const; + TWriteOperation::TPtr GetOperationVerified(const TOperationWriteId writeId) const { + return TValidator::CheckNotNull(GetOperationOptional(writeId)); + } + TWriteOperation::TPtr GetOperationOptional(const TOperationWriteId writeId) const { + return GetOperation(writeId); + } + void CommitTransactionOnExecute( + TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot); + void CommitTransactionOnComplete( + TColumnShard& owner, const ui64 txId, const NOlap::TSnapshot& snapshot); + void AddTemporaryTxLink(const ui64 lockId) { + AFL_VERIFY(Tx2Lock.emplace(lockId, lockId).second); + } + void LinkTransactionOnExecute(const ui64 lockId, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); + void LinkTransactionOnComplete(const ui64 lockId, const ui64 txId); + void AbortTransactionOnExecute(TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); + void AbortTransactionOnComplete(TColumnShard& owner, const ui64 txId); + + std::optional GetLockForTx(const ui64 txId) const; + std::optional GetLockForTxOptional(const ui64 txId) const { + return GetLockForTx(txId); + } + TLockFeatures* GetLockFeaturesForTxOptional(const ui64 txId) { + auto lockId = GetLockForTxOptional(txId); + if (!lockId) { + return nullptr; + } + return &GetLockVerified(*lockId); + } + TLockFeatures& GetLockFeaturesForTxVerified(const ui64 txId) { + auto lockId = GetLockForTxOptional(txId); + AFL_VERIFY(lockId); + return GetLockVerified(*lockId); + } + ui64 GetLockForTxVerified(const ui64 txId) const { + auto result = GetLockForTxOptional(txId); + AFL_VERIFY(result)("tx_id", txId); + return *result; + } + + TWriteOperation::TPtr RegisterOperation( + const ui64 lockId, const ui64 cookie, const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType); + bool RegisterLock(const ui64 lockId, const ui64 generationId) { + if (LockFeatures.contains(lockId)) { + return false; + } else { + LockFeatures.emplace(lockId, TLockFeatures(lockId, generationId)); + return true; + } + } + static TConclusion GetBehaviour(const NEvents::TDataEvents::TEvWrite& evWrite); + TLockFeatures& GetLockVerified(const ui64 lockId) { + auto result = GetLockOptional(lockId); + AFL_VERIFY(result)("lock_id", lockId); + return *result; + } + + TLockFeatures* GetLockOptional(const ui64 lockId) { + auto it = LockFeatures.find(lockId); + if (it != LockFeatures.end()) { + return &it->second; + } else { + return nullptr; + } + } + + TOperationsManager(); + +private: + TOperationWriteId BuildNextOperationWriteId(); + void RemoveOperationOnExecute(const TWriteOperation::TPtr& op, NTabletFlatExecutor::TTransactionContext& txc); + void RemoveOperationOnComplete(const TWriteOperation::TPtr& op); + void OnTransactionFinishOnExecute(const TVector& operations, const TLockFeatures& lock, const ui64 txId, + NTabletFlatExecutor::TTransactionContext& txc); + void OnTransactionFinishOnComplete(const TVector& operations, const TLockFeatures& lock, const ui64 txId); +}; +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/operations/slice_builder/builder.cpp b/ydb/core/tx/columnshard/operations/slice_builder/builder.cpp index ba8c51c63b2e..2a9d42b00283 100644 --- a/ydb/core/tx/columnshard/operations/slice_builder/builder.cpp +++ b/ydb/core/tx/columnshard/operations/slice_builder/builder.cpp @@ -27,49 +27,63 @@ std::optional> TBuildSlicesTask:: return result; } -void TBuildSlicesTask::ReplyError(const TString& message) { +void TBuildSlicesTask::ReplyError(const TString& message, const NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass errorClass) { auto writeDataPtr = std::make_shared(std::move(WriteData)); - TWritingBuffer buffer(writeDataPtr->GetBlobsAction(), { std::make_shared(writeDataPtr) }); + TWritingBuffer buffer(writeDataPtr->GetBlobsAction(), { std::make_shared(*writeDataPtr) }); auto result = NColumnShard::TEvPrivate::TEvWriteBlobsResult::Error( - NKikimrProto::EReplyStatus::CORRUPTED, std::move(buffer), message); + NKikimrProto::EReplyStatus::CORRUPTED, std::move(buffer), message, errorClass); TActorContext::AsActorContext().Send(ParentActorId, result.release()); } -bool TBuildSlicesTask::DoExecute() { +TConclusionStatus TBuildSlicesTask::DoExecute(const std::shared_ptr& /*taskPtr*/) { NActors::TLogContextGuard g(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletId)("parent_id", ParentActorId)); if (!OriginalBatch) { AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "ev_write_bad_data")("write_id", WriteData.GetWriteMeta().GetWriteId())("table_id", WriteData.GetWriteMeta().GetTableId()); - ReplyError("no data in batch"); - return true; + ReplyError("no data in batch", NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); + return TConclusionStatus::Fail("no data in batch"); } const auto& indexSchema = ActualSchema->GetIndexInfo().ArrowSchema(); - auto reorderConclusion = NArrow::TColumnOperator().Reorder(OriginalBatch, indexSchema->field_names()); - if (reorderConclusion.IsFail()) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "unadaptable schemas")("index", indexSchema->ToString())("problem", reorderConclusion.GetErrorMessage()); - ReplyError("cannot reorder schema: " + reorderConclusion.GetErrorMessage()); - return true; - } else { - OriginalBatch = reorderConclusion.DetachResult(); - } - if (!OriginalBatch->schema()->Equals(indexSchema)) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "unequal schemas")("batch", OriginalBatch->schema()->ToString()) - ("index", indexSchema->ToString()); - ReplyError("unequal schemas"); - return true; + auto subsetConclusion = NArrow::TColumnOperator().BuildSequentialSubset(OriginalBatch, indexSchema); + if (subsetConclusion.IsFail()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "unadaptable schemas")("index", indexSchema->ToString())( + "problem", subsetConclusion.GetErrorMessage()); + ReplyError( + "unadaptable schema: " + subsetConclusion.GetErrorMessage(), + NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); + return TConclusionStatus::Fail("cannot reorder schema: " + subsetConclusion.GetErrorMessage()); } + NArrow::TSchemaSubset subset = subsetConclusion.DetachResult(); + if (OriginalBatch->num_columns() != indexSchema->num_fields()) { + AFL_VERIFY(OriginalBatch->num_columns() < indexSchema->num_fields())("original", OriginalBatch->num_columns())( + "index", indexSchema->num_fields()); + if (HasAppData() && !AppDataVerified().FeatureFlags.GetEnableOptionalColumnsInColumnShard()) { + subset = NArrow::TSchemaSubset::AllFieldsAccepted(); + const std::vector& columnIdsVector = ActualSchema->GetIndexInfo().GetColumnIds(false); + const std::set columnIdsSet(columnIdsVector.begin(), columnIdsVector.end()); + auto normalized = + ActualSchema->NormalizeBatch(*ActualSchema, std::make_shared(OriginalBatch), columnIdsSet).DetachResult(); + OriginalBatch = NArrow::ToBatch(normalized->BuildTableVerified(), true); + } + } WriteData.MutableWriteMeta().SetWriteMiddle2StartInstant(TMonotonic::Now()); auto batches = BuildSlices(); WriteData.MutableWriteMeta().SetWriteMiddle3StartInstant(TMonotonic::Now()); if (batches) { auto writeDataPtr = std::make_shared(std::move(WriteData)); - auto result = std::make_unique(writeDataPtr, std::move(*batches)); + writeDataPtr->SetSchemaSubset(std::move(subset)); + std::shared_ptr pkBatch; + if (!writeDataPtr->GetWriteMeta().HasLongTxId()) { + pkBatch = NArrow::TColumnOperator().Extract(OriginalBatch, ActualSchema->GetIndexInfo().GetPrimaryKey()->fields()); + } + auto result = std::make_unique(writeDataPtr, std::move(*batches), pkBatch); TActorContext::AsActorContext().Send(BufferActorId, result.release()); } else { - ReplyError("Cannot slice input to batches"); + ReplyError("Cannot slice input to batches", NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); + return TConclusionStatus::Fail("Cannot slice input to batches"); } - return true; + return TConclusionStatus::Success(); } } diff --git a/ydb/core/tx/columnshard/operations/slice_builder/builder.h b/ydb/core/tx/columnshard/operations/slice_builder/builder.h index f9eb29fbe9a4..a22b0c7d6ca7 100644 --- a/ydb/core/tx/columnshard/operations/slice_builder/builder.h +++ b/ydb/core/tx/columnshard/operations/slice_builder/builder.h @@ -1,8 +1,9 @@ #pragma once +#include +#include #include #include #include -#include namespace NKikimr::NOlap { @@ -15,24 +16,24 @@ class TBuildSlicesTask: public NConveyor::ITask { std::shared_ptr OriginalBatch; std::optional> BuildSlices(); const std::shared_ptr ActualSchema; - void ReplyError(const TString& message); + void ReplyError(const TString& message, const NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass errorClass); + protected: - virtual bool DoExecute() override; + virtual TConclusionStatus DoExecute(const std::shared_ptr& taskPtr) override; + public: virtual TString GetTaskClassIdentifier() const override { return "Write::ConstructBlobs::Slices"; } - TBuildSlicesTask(const ui64 tabletId, const NActors::TActorId parentActorId, - const NActors::TActorId bufferActorId, NEvWrite::TWriteData&& writeData, const std::shared_ptr& batch, - const std::shared_ptr& actualSchema) + TBuildSlicesTask(const ui64 tabletId, const NActors::TActorId parentActorId, const NActors::TActorId bufferActorId, + NEvWrite::TWriteData&& writeData, const std::shared_ptr& batch, const std::shared_ptr& actualSchema) : WriteData(std::move(writeData)) , TabletId(tabletId) , ParentActorId(parentActorId) , BufferActorId(bufferActorId) , OriginalBatch(batch) - , ActualSchema(actualSchema) - { + , ActualSchema(actualSchema) { } }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/operations/write.cpp b/ydb/core/tx/columnshard/operations/write.cpp index 4ff54d395e73..06b7701cd7bd 100644 --- a/ydb/core/tx/columnshard/operations/write.cpp +++ b/ydb/core/tx/columnshard/operations/write.cpp @@ -1,6 +1,7 @@ -#include "batch_builder/builder.h" #include "write.h" +#include "batch_builder/builder.h" + #include #include #include @@ -11,272 +12,111 @@ namespace NKikimr::NColumnShard { - TWriteOperation::TWriteOperation(const TWriteId writeId, const ui64 lockId, const ui64 cookie, const EOperationStatus& status, const TInstant createdAt, - const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType) - : Status(status) - , CreatedAt(createdAt) - , WriteId(writeId) - , LockId(lockId) - , Cookie(cookie) - , GranuleShardingVersionId(granuleShardingVersionId) - , ModificationType(mType) - { - } - - void TWriteOperation::Start(TColumnShard& owner, const ui64 tableId, const NEvWrite::IDataContainer::TPtr& data, - const NActors::TActorId& source, const std::shared_ptr& schema, const TActorContext& ctx) { - Y_ABORT_UNLESS(Status == EOperationStatus::Draft); - - NEvWrite::TWriteMeta writeMeta((ui64)WriteId, tableId, source, GranuleShardingVersionId); - writeMeta.SetModificationType(ModificationType); - std::shared_ptr task = std::make_shared(owner.TabletID(), ctx.SelfID, owner.BufferizationWriteActorId, +TWriteOperation::TWriteOperation(const TOperationWriteId writeId, const ui64 lockId, const ui64 cookie, const EOperationStatus& status, + const TInstant createdAt, const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType) + : Status(status) + , CreatedAt(createdAt) + , WriteId(writeId) + , LockId(lockId) + , Cookie(cookie) + , GranuleShardingVersionId(granuleShardingVersionId) + , ModificationType(mType) { +} + +void TWriteOperation::Start(TColumnShard& owner, const ui64 tableId, const NEvWrite::IDataContainer::TPtr& data, const NActors::TActorId& source, + const std::shared_ptr& schema, const TActorContext& ctx) { + Y_ABORT_UNLESS(Status == EOperationStatus::Draft); + + NEvWrite::TWriteMeta writeMeta((ui64)WriteId, tableId, source, GranuleShardingVersionId); + writeMeta.SetLockId(LockId); + writeMeta.SetModificationType(ModificationType); + std::shared_ptr task = + std::make_shared(owner.TabletID(), ctx.SelfID, owner.BufferizationWriteActorId, NEvWrite::TWriteData(writeMeta, data, owner.TablesManager.GetPrimaryIndex()->GetReplaceKey(), owner.StoragesManager->GetInsertOperator()->StartWritingAction(NOlap::NBlobOperations::EConsumer::WRITING_OPERATOR)), - schema, owner.GetLastTxSnapshot()); - NConveyor::TCompServiceOperator::SendTaskToExecute(task); - - Status = EOperationStatus::Started; - } - - void TWriteOperation::Commit(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot) const { - Y_ABORT_UNLESS(Status == EOperationStatus::Prepared); - - TBlobGroupSelector dsGroupSelector(owner.Info()); - NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); - - for (auto gWriteId : GlobalWriteIds) { - auto pathExists = [&](ui64 pathId) { - return owner.TablesManager.HasTable(pathId); - }; - - auto counters = owner.InsertTable->Commit(dbTable, snapshot.GetPlanStep(), snapshot.GetTxId(), { gWriteId }, - pathExists); - - owner.IncCounter(COUNTER_BLOBS_COMMITTED, counters.Rows); - owner.IncCounter(COUNTER_BYTES_COMMITTED, counters.Bytes); - owner.IncCounter(COUNTER_RAW_BYTES_COMMITTED, counters.RawBytes); - } - owner.UpdateInsertTableCounters(); - } - - void TWriteOperation::OnWriteFinish(NTabletFlatExecutor::TTransactionContext& txc, const TVector& globalWriteIds) { - Y_ABORT_UNLESS(Status == EOperationStatus::Started); - Status = EOperationStatus::Prepared; - GlobalWriteIds = globalWriteIds; - - NIceDb::TNiceDb db(txc.DB); - NKikimrTxColumnShard::TInternalOperationData proto; - ToProto(proto); - - TString metadata; - Y_ABORT_UNLESS(proto.SerializeToString(&metadata)); - - db.Table().Key((ui64)WriteId).Update( - NIceDb::TUpdate((ui32)Status), - NIceDb::TUpdate(CreatedAt.Seconds()), - NIceDb::TUpdate(metadata), - NIceDb::TUpdate(LockId), - NIceDb::TUpdate(Cookie), - NIceDb::TUpdate(GranuleShardingVersionId.value_or(0))); - } + schema, owner.GetLastTxSnapshot(), owner.Counters.GetCSCounters().WritingCounters); + NConveyor::TCompServiceOperator::SendTaskToExecute(task); - void TWriteOperation::ToProto(NKikimrTxColumnShard::TInternalOperationData& proto) const { - for (auto&& writeId : GlobalWriteIds) { - proto.AddInternalWriteIds((ui64)writeId); - } - proto.SetModificationType((ui32)ModificationType); - } + Status = EOperationStatus::Started; +} - void TWriteOperation::FromProto(const NKikimrTxColumnShard::TInternalOperationData& proto) { - for (auto&& writeId : proto.GetInternalWriteIds()) { - GlobalWriteIds.push_back(TWriteId(writeId)); - } - if (proto.HasModificationType()) { - ModificationType = (NEvWrite::EModificationType)proto.GetModificationType(); - } else { - ModificationType = NEvWrite::EModificationType::Replace; - } - } +void TWriteOperation::CommitOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot) const { + Y_ABORT_UNLESS(Status == EOperationStatus::Prepared); - void TWriteOperation::Abort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) const { - Y_ABORT_UNLESS(Status == EOperationStatus::Prepared); + TBlobGroupSelector dsGroupSelector(owner.Info()); + NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); - TBlobGroupSelector dsGroupSelector(owner.Info()); - NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); + for (auto gWriteId : InsertWriteIds) { + auto pathExists = [&](ui64 pathId) { + return owner.TablesManager.HasTable(pathId); + }; - THashSet writeIds; - writeIds.insert(GlobalWriteIds.begin(), GlobalWriteIds.end()); - owner.InsertTable->Abort(dbTable, writeIds); + const auto counters = owner.InsertTable->Commit(dbTable, snapshot.GetPlanStep(), snapshot.GetTxId(), { gWriteId }, pathExists); + owner.Counters.GetTabletCounters()->OnWriteCommitted(counters); } +} - bool TOperationsManager::Load(NTabletFlatExecutor::TTransactionContext& txc) { - NIceDb::TNiceDb db(txc.DB); - { - auto rowset = db.Table().Select(); - if (!rowset.IsReady()) { - return false; - } - - while (!rowset.EndOfSet()) { - const TWriteId writeId = (TWriteId)rowset.GetValue(); - const ui64 createdAtSec = rowset.GetValue(); - const ui64 lockId = rowset.GetValue(); - const ui64 cookie = rowset.GetValueOrDefault(0); - const TString metadata = rowset.GetValue(); - const EOperationStatus status = (EOperationStatus)rowset.GetValue(); - std::optional granuleShardingVersionId; - if (rowset.HaveValue() && rowset.GetValue()) { - granuleShardingVersionId = rowset.GetValue(); - } +void TWriteOperation::CommitOnComplete(TColumnShard& owner, const NOlap::TSnapshot& /*snapshot*/) const { + Y_ABORT_UNLESS(Status == EOperationStatus::Prepared); + owner.UpdateInsertTableCounters(); +} - NKikimrTxColumnShard::TInternalOperationData metaProto; - Y_ABORT_UNLESS(metaProto.ParseFromString(metadata)); +void TWriteOperation::OnWriteFinish( + NTabletFlatExecutor::TTransactionContext& txc, const std::vector& insertWriteIds, const bool ephemeralFlag) { + Y_ABORT_UNLESS(Status == EOperationStatus::Started); + Status = EOperationStatus::Prepared; + InsertWriteIds = insertWriteIds; - auto operation = std::make_shared(writeId, lockId, cookie, status, TInstant::Seconds(createdAtSec), granuleShardingVersionId, NEvWrite::EModificationType::Upsert); - operation->FromProto(metaProto); - AFL_VERIFY(operation->GetStatus() != EOperationStatus::Draft); - - auto [_, isOk] = Operations.emplace(operation->GetWriteId(), operation); - if (!isOk) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "duplicated_operation")("operation", *operation); - return false; - } - Locks[lockId].push_back(operation->GetWriteId()); - LastWriteId = std::max(LastWriteId, operation->GetWriteId()); - if (!rowset.Next()) { - return false; - } - } - } - { - auto rowset = db.Table().Select(); - if (!rowset.IsReady()) { - return false; - } - - while (!rowset.EndOfSet()) { - const ui64 lockId = rowset.GetValue(); - const ui64 txId = rowset.GetValue(); - AFL_VERIFY(Locks.contains(lockId))("lock_id", lockId); - Tx2Lock[txId] = lockId; - if (!rowset.Next()) { - return false; - } - } - } - return true; + if (ephemeralFlag) { + return; } - bool TOperationsManager::CommitTransaction(TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot) { - TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tx_id", txId)); - auto lockId = GetLockForTx(txId); - if (!lockId) { - ACFL_ERROR("details", "unknown_transaction"); - return true; - } - auto tIt = Locks.find(*lockId); - AFL_VERIFY(tIt != Locks.end())("tx_id", txId)("lock_id", *lockId); + NIceDb::TNiceDb db(txc.DB); + NKikimrTxColumnShard::TInternalOperationData proto; + ToProto(proto); - TVector commited; - for (auto&& opId : tIt->second) { - auto opPtr = Operations.FindPtr(opId); - (*opPtr)->Commit(owner, txc, snapshot); - commited.emplace_back(*opPtr); - } - OnTransactionFinish(commited, txId, txc); - return true; - } - - bool TOperationsManager::AbortTransaction(TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { - TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tx_id", txId)); - - auto lockId = GetLockForTx(txId); - if (!lockId) { - ACFL_ERROR("details", "unknown_transaction"); - return true; - } - auto tIt = Locks.find(*lockId); - AFL_VERIFY(tIt != Locks.end())("tx_id", txId)("lock_id", *lockId); - - TVector aborted; - for (auto&& opId : tIt->second) { - auto opPtr = Operations.FindPtr(opId); - (*opPtr)->Abort(owner, txc); - aborted.emplace_back(*opPtr); - } - - OnTransactionFinish(aborted, txId, txc); - return true; - } + TString metadata; + Y_ABORT_UNLESS(proto.SerializeToString(&metadata)); - TWriteOperation::TPtr TOperationsManager::GetOperation(const TWriteId writeId) const { - auto it = Operations.find(writeId); - if (it == Operations.end()) { - return nullptr; - } - return it->second; - } - - void TOperationsManager::OnTransactionFinish(const TVector& operations, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { - auto lockId = GetLockForTx(txId); - AFL_VERIFY(!!lockId)("tx_id", txId); - Locks.erase(*lockId); - Tx2Lock.erase(txId); - for (auto&& op : operations) { - RemoveOperation(op, txc); - } - NIceDb::TNiceDb db(txc.DB); - db.Table().Key(txId, *lockId).Delete(); - } - - void TOperationsManager::RemoveOperation(const TWriteOperation::TPtr& op, NTabletFlatExecutor::TTransactionContext& txc) { - Operations.erase(op->GetWriteId()); - NIceDb::TNiceDb db(txc.DB); - db.Table().Key((ui64)op->GetWriteId()).Delete(); - } + db.Table() + .Key((ui64)WriteId) + .Update(NIceDb::TUpdate((ui32)Status), NIceDb::TUpdate(CreatedAt.Seconds()), + NIceDb::TUpdate(metadata), NIceDb::TUpdate(LockId), + NIceDb::TUpdate(Cookie), + NIceDb::TUpdate(GranuleShardingVersionId.value_or(0))); +} - TWriteId TOperationsManager::BuildNextWriteId() { - return ++LastWriteId; +void TWriteOperation::ToProto(NKikimrTxColumnShard::TInternalOperationData& proto) const { + for (auto&& writeId : InsertWriteIds) { + proto.AddInternalWriteIds((ui64)writeId); } + proto.SetModificationType((ui32)ModificationType); +} - std::optional TOperationsManager::GetLockForTx(const ui64 txId) const { - auto lockIt = Tx2Lock.find(txId); - if (lockIt != Tx2Lock.end()) { - return lockIt->second; - } - return std::nullopt; +void TWriteOperation::FromProto(const NKikimrTxColumnShard::TInternalOperationData& proto) { + for (auto&& writeId : proto.GetInternalWriteIds()) { + InsertWriteIds.push_back(TInsertWriteId(writeId)); } - - void TOperationsManager::LinkTransaction(const ui64 lockId, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { - Tx2Lock[txId] = lockId; - NIceDb::TNiceDb db(txc.DB); - db.Table().Key(txId, lockId).Update(); + if (proto.HasModificationType()) { + ModificationType = (NEvWrite::EModificationType)proto.GetModificationType(); + } else { + ModificationType = NEvWrite::EModificationType::Replace; } +} - TWriteOperation::TPtr TOperationsManager::RegisterOperation(const ui64 lockId, const ui64 cookie, const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType) { - auto writeId = BuildNextWriteId(); - auto operation = std::make_shared(writeId, lockId, cookie, EOperationStatus::Draft, AppData()->TimeProvider->Now(), granuleShardingVersionId, mType); - Y_ABORT_UNLESS(Operations.emplace(operation->GetWriteId(), operation).second); - Locks[operation->GetLockId()].push_back(operation->GetWriteId()); - return operation; - } +void TWriteOperation::AbortOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) const { + Y_ABORT_UNLESS(Status == EOperationStatus::Prepared); - EOperationBehaviour TOperationsManager::GetBehaviour(const NEvents::TDataEvents::TEvWrite& evWrite) { - if (evWrite.Record.HasTxId() && evWrite.Record.HasLocks() && evWrite.Record.GetLocks().GetOp() == NKikimrDataEvents::TKqpLocks::Commit) { - return EOperationBehaviour::CommitWriteLock; - } + TBlobGroupSelector dsGroupSelector(owner.Info()); + NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); - if (evWrite.Record.HasLockTxId() && evWrite.Record.HasLockNodeId()) { - if (evWrite.Record.GetTxMode() == NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE) { - return EOperationBehaviour::WriteWithLock; - } + THashSet writeIds; + writeIds.insert(InsertWriteIds.begin(), InsertWriteIds.end()); + owner.InsertTable->Abort(dbTable, writeIds); +} - return EOperationBehaviour::Undefined; - } +void TWriteOperation::AbortOnComplete(TColumnShard& /*owner*/) const { + Y_ABORT_UNLESS(Status == EOperationStatus::Prepared); +} - if (evWrite.Record.HasTxId() && evWrite.Record.GetTxMode() == NKikimrDataEvents::TEvWrite::MODE_PREPARE) { - return EOperationBehaviour::InTxWrite; - } - return EOperationBehaviour::Undefined; - } } // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/operations/write.h b/ydb/core/tx/columnshard/operations/write.h index b72827af6c58..ad22caa651d4 100644 --- a/ydb/core/tx/columnshard/operations/write.h +++ b/ydb/core/tx/columnshard/operations/write.h @@ -1,95 +1,83 @@ #pragma once -#include -#include +#include +#include #include -#include #include -#include +#include +#include +#include -#include #include #include -#include +#include namespace NKikimr::NTabletFlatExecutor { - class TTransactionContext; +class TTransactionContext; +} + +namespace NKikimr::NOlap::NTxInteractions { +class TManager; } namespace NKikimr::NColumnShard { - class TColumnShard; - - using TWriteId = NOlap::TWriteId; - - enum class EOperationStatus : ui32 { - Draft = 1, - Started = 2, - Prepared = 3 - }; - - enum class EOperationBehaviour : ui32 { - Undefined = 1, - InTxWrite = 2, - WriteWithLock = 3, - CommitWriteLock = 4 - }; - - class TWriteOperation { - YDB_READONLY(EOperationStatus, Status, EOperationStatus::Draft); - YDB_READONLY_DEF(TInstant, CreatedAt); - YDB_READONLY_DEF(TWriteId, WriteId); - YDB_READONLY(ui64, LockId, 0); - YDB_READONLY(ui64, Cookie, 0); - YDB_READONLY_DEF(TVector, GlobalWriteIds); - YDB_ACCESSOR(EOperationBehaviour, Behaviour, EOperationBehaviour::Undefined); - YDB_READONLY_DEF(std::optional, GranuleShardingVersionId); - YDB_READONLY(NEvWrite::EModificationType, ModificationType, NEvWrite::EModificationType::Upsert); - public: - using TPtr = std::shared_ptr; - - TWriteOperation(const TWriteId writeId, const ui64 lockId, const ui64 cookie, const EOperationStatus& status, const TInstant createdAt, const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType); - - void Start(TColumnShard& owner, const ui64 tableId, const NEvWrite::IDataContainer::TPtr& data, - const NActors::TActorId& source, const std::shared_ptr& schema, const TActorContext& ctx); - void OnWriteFinish(NTabletFlatExecutor::TTransactionContext& txc, const TVector& globalWriteIds); - void Commit(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot) const; - void Abort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) const; - - void Out(IOutputStream& out) const { - out << "write_id=" << (ui64) WriteId << ";lock_id=" << LockId; - } - - void ToProto(NKikimrTxColumnShard::TInternalOperationData& proto) const; - void FromProto(const NKikimrTxColumnShard::TInternalOperationData& proto); - }; - - class TOperationsManager { - TMap> Locks; - TMap Tx2Lock; - TMap Operations; - TWriteId LastWriteId = TWriteId(0); - - public: - bool Load(NTabletFlatExecutor::TTransactionContext& txc); - - TWriteOperation::TPtr GetOperation(const TWriteId writeId) const; - bool CommitTransaction(TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot); - bool AbortTransaction(TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); - void LinkTransaction(const ui64 lockId, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); - std::optional GetLockForTx(const ui64 lockId) const; - - TWriteOperation::TPtr RegisterOperation(const ui64 lockId, const ui64 cookie, const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType); - static EOperationBehaviour GetBehaviour(const NEvents::TDataEvents::TEvWrite& evWrite); - - private: - TWriteId BuildNextWriteId(); - void RemoveOperation(const TWriteOperation::TPtr& op, NTabletFlatExecutor::TTransactionContext& txc); - void OnTransactionFinish(const TVector& operations, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); - }; -} +class TColumnShard; + +using TOperationWriteId = NOlap::TOperationWriteId; +using TInsertWriteId = NOlap::TInsertWriteId; + +enum class EOperationStatus : ui32 { + Draft = 1, + Started = 2, + Prepared = 3 +}; + +enum class EOperationBehaviour : ui32 { + Undefined = 1, + InTxWrite = 2, + WriteWithLock = 3, + CommitWriteLock = 4, + AbortWriteLock = 5, + NoTxWrite = 6 +}; + +class TWriteOperation { + YDB_READONLY(EOperationStatus, Status, EOperationStatus::Draft); + YDB_READONLY_DEF(TInstant, CreatedAt); + YDB_READONLY_DEF(TOperationWriteId, WriteId); + YDB_READONLY(ui64, LockId, 0); + YDB_READONLY(ui64, Cookie, 0); + YDB_READONLY_DEF(std::vector, InsertWriteIds); + YDB_ACCESSOR(EOperationBehaviour, Behaviour, EOperationBehaviour::Undefined); + YDB_READONLY_DEF(std::optional, GranuleShardingVersionId); + YDB_READONLY(NEvWrite::EModificationType, ModificationType, NEvWrite::EModificationType::Upsert); + +public: + using TPtr = std::shared_ptr; + + TWriteOperation(const TOperationWriteId writeId, const ui64 lockId, const ui64 cookie, const EOperationStatus& status, const TInstant createdAt, + const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType); + + void Start(TColumnShard& owner, const ui64 tableId, const NEvWrite::IDataContainer::TPtr& data, const NActors::TActorId& source, + const std::shared_ptr& schema, const TActorContext& ctx); + void OnWriteFinish(NTabletFlatExecutor::TTransactionContext& txc, const std::vector& insertWriteIds, const bool ephemeralFlag); + void CommitOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot) const; + void CommitOnComplete(TColumnShard& owner, const NOlap::TSnapshot& snapshot) const; + void AbortOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) const; + void AbortOnComplete(TColumnShard& owner) const; + + void Out(IOutputStream& out) const { + out << "write_id=" << (ui64)WriteId << ";lock_id=" << LockId; + } + + void ToProto(NKikimrTxColumnShard::TInternalOperationData& proto) const; + void FromProto(const NKikimrTxColumnShard::TInternalOperationData& proto); +}; + +} // namespace NKikimr::NColumnShard template <> inline void Out(IOutputStream& o, const NKikimr::NColumnShard::TWriteOperation& x) { diff --git a/ydb/core/tx/columnshard/operations/write_data.cpp b/ydb/core/tx/columnshard/operations/write_data.cpp index a0cd6450f534..56a0ad5e16cb 100644 --- a/ydb/core/tx/columnshard/operations/write_data.cpp +++ b/ydb/core/tx/columnshard/operations/write_data.cpp @@ -31,7 +31,7 @@ bool TArrowData::Parse(const NKikimrDataEvents::TEvWrite_TOperation& proto, cons TConclusion> TArrowData::ExtractBatch() { Y_ABORT_UNLESS(!!IncomingData); - auto result = NArrow::DeserializeBatch(IncomingData, BatchSchema->GetSchema()); + auto result = NArrow::DeserializeBatch(IncomingData, std::make_shared(BatchSchema->GetSchema()->fields())); IncomingData = ""; return result; } diff --git a/ydb/core/tx/columnshard/operations/ya.make b/ydb/core/tx/columnshard/operations/ya.make index c626a22b508b..c0bd3f234b78 100644 --- a/ydb/core/tx/columnshard/operations/ya.make +++ b/ydb/core/tx/columnshard/operations/ya.make @@ -3,6 +3,7 @@ LIBRARY() SRCS( write.cpp write_data.cpp + manager.cpp ) PEERDIR( @@ -11,6 +12,7 @@ PEERDIR( ydb/services/metadata ydb/core/tx/columnshard/data_sharing/destination/events ydb/core/tx/columnshard/data_reader + ydb/core/tx/columnshard/transactions/locks ydb/core/tx/columnshard/operations/batch_builder ydb/core/tx/columnshard/operations/slice_builder ) diff --git a/ydb/core/tx/columnshard/resource_subscriber/counters.cpp b/ydb/core/tx/columnshard/resource_subscriber/counters.cpp index 785ca04ba8f9..45f785c10223 100644 --- a/ydb/core/tx/columnshard/resource_subscriber/counters.cpp +++ b/ydb/core/tx/columnshard/resource_subscriber/counters.cpp @@ -1,9 +1,12 @@ #include "counters.h" +#include + namespace NKikimr::NOlap::NResourceBroker::NSubscribe { std::shared_ptr TSubscriberCounters::GetTypeCounters(const TString& resourceType) { + TGuard lock(Mutex); auto it = ResourceTypeCounters.find(resourceType); if (it == ResourceTypeCounters.end()) { it = ResourceTypeCounters.emplace(resourceType, std::make_shared(*this, resourceType)).first; diff --git a/ydb/core/tx/columnshard/resource_subscriber/task.h b/ydb/core/tx/columnshard/resource_subscriber/task.h index df4b742f1ad9..46a1ebebd32c 100644 --- a/ydb/core/tx/columnshard/resource_subscriber/task.h +++ b/ydb/core/tx/columnshard/resource_subscriber/task.h @@ -28,6 +28,10 @@ class TResourcesGuard: public NColumnShard::TMonitoringObjectsCounter& column, const bool needMax, const bool isSortedColumn) { +TSimpleChunkMeta::TSimpleChunkMeta( + const std::shared_ptr& column, const bool needMax, const bool isSortedColumn) { Y_ABORT_UNLESS(column); - Y_ABORT_UNLESS(column->length()); - NumRows = column->length(); - RawBytes = NArrow::GetArrayDataSize(column); + Y_ABORT_UNLESS(column->GetRecordsCount()); + NumRows = column->GetRecordsCount(); + RawBytes = column->GetRawSizeVerified(); if (needMax) { - std::pair minMaxPos = {0, (column->length() - 1)}; if (!isSortedColumn) { - minMaxPos = NArrow::FindMinMaxPosition(column); - Y_ABORT_UNLESS(minMaxPos.first >= 0); - Y_ABORT_UNLESS(minMaxPos.second >= 0); + Max = column->GetMaxScalar(); + } else { + Max = column->GetScalar(column->GetRecordsCount() - 1); } - - Max = NArrow::GetScalar(column, minMaxPos.second); - - Y_ABORT_UNLESS(Max); +// AFL_VERIFY(Max); } } diff --git a/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.h b/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.h index 8f8f902e4095..526a2a037967 100644 --- a/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.h +++ b/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.h @@ -1,4 +1,6 @@ #pragma once +#include + #include #include @@ -17,7 +19,7 @@ class TSimpleChunkMeta { ui32 RawBytes = 0; TSimpleChunkMeta() = default; public: - TSimpleChunkMeta(const std::shared_ptr& column, const bool needMinMax, const bool isSortedColumn); + TSimpleChunkMeta(const std::shared_ptr& column, const bool needMinMax, const bool isSortedColumn); ui64 GetMetadataSize() const { return sizeof(ui32) + sizeof(ui32) + 8 * 3 * 2; diff --git a/ydb/core/tx/columnshard/splitter/abstract/chunks.h b/ydb/core/tx/columnshard/splitter/abstract/chunks.h index e3be37be2bd3..d0300915f098 100644 --- a/ydb/core/tx/columnshard/splitter/abstract/chunks.h +++ b/ydb/core/tx/columnshard/splitter/abstract/chunks.h @@ -15,14 +15,12 @@ namespace NKikimr::NOlap { class TPortionInfo; class TPortionInfoConstructor; class TSimpleColumnInfo; -class TColumnSaver; class IPortionDataChunk { private: YDB_READONLY(ui32, EntityId, 0); std::optional ChunkIdx; - protected: ui64 DoGetPackedSize() const { return GetData().size(); @@ -32,9 +30,14 @@ class IPortionDataChunk { virtual std::vector> DoInternalSplit(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const = 0; virtual bool DoIsSplittable() const = 0; virtual std::optional DoGetRecordsCount() const = 0; + virtual std::optional DoGetRawBytes() const = 0; + virtual std::shared_ptr DoGetFirstScalar() const = 0; virtual std::shared_ptr DoGetLastScalar() const = 0; virtual void DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRange, TPortionInfoConstructor& portionInfo) const = 0; + virtual void DoAddInplaceIntoPortion(TPortionInfoConstructor& /*portionInfo*/) const { + AFL_VERIFY(false)("problem", "implemented only in index chunks"); + } virtual std::shared_ptr DoCopyWithAnotherBlob(TString&& /*data*/, const TSimpleColumnInfo& /*columnInfo*/) const { AFL_VERIFY(false); return nullptr; @@ -63,6 +66,12 @@ class IPortionDataChunk { return DoGetRecordsCount(); } + ui64 GetRawBytesVerified() const { + auto result = DoGetRawBytes(); + AFL_VERIFY(result); + return *result; + } + ui32 GetRecordsCountVerified() const { auto result = DoGetRecordsCount(); AFL_VERIFY(result); @@ -121,6 +130,10 @@ class IPortionDataChunk { AFL_VERIFY(!bRange.IsValid()); return DoAddIntoPortionBeforeBlob(bRange, portionInfo); } + + void AddInplaceIntoPortion(TPortionInfoConstructor& portionInfo) const { + return DoAddInplaceIntoPortion(portionInfo); + } }; } diff --git a/ydb/core/tx/columnshard/splitter/batch_slice.cpp b/ydb/core/tx/columnshard/splitter/batch_slice.cpp index 83f9f90f77f2..7f6cc05c1e7b 100644 --- a/ydb/core/tx/columnshard/splitter/batch_slice.cpp +++ b/ydb/core/tx/columnshard/splitter/batch_slice.cpp @@ -1,5 +1,4 @@ #include "batch_slice.h" -#include "simple.h" #include namespace NKikimr::NOlap { @@ -69,7 +68,6 @@ bool TGeneralSerializedSlice::GroupBlobsImpl(const NSplitter::TGroupFeatures& fe chunksInProgress.AddChunks(i.GetChunks()); } InternalSplitsCount = 0; - AFL_VERIFY(chunksInProgress.size()); std::vector result; Y_ABORT_UNLESS(features.GetSplitSettings().GetMaxBlobSize() >= 2 * features.GetSplitSettings().GetMinBlobSize()); while (!chunksInProgress.IsEmpty()) { @@ -149,7 +147,8 @@ bool TGeneralSerializedSlice::GroupBlobsImpl(const NSplitter::TGroupFeatures& fe return true; } -TGeneralSerializedSlice::TGeneralSerializedSlice(const THashMap>>& data, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters) +TGeneralSerializedSlice::TGeneralSerializedSlice(const THashMap>>& data, + NArrow::NSplitter::ISchemaDetailInfo::TPtr schema, std::shared_ptr counters) : Schema(schema) , Counters(counters) { std::optional recordsCount; @@ -170,60 +169,14 @@ TGeneralSerializedSlice::TGeneralSerializedSlice(const THashMap counters) +TGeneralSerializedSlice::TGeneralSerializedSlice( + const ui32 recordsCount, NArrow::NSplitter::ISchemaDetailInfo::TPtr schema, std::shared_ptr counters) : RecordsCount(recordsCount) , Schema(schema) , Counters(counters) { } -TBatchSerializedSlice::TBatchSerializedSlice(const std::shared_ptr& batch, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters, - const NSplitter::TSplitSettings& settings) - : TBase(TValidator::CheckNotNull(batch)->num_rows(), schema, counters) - , Batch(batch) -{ - Y_ABORT_UNLESS(batch); - Data.reserve(batch->num_columns()); - for (auto&& i : batch->schema()->fields()) { - TSplittedEntity c(schema->GetColumnId(i->name())); - Data.emplace_back(std::move(c)); - } - - ui32 idx = 0; - for (auto&& i : batch->columns()) { - auto& c = Data[idx]; - auto columnSaver = schema->GetColumnSaver(c.GetEntityId()); - auto stats = schema->GetColumnSerializationStats(c.GetEntityId()); - TSimpleSplitter splitter(columnSaver, Counters); - splitter.SetStats(stats); - std::vector> chunks; - for (auto&& i : splitter.Split(i, Schema->GetField(c.GetEntityId()), settings.GetMaxBlobSize())) { - chunks.emplace_back(std::make_shared(c.GetEntityId(), i, Schema)); - } - c.SetChunks(chunks); - Size += c.GetSize(); - ++idx; - } -} - -std::vector TBatchSerializedSlice::BuildSimpleSlices(const std::shared_ptr& batch, const NSplitter::TSplitSettings& settings, const std::shared_ptr& counters, const ISchemaDetailInfo::TPtr& schemaInfo) { - std::vector slices; - auto stats = schemaInfo->GetBatchSerializationStats(batch); - ui32 recordsCount = settings.GetMinRecordsCount(); - if (stats) { - const ui32 recordsCountForMinSize = stats->PredictOptimalPackRecordsCount(batch->num_rows(), settings.GetMinBlobSize()).value_or(recordsCount); - const ui32 recordsCountForMaxPortionSize = stats->PredictOptimalPackRecordsCount(batch->num_rows(), settings.GetMaxPortionSize()).value_or(recordsCount); - recordsCount = std::min(recordsCountForMaxPortionSize, std::max(recordsCount, recordsCountForMinSize)); - } - auto linearSplitInfo = TSimpleSplitter::GetOptimalLinearSplitting(batch->num_rows(), recordsCount); - for (auto it = linearSplitInfo.StartIterator(); it.IsValid(); it.Next()) { - std::shared_ptr current = batch->Slice(it.GetPosition(), it.GetCurrentPackSize()); - TBatchSerializedSlice slice(current, schemaInfo, counters, settings); - slices.emplace_back(std::move(slice)); - } - return slices; -} - void TGeneralSerializedSlice::MergeSlice(TGeneralSerializedSlice&& slice) { Y_ABORT_UNLESS(Data.size() == slice.Data.size()); RecordsCount += slice.GetRecordsCount(); diff --git a/ydb/core/tx/columnshard/splitter/batch_slice.h b/ydb/core/tx/columnshard/splitter/batch_slice.h index acbb9a0414e4..f1b019544d8c 100644 --- a/ydb/core/tx/columnshard/splitter/batch_slice.h +++ b/ydb/core/tx/columnshard/splitter/batch_slice.h @@ -1,10 +1,10 @@ #pragma once #include "chunks.h" -#include "stats.h" -#include "scheme_info.h" #include "column_info.h" #include "blob_info.h" -#include "similar_packer.h" +#include +#include +#include #include #include #include @@ -14,16 +14,17 @@ namespace NKikimr::NOlap { -class TDefaultSchemaDetails: public ISchemaDetailInfo { +class TDefaultSchemaDetails: public NArrow::NSplitter::ISchemaDetailInfo { private: ISnapshotSchema::TPtr Schema; - std::shared_ptr Stats; + std::shared_ptr Stats; + protected: virtual TColumnSaver DoGetColumnSaver(const ui32 columnId) const override { return Schema->GetColumnSaver(columnId); } public: - TDefaultSchemaDetails(ISnapshotSchema::TPtr schema, const std::shared_ptr& stats) + TDefaultSchemaDetails(ISnapshotSchema::TPtr schema, const std::shared_ptr& stats) : Schema(schema) , Stats(stats) { @@ -39,14 +40,15 @@ class TDefaultSchemaDetails: public ISchemaDetailInfo { return Schema->GetIndexInfo().IsSortedColumn(columnId); } - virtual std::optional GetColumnSerializationStats(const ui32 columnId) const override { + virtual std::optional GetColumnSerializationStats(const ui32 columnId) const override { auto stats = Stats->GetColumnInfo(columnId); if (stats && stats->GetRecordsCount() != 0) { return stats; } return std::nullopt; } - virtual std::optional GetBatchSerializationStats(const std::shared_ptr& rb) const override { + virtual std::optional GetBatchSerializationStats( + const std::shared_ptr& rb) const override { return Stats->GetStatsForRecordBatch(rb); } virtual ui32 GetColumnId(const std::string& fieldName) const override { @@ -61,17 +63,17 @@ class TGeneralSerializedSlice { protected: std::vector Data; ui64 Size = 0; - ISchemaDetailInfo::TPtr Schema; + NArrow::NSplitter::ISchemaDetailInfo::TPtr Schema; std::shared_ptr Counters; TGeneralSerializedSlice() = default; - const TSplittedEntity& GetEntityDataVerified(const ui32& entityId) const { + const TSplittedEntity& GetEntityDataVerified(const ui32 entityId) const { for (auto&& i : Data) { if (i.GetEntityId() == entityId) { return i; } } - Y_ABORT_UNLESS(false); + AFL_VERIFY(false)("id", entityId); return Data.front(); } bool GroupBlobsImpl(const NSplitter::TGroupFeatures& features, std::vector& blobs); @@ -116,15 +118,17 @@ class TGeneralSerializedSlice { return blobs; } - explicit TGeneralSerializedSlice(TVectorView&& objects) { + explicit TGeneralSerializedSlice(NArrow::NSplitter::TVectorView&& objects) { Y_ABORT_UNLESS(objects.size()); std::swap(*this, objects.front()); for (ui32 i = 1; i < objects.size(); ++i) { MergeSlice(std::move(objects[i])); } } - TGeneralSerializedSlice(const THashMap>>& data, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters); - TGeneralSerializedSlice(const ui32 recordsCount, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters); + TGeneralSerializedSlice(const THashMap>>& data, + NArrow::NSplitter::ISchemaDetailInfo::TPtr schema, std::shared_ptr counters); + TGeneralSerializedSlice( + const ui32 recordsCount, NArrow::NSplitter::ISchemaDetailInfo::TPtr schema, std::shared_ptr counters); void MergeSlice(TGeneralSerializedSlice&& slice); @@ -135,28 +139,4 @@ class TGeneralSerializedSlice { } }; -class TBatchSerializedSlice: public TGeneralSerializedSlice { -private: - using TBase = TGeneralSerializedSlice; - YDB_READONLY_DEF(std::shared_ptr, Batch); -public: - TBatchSerializedSlice(const std::shared_ptr& batch, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters, const NSplitter::TSplitSettings& settings); - - explicit TBatchSerializedSlice(TVectorView&& objects) { - Y_ABORT_UNLESS(objects.size()); - std::swap(*this, objects.front()); - for (ui32 i = 1; i < objects.size(); ++i) { - MergeSlice(std::move(objects[i])); - } - } - void MergeSlice(TBatchSerializedSlice&& slice) { - Batch = NArrow::CombineBatches({Batch, slice.Batch}); - TBase::MergeSlice(std::move(slice)); - } - - static std::vector BuildSimpleSlices(const std::shared_ptr& batch, const NSplitter::TSplitSettings& settings, - const std::shared_ptr& counters, const ISchemaDetailInfo::TPtr& schemaInfo); - -}; - } diff --git a/ydb/core/tx/columnshard/splitter/chunks.h b/ydb/core/tx/columnshard/splitter/chunks.h index eae2f4d58fd0..e86806da299a 100644 --- a/ydb/core/tx/columnshard/splitter/chunks.h +++ b/ydb/core/tx/columnshard/splitter/chunks.h @@ -15,14 +15,22 @@ class IPortionColumnChunk : public IPortionDataChunk { protected: virtual TSimpleChunkMeta DoBuildSimpleChunkMeta() const = 0; virtual ui32 DoGetRecordsCountImpl() const = 0; + virtual ui64 DoGetRawBytesImpl() const = 0; + + virtual std::optional DoGetRawBytes() const final { + return DoGetRawBytesImpl(); + } + virtual std::optional DoGetRecordsCount() const override final { return DoGetRecordsCountImpl(); } virtual void DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRange, TPortionInfoConstructor& portionInfo) const override; - virtual std::vector> DoInternalSplitImpl(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const = 0; - virtual std::vector> DoInternalSplit(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const override; + virtual std::vector> DoInternalSplitImpl(const TColumnSaver& saver, + const std::shared_ptr& counters, const std::vector& splitSizes) const = 0; + virtual std::vector> DoInternalSplit(const TColumnSaver& saver, + const std::shared_ptr& counters, const std::vector& splitSizes) const override; virtual bool DoIsSplittable() const override { return GetRecordsCount() > 1; } @@ -47,7 +55,8 @@ class TChunkedColumnReader { std::vector> Chunks; std::shared_ptr Loader; - std::shared_ptr CurrentChunk; + std::shared_ptr CurrentChunk; + std::optional CurrentChunkArray; ui32 CurrentChunkIndex = 0; ui32 CurrentRecordIndex = 0; public: @@ -62,37 +71,56 @@ class TChunkedColumnReader { CurrentChunkIndex = 0; CurrentRecordIndex = 0; if (Chunks.size()) { - CurrentChunk = Loader->ApplyVerifiedColumn(Chunks.front()->GetData()); + CurrentChunk = Loader->ApplyVerified(Chunks.front()->GetData(), Chunks.front()->GetRecordsCountVerified()); + CurrentChunkArray.reset(); + } + } + + const std::shared_ptr& GetCurrentChunk() { + if (!CurrentChunkArray || !CurrentChunkArray->GetAddress().Contains(CurrentRecordIndex)) { + CurrentChunkArray = CurrentChunk->GetChunk(CurrentChunkArray, CurrentRecordIndex); } + AFL_VERIFY(CurrentChunkArray); + return CurrentChunkArray->GetArray(); } - const std::shared_ptr& GetCurrentChunk() const { + const std::shared_ptr& GetCurrentAccessor() const { + AFL_VERIFY(CurrentChunk); return CurrentChunk; } - ui32 GetCurrentRecordIndex() const { - return CurrentRecordIndex; + ui32 GetCurrentRecordIndex() { + if (!CurrentChunkArray || !CurrentChunkArray->GetAddress().Contains(CurrentRecordIndex)) { + CurrentChunkArray = CurrentChunk->GetChunk(CurrentChunkArray->GetAddress(), CurrentRecordIndex); + } + return CurrentChunkArray->GetAddress().GetLocalIndex(CurrentRecordIndex); } bool IsCorrect() const { return !!CurrentChunk; } - bool ReadNext() { - AFL_VERIFY(!!CurrentChunk); - if (++CurrentRecordIndex < CurrentChunk->length()) { - return true; - } + bool ReadNextChunk() { while (++CurrentChunkIndex < Chunks.size()) { - CurrentChunk = Loader->ApplyVerifiedColumn(Chunks[CurrentChunkIndex]->GetData()); + CurrentChunk = Loader->ApplyVerified(Chunks[CurrentChunkIndex]->GetData(), Chunks[CurrentChunkIndex]->GetRecordsCountVerified()); + CurrentChunkArray.reset(); CurrentRecordIndex = 0; - if (CurrentRecordIndex < CurrentChunk->length()) { + if (CurrentRecordIndex < CurrentChunk->GetRecordsCount()) { return true; } } + CurrentChunkArray.reset(); CurrentChunk = nullptr; return false; } + + bool ReadNext() { + AFL_VERIFY(!!CurrentChunk); + if (++CurrentRecordIndex < CurrentChunk->GetRecordsCount()) { + return true; + } + return ReadNextChunk(); + } }; class TChunkedBatchReader { @@ -135,6 +163,10 @@ class TChunkedBatchReader { return *result; } + ui32 GetColumnsCount() const { + return Columns.size(); + } + std::vector::const_iterator begin() const { return Columns.begin(); } @@ -142,6 +174,14 @@ class TChunkedBatchReader { std::vector::const_iterator end() const { return Columns.end(); } + + std::vector::iterator begin() { + return Columns.begin(); + } + + std::vector::iterator end() { + return Columns.end(); + } }; } diff --git a/ydb/core/tx/columnshard/splitter/scheme_info.cpp b/ydb/core/tx/columnshard/splitter/scheme_info.cpp deleted file mode 100644 index fe4a65604e11..000000000000 --- a/ydb/core/tx/columnshard/splitter/scheme_info.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#include "scheme_info.h" - -namespace NKikimr::NOlap { - -NKikimr::NOlap::TColumnSaver ISchemaDetailInfo::GetColumnSaver(const ui32 columnId) const { - auto saver = DoGetColumnSaver(columnId); - if (OverrideSerializer) { - saver.ResetSerializer(*OverrideSerializer); - } - return saver; -} - -} diff --git a/ydb/core/tx/columnshard/splitter/scheme_info.h b/ydb/core/tx/columnshard/splitter/scheme_info.h deleted file mode 100644 index 1e72e63e9d35..000000000000 --- a/ydb/core/tx/columnshard/splitter/scheme_info.h +++ /dev/null @@ -1,26 +0,0 @@ -#pragma once -#include "stats.h" - -#include - -#include - -namespace NKikimr::NOlap { - -class ISchemaDetailInfo { -private: - YDB_ACCESSOR_DEF(std::optional, OverrideSerializer); -protected: - virtual TColumnSaver DoGetColumnSaver(const ui32 columnId) const = 0; -public: - using TPtr = std::shared_ptr; - virtual ~ISchemaDetailInfo() = default; - virtual ui32 GetColumnId(const std::string& fieldName) const = 0; - TColumnSaver GetColumnSaver(const ui32 columnId) const; - virtual std::shared_ptr GetField(const ui32 columnId) const = 0; - virtual std::optional GetColumnSerializationStats(const ui32 columnId) const = 0; - virtual bool NeedMinMaxForColumn(const ui32 columnId) const = 0; - virtual bool IsSortedColumn(const ui32 columnId) const = 0; - virtual std::optional GetBatchSerializationStats(const std::shared_ptr& rb) const = 0; -}; -} diff --git a/ydb/core/tx/columnshard/splitter/settings.h b/ydb/core/tx/columnshard/splitter/settings.h index 146d1147aef2..d370a5206047 100644 --- a/ydb/core/tx/columnshard/splitter/settings.h +++ b/ydb/core/tx/columnshard/splitter/settings.h @@ -22,6 +22,7 @@ class TSplitSettings { YDB_ACCESSOR(i64, MinBlobSize, DefaultMinBlobSize); YDB_ACCESSOR(i64, MinRecordsCount, DefaultMinRecordsCount); YDB_ACCESSOR(i64, MaxPortionSize, DefaultMaxPortionSize); + public: ui64 GetExpectedRecordsCountOnPage() const { return 1.5 * MinRecordsCount; diff --git a/ydb/core/tx/columnshard/splitter/similar_packer.cpp b/ydb/core/tx/columnshard/splitter/similar_packer.cpp deleted file mode 100644 index 9d22b3a6b255..000000000000 --- a/ydb/core/tx/columnshard/splitter/similar_packer.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include "similar_packer.h" - -namespace NKikimr::NOlap { - -} diff --git a/ydb/core/tx/columnshard/splitter/simple.cpp b/ydb/core/tx/columnshard/splitter/simple.cpp deleted file mode 100644 index 7a155eb3158a..000000000000 --- a/ydb/core/tx/columnshard/splitter/simple.cpp +++ /dev/null @@ -1,224 +0,0 @@ -#include "simple.h" -#include -#include -#include -#include - -namespace NKikimr::NOlap { - -std::vector> TSplittedColumnChunk::DoInternalSplitImpl(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const { - auto chunks = TSimpleSplitter(saver, counters).SplitBySizes(Data.GetSlicedBatch(), Data.GetSerializedChunk(), splitSizes); - std::vector> newChunks; - for (auto&& i : chunks) { - newChunks.emplace_back(std::make_shared(GetColumnId(), i, SchemaInfo)); - } - return newChunks; -} - -TString TSplittedColumnChunk::DoDebugString() const { - return TStringBuilder() << "records_count=" << GetRecordsCount() << ";data=" << NArrow::DebugJson(Data.GetSlicedBatch(), 3, 3) << ";"; -} - -std::vector TSimpleSplitter::Split(const std::shared_ptr& data, const std::shared_ptr& field, const ui32 maxBlobSize) const { - AFL_VERIFY(data); - AFL_VERIFY(field); - auto schema = std::make_shared(arrow::FieldVector{field}); - auto batch = arrow::RecordBatch::Make(schema, data->length(), {data}); - return Split(batch, maxBlobSize); -} - -class TSplitChunk { -private: - std::shared_ptr Data; - YDB_READONLY_DEF(std::optional, Result); - ui32 SplitFactor = 0; - ui32 Iterations = 0; - ui32 MaxBlobSize = 8 * 1024 * 1024; - TColumnSaver ColumnSaver; - std::shared_ptr Counters; -public: - TSplitChunk(const ui32 baseSplitFactor, const ui32 maxBlobSize, const std::shared_ptr& data, const TColumnSaver& columnSaver, const std::shared_ptr& counters) - : Data(data) - , SplitFactor(baseSplitFactor) - , MaxBlobSize(maxBlobSize) - , ColumnSaver(columnSaver) - , Counters(counters) - { - AFL_VERIFY(Data && Data->num_rows()); - AFL_VERIFY(SplitFactor); - } - - TSplitChunk(const ui32 baseSplitFactor, const ui32 maxBlobSize, const std::shared_ptr& data, TString&& serializedData, const TColumnSaver& columnSaver, const std::shared_ptr& counters) - : Data(data) - , Result(TSaverSplittedChunk(data, std::move(serializedData))) - , SplitFactor(baseSplitFactor) - , MaxBlobSize(maxBlobSize) - , ColumnSaver(columnSaver) - , Counters(counters) - { - AFL_VERIFY(Data && Data->num_rows()); - AFL_VERIFY(SplitFactor); - } - - std::vector Split() { - while (true) { - AFL_VERIFY(!Result); - AFL_VERIFY(++Iterations < 100); - AFL_VERIFY(SplitFactor <= Data->num_rows())("factor", SplitFactor)("records", Data->num_rows())("iteration", Iterations)("size", NArrow::GetBatchDataSize(Data)); - bool found = false; - std::vector result; - if (SplitFactor == 1) { - TString blob = ColumnSaver.Apply(Data); - if (blob.size() < MaxBlobSize) { - Counters->SimpleSplitter.OnCorrectSerialized(blob.size()); - Result = TSaverSplittedChunk(Data, std::move(blob)); - found = true; - result.emplace_back(*this); - } else { - Counters->SimpleSplitter.OnTrashSerialized(blob.size()); - TBatchSerializationStat stats(blob.size(), Data->num_rows(), NArrow::GetBatchDataSize(Data)); - SplitFactor = stats.PredictOptimalSplitFactor(Data->num_rows(), MaxBlobSize).value_or(1); - if (SplitFactor == 1) { - SplitFactor = 2; - } - AFL_VERIFY(Data->num_rows() > 1); - } - } else { - TLinearSplitInfo linearSplitting = TSimpleSplitter::GetLinearSplittingByMax(Data->num_rows(), Data->num_rows() / SplitFactor); - TStringBuilder sb; - std::optional badStartPosition; - ui32 badBatchRecordsCount = 0; - ui64 badBatchSerializedSize = 0; - ui32 badBatchCount = 0; - for (auto it = linearSplitting.StartIterator(); it.IsValid(); it.Next()) { - auto slice = Data->Slice(it.GetPosition(), it.GetCurrentPackSize()); - TString blob = ColumnSaver.Apply(slice); - if (blob.size() >= MaxBlobSize) { - Counters->SimpleSplitter.OnTrashSerialized(blob.size()); - if (!badStartPosition) { - badStartPosition = it.GetPosition(); - } - badBatchSerializedSize += blob.size(); - badBatchRecordsCount += it.GetCurrentPackSize(); - ++badBatchCount; - Y_ABORT_UNLESS(!linearSplitting.IsMinimalGranularity()); - } else { - Counters->SimpleSplitter.OnCorrectSerialized(blob.size()); - if (badStartPosition) { - AFL_VERIFY(badBatchRecordsCount && badBatchCount)("count", badBatchCount)("records", badBatchRecordsCount); - auto badSlice = Data->Slice(*badStartPosition, badBatchRecordsCount); - TBatchSerializationStat stats(badBatchSerializedSize, badBatchRecordsCount, Max()); - result.emplace_back(std::max(stats.PredictOptimalSplitFactor(badBatchRecordsCount, MaxBlobSize).value_or(1), badBatchCount) + 1, MaxBlobSize, badSlice, ColumnSaver, Counters); - badStartPosition = {}; - badBatchRecordsCount = 0; - badBatchCount = 0; - badBatchSerializedSize = 0; - } - found = true; - result.emplace_back(1, MaxBlobSize, slice, std::move(blob), ColumnSaver, Counters); - } - } - if (badStartPosition) { - auto badSlice = Data->Slice(*badStartPosition, badBatchRecordsCount); - TBatchSerializationStat stats(badBatchSerializedSize, badBatchRecordsCount, Max()); - result.emplace_back(std::max(stats.PredictOptimalSplitFactor(badBatchRecordsCount, MaxBlobSize).value_or(1), badBatchCount) + 1, MaxBlobSize, badSlice, ColumnSaver, Counters); - } - ++SplitFactor; - } - if (found) { - return result; - } - } - AFL_VERIFY(false); - return {}; - } -}; - -std::vector TSimpleSplitter::Split(const std::shared_ptr& data, const ui32 maxBlobSize) const { - AFL_VERIFY(data->num_columns() == 1); - AFL_VERIFY(data->num_rows()); - TSplitChunk baseChunk(Stats ? Stats->PredictOptimalSplitFactor(data->num_rows(), maxBlobSize).value_or(1) : 1, maxBlobSize, data, ColumnSaver, Counters); - std::vector chunks = {baseChunk}; - for (auto it = chunks.begin(); it != chunks.end(); ) { - AFL_VERIFY(chunks.size() < 100); - if (!!it->GetResult()) { - ++it; - continue; - } - std::vector splitted = it->Split(); - if (splitted.size() == 1) { - *it = splitted.front(); - } else { - it = chunks.insert(it, splitted.begin(), splitted.end()); - chunks.erase(it + splitted.size()); - } - } - std::vector result; - for (auto&& i : chunks) { - AFL_VERIFY(i.GetResult()); - result.emplace_back(*i.GetResult()); - } - return result; -} - -std::vector TSimpleSplitter::SplitByRecordsCount(std::shared_ptr data, const std::vector& recordsCount) const { - std::vector result; - ui64 position = 0; - for (auto&& i : recordsCount) { - auto subData = data->Slice(position, i); - result.emplace_back(subData, ColumnSaver.Apply(subData)); - position += i; - } - Y_ABORT_UNLESS(position == (ui64)data->num_rows()); - return result; -} - -std::vector TSimpleSplitter::SplitBySizes(std::shared_ptr data, const TString& dataSerialization, const std::vector& splitPartSizesExt) const { - auto splitPartSizesLocal = splitPartSizesExt; - Y_ABORT_UNLESS(data); - { - ui32 sumSizes = 0; - for (auto&& i : splitPartSizesExt) { - sumSizes += i; - } - Y_ABORT_UNLESS(sumSizes <= dataSerialization.size()); - - if (sumSizes < dataSerialization.size()) { - splitPartSizesLocal.emplace_back(dataSerialization.size() - sumSizes); - } - } - std::vector recordsCount; - i64 remainedRecordsCount = data->num_rows(); - const double rowsPerByte = 1.0 * data->num_rows() / dataSerialization.size(); - i32 remainedParts = splitPartSizesLocal.size(); - for (ui32 idx = 0; idx < splitPartSizesLocal.size(); ++idx) { - AFL_VERIFY(remainedRecordsCount >= remainedParts)("remained_records_count", remainedRecordsCount) - ("remained_parts", remainedParts)("idx", idx)("size", splitPartSizesLocal.size())("sizes", JoinSeq(",", splitPartSizesLocal))("data_size", dataSerialization.size()); - --remainedParts; - i64 expectedRecordsCount = rowsPerByte * splitPartSizesLocal[idx]; - if (expectedRecordsCount < 1) { - expectedRecordsCount = 1; - } else if (remainedRecordsCount < expectedRecordsCount + remainedParts) { - expectedRecordsCount = remainedRecordsCount - remainedParts; - } - if (idx + 1 == splitPartSizesLocal.size()) { - expectedRecordsCount = remainedRecordsCount; - } - Y_ABORT_UNLESS(expectedRecordsCount); - recordsCount.emplace_back(expectedRecordsCount); - remainedRecordsCount -= expectedRecordsCount; - Y_ABORT_UNLESS(remainedRecordsCount >= 0); - } - Y_ABORT_UNLESS(remainedRecordsCount == 0); - return SplitByRecordsCount(data, recordsCount); -} - -std::shared_ptr TSaverSplittedChunk::GetFirstScalar() const { - return NArrow::TStatusValidator::GetValid(SlicedBatch->column(0)->GetScalar(0)); -} - -std::shared_ptr TSaverSplittedChunk::GetLastScalar() const { - return NArrow::TStatusValidator::GetValid(SlicedBatch->column(0)->GetScalar(GetRecordsCount() - 1)); -} - -} diff --git a/ydb/core/tx/columnshard/splitter/simple.h b/ydb/core/tx/columnshard/splitter/simple.h deleted file mode 100644 index 48c7b9efa009..000000000000 --- a/ydb/core/tx/columnshard/splitter/simple.h +++ /dev/null @@ -1,199 +0,0 @@ -#pragma once -#include -#include -#include -#include "stats.h" -#include "chunks.h" -#include "scheme_info.h" - -namespace NKikimr::NOlap { - -class TSaverSplittedChunk { -private: - YDB_READONLY_DEF(std::shared_ptr, SlicedBatch); - YDB_READONLY_DEF(TString, SerializedChunk); -public: - std::shared_ptr GetColumn() const { - return SlicedBatch->column(0); - } - - ui32 GetRecordsCount() const { - return SlicedBatch->num_rows(); - } - - std::shared_ptr GetFirstScalar() const; - std::shared_ptr GetLastScalar() const; - - TSaverSplittedChunk(const std::shared_ptr& batch, TString&& serializedChunk) - : SlicedBatch(batch) - , SerializedChunk(std::move(serializedChunk)) { - Y_ABORT_UNLESS(SlicedBatch); - Y_ABORT_UNLESS(SlicedBatch->num_columns() == 1); - Y_ABORT_UNLESS(SlicedBatch->num_rows()); - } - - bool IsCompatibleColumn(const std::shared_ptr& f) const { - if (!SlicedBatch) { - return false; - } - if (SlicedBatch->num_columns() != 1) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "unexpected columns count")("expectation", 1)("actual", SlicedBatch->num_columns()); - return false; - } - if (!SlicedBatch->schema()->fields().front()->Equals(f)) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "unexpected column type")("expectation", f->ToString())("actual", SlicedBatch->schema()->fields().front()->ToString()); - return false; - } - return true; - } -}; - -class TLinearSplitInfo { -private: - YDB_READONLY(ui64, PacksCount, 0); - YDB_READONLY(ui64, PackSize, 0); - YDB_READONLY(ui64, ObjectsCount, 0); -public: - bool IsMinimalGranularity() const { - return PackSize == 1; - } - - TLinearSplitInfo(const ui64 packsCount, const ui64 packSize, const ui64 objectsCount) - : PacksCount(packsCount) - , PackSize(packSize) - , ObjectsCount(objectsCount) - { - AFL_VERIFY(objectsCount >= packsCount)("objects_count", objectsCount)("packs_count", packsCount); - AFL_VERIFY(PackSize); - AFL_VERIFY(PacksCount); - } - - class TIterator { - private: - const TLinearSplitInfo& Owner; - YDB_READONLY(ui64, Position, 0); - YDB_READONLY(ui64, CurrentPackSize, 0); - ui64 PackIdx = 0; - void InitPack() { - CurrentPackSize = (PackIdx + 1 == Owner.GetPacksCount()) ? Owner.ObjectsCount - Position : Owner.GetPackSize(); - } - public: - explicit TIterator(const TLinearSplitInfo& owner) - : Owner(owner) - { - InitPack(); - } - - bool IsValid() const { - if (Position < Owner.GetObjectsCount() && PackIdx < Owner.GetPacksCount()) { - return true; - } else { - Y_ABORT_UNLESS(Position == Owner.GetObjectsCount() && PackIdx == Owner.GetPacksCount()); - return false; - } - } - - bool Next() { - Y_ABORT_UNLESS(IsValid()); - Position += CurrentPackSize; - ++PackIdx; - InitPack(); - return IsValid(); - } - }; - - TIterator StartIterator() const { - return TIterator(*this); - } -}; - -class TSimpleSplitter { -private: - TColumnSaver ColumnSaver; - YDB_ACCESSOR_DEF(std::optional, Stats); - std::shared_ptr Counters; -public: - explicit TSimpleSplitter(const TColumnSaver& columnSaver, std::shared_ptr counters) - : ColumnSaver(columnSaver) - , Counters(counters) - { - - } - - static TLinearSplitInfo GetOptimalLinearSplitting(const ui64 objectsCount, const i64 optimalPackSizeExt) { - const i64 optimalPackSize = optimalPackSizeExt ? optimalPackSizeExt : 1; - const ui32 countPacksMax = std::max(1, (ui32)floor(1.0 * objectsCount / optimalPackSize)); - const ui32 countPacksMin = std::max(1, (ui32)ceil(1.0 * objectsCount / optimalPackSize)); - const ui32 stepPackMax = objectsCount / countPacksMin; - const ui32 stepPackMin = objectsCount / countPacksMax; - if (std::abs(optimalPackSize - stepPackMax) > std::abs(optimalPackSize - stepPackMin)) { - return TLinearSplitInfo(countPacksMax, stepPackMin, objectsCount); - } else { - return TLinearSplitInfo(countPacksMin, stepPackMax, objectsCount); - } - } - - static TLinearSplitInfo GetLinearSplittingByMax(const ui64 objectsCount, const ui64 maxPackSizeExt) { - const ui64 maxPackSize = maxPackSizeExt ? maxPackSizeExt : 1; - const ui32 countPacksMax = std::max(1, (ui32)floor(1.0 * objectsCount / maxPackSize)); - const ui32 stepPackMin = objectsCount / countPacksMax; - return TLinearSplitInfo(countPacksMax, stepPackMin, objectsCount); - } - - std::vector Split(const std::shared_ptr& data, const std::shared_ptr& field, const ui32 maxBlobSize) const; - std::vector Split(const std::shared_ptr& data, const ui32 maxBlobSize) const; - std::vector SplitByRecordsCount(std::shared_ptr data, const std::vector& recordsCount) const; - std::vector SplitBySizes(std::shared_ptr data, const TString& dataSerialization, const std::vector& splitPartSizesExt) const; -}; - -class TSplittedColumnChunk: public IPortionColumnChunk { -private: - using TBase = IPortionColumnChunk; - TSaverSplittedChunk Data; - ISchemaDetailInfo::TPtr SchemaInfo; -protected: - virtual std::vector> DoInternalSplitImpl(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const override; - virtual const TString& DoGetData() const override { - return Data.GetSerializedChunk(); - } - virtual ui32 DoGetRecordsCountImpl() const override { - return Data.GetRecordsCount(); - } - - virtual TString DoDebugString() const override; - - virtual TSimpleChunkMeta DoBuildSimpleChunkMeta() const override { - return TSimpleChunkMeta(Data.GetColumn(), SchemaInfo->NeedMinMaxForColumn(GetColumnId()), SchemaInfo->IsSortedColumn(GetColumnId())); - } - - virtual std::shared_ptr DoGetFirstScalar() const override { - return Data.GetFirstScalar(); - } - virtual std::shared_ptr DoGetLastScalar() const override { - return Data.GetLastScalar(); - } - virtual std::shared_ptr DoCopyWithAnotherBlob(TString&& /*data*/, const TSimpleColumnInfo& /*columnInfo*/) const override { - AFL_VERIFY(false); - return nullptr; - } - -public: - i64 GetSize() const { - return Data.GetSerializedChunk().size(); - } - - const TSaverSplittedChunk& GetData() const { - return Data; - } - - TSplittedColumnChunk() = default; - - TSplittedColumnChunk(const ui32 columnId, const TSaverSplittedChunk& data, ISchemaDetailInfo::TPtr schemaInfo) - : TBase(columnId) - , Data(data) - , SchemaInfo(schemaInfo) { - - } -}; - -} diff --git a/ydb/core/tx/columnshard/splitter/ut/batch_slice.cpp b/ydb/core/tx/columnshard/splitter/ut/batch_slice.cpp new file mode 100644 index 000000000000..880881238fa1 --- /dev/null +++ b/ydb/core/tx/columnshard/splitter/ut/batch_slice.cpp @@ -0,0 +1,65 @@ +#include "batch_slice.h" + +#include +#include +#include + +#include + +namespace NKikimr::NOlap { + +TBatchSerializedSlice::TBatchSerializedSlice(const std::shared_ptr& batch, NArrow::NSplitter::ISchemaDetailInfo::TPtr schema, + std::shared_ptr counters, const NSplitter::TSplitSettings& settings) + : TBase(TValidator::CheckNotNull(batch)->num_rows(), schema, counters) + , Batch(batch) { + Y_ABORT_UNLESS(batch); + Data.reserve(batch->num_columns()); + for (auto&& i : batch->schema()->fields()) { + TSplittedEntity c(schema->GetColumnId(i->name())); + Data.emplace_back(std::move(c)); + } + + ui32 idx = 0; + for (auto&& i : batch->columns()) { + auto& c = Data[idx]; + auto columnSaver = schema->GetColumnSaver(c.GetEntityId()); + auto stats = schema->GetColumnSerializationStats(c.GetEntityId()); + NKikimr::NArrow::NSplitter::TSimpleSplitter splitter(columnSaver); + splitter.SetStats(stats); + std::vector> chunks; + for (auto&& i : splitter.Split(i, Schema->GetField(c.GetEntityId()), settings.GetMaxBlobSize())) { + NOlap::TSimpleColumnInfo columnInfo(c.GetEntityId(), Schema->GetField(c.GetEntityId()), + Schema->GetColumnSaver(c.GetEntityId()).GetSerializer(), true, false, true, nullptr); + chunks.emplace_back(std::make_shared(i.GetSerializedChunk(), + std::make_shared(i.GetSlicedBatch()->column(0)), TChunkAddress(c.GetEntityId(), 0), + columnInfo)); + } + c.SetChunks(chunks); + Size += c.GetSize(); + ++idx; + } +} + +std::vector TBatchSerializedSlice::BuildSimpleSlices(const std::shared_ptr& batch, + const NSplitter::TSplitSettings& settings, const std::shared_ptr& counters, + const NArrow::NSplitter::ISchemaDetailInfo::TPtr& schemaInfo) { + std::vector slices; + auto stats = schemaInfo->GetBatchSerializationStats(batch); + ui32 recordsCount = settings.GetMinRecordsCount(); + if (stats) { + const ui32 recordsCountForMinSize = + stats->PredictOptimalPackRecordsCount(batch->num_rows(), settings.GetMinBlobSize()).value_or(recordsCount); + const ui32 recordsCountForMaxPortionSize = + stats->PredictOptimalPackRecordsCount(batch->num_rows(), settings.GetMaxPortionSize()).value_or(recordsCount); + recordsCount = std::min(recordsCountForMaxPortionSize, std::max(recordsCount, recordsCountForMinSize)); + } + auto linearSplitInfo = NKikimr::NArrow::NSplitter::TSimpleSplitter::GetOptimalLinearSplitting(batch->num_rows(), recordsCount); + for (auto it = linearSplitInfo.StartIterator(); it.IsValid(); it.Next()) { + std::shared_ptr current = batch->Slice(it.GetPosition(), it.GetCurrentPackSize()); + TBatchSerializedSlice slice(current, schemaInfo, counters, settings); + slices.emplace_back(std::move(slice)); + } + return slices; +} + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/splitter/ut/batch_slice.h b/ydb/core/tx/columnshard/splitter/ut/batch_slice.h new file mode 100644 index 000000000000..c085eb1568ce --- /dev/null +++ b/ydb/core/tx/columnshard/splitter/ut/batch_slice.h @@ -0,0 +1,31 @@ +#pragma once +#include + +namespace NKikimr::NOlap { + +class TBatchSerializedSlice: public TGeneralSerializedSlice { +private: + using TBase = TGeneralSerializedSlice; + YDB_READONLY_DEF(std::shared_ptr, Batch); + +public: + TBatchSerializedSlice(const std::shared_ptr& batch, NArrow::NSplitter::ISchemaDetailInfo::TPtr schema, + std::shared_ptr counters, const NSplitter::TSplitSettings& settings); + + explicit TBatchSerializedSlice(NArrow::NSplitter::TVectorView&& objects) { + Y_ABORT_UNLESS(objects.size()); + std::swap(*this, objects.front()); + for (ui32 i = 1; i < objects.size(); ++i) { + MergeSlice(std::move(objects[i])); + } + } + void MergeSlice(TBatchSerializedSlice&& slice) { + Batch = NArrow::CombineBatches({ Batch, slice.Batch }); + TBase::MergeSlice(std::move(slice)); + } + + static std::vector BuildSimpleSlices(const std::shared_ptr& batch, + const NSplitter::TSplitSettings& settings, const std::shared_ptr& counters, + const NArrow::NSplitter::ISchemaDetailInfo::TPtr& schemaInfo); +}; +} diff --git a/ydb/core/tx/columnshard/splitter/ut/ut_splitter.cpp b/ydb/core/tx/columnshard/splitter/ut/ut_splitter.cpp index f2f942dbbab3..7ca04ee36933 100644 --- a/ydb/core/tx/columnshard/splitter/ut/ut_splitter.cpp +++ b/ydb/core/tx/columnshard/splitter/ut/ut_splitter.cpp @@ -1,29 +1,32 @@ -#include -#include -#include -#include -#include -#include +#include "batch_slice.h" -#include -#include +#include +#include +#include #include -#include - -#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include Y_UNIT_TEST_SUITE(Splitter) { - using namespace NKikimr::NArrow; - class TTestSnapshotSchema: public NKikimr::NOlap::ISchemaDetailInfo { + class TTestSnapshotSchema: public NKikimr::NArrow::NSplitter::ISchemaDetailInfo { private: mutable std::map Decoder; + protected: - virtual NKikimr::NOlap::TColumnSaver DoGetColumnSaver(const ui32 columnId) const override { - return NKikimr::NOlap::TColumnSaver(nullptr, std::make_shared(arrow::ipc::IpcOptions::Defaults())); + virtual NKikimr::NArrow::NAccessor::TColumnSaver DoGetColumnSaver(const ui32 columnId) const override { + return NKikimr::NArrow::NAccessor::TColumnSaver( + nullptr, std::make_shared(arrow::ipc::IpcOptions::Defaults())); } public: @@ -34,17 +37,18 @@ Y_UNIT_TEST_SUITE(Splitter) { return false; } - virtual std::optional GetColumnSerializationStats(const ui32 /*columnId*/) const override { + virtual std::optional GetColumnSerializationStats( + const ui32 /*columnId*/) const override { return {}; } - virtual std::optional GetBatchSerializationStats(const std::shared_ptr& /*rb*/) const override { + virtual std::optional GetBatchSerializationStats( + const std::shared_ptr& /*rb*/) const override { return {}; } - NKikimr::NOlap::TColumnLoader GetColumnLoader(const ui32 columnId) const { - arrow::FieldVector v = {std::make_shared(GetColumnName(columnId), std::make_shared())}; - auto schema = std::make_shared(v); - return NKikimr::NOlap::TColumnLoader(nullptr, NSerialization::TSerializerContainer::GetDefaultSerializer(), schema, nullptr, columnId); + NKikimr::NArrow::NAccessor::TColumnLoader GetColumnLoader(const ui32 columnId) const { + return NKikimr::NArrow::NAccessor::TColumnLoader(nullptr, NSerialization::TSerializerContainer::GetDefaultSerializer(), + NKikimr::NArrow::NAccessor::TConstructorContainer::GetDefaultConstructor(), GetField(columnId), nullptr, columnId); } virtual std::shared_ptr GetField(const ui32 columnId) const override { @@ -80,9 +84,8 @@ Y_UNIT_TEST_SUITE(Splitter) { YDB_ACCESSOR(std::optional, ExpectedInternalSplitsCount, 0); public: - void Execute(std::shared_ptr batch, - const NKikimr::NOlap::NSplitter::TSplitSettings& settings = NKikimr::NOlap::NSplitter::TSplitSettings() - ) { + void Execute(std::shared_ptr batch, + const NKikimr::NOlap::NSplitter::TSplitSettings& settings = NKikimr::NOlap::NSplitter::TSplitSettings()) { using namespace NKikimr::NOlap; NKikimr::NColumnShard::TIndexationCounters counters("test"); std::vector generalSlices; @@ -93,9 +96,9 @@ Y_UNIT_TEST_SUITE(Splitter) { } } - TSimilarPacker packer(settings.GetExpectedPortionSize()); + NKikimr::NArrow::NSplitter::TSimilarPacker packer(settings.GetExpectedPortionSize()); auto packs = packer.Split(generalSlices); - const NSplitter::TEntityGroups groups(settings, "default"); + const NKikimr::NOlap::NSplitter::TEntityGroups groups(settings, "default"); const ui32 portionsCount = packs.size(); ui32 blobsCount = 0; ui32 chunksCount = 0; @@ -129,9 +132,11 @@ Y_UNIT_TEST_SUITE(Splitter) { } portionSize += bSize; AFL_VERIFY(bSize < (ui64)settings.GetMaxBlobSize()); - AFL_VERIFY(bSize * 1.01 > (ui64)settings.GetMinBlobSize() || (packs.size() == 1 && blobsLocal.size() == 1))("blob_size", bSize); + AFL_VERIFY(bSize * 1.01 > (ui64)settings.GetMinBlobSize() || (packs.size() == 1 && blobsLocal.size() == 1))( + "blob_size", bSize); } - AFL_VERIFY(portionSize >= settings.GetExpectedPortionSize() || packs.size() == 1)("size", portionSize)("limit", settings.GetMaxPortionSize()); + AFL_VERIFY(portionSize >= settings.GetExpectedPortionSize() || packs.size() == 1)("size", portionSize)( + "limit", settings.GetMaxPortionSize()); THashMap> entitiesByRecordsCount; ui32 pagesRestore = 0; @@ -141,7 +146,7 @@ Y_UNIT_TEST_SUITE(Splitter) { ui32 count = 0; for (auto&& c : e.second) { auto slice = arr->Slice(count + portionShift, c->GetRecordsCountVerified()); - auto readBatch = *Schema->GetColumnLoader(e.first).Apply(c->GetData()); + auto readBatch = Schema->GetColumnLoader(e.first).ApplyRawVerified(c->GetData()); AFL_VERIFY(slice->length() == readBatch->num_rows()); Y_ABORT_UNLESS(readBatch->column(0)->RangeEquals(*slice, 0, readBatch->num_rows(), 0, arrow::EqualOptions::Defaults())); count += c->GetRecordsCountVerified(); @@ -161,91 +166,107 @@ Y_UNIT_TEST_SUITE(Splitter) { } AFL_VERIFY(portionShift = batch->num_rows()); AFL_VERIFY(pagesSum == generalSlices.size())("sum", pagesSum)("general_slices", generalSlices.size()); - AFL_VERIFY(internalSplitsCount == ExpectedInternalSplitsCount.value_or(internalSplitsCount))("expected", *ExpectedInternalSplitsCount)("real", internalSplitsCount); + AFL_VERIFY(internalSplitsCount == ExpectedInternalSplitsCount.value_or(internalSplitsCount))( + "expected", *ExpectedInternalSplitsCount)("real", internalSplitsCount); AFL_VERIFY(blobsCount == ExpectBlobsCount.value_or(blobsCount))("blobs_count", blobsCount)("expected", *ExpectBlobsCount); AFL_VERIFY(pagesSum == ExpectSlicesCount.value_or(pagesSum))("sum", pagesSum)("expected", *ExpectSlicesCount); - AFL_VERIFY(portionsCount == ExpectPortionsCount.value_or(portionsCount))("portions_count", portionsCount)("expected", *ExpectPortionsCount); + AFL_VERIFY(portionsCount == ExpectPortionsCount.value_or(portionsCount))("portions_count", portionsCount)( + "expected", *ExpectPortionsCount); AFL_VERIFY(chunksCount == ExpectChunksCount.value_or(chunksCount))("chunks_count", chunksCount)("expected", *ExpectChunksCount); - } }; Y_UNIT_TEST(Simple) { - NConstruction::IArrayBuilder::TPtr column = std::make_shared>( - "field", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 512)); - std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({column}).BuildBatch(80048); + NConstruction::IArrayBuilder::TPtr column = + std::make_shared>( + "field", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 512)); + std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({ column }).BuildBatch(80048); NKikimr::NColumnShard::TIndexationCounters counters("test"); TSplitTester().SetExpectBlobsCount(8).SetExpectSlicesCount(8).Execute(batch); } Y_UNIT_TEST(Small) { - NConstruction::IArrayBuilder::TPtr column = std::make_shared>( - "field", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 24)); - std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({column}).BuildBatch(80048); + NConstruction::IArrayBuilder::TPtr column = + std::make_shared>( + "field", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 24)); + std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({ column }).BuildBatch(80048); NKikimr::NColumnShard::TIndexationCounters counters("test"); TSplitTester().SetExpectBlobsCount(1).SetExpectSlicesCount(8).Execute(batch); } Y_UNIT_TEST(Minimal) { - NConstruction::IArrayBuilder::TPtr column = std::make_shared>( - "field", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 512)); - std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({column}).BuildBatch(4048); + NConstruction::IArrayBuilder::TPtr column = + std::make_shared>( + "field", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 512)); + std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({ column }).BuildBatch(4048); NKikimr::NColumnShard::TIndexationCounters counters("test"); TSplitTester().SetExpectBlobsCount(1).SetExpectSlicesCount(1).Execute(batch); } Y_UNIT_TEST(Trivial) { - NConstruction::IArrayBuilder::TPtr column = std::make_shared>( - "field", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 512)); - std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({column}).BuildBatch(10048); + NConstruction::IArrayBuilder::TPtr column = + std::make_shared>( + "field", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 512)); + std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({ column }).BuildBatch(10048); TSplitTester().SetExpectBlobsCount(1).SetExpectSlicesCount(1).Execute(batch); } Y_UNIT_TEST(BigAndSmall) { - NConstruction::IArrayBuilder::TPtr columnBig = std::make_shared>( - "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 512)); - NConstruction::IArrayBuilder::TPtr columnSmall = std::make_shared>( - "field2", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 1)); - std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({columnBig, columnSmall}).BuildBatch(80048); + NConstruction::IArrayBuilder::TPtr columnBig = + std::make_shared>( + "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 512)); + NConstruction::IArrayBuilder::TPtr columnSmall = + std::make_shared>( + "field2", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 1)); + std::shared_ptr batch = + NKikimr::NArrow::NConstruction::TRecordBatchConstructor({ columnBig, columnSmall }).BuildBatch(80048); NKikimr::NColumnShard::TIndexationCounters counters("test"); TSplitTester().SetExpectBlobsCount(8).SetExpectSlicesCount(8).Execute(batch); } Y_UNIT_TEST(CritSmallPortions) { - NConstruction::IArrayBuilder::TPtr columnBig = std::make_shared>( - "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 7120)); - NConstruction::IArrayBuilder::TPtr columnSmall = std::make_shared>( - "field2", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 128)); - std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({columnBig, columnSmall}).BuildBatch(80048); + NConstruction::IArrayBuilder::TPtr columnBig = + std::make_shared>( + "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 7120)); + NConstruction::IArrayBuilder::TPtr columnSmall = + std::make_shared>( + "field2", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 128)); + std::shared_ptr batch = + NKikimr::NArrow::NConstruction::TRecordBatchConstructor({ columnBig, columnSmall }).BuildBatch(80048); NKikimr::NColumnShard::TIndexationCounters counters("test"); - TSplitTester().SetExpectBlobsCount(80).SetExpectSlicesCount(80).SetExpectedInternalSplitsCount(0).SetExpectPortionsCount(40) - .Execute(batch, NKikimr::NOlap::NSplitter::TSplitSettings().SetMinRecordsCount(1000).SetMaxPortionSize(8000000)); + TSplitTester().SetExpectBlobsCount(80).SetExpectSlicesCount(80).SetExpectedInternalSplitsCount(0).SetExpectPortionsCount(40).Execute( + batch, NKikimr::NOlap::NSplitter::TSplitSettings().SetMinRecordsCount(1000).SetMaxPortionSize(8000000)); } Y_UNIT_TEST(Crit) { - NConstruction::IArrayBuilder::TPtr columnBig = std::make_shared>( - "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 7120)); - NConstruction::IArrayBuilder::TPtr columnSmall = std::make_shared>( - "field2", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 128)); - std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({columnBig, columnSmall}).BuildBatch(80048); + NConstruction::IArrayBuilder::TPtr columnBig = + std::make_shared>( + "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 7120)); + NConstruction::IArrayBuilder::TPtr columnSmall = + std::make_shared>( + "field2", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 128)); + std::shared_ptr batch = + NKikimr::NArrow::NConstruction::TRecordBatchConstructor({ columnBig, columnSmall }).BuildBatch(80048); NKikimr::NColumnShard::TIndexationCounters counters("test"); - TSplitTester().SetExpectBlobsCount(80).SetExpectSlicesCount(8).SetExpectedInternalSplitsCount(8).SetExpectPortionsCount(8).Execute(batch); + TSplitTester().SetExpectBlobsCount(80).SetExpectSlicesCount(8).SetExpectedInternalSplitsCount(8).SetExpectPortionsCount(8).Execute( + batch); } Y_UNIT_TEST(CritSimple) { - NConstruction::IArrayBuilder::TPtr columnBig = std::make_shared>( - "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 7120)); - std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({columnBig}).BuildBatch(80048); + NConstruction::IArrayBuilder::TPtr columnBig = + std::make_shared>( + "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 7120)); + std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({ columnBig }).BuildBatch(80048); NKikimr::NColumnShard::TIndexationCounters counters("test"); - TSplitTester().SetExpectBlobsCount(72).SetExpectSlicesCount(8).SetExpectedInternalSplitsCount(0).SetExpectPortionsCount(8).Execute(batch); + TSplitTester().SetExpectBlobsCount(72).SetExpectSlicesCount(8).SetExpectedInternalSplitsCount(0).SetExpectPortionsCount(8).Execute( + batch); } - }; diff --git a/ydb/core/tx/columnshard/splitter/ut/ya.make b/ydb/core/tx/columnshard/splitter/ut/ya.make index 24d266bffa8e..c7a6a0be4c0c 100644 --- a/ydb/core/tx/columnshard/splitter/ut/ya.make +++ b/ydb/core/tx/columnshard/splitter/ut/ya.make @@ -18,6 +18,8 @@ PEERDIR( ydb/core/kqp/session_actor ydb/core/tx/tx_proxy ydb/core/tx/columnshard/engines/storage/chunks + ydb/core/tx/columnshard/engines/storage/indexes/max + ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch ydb/core/tx ydb/core/mind ydb/library/yql/minikql/comp_nodes/llvm14 @@ -40,6 +42,7 @@ CFLAGS( SRCS( ut_splitter.cpp + batch_slice.cpp ) END() diff --git a/ydb/core/tx/columnshard/splitter/ya.make b/ydb/core/tx/columnshard/splitter/ya.make index 5f6c60cdf1ff..380d51bca325 100644 --- a/ydb/core/tx/columnshard/splitter/ya.make +++ b/ydb/core/tx/columnshard/splitter/ya.make @@ -3,12 +3,8 @@ LIBRARY() SRCS( batch_slice.cpp chunks.cpp - simple.cpp - similar_packer.cpp - stats.cpp column_info.cpp settings.cpp - scheme_info.cpp blob_info.cpp chunk_meta.cpp ) @@ -17,6 +13,7 @@ PEERDIR( contrib/libs/apache/arrow ydb/core/tx/columnshard/splitter/abstract ydb/core/tx/columnshard/engines/scheme + ydb/core/formats/arrow/splitter ) END() diff --git a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.cpp b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.cpp index bee19d729f86..31de6ffef8a5 100644 --- a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.cpp +++ b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.cpp @@ -1,8 +1,9 @@ #include "columnshard_ut_common.h" +#include "shard_reader.h" -#include #include #include +#include #include #include @@ -124,22 +125,25 @@ bool WriteDataImpl(TTestBasicRuntime& runtime, TActorId& sender, const ui64 shar } bool WriteData(TTestBasicRuntime& runtime, TActorId& sender, const ui64 shardId, const ui64 writeId, const ui64 tableId, const TString& data, - const std::vector& ydbSchema, std::vector* writeIds, const NEvWrite::EModificationType mType) { + const std::vector& ydbSchema, std::vector* writeIds, const NEvWrite::EModificationType mType) { NLongTxService::TLongTxId longTxId; UNIT_ASSERT(longTxId.ParseString("ydb://long-tx/01ezvvxjdk2hd4vdgjs68knvp8?node_id=1")); - return WriteDataImpl(runtime, sender, shardId, tableId, longTxId, writeId, data, NArrow::MakeArrowSchema(ydbSchema), writeIds, mType); - + return WriteDataImpl( + runtime, sender, shardId, tableId, longTxId, writeId, data, NArrow::MakeArrowSchema(ydbSchema), writeIds, mType); } bool WriteData(TTestBasicRuntime& runtime, TActorId& sender, const ui64 writeId, const ui64 tableId, const TString& data, - const std::vector& ydbSchema, bool waitResult, std::vector* writeIds, const NEvWrite::EModificationType mType) { + const std::vector& ydbSchema, bool waitResult, std::vector* writeIds, + const NEvWrite::EModificationType mType) { NLongTxService::TLongTxId longTxId; UNIT_ASSERT(longTxId.ParseString("ydb://long-tx/01ezvvxjdk2hd4vdgjs68knvp8?node_id=1")); if (writeIds) { - return WriteDataImpl(runtime, sender, TTestTxConfig::TxTablet0, tableId, longTxId, writeId, data, NArrow::MakeArrowSchema(ydbSchema), writeIds, mType); + return WriteDataImpl(runtime, sender, TTestTxConfig::TxTablet0, tableId, longTxId, writeId, data, + NArrow::MakeArrowSchema(ydbSchema), writeIds, mType); } std::vector ids; - return WriteDataImpl(runtime, sender, TTestTxConfig::TxTablet0, tableId, longTxId, writeId, data, NArrow::MakeArrowSchema(ydbSchema), waitResult ? &ids : nullptr, mType); + return WriteDataImpl(runtime, sender, TTestTxConfig::TxTablet0, tableId, longTxId, writeId, data, + NArrow::MakeArrowSchema(ydbSchema), waitResult ? &ids : nullptr, mType); } std::optional WriteData(TTestBasicRuntime& runtime, TActorId& sender, const NLongTxService::TLongTxId& longTxId, @@ -396,30 +400,54 @@ NMetadata::NFetcher::ISnapshot::TPtr TTestSchema::BuildSnapshot(const TTableSpec return cs; } +void TTestSchema::InitSchema(const std::vector& columns, const std::vector& pk, + const TTableSpecials& specials, NKikimrSchemeOp::TColumnTableSchema* schema) { + schema->SetEngine(NKikimrSchemeOp::COLUMN_ENGINE_REPLACING_TIMESERIES); + + for (ui32 i = 0; i < columns.size(); ++i) { + *schema->MutableColumns()->Add() = columns[i].CreateColumn(i + 1); + if (!specials.NeedTestStatistics()) { + continue; + } + if (NOlap::NIndexes::NMax::TIndexMeta::IsAvailableType(columns[i].GetType())) { + *schema->AddIndexes() = NOlap::NIndexes::TIndexMetaContainer( + std::make_shared(1000 + i, "MAX::INDEX::" + columns[i].GetName(), "__LOCAL_METADATA", i + 1)) + .SerializeToProto(); + } + } + + Y_ABORT_UNLESS(pk.size() > 0); + for (auto& column : ExtractNames(pk)) { + schema->AddKeyColumnNames(column); + } + + if (specials.HasCodec()) { + schema->MutableDefaultCompression()->SetCodec(specials.GetCodecId()); + } + if (specials.CompressionLevel) { + schema->MutableDefaultCompression()->SetLevel(*specials.CompressionLevel); + } +} + } namespace NKikimr::NColumnShard { NOlap::TIndexInfo BuildTableInfo(const std::vector& ydbSchema, const std::vector& key) { - NOlap::TIndexInfo indexInfo = NOlap::TIndexInfo::BuildDefault(); - + THashMap columns; for (ui32 i = 0; i < ydbSchema.size(); ++i) { ui32 id = i + 1; auto& name = ydbSchema[i].GetName(); auto& type = ydbSchema[i].GetType(); - indexInfo.Columns[id] = NTable::TColumn(name, id, type, ""); - indexInfo.ColumnNames[name] = id; + columns[id] = NTable::TColumn(name, id, type, ""); } + std::vector pkNames; for (const auto& c : key) { - indexInfo.KeyColumns.push_back(indexInfo.ColumnNames[c.GetName()]); + pkNames.push_back(c.GetName()); } - - auto storage = std::make_shared(); - storage->Initialize(TInstant::Now().Seconds()); - indexInfo.SetAllKeys(NOlap::TTestStoragesManager::GetInstance()); - return indexInfo; + return NOlap::TIndexInfo::BuildDefault(NOlap::TTestStoragesManager::GetInstance(), columns, pkNames); } void SetupSchema(TTestBasicRuntime& runtime, TActorId& sender, const TString& txBody, const NOlap::TSnapshot& snapshot, bool succeed) { @@ -489,7 +517,7 @@ namespace NKikimr::NColumnShard { fields.emplace_back(f.GetName()); } - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, snapshot); + NTxUT::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, snapshot); reader.SetReplyColumns(fields); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); diff --git a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h index 12b056b6ba6a..7594be5da952 100644 --- a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h +++ b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h @@ -1,21 +1,21 @@ #pragma once -#include -#include -#include - #include -#include +#include #include #include #include #include -#include +#include +#include +#include +#include +#include + +#include #include #include -#include -#include namespace NKikimr::NOlap { struct TIndexInfo; @@ -168,7 +168,7 @@ struct TTestSchema { // PK firstKeyItem, TTestColumn("resource_type", TTypeInfo(NTypeIds::Utf8) ), - TTestColumn("resource_id", TTypeInfo(NTypeIds::Utf8) ), + TTestColumn("resource_id", TTypeInfo(NTypeIds::Utf8)).SetAccessorClassName("SPARSED"), TTestColumn("uid", TTypeInfo(NTypeIds::Utf8) ).SetStorageId("__MEMORY"), TTestColumn("level", TTypeInfo(NTypeIds::Int32) ), TTestColumn("message", TTypeInfo(NTypeIds::Utf8) ).SetStorageId("__MEMORY"), @@ -184,7 +184,7 @@ struct TTestSchema { std::vector schema = { // PK TTestColumn("timestamp", TTypeInfo(NTypeIds::Timestamp) ), - TTestColumn("resource_type", TTypeInfo(NTypeIds::Utf8) ), + TTestColumn("resource_type", TTypeInfo(NTypeIds::Utf8)).SetAccessorClassName("SPARSED"), TTestColumn("resource_id", TTypeInfo(NTypeIds::Utf8) ), TTestColumn("uid", TTypeInfo(NTypeIds::Utf8) ).SetStorageId("__MEMORY"), // @@ -193,7 +193,7 @@ struct TTestSchema { TTestColumn("json_payload", TTypeInfo(NTypeIds::JsonDocument) ), TTestColumn("ingested_at", TTypeInfo(NTypeIds::Timestamp) ), TTestColumn("saved_at", TTypeInfo(NTypeIds::Timestamp) ), - TTestColumn("request_id", TTypeInfo(NTypeIds::Yson) ) + TTestColumn("request_id", TTypeInfo(NTypeIds::Yson)).SetAccessorClassName("SPARSED") }; return schema; }; @@ -202,7 +202,7 @@ struct TTestSchema { std::vector schema = { TTestColumn("timestamp", TTypeInfo(NTypeIds::Timestamp) ), TTestColumn("resource_type", TTypeInfo(NTypeIds::Utf8) ).SetStorageId("__MEMORY"), - TTestColumn("resource_id", TTypeInfo(NTypeIds::Utf8) ), + TTestColumn("resource_id", TTypeInfo(NTypeIds::Utf8)).SetAccessorClassName("SPARSED"), TTestColumn("uid", TTypeInfo(NTypeIds::Utf8) ).SetStorageId("__MEMORY") }; return schema; @@ -244,32 +244,7 @@ struct TTestSchema { static void InitSchema(const std::vector& columns, const std::vector& pk, const TTableSpecials& specials, - NKikimrSchemeOp::TColumnTableSchema* schema) - { - schema->SetEngine(NKikimrSchemeOp::COLUMN_ENGINE_REPLACING_TIMESERIES); - - for (ui32 i = 0; i < columns.size(); ++i) { - *schema->MutableColumns()->Add() = columns[i].CreateColumn(i + 1); - if (!specials.NeedTestStatistics()) { - continue; - } - if (NOlap::NStatistics::NMax::TOperator::IsAvailableType(columns[i].GetType())) { - *schema->AddStatistics() = NOlap::NStatistics::TOperatorContainer("MAX::" + columns[i].GetName(), std::make_shared(i + 1)).SerializeToProto(); - } - } - - Y_ABORT_UNLESS(pk.size() > 0); - for (auto& column : ExtractNames(pk)) { - schema->AddKeyColumnNames(column); - } - - if (specials.HasCodec()) { - schema->MutableDefaultCompression()->SetCodec(specials.GetCodecId()); - } - if (specials.CompressionLevel) { - schema->MutableDefaultCompression()->SetLevel(*specials.CompressionLevel); - } - } + NKikimrSchemeOp::TColumnTableSchema* schema); static void InitTtl(const TTableSpecials& specials, NKikimrSchemeOp::TColumnDataLifeCycle::TTtl* ttl) { Y_ABORT_UNLESS(specials.HasTtl()); @@ -432,10 +407,12 @@ void PlanSchemaTx(TTestBasicRuntime& runtime, TActorId& sender, NOlap::TSnapshot void PlanWriteTx(TTestBasicRuntime& runtime, TActorId& sender, NOlap::TSnapshot snap, bool waitResult = true); bool WriteData(TTestBasicRuntime& runtime, TActorId& sender, const ui64 shardId, const ui64 writeId, const ui64 tableId, const TString& data, - const std::vector& ydbSchema, std::vector* writeIds, const NEvWrite::EModificationType mType = NEvWrite::EModificationType::Upsert); + const std::vector& ydbSchema, std::vector* writeIds, + const NEvWrite::EModificationType mType = NEvWrite::EModificationType::Upsert); bool WriteData(TTestBasicRuntime& runtime, TActorId& sender, const ui64 writeId, const ui64 tableId, const TString& data, - const std::vector& ydbSchema, bool waitResult = true, std::vector* writeIds = nullptr, const NEvWrite::EModificationType mType = NEvWrite::EModificationType::Upsert); + const std::vector& ydbSchema, bool waitResult = true, std::vector* writeIds = nullptr, + const NEvWrite::EModificationType mType = NEvWrite::EModificationType::Upsert); std::optional WriteData(TTestBasicRuntime& runtime, TActorId& sender, const NLongTxService::TLongTxId& longTxId, ui64 tableId, const ui64 writePartId, const TString& data, @@ -500,11 +477,13 @@ namespace NKikimr::NColumnShard { auto& builder = Owner.Builders[Index]; auto type = builder->type(); - NArrow::SwitchType(type->id(), [&](const auto& t) { + Y_ABORT_UNLESS(NArrow::SwitchType(type->id(), [&](const auto& t) { using TWrap = std::decay_t; using T = typename TWrap::T; using TBuilder = typename arrow::TypeTraits::BuilderType; + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("T", typeid(T).name()); + auto& typedBuilder = static_cast(*builder); if constexpr (std::is_arithmetic::value) { if constexpr (arrow::has_c_type::value) { @@ -519,9 +498,16 @@ namespace NKikimr::NColumnShard { return true; } } + + if constexpr (std::is_same::value) { + if constexpr (arrow::is_decimal128_type::value) { + Y_ABORT_UNLESS(typedBuilder.Append(arrow::Decimal128(data.Hi_, data.Low_)).ok()); + return true; + } + } Y_ABORT("Unknown type combination"); return false; - }); + })); return TRowBuilder(Index + 1, Owner); } diff --git a/ydb/core/tx/columnshard/test_helper/controllers.h b/ydb/core/tx/columnshard/test_helper/controllers.h index b18c2bc34e8d..68cd6a1dc4ed 100644 --- a/ydb/core/tx/columnshard/test_helper/controllers.h +++ b/ydb/core/tx/columnshard/test_helper/controllers.h @@ -21,31 +21,28 @@ class TWaitCompactionController: public NYDBTest::NColumnShard::TController { virtual bool NeedForceCompactionBacketsConstruction() const override { return true; } - virtual ui64 GetSmallPortionSizeDetector(const ui64 /*def*/) const override { + virtual ui64 DoGetSmallPortionSizeDetector(const ui64 /*def*/) const override { return SmallSizeDetector.value_or(0); } - virtual TDuration GetOptimizerFreshnessCheckDuration(const TDuration /*defaultValue*/) const override { + virtual TDuration DoGetOptimizerFreshnessCheckDuration(const TDuration /*defaultValue*/) const override { return TDuration::Zero(); } - virtual TDuration GetLagForCompactionBeforeTierings(const TDuration /*def*/) const override { + virtual TDuration DoGetLagForCompactionBeforeTierings(const TDuration /*def*/) const override { return TDuration::Zero(); } - virtual TDuration GetCompactionActualizationLag(const TDuration /*def*/) const override { + virtual TDuration DoGetCompactionActualizationLag(const TDuration /*def*/) const override { return TDuration::Zero(); } - virtual TDuration GetTTLDefaultWaitingDuration(const TDuration /*defaultValue*/) const override { - return TDuration::Seconds(1); - } public: TWaitCompactionController() { - SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); } ui32 GetFinishedExportsCount() const { return ExportsFinishedCount.Val(); } - virtual void OnStatisticsUsage(const NKikimr::NOlap::NStatistics::TOperatorContainer& /*statOperator*/) override { + virtual void OnStatisticsUsage(const NKikimr::NOlap::NIndexes::TIndexMetaContainer& /*statOperator*/) override { StatisticsUsageCount.Inc(); } virtual void OnMaxValueUsage() override { diff --git a/ydb/core/tx/columnshard/test_helper/helper.cpp b/ydb/core/tx/columnshard/test_helper/helper.cpp index 8b35442b7abb..d64003c5d525 100644 --- a/ydb/core/tx/columnshard/test_helper/helper.cpp +++ b/ydb/core/tx/columnshard/test_helper/helper.cpp @@ -1,12 +1,17 @@ #include "helper.h" -#include + #include +#include #include #include #include -#include -#include +#include #include +#include + +#include + +#include #ifndef KIKIMR_DISABLE_S3_OPS #include #endif @@ -21,6 +26,9 @@ NKikimrSchemeOp::TOlapColumnDescription TTestColumn::CreateColumn(const ui32 id) col.SetStorageId(StorageId); } auto columnType = NScheme::ProtoColumnTypeFromTypeInfoMod(Type, ""); + if (AccessorClassName) { + col.MutableDataAccessorConstructor()->SetClassName(AccessorClassName); + } col.SetTypeId(columnType.TypeId); if (columnType.TypeInfo) { *col.MutableTypeInfo() = *columnType.TypeInfo; @@ -28,6 +36,16 @@ NKikimrSchemeOp::TOlapColumnDescription TTestColumn::CreateColumn(const ui32 id) return col; } +std::set TTestColumn::GetNullableSet(const std::vector& columns) { + std::set result; + for (auto&& i : columns) { + if (!i.GetNullable()) { + result.emplace(i.GetName()); + } + } + return result; +} + std::vector> TTestColumn::ConvertToPairs(const std::vector& columns) { std::vector> result; for (auto&& i : columns) { @@ -36,7 +54,8 @@ std::vector> TTestColumn::Conver return result; } -std::vector TTestColumn::BuildFromPairs(const std::vector>& columns) { +std::vector TTestColumn::BuildFromPairs( + const std::vector>& columns) { std::vector result; for (auto&& i : columns) { result.emplace_back(i.first, i.second); @@ -57,38 +76,44 @@ std::vector TTestColumn::CropSchema(const s return std::vector(input.begin(), input.begin() + size); } -} +} // namespace NKikimr::NArrow::NTest namespace NKikimr::NArrow { -std::vector> MakeArrowFields(const std::vector& columns, const std::set& notNullColumns /*= {}*/) { +std::vector> MakeArrowFields(const std::vector& columns) { + std::set notNullColumns = NTest::TTestColumn::GetNullableSet(columns); auto result = MakeArrowFields(NTest::TTestColumn::ConvertToPairs(columns), notNullColumns); UNIT_ASSERT_C(result.ok(), result.status().ToString()); return result.ValueUnsafe(); } -std::shared_ptr MakeArrowSchema(const std::vector& columns, const std::set& notNullColumns /*= {}*/) { +std::shared_ptr MakeArrowSchema(const std::vector& columns) { + std::set notNullColumns = NTest::TTestColumn::GetNullableSet(columns); auto result = MakeArrowSchema(NTest::TTestColumn::ConvertToPairs(columns), notNullColumns); UNIT_ASSERT_C(result.ok(), result.status().ToString()); return result.ValueUnsafe(); } -} +} // namespace NKikimr::NArrow namespace NKikimr::NOlap { std::shared_ptr TTestStoragesManager::DoBuildOperator(const TString& storageId) { if (storageId == TBase::DefaultStorageId) { - return std::make_shared(storageId, NActors::TActorId(), TabletInfo, - GetGeneration(), SharedBlobsManager->GetStorageManagerGuarantee(TBase::DefaultStorageId)); + return std::make_shared(storageId, NActors::TActorId(), TabletInfo, GetGeneration(), + SharedBlobsManager->GetStorageManagerGuarantee(TBase::DefaultStorageId)); + } else if (storageId == TBase::LocalMetadataStorageId) { + return std::make_shared( + storageId, SharedBlobsManager->GetStorageManagerGuarantee(TBase::DefaultStorageId)); } else if (storageId == TBase::MemoryStorageId) { #ifndef KIKIMR_DISABLE_S3_OPS Singleton()->SetSecretKey("fakeSecret"); - return std::make_shared(storageId, NActors::TActorId(), std::make_shared("fakeBucket", "fakeSecret"), + return std::make_shared(storageId, NActors::TActorId(), + std::make_shared("fakeBucket", "fakeSecret"), SharedBlobsManager->GetStorageManagerGuarantee(storageId), GetGeneration()); #endif } return nullptr; } -} \ No newline at end of file +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/test_helper/helper.h b/ydb/core/tx/columnshard/test_helper/helper.h index cf71657cf97c..98f9a09b526c 100644 --- a/ydb/core/tx/columnshard/test_helper/helper.h +++ b/ydb/core/tx/columnshard/test_helper/helper.h @@ -51,6 +51,9 @@ class TTestColumn { YDB_ACCESSOR_DEF(TString, Name); YDB_ACCESSOR_DEF(NScheme::TTypeInfo, Type); YDB_ACCESSOR_DEF(TString, StorageId); + YDB_ACCESSOR_DEF(TString, AccessorClassName); + YDB_ACCESSOR(bool, Nullable, true); + public: explicit TTestColumn(const TString& name, const NScheme::TTypeInfo& type) : Name(name) @@ -63,13 +66,14 @@ class TTestColumn { static THashMap ConvertToHash(const std::vector& columns); static std::vector BuildFromPairs(const std::vector>& columns); static std::vector CropSchema(const std::vector& input, const ui32 size); + static std::set GetNullableSet(const std::vector& columns); }; } namespace NKikimr::NArrow { -std::vector> MakeArrowFields(const std::vector& columns, const std::set& notNullColumns = {}); -std::shared_ptr MakeArrowSchema(const std::vector& columns, const std::set& notNullColumns = {}); +std::vector> MakeArrowFields(const std::vector& columns); +std::shared_ptr MakeArrowSchema(const std::vector& columns); } diff --git a/ydb/core/tx/columnshard/common/tests/shard_reader.h b/ydb/core/tx/columnshard/test_helper/shard_reader.h similarity index 99% rename from ydb/core/tx/columnshard/common/tests/shard_reader.h rename to ydb/core/tx/columnshard/test_helper/shard_reader.h index 1bb3ad353835..2beaa5a782d9 100644 --- a/ydb/core/tx/columnshard/common/tests/shard_reader.h +++ b/ydb/core/tx/columnshard/test_helper/shard_reader.h @@ -10,7 +10,7 @@ #include #include -namespace NKikimr::NOlap::NTests { +namespace NKikimr::NTxUT { class TShardReader { private: @@ -267,4 +267,4 @@ class TShardReader { } }; -} +} //namespace NKikimr::NTxUT diff --git a/ydb/core/tx/columnshard/test_helper/ya.make b/ydb/core/tx/columnshard/test_helper/ya.make index a900deb266db..cab4937293dd 100644 --- a/ydb/core/tx/columnshard/test_helper/ya.make +++ b/ydb/core/tx/columnshard/test_helper/ya.make @@ -2,6 +2,7 @@ LIBRARY() PEERDIR( ydb/core/protos + ydb/core/formats/arrow contrib/libs/apache/arrow ydb/library/actors/core ydb/core/tx/columnshard/blobs_action/bs diff --git a/ydb/core/tx/columnshard/transactions/locks/abstract.cpp b/ydb/core/tx/columnshard/transactions/locks/abstract.cpp new file mode 100644 index 000000000000..927b73dcdd23 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/abstract.cpp @@ -0,0 +1,6 @@ +#include "abstract.h" +#include + +namespace NKikimr::NOlap::NTxInteractions { + +} diff --git a/ydb/core/tx/columnshard/transactions/locks/abstract.h b/ydb/core/tx/columnshard/transactions/locks/abstract.h new file mode 100644 index 000000000000..5699d13d9705 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/abstract.h @@ -0,0 +1,105 @@ +#pragma once +#include "dependencies.h" +#include "interaction.h" + +#include + +#include + +namespace NKikimr::NColumnShard { +class TColumnShard; +} + +namespace NKikimrColumnShardTxProto { +class TEvent; +} + +namespace NKikimr::NOlap::NTxInteractions { + +class ITxEvent { +public: + using TFactory = NObjectFactory::TParametrizedObjectFactory; + using TProto = NKikimrColumnShardTxProto::TEvent; + +protected: + virtual void DoAddToInteraction(const ui64 txId, TInteractionsContext& context) const = 0; + virtual void DoRemoveFromInteraction(const ui64 txId, TInteractionsContext& context) const = 0; + virtual bool DoDeserializeFromProto(const NKikimrColumnShardTxProto::TEvent& proto) = 0; + virtual void DoSerializeToProto(NKikimrColumnShardTxProto::TEvent& proto) const = 0; + +public: + ITxEvent() = default; + virtual ~ITxEvent() = default; + + virtual TString GetClassName() const = 0; + + bool DeserializeFromProto(const TProto& proto) { + return DoDeserializeFromProto(proto); + } + + void SerializeToProto(TProto& proto) const { + DoSerializeToProto(proto); + } + + void AddToInteraction(const ui64 txId, TInteractionsContext& context) const { + return DoAddToInteraction(txId, context); + } + + void RemoveFromInteraction(const ui64 txId, TInteractionsContext& context) const { + return DoRemoveFromInteraction(txId, context); + } +}; + +class TTxEventContainer: public NBackgroundTasks::TInterfaceProtoContainer { +private: + using TBase = NBackgroundTasks::TInterfaceProtoContainer; + YDB_READONLY(ui64, TxId, 0); + +public: + void AddToInteraction(TInteractionsContext& context) const { + return GetObjectVerified().AddToInteraction(TxId, context); + } + + void RemoveFromInteraction(TInteractionsContext& context) const { + return GetObjectVerified().RemoveFromInteraction(TxId, context); + } + + TTxEventContainer(const ui64 txId, const std::shared_ptr& txEvent) + : TBase(txEvent) + , TxId(txId) { + } + + TTxEventContainer(const ui64 txId) + : TxId(txId) { + } + + bool operator<(const TTxEventContainer& item) const { + return TxId < item.TxId; + } +}; + +class ITxEventWriter { +protected: + virtual bool DoCheckInteraction( + const ui64 selfTxId, TInteractionsContext& context, TTxConflicts& conflicts, TTxConflicts& notifications) const = 0; + virtual std::shared_ptr DoBuildEvent() = 0; + +public: + ITxEventWriter() = default; + virtual ~ITxEventWriter() = default; + + bool CheckInteraction(const ui64 selfTxId, TInteractionsContext& context, TTxConflicts& conflicts, TTxConflicts& notifications) const { + TTxConflicts conflictsResult; + TTxConflicts notificationsResult; + const bool result = DoCheckInteraction(selfTxId, context, conflictsResult, notificationsResult); + std::swap(conflictsResult, conflicts); + std::swap(notificationsResult, notifications); + return result; + } + + std::shared_ptr BuildEvent() { + return DoBuildEvent(); + } +}; + +} // namespace NKikimr::NOlap::NTxInteractions diff --git a/ydb/core/tx/columnshard/transactions/locks/dependencies.cpp b/ydb/core/tx/columnshard/transactions/locks/dependencies.cpp new file mode 100644 index 000000000000..921a024ce651 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/dependencies.cpp @@ -0,0 +1,48 @@ +#include "dependencies.h" +#include + +namespace NKikimr::NOlap::NTxInteractions { + +bool TTxConflicts::LoadFromDatabase(NIceDb::TNiceDb& db) { + using namespace NColumnShard; + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return false; + } + + THashMap> local; + while (!rowset.EndOfSet()) { + const ui64 txId = rowset.GetValue(); + local[txId].emplace(rowset.GetValue()); + if (!rowset.Next()) { + return false; + } + } + std::swap(local, TxIdsFromCommitToBroken); + return true; +} + +void TTxConflicts::AddOnExecute(NTabletFlatExecutor::TTransactionContext& txc) const { + using namespace NColumnShard; + NIceDb::TNiceDb db(txc.DB); + for (auto&& [commitTxId, brokeTxIds] : TxIdsFromCommitToBroken) { + for (auto&& brokeTxId : brokeTxIds) { + db.Table().Key(commitTxId, brokeTxId).Update(); + } + } +} + +bool TTxConflicts::RemoveOnExecute(NTabletFlatExecutor::TTransactionContext& txc, const ui64 txId) const { + using namespace NColumnShard; + NIceDb::TNiceDb db(txc.DB); + auto it = TxIdsFromCommitToBroken.find(txId); + if (it == TxIdsFromCommitToBroken.end()) { + return false; + } + for (auto&& brokeTxId : it->second) { + db.Table().Key(txId, brokeTxId).Delete(); + } + return true; +} + +} diff --git a/ydb/core/tx/columnshard/transactions/locks/dependencies.h b/ydb/core/tx/columnshard/transactions/locks/dependencies.h new file mode 100644 index 000000000000..d749ed1db87e --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/dependencies.h @@ -0,0 +1,55 @@ +#pragma once +#include +#include +#include + +namespace NKikimr::NOlap::NTxInteractions { + +class TTxConflicts { +private: + THashMap> TxIdsFromCommitToBroken; + +public: + THashMap>::const_iterator begin() const { + return TxIdsFromCommitToBroken.begin(); + } + + THashMap>::const_iterator end() const { + return TxIdsFromCommitToBroken.end(); + } + + bool Add(const ui64 commitTxId, const ui64 brokenTxId) { + return TxIdsFromCommitToBroken[commitTxId].emplace(brokenTxId).second; + } + + THashSet GetBrokenTxIds(const ui64 txId) const { + auto it = TxIdsFromCommitToBroken.find(txId); + if (it == TxIdsFromCommitToBroken.end()) { + return Default>(); + } + return it->second; + } + + bool LoadFromDatabase(NIceDb::TNiceDb& db); + + bool RemoveOnExecute(NTabletFlatExecutor::TTransactionContext& txc, const ui64 txId) const; + + [[nodiscard]] bool RemoveOnComplete(const ui64 txId) { + return TxIdsFromCommitToBroken.erase(txId); + } + + void AddOnExecute(NTabletFlatExecutor::TTransactionContext& txc) const; + + void MergeTo(TTxConflicts& dest) const { + for (auto&& i : TxIdsFromCommitToBroken) { + auto it = dest.TxIdsFromCommitToBroken.find(i.first); + if (it == dest.TxIdsFromCommitToBroken.end()) { + dest.TxIdsFromCommitToBroken.emplace(i.first, i.second); + } else { + it->second.insert(i.second.begin(), i.second.end()); + } + } + } +}; + +} // namespace NKikimr::NOlap::NTxInteractions diff --git a/ydb/core/tx/columnshard/transactions/locks/interaction.cpp b/ydb/core/tx/columnshard/transactions/locks/interaction.cpp new file mode 100644 index 000000000000..0dd52a2a79ba --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/interaction.cpp @@ -0,0 +1,26 @@ +#include "interaction.h" +#include + +namespace NKikimr::NOlap::NTxInteractions { +TIntervalPoint TIntervalPoint::From( + const TPredicateContainer& container, const std::shared_ptr& pkSchema) { + i32 shift = container.IsInclude() ? 0 : 1; + if (!container.GetReplaceKey()) { + shift = -1; + } else if (container.GetReplaceKey()->Size() < (ui32)pkSchema->num_fields()) { + shift = 1; + } + return TIntervalPoint(container.GetReplaceKey(), shift); +} + +TIntervalPoint TIntervalPoint::To( + const TPredicateContainer& container, const std::shared_ptr& pkSchema) { + i32 shift = container.IsInclude() ? 0 : -1; + if (!container.GetReplaceKey() || container.GetReplaceKey()->Size() < (ui32)pkSchema->num_fields()) { + shift = Max(); + } + + return TIntervalPoint(container.GetReplaceKey(), shift); +} + +} // namespace NKikimr::NOlap::NTxInteractions diff --git a/ydb/core/tx/columnshard/transactions/locks/interaction.h b/ydb/core/tx/columnshard/transactions/locks/interaction.h new file mode 100644 index 000000000000..abd9ef92f6d5 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/interaction.h @@ -0,0 +1,459 @@ +#pragma once +#include +#include + +#include +#include + +#include + +namespace NKikimr::NOlap { +class TPredicateContainer; +} + +namespace NKikimr::NOlap::NTxInteractions { + +class TPointTxCounters { +private: + YDB_READONLY(ui32, CountIncludes, 0); + YDB_READONLY(ui32, CountNotIncludes, 0); + +public: + void Inc(const bool include) { + if (include) { + IncInclude(); + } else { + IncNotInclude(); + } + } + bool Dec(const bool include) { + if (include) { + return DecInclude(); + } else { + return DecNotInclude(); + } + } + void IncInclude() { + ++CountIncludes; + } + [[nodiscard]] bool DecInclude() { + AFL_VERIFY(CountIncludes); + return --CountIncludes == 0; + } + void IncNotInclude() { + ++CountNotIncludes; + } + [[nodiscard]] bool DecNotInclude() { + AFL_VERIFY(CountNotIncludes); + return --CountNotIncludes == 0; + } + bool IsEmpty() const { + return !CountIncludes && !CountNotIncludes; + } + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + if (CountIncludes) { + result.InsertValue("count_include", CountIncludes); + } + if (CountNotIncludes) { + result.InsertValue("count_not_include", CountNotIncludes); + } + return result; + } + ui32 GetCountSum() const { + return CountIncludes + CountNotIncludes; + } +}; + +class TIntervalTxCounters { +private: + YDB_READONLY(ui32, Count, 0); + +public: + void Inc(const ui32 count = 1) { + Count += count; + } + [[nodiscard]] bool Dec(const ui32 count = 1) { + AFL_VERIFY(Count); + Count -= count; + return Count == 0; + } + bool IsEmpty() const { + return !Count; + } + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + result.InsertValue("count", Count); + return result; + } + + void ProvideFrom(const TIntervalTxCounters& counters) { + Count += counters.Count; + AFL_VERIFY(counters.Count); + } +}; + +class TPointInfo { +private: + THashMap StartTxIds; + THashMap FinishTxIds; + THashMap IntervalTxIds; + +public: + void InsertCurrentTxs(THashSet& txIds, const bool includePoint) const { + for (auto&& i : IntervalTxIds) { + txIds.emplace(i.first); + } + if (includePoint) { + for (auto&& i : FinishTxIds) { + if (!i.second.GetCountIncludes()) { + continue; + } + auto it = StartTxIds.find(i.first); + if (it != StartTxIds.end() && it->second.GetCountIncludes()) { + txIds.emplace(i.first); + } + } + } + } + + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + auto& starts = result.InsertValue("starts", NJson::JSON_ARRAY); + for (auto&& i : StartTxIds) { + auto& data = starts.AppendValue(NJson::JSON_MAP); + data.InsertValue("id", i.first); + data.InsertValue("inc", i.second.DebugJson()); + } + auto& finish = result.InsertValue("finishes", NJson::JSON_ARRAY); + for (auto&& i : FinishTxIds) { + auto& data = finish.AppendValue(NJson::JSON_MAP); + data.InsertValue("id", i.first); + data.InsertValue("inc", i.second.DebugJson()); + } + auto& txs = result.InsertValue("txs", NJson::JSON_ARRAY); + for (auto&& i : IntervalTxIds) { + auto& data = txs.AppendValue(NJson::JSON_MAP); + data.InsertValue("id", i.first); + data.InsertValue("inc", i.second.DebugJson()); + } + return result; + } + + void AddStart(const ui64 txId, const bool include) { + StartTxIds[txId].Inc(include); + } + void RemoveStart(const ui64 txId, const bool include) { + if (StartTxIds[txId].Dec(include)) { + StartTxIds.erase(txId); + } + } + void AddFinish(const ui64 txId, const bool include) { + FinishTxIds[txId].Inc(include); + } + void RemoveFinish(const ui64 txId, const bool include) { + if (FinishTxIds[txId].Dec(include)) { + FinishTxIds.erase(txId); + } + } + void AddIntervalTx(const ui64 txId) { + IntervalTxIds[txId].Inc(); + } + void RemoveIntervalTx(const ui64 txId) { + if (IntervalTxIds[txId].Dec()) { + IntervalTxIds.erase(txId); + } + } + bool TryRemoveTx(const ui64 txId, const bool include) { + bool result = false; + if (StartTxIds[txId].Dec(include)) { + StartTxIds.erase(txId); + result = true; + } + if (FinishTxIds[txId].Dec(include)) { + FinishTxIds.erase(txId); + result = true; + } + if (IntervalTxIds[txId].Dec(txId)) { + IntervalTxIds.erase(txId); + result = true; + } + return result; + } + + bool IsEmpty() const { + return StartTxIds.empty() && FinishTxIds.empty() && IntervalTxIds.empty(); + } + + void ProvideTxIdsFrom(const TPointInfo& previouse) { + for (auto&& i : previouse.IntervalTxIds) { + auto provided = i.second; + { + auto it = StartTxIds.find(i.first); + if (it != StartTxIds.end()) { + provided.Inc(it->second.GetCountSum()); + } + } + { + auto it = FinishTxIds.find(i.first); + if (it != FinishTxIds.end()) { + if (provided.Dec(it->second.GetCountSum())) { + return; + } + } + } + IntervalTxIds[i.first].ProvideFrom(provided); + } + } +}; + +class TIntervalPoint { +private: + i32 IncludeState = 0; + std::optional PrimaryKey; + + TIntervalPoint(const NArrow::TReplaceKey& primaryKey, const int includeState) + : IncludeState(includeState) + , PrimaryKey(primaryKey) { + } + + TIntervalPoint(const std::shared_ptr& primaryKey, const int includeState) + : IncludeState(includeState) { + if (primaryKey) { + PrimaryKey = *primaryKey; + } + } + +public: + static TIntervalPoint Equal(const NArrow::TReplaceKey& replaceKey) { + return TIntervalPoint(replaceKey, 0); + } + static TIntervalPoint From(const TPredicateContainer& container, const std::shared_ptr& pkSchema); + static TIntervalPoint To(const TPredicateContainer& container, const std::shared_ptr& pkSchema); + + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + result.InsertValue("include", IncludeState); + if (PrimaryKey) { + result.InsertValue("pk", PrimaryKey->DebugString()); + } + return result; + } + + bool IsIncluded() const { + return IncludeState == 0; + } + + bool operator==(const TIntervalPoint& item) const { + if (!PrimaryKey && !item.PrimaryKey) { + return IncludeState == item.IncludeState; + } else if (!PrimaryKey && item.PrimaryKey) { + return false; + } else if (PrimaryKey && !item.PrimaryKey) { + return false; + } else if (IncludeState == item.IncludeState) { + if (PrimaryKey->Size() != item.PrimaryKey->Size()) { + return false; + } + return *PrimaryKey == *item.PrimaryKey; + } else { + return false; + } + } + + bool operator<=(const TIntervalPoint& point) const { + return !(point < *this); + } + + bool operator<(const TIntervalPoint& point) const { + if (!PrimaryKey && !point.PrimaryKey) { + return IncludeState < point.IncludeState; + } else if (!PrimaryKey && point.PrimaryKey) { + return IncludeState < 0; + } else if (PrimaryKey && !point.PrimaryKey) { + return 0 < point.IncludeState; + } else { + const ui32 sizeMin = std::min(PrimaryKey->Size(), point.PrimaryKey->Size()); + const std::partial_ordering compareResult = PrimaryKey->ComparePartNotNull(*point.PrimaryKey, sizeMin); + if (compareResult == std::partial_ordering::less) { + return true; + } else if (compareResult == std::partial_ordering::greater) { + return false; + } else { + AFL_VERIFY(compareResult == std::partial_ordering::equivalent); + if (PrimaryKey->Size() == point.PrimaryKey->Size()) { + return IncludeState < point.IncludeState; + } else if (PrimaryKey->Size() < point.PrimaryKey->Size()) { + if (IncludeState <= 1) { + return true; + } else { + return false; + } + } else { + if (point.IncludeState <= 1) { + return false; + } else { + return true; + } + } + return false; + } + } + } +}; + +class TReadIntervals { +private: + std::map IntervalsInfo; + +public: + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + auto& jsonIntervals = result.InsertValue("intervals", NJson::JSON_ARRAY); + for (auto&& i : IntervalsInfo) { + auto& pointInfo = jsonIntervals.AppendValue(NJson::JSON_MAP); + pointInfo.InsertValue("p", i.first.DebugJson()); + pointInfo.InsertValue("i", i.second.DebugJson()); + } + return result; + } + + bool IsEmpty() const { + return IntervalsInfo.empty(); + } + + std::map::iterator Erase(const std::map::iterator& it) { + return IntervalsInfo.erase(it); + } + + std::map::iterator GetPointIterator(const TIntervalPoint& intervalPoint) { + auto it = IntervalsInfo.find(intervalPoint); + AFL_VERIFY(it != IntervalsInfo.end()); + return it; + } + + std::map::iterator InsertPoint(const TIntervalPoint& intervalPoint) { + auto it = IntervalsInfo.lower_bound(intervalPoint); + if (it == IntervalsInfo.end() || it == IntervalsInfo.begin()) { + return IntervalsInfo.emplace(intervalPoint, TPointInfo()).first; + } else if (it->first == intervalPoint) { + return it; + } else { + --it; + auto result = IntervalsInfo.emplace(intervalPoint, TPointInfo()).first; + result->second.ProvideTxIdsFrom(it->second); + return result; + } + } + + THashSet GetAffectedTxIds(const std::shared_ptr& writtenPrimaryKeys) const { + AFL_VERIFY(writtenPrimaryKeys); + auto it = IntervalsInfo.begin(); + THashSet affectedTxIds; + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("batch", writtenPrimaryKeys->ToString())("info", DebugJson().GetStringRobust()); + for (ui32 i = 0; i < writtenPrimaryKeys->num_rows();) { + if (it == IntervalsInfo.end()) { + return affectedTxIds; + } + auto rKey = NArrow::TReplaceKey::FromBatch(writtenPrimaryKeys, writtenPrimaryKeys->schema(), i); + auto pkIntervalPoint = TIntervalPoint::Equal(rKey); + while (it != IntervalsInfo.end() && it->first < pkIntervalPoint) { + ++it; + } + if (it == IntervalsInfo.end()) { + return affectedTxIds; + } + auto itPred = it; + bool equal = false; + if (pkIntervalPoint < it->first) { + if (it == IntervalsInfo.begin()) { + ++i; + continue; + } + if (pkIntervalPoint < it->first) { + --itPred; + } + } else { + equal = true; + ++it; + } + + itPred->second.InsertCurrentTxs(affectedTxIds, equal); + if (it == IntervalsInfo.end()) { + return affectedTxIds; + } + while (i < writtenPrimaryKeys->num_rows()) { + auto rKey = NArrow::TReplaceKey::FromBatch(writtenPrimaryKeys, writtenPrimaryKeys->schema(), i); + if (TIntervalPoint::Equal(rKey) < it->first) { + ++i; + } else { + break; + } + } + } + return affectedTxIds; + } +}; + +class TInteractionsContext { +private: + THashMap ReadIntervalsByPathId; + +public: + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + for (auto&& i : ReadIntervalsByPathId) { + result.InsertValue(::ToString(i.first), i.second.DebugJson()); + } + return result; + } + + THashSet GetAffectedTxIds(const ui64 pathId, const std::shared_ptr& batch) const { + auto it = ReadIntervalsByPathId.find(pathId); + if (it == ReadIntervalsByPathId.end()) { + return {}; + } + return it->second.GetAffectedTxIds(batch); + } + + void AddInterval(const ui64 txId, const ui64 pathId, const TIntervalPoint& from, const TIntervalPoint& to) { + auto& intervals = ReadIntervalsByPathId[pathId]; + auto itFrom = intervals.InsertPoint(from); + auto itTo = intervals.InsertPoint(to); + itFrom->second.AddStart(txId, from.IsIncluded()); + for (auto it = itFrom; it != itTo; ++it) { + it->second.AddIntervalTx(txId); + } + itTo->second.AddFinish(txId, to.IsIncluded()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "add_interval")("interactions_info", DebugJson().GetStringRobust()); + } + + void RemoveInterval(const ui64 txId, const ui64 pathId, const TIntervalPoint& from, const TIntervalPoint& to) { + auto itIntervals = ReadIntervalsByPathId.find(pathId); + AFL_VERIFY(itIntervals != ReadIntervalsByPathId.end())("path_id", pathId); + auto& intervals = itIntervals->second; + auto itFrom = intervals.GetPointIterator(from); + auto itTo = intervals.GetPointIterator(to); + itFrom->second.RemoveStart(txId, from.IsIncluded()); + for (auto it = itFrom; it != itTo; ++it) { + it->second.RemoveIntervalTx(txId); + } + itTo->second.RemoveFinish(txId, to.IsIncluded()); + for (auto&& it = itFrom; it != itTo;) { + if (it->second.IsEmpty()) { + it = intervals.Erase(it); + } else { + ++it; + } + } + if (itTo->second.IsEmpty()) { + intervals.Erase(itTo); + } + if (intervals.IsEmpty()) { + ReadIntervalsByPathId.erase(itIntervals); + } + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "remove_interval")("interactions_info", DebugJson().GetStringRobust()); + } +}; + +} // namespace NKikimr::NOlap::NTxInteractions diff --git a/ydb/core/tx/columnshard/transactions/locks/read_finished.cpp b/ydb/core/tx/columnshard/transactions/locks/read_finished.cpp new file mode 100644 index 000000000000..159b5d269533 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/read_finished.cpp @@ -0,0 +1,5 @@ +#include "read_finished.h" + +namespace NKikimr::NOlap::NTxInteractions { + +} diff --git a/ydb/core/tx/columnshard/transactions/locks/read_finished.h b/ydb/core/tx/columnshard/transactions/locks/read_finished.h new file mode 100644 index 000000000000..895123e5097b --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/read_finished.h @@ -0,0 +1,30 @@ +#pragma once +#include "abstract.h" + +namespace NKikimr::NOlap::NTxInteractions { + +class TEvReadFinishedWriter: public ITxEventWriter { +private: + YDB_READONLY(ui64, PathId, 0); + TTxConflicts Conflicts; + + virtual bool DoCheckInteraction( + const ui64 /*selfTxId*/, TInteractionsContext& /*context*/, TTxConflicts& conflicts, TTxConflicts& /*notifications*/) const override { + conflicts = Conflicts; + return true; + } + + virtual std::shared_ptr DoBuildEvent() override { + return nullptr; + } + +public: + TEvReadFinishedWriter(const ui64 pathId, const TTxConflicts& conflicts) + : PathId(pathId) + , Conflicts(conflicts) + { + AFL_VERIFY(PathId); + } +}; + +} // namespace NKikimr::NOlap::NTxInteractions diff --git a/ydb/core/tx/columnshard/transactions/locks/read_start.cpp b/ydb/core/tx/columnshard/transactions/locks/read_start.cpp new file mode 100644 index 000000000000..963c47b068a9 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/read_start.cpp @@ -0,0 +1,47 @@ +#include "read_start.h" +#include + +namespace NKikimr::NOlap::NTxInteractions { + +std::shared_ptr TEvReadStartWriter::DoBuildEvent() { + return std::make_shared(PathId, Schema, Filter); +} + +bool TEvReadStart::DoDeserializeFromProto(const NKikimrColumnShardTxProto::TEvent& proto) { + if (!proto.HasRead()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("error", "cannot_parse_TEvReadStart")("reason", "have not 'read' in proto"); + return false; + } + Schema = NArrow::DeserializeSchema(proto.GetRead().GetSchema()); + if (!Schema) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("error", "cannot_parse_TEvReadStart")("reason", "cannot_parse_schema"); + return false; + } + Filter = TPKRangesFilter::BuildFromString(proto.GetRead().GetFilter(), Schema, false); + if (!Filter) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("error", "cannot_parse_TEvReadStart")("reason", "cannot_parse_filter"); + return false; + } + return true; +} + +void TEvReadStart::DoSerializeToProto(NKikimrColumnShardTxProto::TEvent& proto) const { + AFL_VERIFY(!!Filter); + AFL_VERIFY(!!Schema); + *proto.MutableRead()->MutableFilter() = Filter->SerializeToString(Schema); + *proto.MutableRead()->MutableSchema() = NArrow::SerializeSchema(*Schema); +} + +void TEvReadStart::DoAddToInteraction(const ui64 txId, TInteractionsContext& context) const { + for (auto&& i : *Filter) { + context.AddInterval(txId, PathId, TIntervalPoint::From(i.GetPredicateFrom(), Schema), TIntervalPoint::To(i.GetPredicateTo(), Schema)); + } +} + +void TEvReadStart::DoRemoveFromInteraction(const ui64 txId, TInteractionsContext& context) const { + for (auto&& i : *Filter) { + context.RemoveInterval(txId, PathId, TIntervalPoint::From(i.GetPredicateFrom(), Schema), TIntervalPoint::To(i.GetPredicateTo(), Schema)); + } +} + +} diff --git a/ydb/core/tx/columnshard/transactions/locks/read_start.h b/ydb/core/tx/columnshard/transactions/locks/read_start.h new file mode 100644 index 000000000000..6587dfea5bda --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/read_start.h @@ -0,0 +1,71 @@ +#pragma once +#include "abstract.h" +#include + +namespace NKikimr::NOlap::NTxInteractions { + +class TEvReadStartWriter: public ITxEventWriter { +private: + YDB_READONLY(ui64, PathId, 0); + YDB_READONLY_DEF(std::shared_ptr, Schema); + YDB_READONLY_DEF(std::shared_ptr, Filter); + YDB_READONLY_DEF(THashSet, LockIdsForCheck); + + virtual bool DoCheckInteraction( + const ui64 selfTxId, TInteractionsContext& /*context*/, TTxConflicts& /*conflicts*/, TTxConflicts& notifications) const override { + for (auto&& i : LockIdsForCheck) { + notifications.Add(i, selfTxId); + } + return true; + } + + virtual std::shared_ptr DoBuildEvent() override; + +public: + TEvReadStartWriter(const ui64 pathId, const std::shared_ptr& schema, const std::shared_ptr& filter, + const THashSet& lockIdsForCheck) + : PathId(pathId) + , Schema(schema) + , Filter(filter) + , LockIdsForCheck(lockIdsForCheck) + { + AFL_VERIFY(PathId); + AFL_VERIFY(Schema); + AFL_VERIFY(Filter); + } +}; + +class TEvReadStart: public ITxEvent { +public: + static TString GetClassNameStatic() { + return "READ_START"; + } + +private: + YDB_READONLY(ui64, PathId, 0); + YDB_READONLY_DEF(std::shared_ptr, Schema); + YDB_READONLY_DEF(std::shared_ptr, Filter); + + virtual bool DoDeserializeFromProto(const NKikimrColumnShardTxProto::TEvent& proto) override; + virtual void DoSerializeToProto(NKikimrColumnShardTxProto::TEvent& proto) const override; + virtual void DoAddToInteraction(const ui64 txId, TInteractionsContext& context) const override; + virtual void DoRemoveFromInteraction(const ui64 txId, TInteractionsContext& context) const override; + static inline const TFactory::TRegistrator Registrator = TFactory::TRegistrator(GetClassNameStatic()); + +public: + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } + + TEvReadStart() = default; + TEvReadStart(const ui64 pathId, const std::shared_ptr& schema, const std::shared_ptr& filter) + : PathId(pathId) + , Schema(schema) + , Filter(filter) { + AFL_VERIFY(PathId); + AFL_VERIFY(Schema); + AFL_VERIFY(Filter); + } +}; + +} // namespace NKikimr::NOlap::NTxInteractions diff --git a/ydb/core/tx/columnshard/transactions/locks/write.cpp b/ydb/core/tx/columnshard/transactions/locks/write.cpp new file mode 100644 index 000000000000..19ff258fe7c6 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/write.cpp @@ -0,0 +1,5 @@ +#include "write.h" + +namespace NKikimr::NOlap::NTxInteractions { + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/transactions/locks/write.h b/ydb/core/tx/columnshard/transactions/locks/write.h new file mode 100644 index 000000000000..78ba8baf7e0e --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/write.h @@ -0,0 +1,36 @@ +#pragma once +#include "abstract.h" + +namespace NKikimr::NOlap::NTxInteractions { + +class TEvWriteWriter: public ITxEventWriter { +private: + YDB_READONLY(ui64, PathId, 0); + YDB_READONLY_DEF(std::shared_ptr, RecordBatch); + + virtual bool DoCheckInteraction( + const ui64 selfTxId, TInteractionsContext& context, TTxConflicts& conflicts, TTxConflicts& /*notifications*/) const override { + THashSet txIds = context.GetAffectedTxIds(PathId, RecordBatch); + txIds.erase(selfTxId); + TTxConflicts result; + for (auto&& i : txIds) { + result.Add(selfTxId, i); + } + std::swap(result, conflicts); + return true; + } + + virtual std::shared_ptr DoBuildEvent() override { + return nullptr; + } + +public: + TEvWriteWriter(const ui64 pathId, const std::shared_ptr& batch, const std::shared_ptr& pkSchema) + : PathId(pathId) + , RecordBatch(NArrow::TColumnOperator().Extract(batch, pkSchema->field_names())) { + AFL_VERIFY(PathId); + AFL_VERIFY(RecordBatch); + } +}; + +} // namespace NKikimr::NOlap::NTxInteractions diff --git a/ydb/core/tx/columnshard/transactions/locks/ya.make b/ydb/core/tx/columnshard/transactions/locks/ya.make new file mode 100644 index 000000000000..a7ad6b27ab0e --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/ya.make @@ -0,0 +1,19 @@ +LIBRARY() + +SRCS( + dependencies.cpp + interaction.cpp + abstract.cpp + GLOBAL read_start.cpp + GLOBAL read_finished.cpp + GLOBAL write.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/transactions/protos + ydb/core/tx/columnshard/engines/predicate + ydb/core/tx/columnshard/blobs_action/events + ydb/core/tx/columnshard/data_sharing/destination/events +) + +END() diff --git a/ydb/core/tx/columnshard/transactions/operators/backup.cpp b/ydb/core/tx/columnshard/transactions/operators/backup.cpp index 5ea7cdafb3da..8427cd482901 100644 --- a/ydb/core/tx/columnshard/transactions/operators/backup.cpp +++ b/ydb/core/tx/columnshard/transactions/operators/backup.cpp @@ -59,11 +59,12 @@ void TBackupTransactionOperator::DoStartProposeOnComplete(TColumnShard& /*owner* } } -bool TBackupTransactionOperator::ExecuteOnProgress(TColumnShard& /*owner*/, const NOlap::TSnapshot& /*version*/, NTabletFlatExecutor::TTransactionContext& /*txc*/) { +bool TBackupTransactionOperator::ProgressOnExecute( + TColumnShard& /*owner*/, const NOlap::TSnapshot& /*version*/, NTabletFlatExecutor::TTransactionContext& /*txc*/) { return true; } -bool TBackupTransactionOperator::CompleteOnProgress(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) { +bool TBackupTransactionOperator::ProgressOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) { return true; } diff --git a/ydb/core/tx/columnshard/transactions/operators/backup.h b/ydb/core/tx/columnshard/transactions/operators/backup.h index c941d773acc6..cd9d5785dd9c 100644 --- a/ydb/core/tx/columnshard/transactions/operators/backup.h +++ b/ydb/core/tx/columnshard/transactions/operators/backup.h @@ -6,7 +6,7 @@ namespace NKikimr::NColumnShard { -class TBackupTransactionOperator: public IProposeTxOperator { +class TBackupTransactionOperator: public IProposeTxOperator, public TMonitoringObjectsCounter { private: using TBase = IProposeTxOperator; @@ -24,6 +24,9 @@ class TBackupTransactionOperator: public IProposeTxOperator { } virtual void DoFinishProposeOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { } + virtual TString DoGetOpType() const override { + return "Backup"; + } virtual bool DoIsAsync() const override { return true; } @@ -35,9 +38,9 @@ class TBackupTransactionOperator: public IProposeTxOperator { public: using TBase::TBase; - virtual bool ExecuteOnProgress(TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) override; + virtual bool ProgressOnExecute(TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) override; - virtual bool CompleteOnProgress(TColumnShard& owner, const TActorContext& ctx) override; + virtual bool ProgressOnComplete(TColumnShard& owner, const TActorContext& ctx) override; virtual bool ExecuteOnAbort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) override; virtual bool CompleteOnAbort(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write.cpp b/ydb/core/tx/columnshard/transactions/operators/ev_write.cpp deleted file mode 100644 index 3aa27316237f..000000000000 --- a/ydb/core/tx/columnshard/transactions/operators/ev_write.cpp +++ /dev/null @@ -1,4 +0,0 @@ -#include "ev_write.h" - -namespace NKikimr::NColumnShard { -} diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write.h b/ydb/core/tx/columnshard/transactions/operators/ev_write.h deleted file mode 100644 index 2f33bec4ccbe..000000000000 --- a/ydb/core/tx/columnshard/transactions/operators/ev_write.h +++ /dev/null @@ -1,76 +0,0 @@ -#pragma once - -#include - -namespace NKikimr::NColumnShard { - - class TEvWriteTransactionOperator : public TTxController::ITransactionOperator { - using TBase = TTxController::ITransactionOperator; - using TProposeResult = TTxController::TProposeResult; - static inline auto Registrator = TFactory::TRegistrator(NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE); - private: - virtual TProposeResult DoStartProposeOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) override { - owner.OperationsManager->LinkTransaction(LockId, GetTxId(), txc); - return TProposeResult(); - } - virtual void DoStartProposeOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { - - } - virtual void DoFinishProposeOnExecute(TColumnShard& /*owner*/, NTabletFlatExecutor::TTransactionContext& /*txc*/) override { - } - virtual void DoFinishProposeOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { - } - virtual bool DoIsAsync() const override { - return false; - } - virtual bool DoCheckAllowUpdate(const TFullTxInfo& currentTxInfo) const override { - return (currentTxInfo.Source == GetTxInfo().Source && currentTxInfo.Cookie == GetTxInfo().Cookie); - } - virtual TString DoDebugString() const override { - return "EV_WRITE"; - } - virtual void DoSendReply(TColumnShard& owner, const TActorContext& ctx) override { - const auto& txInfo = GetTxInfo(); - std::unique_ptr evResult; - if (IsFail()) { - evResult = NEvents::TDataEvents::TEvWriteResult::BuildError(owner.TabletID(), txInfo.GetTxId(), NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR, GetProposeStartInfoVerified().GetStatusMessage()); - } else { - evResult = NEvents::TDataEvents::TEvWriteResult::BuildPrepared(owner.TabletID(), txInfo.GetTxId(), owner.GetProgressTxController().BuildCoordinatorInfo(txInfo)); - } - ctx.Send(txInfo.Source, evResult.release(), 0, txInfo.Cookie); - } - - virtual bool DoParse(TColumnShard& /*owner*/, const TString& data) override { - NKikimrTxColumnShard::TCommitWriteTxBody commitTxBody; - if (!commitTxBody.ParseFromString(data)) { - return false; - } - LockId = commitTxBody.GetLockId(); - return !!LockId; - } - - public: - using TBase::TBase; - - virtual bool ExecuteOnProgress(TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) override { - return owner.OperationsManager->CommitTransaction(owner, GetTxId(), txc, version); - } - - virtual bool CompleteOnProgress(TColumnShard& owner, const TActorContext& ctx) override { - auto result = NEvents::TDataEvents::TEvWriteResult::BuildCompleted(owner.TabletID(), GetTxId()); - ctx.Send(TxInfo.Source, result.release(), 0, TxInfo.Cookie); - return true; - } - - virtual bool ExecuteOnAbort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) override { - return owner.OperationsManager->AbortTransaction(owner, GetTxId(), txc); - } - virtual bool CompleteOnAbort(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { - return true; - } - - private: - ui64 LockId = 0; - }; - -} diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/abstract.cpp b/ydb/core/tx/columnshard/transactions/operators/ev_write/abstract.cpp new file mode 100644 index 000000000000..1217abd09701 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/abstract.cpp @@ -0,0 +1,5 @@ +#include "abstract.h" + +namespace NKikimr::NColumnShard { + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/abstract.h b/ydb/core/tx/columnshard/transactions/operators/ev_write/abstract.h new file mode 100644 index 000000000000..5be836f1d520 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/abstract.h @@ -0,0 +1,113 @@ +#pragma once + +#include + +namespace NKikimr::NColumnShard { + +class TBaseEvWriteTransactionOperator: public TTxController::ITransactionOperator { +private: + using TBase = TTxController::ITransactionOperator; + using TProposeResult = TTxController::TProposeResult; + +protected: + ui64 LockId = 0; + +private: + virtual bool DoParseImpl(TColumnShard& owner, const NKikimrTxColumnShard::TCommitWriteTxBody& commitTxBody) = 0; + virtual TProposeResult DoStartProposeOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) override final { + owner.GetOperationsManager().LinkTransactionOnExecute(LockId, GetTxId(), txc); + return TProposeResult(); + } + virtual void DoStartProposeOnComplete(TColumnShard& owner, const TActorContext& /*ctx*/) override final { + owner.GetOperationsManager().LinkTransactionOnComplete(LockId, GetTxId()); + } + virtual void DoFinishProposeOnExecute(TColumnShard& /*owner*/, NTabletFlatExecutor::TTransactionContext& /*txc*/) override final { + } + virtual void DoFinishProposeOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override final { + } + virtual bool DoCheckAllowUpdate(const TFullTxInfo& currentTxInfo) const override final { + return (currentTxInfo.Source == GetTxInfo().Source && currentTxInfo.Cookie == GetTxInfo().Cookie); + } + virtual bool DoParse(TColumnShard& owner, const TString& data) override final { + NKikimrTxColumnShard::TCommitWriteTxBody commitTxBody; + if (!commitTxBody.ParseFromString(data)) { + return false; + } + LockId = commitTxBody.GetLockId(); + return DoParseImpl(owner, commitTxBody); + } + + virtual bool DoIsAsync() const override final { + return false; + } + + virtual void DoSendReply(TColumnShard& owner, const TActorContext& ctx) override { + const auto& txInfo = GetTxInfo(); + std::unique_ptr evResult; + TLogContextGuard gLogging( + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("send_reply_tx_id", GetTxId())("send_reply_lock_id", LockId)); + if (IsFail()) { + evResult = NEvents::TDataEvents::TEvWriteResult::BuildError(owner.TabletID(), txInfo.GetTxId(), + NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR, GetProposeStartInfoVerified().GetStatusMessage()); + } else { + evResult = NEvents::TDataEvents::TEvWriteResult::BuildPrepared( + owner.TabletID(), txInfo.GetTxId(), owner.GetProgressTxController().BuildCoordinatorInfo(txInfo)); + } + ctx.Send(txInfo.Source, evResult.release(), 0, txInfo.Cookie); + } + std::optional Version; + +public: + using TBase::TBase; + TBaseEvWriteTransactionOperator(const TFullTxInfo& txInfo, const ui64 lockId) + : TBase(txInfo) + , LockId(lockId) { + } + + virtual bool IsTxBroken() const { + return false; + } + + ui64 GetLockId() const { + return LockId; + } + + virtual bool ProgressOnExecute( + TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) override { + Version = version; + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("progress_tx_id", GetTxId())("lock_id", LockId)("broken", IsTxBroken()); + if (IsTxBroken()) { + owner.GetOperationsManager().AbortTransactionOnExecute(owner, GetTxId(), txc); + } else { + owner.GetOperationsManager().CommitTransactionOnExecute(owner, GetTxId(), txc, version); + } + return true; + } + + virtual bool ProgressOnComplete(TColumnShard& owner, const TActorContext& ctx) override { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("progress_tx_id", GetTxId())("lock_id", LockId)("broken", IsTxBroken()); + AFL_VERIFY(Version); + if (IsTxBroken()) { + owner.GetOperationsManager().AbortTransactionOnComplete(owner, GetTxId()); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError( + owner.TabletID(), GetTxId(), NKikimrDataEvents::TEvWriteResult::STATUS_LOCKS_BROKEN, "lock invalidated"); + ctx.Send(TxInfo.Source, result.release(), 0, TxInfo.Cookie); + } else { + owner.GetOperationsManager().CommitTransactionOnComplete(owner, GetTxId(), *Version); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildCompleted(owner.TabletID(), GetTxId()); + ctx.Send(TxInfo.Source, result.release(), 0, TxInfo.Cookie); + } + return true; + } + + virtual bool ExecuteOnAbort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) override { + owner.GetOperationsManager().AbortTransactionOnExecute(owner, GetTxId(), txc); + return true; + } + virtual bool CompleteOnAbort(TColumnShard& owner, const TActorContext& /*ctx*/) override { + owner.GetOperationsManager().AbortTransactionOnComplete(owner, GetTxId()); + return true; + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/primary.cpp b/ydb/core/tx/columnshard/transactions/operators/ev_write/primary.cpp new file mode 100644 index 000000000000..9e33c29d2053 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/primary.cpp @@ -0,0 +1,5 @@ +#include "primary.h" + +namespace NKikimr::NColumnShard { + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/primary.h b/ydb/core/tx/columnshard/transactions/operators/ev_write/primary.h new file mode 100644 index 000000000000..9073e7458ae3 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/primary.h @@ -0,0 +1,287 @@ +#pragma once + +#include "sync.h" + +#include +#include + +#include + +namespace NKikimr::NColumnShard { + +class TEvWriteCommitPrimaryTransactionOperator: public TEvWriteCommitSyncTransactionOperator, + public TMonitoringObjectsCounter { +private: + using TBase = TEvWriteCommitSyncTransactionOperator; + using TProposeResult = TTxController::TProposeResult; + static inline auto Registrator = + TFactory::TRegistrator(NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE_PRIMARY); + +private: + std::set ReceivingShards; + std::set SendingShards; + std::set WaitShardsBrokenFlags; + std::set WaitShardsResultAck; + std::optional TxBroken; + mutable TAtomicCounter ControlCounter = 0; + + virtual NKikimrTxColumnShard::TCommitWriteTxBody SerializeToProto() const override { + NKikimrTxColumnShard::TCommitWriteTxBody result; + auto& data = *result.MutablePrimaryTabletData(); + if (TxBroken) { + data.SetTxBroken(*TxBroken); + } + for (auto&& i : ReceivingShards) { + data.AddReceivingShards(i); + } + for (auto&& i : SendingShards) { + data.AddSendingShards(i); + } + for (auto&& i : WaitShardsBrokenFlags) { + data.AddWaitShardsBrokenFlags(i); + } + for (auto&& i : WaitShardsResultAck) { + data.AddWaitShardsResultAck(i); + } + return result; + } + + virtual bool DoParseImpl(TColumnShard& /*owner*/, const NKikimrTxColumnShard::TCommitWriteTxBody& commitTxBody) override { + if (!commitTxBody.HasPrimaryTabletData()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot read proto")("proto", commitTxBody.DebugString()); + return false; + } + auto& protoData = commitTxBody.GetPrimaryTabletData(); + for (auto&& i : protoData.GetReceivingShards()) { + ReceivingShards.emplace(i); + } + for (auto&& i : protoData.GetSendingShards()) { + SendingShards.emplace(i); + } + for (auto&& i : protoData.GetWaitShardsBrokenFlags()) { + WaitShardsBrokenFlags.emplace(i); + } + for (auto&& i : protoData.GetWaitShardsResultAck()) { + WaitShardsResultAck.emplace(i); + } + AFL_VERIFY(ReceivingShards.empty() == SendingShards.empty()); + if (protoData.HasTxBroken()) { + TxBroken = protoData.GetTxBroken(); + } + return true; + } + +private: + virtual TString DoGetOpType() const override { + return "EvWritePrimary"; + } + virtual TString DoDebugString() const override { + return "EV_WRITE_PRIMARY"; + } + class TTxWriteReceivedBrokenFlag: public NOlap::NDataSharing::TExtendedTransactionBase { + private: + using TBase = NOlap::NDataSharing::TExtendedTransactionBase; + const ui64 TxId; + const ui64 TabletId; + const bool BrokenFlag; + + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const NActors::TActorContext& /*ctx*/) override { + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + auto copy = *op; + if (copy.WaitShardsBrokenFlags.erase(TabletId)) { + copy.TxBroken = copy.TxBroken.value_or(false) || BrokenFlag; + Self->GetProgressTxController().WriteTxOperatorInfo(txc, TxId, copy.SerializeToProto().SerializeAsString()); + } else { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "repeated shard broken_flag info")("shard_id", TabletId); + } + return true; + } + virtual void DoComplete(const NActors::TActorContext& /*ctx*/) override { + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + if (op->WaitShardsBrokenFlags.erase(TabletId)) { + op->TxBroken = op->TxBroken.value_or(false) || BrokenFlag; + op->SendBrokenFlagAck(*Self, TabletId); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "remove_tablet_id")("wait", JoinSeq(",", op->WaitShardsBrokenFlags))( + "receive", TabletId); + op->InitializeRequests(*Self); + } else { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "repeated shard broken_flag info")("shard_id", TabletId); + } + } + + public: + TTxWriteReceivedBrokenFlag(TColumnShard& owner, const ui64 txId, const ui64 tabletId, const bool broken) + : TBase(&owner, ::ToString(txId)) + , TxId(txId) + , TabletId(tabletId) + , BrokenFlag(broken) { + } + }; + + virtual std::unique_ptr CreateReceiveBrokenFlagTx( + TColumnShard& owner, const ui64 sendTabletId, const bool broken) const override { + return std::make_unique(owner, GetTxId(), sendTabletId, broken); + } + + class TTxWriteReceivedResultAck: public NOlap::NDataSharing::TExtendedTransactionBase { + private: + using TBase = NOlap::NDataSharing::TExtendedTransactionBase; + const ui64 TxId; + const ui64 TabletId; + + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const NActors::TActorContext& /*ctx*/) override { + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + auto copy = *op; + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "ack_tablet")("wait", JoinSeq(",", op->WaitShardsResultAck))("receive", TabletId); + AFL_VERIFY(copy.WaitShardsResultAck.erase(TabletId)); + Self->GetProgressTxController().WriteTxOperatorInfo(txc, TxId, copy.SerializeToProto().SerializeAsString()); + return true; + } + virtual void DoComplete(const NActors::TActorContext& /*ctx*/) override { + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "ack_tablet")("wait", JoinSeq(",", op->WaitShardsResultAck))( + "receive", TabletId); + AFL_VERIFY(op->WaitShardsResultAck.erase(TabletId)); + op->CheckFinished(*Self); + } + + public: + TTxWriteReceivedResultAck(TColumnShard& owner, const ui64 txId, const ui64 tabletId) + : TBase(&owner) + , TxId(txId) + , TabletId(tabletId) { + } + }; + + virtual bool IsTxBroken() const override { + AFL_VERIFY(TxBroken); + return *TxBroken; + } + + void InitializeRequests(TColumnShard& owner) { + if (WaitShardsBrokenFlags.empty()) { + WaitShardsResultAck.erase(owner.TabletID()); + if (WaitShardsResultAck.size()) { + SendResult(owner); + } else { + CheckFinished(owner); + } + } + } + + void CheckFinished(TColumnShard& owner) { + if (WaitShardsResultAck.empty()) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "finished"); + owner.EnqueueProgressTx(NActors::TActivationContext::AsActorContext(), GetTxId()); + } + } + + virtual std::unique_ptr CreateReceiveResultAckTx( + TColumnShard& owner, const ui64 recvTabletId) const override + { + return std::make_unique(owner, GetTxId(), recvTabletId); + } + + void SendBrokenFlagAck(TColumnShard& owner, const std::optional tabletId = {}) { + for (auto&& i : SendingShards) { + if (!WaitShardsBrokenFlags.contains(i)) { + if (tabletId && *tabletId != i) { + continue; + } + NActors::TActivationContext::AsActorContext().Send(MakePipePerNodeCacheID(EPipePerNodeCache::Persistent), + new TEvPipeCache::TEvForward( + new TEvTxProcessing::TEvReadSetAck(0, GetTxId(), owner.TabletID(), i, owner.TabletID(), 0), i, true), + IEventHandle::FlagTrackDelivery, GetTxId()); + } + } + } + + void SendResult(TColumnShard& owner) { + AFL_VERIFY(!!TxBroken); + NKikimrTx::TReadSetData readSetData; + readSetData.SetDecision(*TxBroken ? NKikimrTx::TReadSetData::DECISION_ABORT : NKikimrTx::TReadSetData::DECISION_COMMIT); + for (auto&& i : ReceivingShards) { + if (WaitShardsResultAck.contains(i)) { + NActors::TActivationContext::AsActorContext().Send(MakePipePerNodeCacheID(EPipePerNodeCache::Persistent), + new TEvPipeCache::TEvForward( + new TEvTxProcessing::TEvReadSet(0, GetTxId(), owner.TabletID(), i, owner.TabletID(), readSetData.SerializeAsString()), i, + true), + IEventHandle::FlagTrackDelivery, GetTxId()); + } + } + } + + virtual void DoOnTabletInit(TColumnShard& owner) override { + InitializeRequests(owner); + CheckFinished(owner); + } + + class TTxStartPreparation: public NOlap::NDataSharing::TExtendedTransactionBase { + private: + using TBase = NOlap::NDataSharing::TExtendedTransactionBase; + const ui64 TxId; + + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const NActors::TActorContext& /*ctx*/) override { + auto& lock = Self->GetOperationsManager().GetLockVerified(Self->GetOperationsManager().GetLockForTxVerified(TxId)); + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + if (op->WaitShardsBrokenFlags.contains(Self->TabletID())) { + auto copy = *op; + copy.TxBroken = lock.IsBroken(); + AFL_VERIFY(copy.WaitShardsBrokenFlags.erase(Self->TabletID())); + if (copy.WaitShardsBrokenFlags.empty()) { + AFL_VERIFY(copy.WaitShardsResultAck.erase(Self->TabletID())); + } + + Self->GetProgressTxController().WriteTxOperatorInfo(txc, TxId, copy.SerializeToProto().SerializeAsString()); + } + return true; + } + virtual void DoComplete(const NActors::TActorContext& /*ctx*/) override { + auto& lock = Self->GetOperationsManager().GetLockVerified(Self->GetOperationsManager().GetLockForTxVerified(TxId)); + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + if (op->WaitShardsBrokenFlags.contains(Self->TabletID())) { + op->TxBroken = lock.IsBroken(); + AFL_VERIFY(op->WaitShardsBrokenFlags.erase(Self->TabletID())); + if (op->WaitShardsBrokenFlags.empty()) { + AFL_VERIFY(op->WaitShardsResultAck.erase(Self->TabletID())); + } + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "remove_tablet_id")("wait", JoinSeq(",", op->WaitShardsBrokenFlags))( + "receive", Self->TabletID()); + op->CheckFinished(*Self); + } + } + + public: + TTxStartPreparation(TColumnShard* owner, const ui64 txId) + : TBase(owner) + , TxId(txId) { + } + }; + + virtual void OnTimeout(TColumnShard& owner) override { + InitializeRequests(owner); + } + + virtual std::unique_ptr DoBuildTxPrepareForProgress(TColumnShard* owner) const override { + if (WaitShardsResultAck.empty()) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "skip_prepare_for_progress")("lock_id", LockId); + return nullptr; + } + AFL_VERIFY(ControlCounter.Inc() <= 1); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "prepare_for_progress_started")("lock_id", LockId); + return std::make_unique(owner, GetTxId()); + } + +public: + using TBase::TBase; + TEvWriteCommitPrimaryTransactionOperator( + const TFullTxInfo& txInfo, const ui64 lockId, const std::set& receivingShards, const std::set& sendingShards) + : TBase(txInfo, lockId) + , ReceivingShards(receivingShards) + , SendingShards(sendingShards) { + WaitShardsBrokenFlags = SendingShards; + WaitShardsResultAck = ReceivingShards; + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/secondary.cpp b/ydb/core/tx/columnshard/transactions/operators/ev_write/secondary.cpp new file mode 100644 index 000000000000..30f6078321e9 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/secondary.cpp @@ -0,0 +1,5 @@ +#include "secondary.h" + +namespace NKikimr::NColumnShard { + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/secondary.h b/ydb/core/tx/columnshard/transactions/operators/ev_write/secondary.h new file mode 100644 index 000000000000..ae0224057b46 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/secondary.h @@ -0,0 +1,219 @@ +#pragma once + +#include "sync.h" + +#include +#include + +namespace NKikimr::NColumnShard { + +class TEvWriteCommitSecondaryTransactionOperator: public TEvWriteCommitSyncTransactionOperator, + public TMonitoringObjectsCounter { +private: + using TBase = TEvWriteCommitSyncTransactionOperator; + using TProposeResult = TTxController::TProposeResult; + static inline auto Registrator = + TFactory::TRegistrator(NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE_SECONDARY); + +private: + ui64 ArbiterTabletId; + bool NeedReceiveBroken = false; + bool ReceiveAck = false; + bool SelfBroken = false; + mutable TAtomicCounter ControlCounter = 0; + std::optional TxBroken; + + virtual NKikimrTxColumnShard::TCommitWriteTxBody SerializeToProto() const override { + NKikimrTxColumnShard::TCommitWriteTxBody result; + auto& data = *result.MutableSecondaryTabletData(); + if (TxBroken) { + data.SetTxBroken(*TxBroken); + } + data.SetSelfBroken(SelfBroken); + data.SetNeedReceiveBroken(NeedReceiveBroken); + data.SetReceiveAck(ReceiveAck); + data.SetArbiterTabletId(ArbiterTabletId); + return result; + } + + virtual bool DoParseImpl(TColumnShard& /*owner*/, const NKikimrTxColumnShard::TCommitWriteTxBody& commitTxBody) override { + if (!commitTxBody.HasSecondaryTabletData()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot read proto")("proto", commitTxBody.DebugString()); + return false; + } + auto& protoData = commitTxBody.GetSecondaryTabletData(); + SelfBroken = protoData.GetSelfBroken(); + ArbiterTabletId = protoData.GetArbiterTabletId(); + NeedReceiveBroken = protoData.GetNeedReceiveBroken(); + ReceiveAck = protoData.GetReceiveAck(); + if (protoData.HasTxBroken()) { + TxBroken = protoData.GetTxBroken(); + } + return true; + } + +private: + virtual TString DoGetOpType() const override { + return "EvWriteSecondary"; + } + virtual TString DoDebugString() const override { + return "EV_WRITE_SECONDARY"; + } + class TTxWriteReceivedAck: public NOlap::NDataSharing::TExtendedTransactionBase { + private: + using TBase = NOlap::NDataSharing::TExtendedTransactionBase; + const ui64 TxId; + + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const NActors::TActorContext& /*ctx*/) override { + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + auto copy = *op; + copy.ReceiveAck = true; + auto proto = copy.SerializeToProto(); + Self->GetProgressTxController().WriteTxOperatorInfo(txc, TxId, proto.SerializeAsString()); + return true; + } + virtual void DoComplete(const NActors::TActorContext& ctx) override { + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + op->ReceiveAck = true; + if (!op->NeedReceiveBroken) { + op->TxBroken = false; + Self->EnqueueProgressTx(ctx, TxId); + } + } + + public: + TTxWriteReceivedAck(TColumnShard& owner, const ui64 txId) + : TBase(&owner) + , TxId(txId) { + } + }; + + virtual std::unique_ptr CreateReceiveResultAckTx( + TColumnShard& owner, const ui64 recvTabletId) const override { + AFL_VERIFY(recvTabletId == ArbiterTabletId)("recv", recvTabletId)("arbiter", ArbiterTabletId); + return std::make_unique(owner, GetTxId()); + } + + class TTxWriteReceivedBrokenFlag: public NOlap::NDataSharing::TExtendedTransactionBase { + private: + using TBase = NOlap::NDataSharing::TExtendedTransactionBase; + const ui64 TxId; + const bool BrokenFlag; + + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const NActors::TActorContext& /*ctx*/) override { + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + auto copy = *op; + copy.TxBroken = BrokenFlag; + auto proto = copy.SerializeToProto(); + Self->GetProgressTxController().WriteTxOperatorInfo(txc, TxId, proto.SerializeAsString()); + if (BrokenFlag) { + Self->GetProgressTxController().ExecuteOnCancel(TxId, txc); + } + return true; + } + virtual void DoComplete(const NActors::TActorContext& ctx) override { + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + op->TxBroken = BrokenFlag; + op->SendBrokenFlagAck(*Self); + if (BrokenFlag) { + Self->GetProgressTxController().CompleteOnCancel(TxId, ctx); + } + Self->EnqueueProgressTx(ctx, TxId); + } + + public: + TTxWriteReceivedBrokenFlag(TColumnShard* owner, const ui64 txId, const bool broken) + : TBase(owner) + , TxId(txId) + , BrokenFlag(broken) { + } + }; + + virtual std::unique_ptr CreateReceiveBrokenFlagTx( + TColumnShard& owner, const ui64 sendTabletId, const bool broken) const override { + AFL_VERIFY(ArbiterTabletId == sendTabletId); + return std::make_unique(&owner, GetTxId(), broken); + } + + void SendBrokenFlagAck(TColumnShard& owner) { + NActors::TActivationContext::AsActorContext().Send(MakePipePerNodeCacheID(EPipePerNodeCache::Persistent), + new TEvPipeCache::TEvForward( + new TEvTxProcessing::TEvReadSetAck(0, GetTxId(), owner.TabletID(), ArbiterTabletId, owner.TabletID(), 0), ArbiterTabletId, true), + IEventHandle::FlagTrackDelivery, GetTxId()); + } + + void SendResult(TColumnShard& owner) { + NKikimrTx::TReadSetData readSetData; + readSetData.SetDecision(SelfBroken ? NKikimrTx::TReadSetData::DECISION_ABORT : NKikimrTx::TReadSetData::DECISION_COMMIT); + NActors::TActivationContext::AsActorContext().Send(MakePipePerNodeCacheID(EPipePerNodeCache::Persistent), + new TEvPipeCache::TEvForward(new TEvTxProcessing::TEvReadSet( + 0, GetTxId(), owner.TabletID(), ArbiterTabletId, owner.TabletID(), readSetData.SerializeAsString()), + ArbiterTabletId, true), + IEventHandle::FlagTrackDelivery, GetTxId()); + } + + virtual void DoOnTabletInit(TColumnShard& owner) override { + if (TxBroken || (ReceiveAck && !NeedReceiveBroken)) { + owner.EnqueueProgressTx(NActors::TActivationContext::AsActorContext(), GetTxId()); + } else if (!ReceiveAck) { + SendResult(owner); + } + } + + class TTxStartPreparation: public NOlap::NDataSharing::TExtendedTransactionBase { + private: + using TBase = NOlap::NDataSharing::TExtendedTransactionBase; + const ui64 TxId; + + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const NActors::TActorContext& /*ctx*/) override { + auto& lock = Self->GetOperationsManager().GetLockVerified(Self->GetOperationsManager().GetLockForTxVerified(TxId)); + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + auto copy = *op; + copy.SelfBroken = lock.IsBroken(); + Self->GetProgressTxController().WriteTxOperatorInfo(txc, TxId, copy.SerializeToProto().SerializeAsString()); + return true; + } + virtual void DoComplete(const NActors::TActorContext& /*ctx*/) override { + auto& lock = Self->GetOperationsManager().GetLockVerified(Self->GetOperationsManager().GetLockForTxVerified(TxId)); + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + op->SelfBroken = lock.IsBroken(); + op->SendResult(*Self); + } + + public: + TTxStartPreparation(TColumnShard* owner, const ui64 txId) + : TBase(owner) + , TxId(txId) { + } + }; + + virtual std::unique_ptr DoBuildTxPrepareForProgress(TColumnShard* owner) const override { + if (TxBroken || (!NeedReceiveBroken && ReceiveAck)) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "skip_prepare_for_progress")("lock_id", LockId); + return nullptr; + } + AFL_VERIFY(ControlCounter.Inc() <= 1); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "prepare_for_progress_started")("lock_id", LockId); + return std::make_unique(owner, GetTxId()); + } + + virtual void OnTimeout(TColumnShard& owner) override { + SendResult(owner); + } + +public: + using TBase::TBase; + virtual bool IsTxBroken() const override { + AFL_VERIFY(TxBroken); + return *TxBroken; + } + + TEvWriteCommitSecondaryTransactionOperator( + const TFullTxInfo& txInfo, const ui64 lockId, const ui64 arbiterTabletId, const bool needReceiveBroken) + : TBase(txInfo, lockId) + , ArbiterTabletId(arbiterTabletId) + , NeedReceiveBroken(needReceiveBroken) { + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/simple.cpp b/ydb/core/tx/columnshard/transactions/operators/ev_write/simple.cpp new file mode 100644 index 000000000000..a6c51118a30e --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/simple.cpp @@ -0,0 +1,5 @@ +#include "simple.h" + +namespace NKikimr::NColumnShard { + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/simple.h b/ydb/core/tx/columnshard/transactions/operators/ev_write/simple.h new file mode 100644 index 000000000000..e3301117bb48 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/simple.h @@ -0,0 +1,28 @@ +#pragma once + +#include "abstract.h" + +#include + +namespace NKikimr::NColumnShard { + +class TEvWriteSimpleCommitTransactionOperator: public TBaseEvWriteTransactionOperator, + public TMonitoringObjectsCounter { +private: + using TBase = TBaseEvWriteTransactionOperator; + virtual bool DoParseImpl(TColumnShard& /*owner*/, const NKikimrTxColumnShard::TCommitWriteTxBody& /*commitTxBody*/) override { + return true; + } + static inline auto Registrator = TFactory::TRegistrator(NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE); + +public: + using TBase::TBase; + virtual TString DoGetOpType() const override { + return "EvWriteSimple"; + } + virtual TString DoDebugString() const override { + return "EV_WRITE_SIMPLE"; + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/sync.cpp b/ydb/core/tx/columnshard/transactions/operators/ev_write/sync.cpp new file mode 100644 index 000000000000..a8a75f586177 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/sync.cpp @@ -0,0 +1,5 @@ +#include "sync.h" + +namespace NKikimr::NColumnShard { + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/sync.h b/ydb/core/tx/columnshard/transactions/operators/ev_write/sync.h new file mode 100644 index 000000000000..8d149cca8100 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/sync.h @@ -0,0 +1,37 @@ +#pragma once + +#include "abstract.h" + +#include + +namespace NKikimr::NColumnShard { + +class TEvWriteCommitSyncTransactionOperator: public TBaseEvWriteTransactionOperator { +private: + using TBase = TBaseEvWriteTransactionOperator; + mutable std::optional DeadlockControlInstant; + virtual void OnTimeout(TColumnShard& owner) = 0; + + virtual bool DoPingTimeout(TColumnShard& owner, const TMonotonic now) override final { + if (!DeadlockControlInstant) { + DeadlockControlInstant = now; + } else if (now - *DeadlockControlInstant > TDuration::Seconds(2)) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "tx_timeout")("lock", LockId)("tx_id", GetTxId())( + "d", now - *DeadlockControlInstant); + DeadlockControlInstant = now; + OnTimeout(owner); + return true; + } + return false; + } + +public: + using TBase::TBase; + + virtual std::unique_ptr CreateReceiveResultAckTx(TColumnShard& owner, const ui64 recvTabletId) const = 0; + virtual std::unique_ptr CreateReceiveBrokenFlagTx( + TColumnShard& owner, const ui64 sendTabletId, const bool broken) const = 0; + virtual NKikimrTxColumnShard::TCommitWriteTxBody SerializeToProto() const = 0; +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/ya.make b/ydb/core/tx/columnshard/transactions/operators/ev_write/ya.make new file mode 100644 index 000000000000..c7283c74fa3d --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/ya.make @@ -0,0 +1,18 @@ +LIBRARY() + +SRCS( + GLOBAL secondary.cpp + GLOBAL simple.cpp + GLOBAL primary.cpp + abstract.cpp + sync.cpp +) + +PEERDIR( + ydb/services/metadata/abstract + ydb/core/tx/columnshard/blobs_action/events + ydb/core/tx/columnshard/data_sharing/destination/events + ydb/core/tx/columnshard/transactions/locks +) + +END() diff --git a/ydb/core/tx/columnshard/transactions/operators/long_tx_write.cpp b/ydb/core/tx/columnshard/transactions/operators/long_tx_write.cpp index 3c04dcdff64a..99298434b88b 100644 --- a/ydb/core/tx/columnshard/transactions/operators/long_tx_write.cpp +++ b/ydb/core/tx/columnshard/transactions/operators/long_tx_write.cpp @@ -20,9 +20,9 @@ TLongTxTransactionOperator::TProposeResult TLongTxTransactionOperator::DoStartPr TStringBuilder() << "Commit TxId# " << GetTxId() << " references WriteId# " << (ui64)writeId << " that is already locked by TxId# " << lw.PreparedTxId); } - auto it = owner.InsertTable->GetInserted().find(writeId); - if (it != owner.InsertTable->GetInserted().end()) { - auto granuleShardingInfo = owner.GetIndexAs().GetVersionedIndex().GetShardingInfoActual(it->second.PathId); + if (auto* inserted = owner.InsertTable->GetInserted().GetOptional(writeId)) { + auto granuleShardingInfo = + owner.GetIndexAs().GetVersionedIndex().GetShardingInfoActual(inserted->GetPathId()); if (granuleShardingInfo && lw.GranuleShardingVersionId && *lw.GranuleShardingVersionId != granuleShardingInfo->GetSnapshotVersion()) { return TProposeResult(NKikimrTxColumnShard::EResultStatus::ERROR, TStringBuilder() << "Commit TxId# " << GetTxId() << " references WriteId# " << (ui64)writeId << " declined through sharding deprecated"); @@ -43,9 +43,14 @@ bool TLongTxTransactionOperator::DoParse(TColumnShard& /*owner*/, const TString& } for (auto& id : commitTxBody.GetWriteIds()) { - WriteIds.insert(TWriteId{ id }); + WriteIds.insert(TInsertWriteId{ id }); } return true; } +void TLongTxTransactionOperator::DoSendReply(TColumnShard& owner, const TActorContext& ctx) { + const auto& txInfo = GetTxInfo(); + ctx.Send(txInfo.Source, BuildProposeResultEvent(owner).release()); +} + } diff --git a/ydb/core/tx/columnshard/transactions/operators/long_tx_write.h b/ydb/core/tx/columnshard/transactions/operators/long_tx_write.h index 5a56077ea470..45b642c8e98a 100644 --- a/ydb/core/tx/columnshard/transactions/operators/long_tx_write.h +++ b/ydb/core/tx/columnshard/transactions/operators/long_tx_write.h @@ -6,7 +6,7 @@ namespace NKikimr::NColumnShard { - class TLongTxTransactionOperator: public IProposeTxOperator { + class TLongTxTransactionOperator: public IProposeTxOperator, public TMonitoringObjectsCounter { using TBase = IProposeTxOperator; using TProposeResult = TTxController::TProposeResult; static inline auto Registrator = TFactory::TRegistrator(NKikimrTxColumnShard::TX_KIND_COMMIT); @@ -16,14 +16,23 @@ namespace NKikimr::NColumnShard { return "LONG_TX_WRITE"; } + bool TxWithDeadline() const override { + return true; + } + virtual TProposeResult DoStartProposeOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) override; virtual void DoStartProposeOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { } + virtual void DoSendReply(TColumnShard& owner, const TActorContext& ctx) override; + virtual void DoFinishProposeOnExecute(TColumnShard& /*owner*/, NTabletFlatExecutor::TTransactionContext& /*txc*/) override { } virtual void DoFinishProposeOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { } + virtual TString DoGetOpType() const override { + return "LongTxWrite"; + } virtual bool DoIsAsync() const override { return false; } @@ -37,14 +46,14 @@ namespace NKikimr::NColumnShard { public: using TBase::TBase; - void OnTabletInit(TColumnShard& owner) override { + virtual void DoOnTabletInit(TColumnShard& owner) override { for (auto&& writeId : WriteIds) { AFL_VERIFY(owner.LongTxWrites.contains(writeId))("problem", "ltx_not_exists_for_write_id")("txId", GetTxId())("writeId", (ui64)writeId); owner.AddLongTxWrite(writeId, GetTxId()); } } - bool ExecuteOnProgress(TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) override { + bool ProgressOnExecute(TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) override { TBlobGroupSelector dsGroupSelector(owner.Info()); NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); @@ -54,19 +63,19 @@ namespace NKikimr::NColumnShard { auto counters = owner.InsertTable->Commit(dbTable, version.GetPlanStep(), version.GetTxId(), WriteIds, pathExists); - owner.IncCounter(COUNTER_BLOBS_COMMITTED, counters.Rows); - owner.IncCounter(COUNTER_BYTES_COMMITTED, counters.Bytes); - owner.IncCounter(COUNTER_RAW_BYTES_COMMITTED, counters.RawBytes); + owner.Counters.GetTabletCounters()->IncCounter(COUNTER_BLOBS_COMMITTED, counters.Rows); + owner.Counters.GetTabletCounters()->IncCounter(COUNTER_BYTES_COMMITTED, counters.Bytes); + owner.Counters.GetTabletCounters()->IncCounter(COUNTER_RAW_BYTES_COMMITTED, counters.RawBytes); NIceDb::TNiceDb db(txc.DB); - for (TWriteId writeId : WriteIds) { + for (TInsertWriteId writeId : WriteIds) { AFL_VERIFY(owner.RemoveLongTxWrite(db, writeId, GetTxId())); } owner.UpdateInsertTableCounters(); return true; } - bool CompleteOnProgress(TColumnShard& owner, const TActorContext& ctx) override { + bool ProgressOnComplete(TColumnShard& owner, const TActorContext& ctx) override { auto result = std::make_unique(owner.TabletID(), TxInfo.TxKind, GetTxId(), NKikimrTxColumnShard::SUCCESS); result->Record.SetStep(TxInfo.PlanStep); ctx.Send(TxInfo.Source, result.release(), 0, TxInfo.Cookie); @@ -75,7 +84,7 @@ namespace NKikimr::NColumnShard { virtual bool ExecuteOnAbort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) override { NIceDb::TNiceDb db(txc.DB); - for (TWriteId writeId : WriteIds) { + for (TInsertWriteId writeId : WriteIds) { AFL_VERIFY(owner.RemoveLongTxWrite(db, writeId, GetTxId())); } TBlobGroupSelector dsGroupSelector(owner.Info()); @@ -88,7 +97,7 @@ namespace NKikimr::NColumnShard { } private: - THashSet WriteIds; + THashSet WriteIds; }; } // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/propose_tx.cpp b/ydb/core/tx/columnshard/transactions/operators/propose_tx.cpp index c7d39da52740..2a48ca49a279 100644 --- a/ydb/core/tx/columnshard/transactions/operators/propose_tx.cpp +++ b/ydb/core/tx/columnshard/transactions/operators/propose_tx.cpp @@ -3,21 +3,36 @@ namespace NKikimr::NColumnShard { void IProposeTxOperator::DoSendReply(TColumnShard& owner, const TActorContext& ctx) { + if (owner.CurrentSchemeShardId) { + AFL_VERIFY(owner.CurrentSchemeShardId); + ctx.Send(MakePipePerNodeCacheID(false), + new TEvPipeCache::TEvForward(BuildProposeResultEvent(owner).release(), (ui64)owner.CurrentSchemeShardId, true)); + } else { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "scheme_shard_tablet_not_initialized")("source", GetTxInfo().Source); + ctx.Send(GetTxInfo().Source, BuildProposeResultEvent(owner).release()); + } +} + +std::unique_ptr IProposeTxOperator::BuildProposeResultEvent(const TColumnShard& owner) const { const auto& txInfo = GetTxInfo(); - std::unique_ptr evResult = std::make_unique( - owner.TabletID(), txInfo.TxKind, txInfo.TxId, GetProposeStartInfoVerified().GetStatus(), GetProposeStartInfoVerified().GetStatusMessage()); + std::unique_ptr evResult = + std::make_unique(owner.TabletID(), txInfo.TxKind, txInfo.TxId, + GetProposeStartInfoVerified().GetStatus(), GetProposeStartInfoVerified().GetStatusMessage()); if (IsFail()) { - owner.IncCounter(COUNTER_PREPARE_ERROR); - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("message", GetProposeStartInfoVerified().GetStatusMessage())("tablet_id", owner.TabletID())("tx_id", txInfo.TxId); + owner.Counters.GetTabletCounters()->IncCounter(COUNTER_PREPARE_ERROR); + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("message", GetProposeStartInfoVerified().GetStatusMessage())("tablet_id", owner.TabletID())( + "tx_id", txInfo.TxId); } else { evResult->Record.SetMinStep(txInfo.MinStep); evResult->Record.SetMaxStep(txInfo.MaxStep); if (owner.ProcessingParams) { evResult->Record.MutableDomainCoordinators()->CopyFrom(owner.ProcessingParams->GetCoordinators()); } - owner.IncCounter(COUNTER_PREPARE_SUCCESS); + owner.Counters.GetTabletCounters()->IncCounter(COUNTER_PREPARE_SUCCESS); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("message", GetProposeStartInfoVerified().GetStatusMessage())("tablet_id", owner.TabletID())( + "tx_id", txInfo.TxId); } - ctx.Send(txInfo.Source, evResult.release()); + return evResult; } } diff --git a/ydb/core/tx/columnshard/transactions/operators/propose_tx.h b/ydb/core/tx/columnshard/transactions/operators/propose_tx.h index 84b2f7e8db66..d867e71bad9b 100644 --- a/ydb/core/tx/columnshard/transactions/operators/propose_tx.h +++ b/ydb/core/tx/columnshard/transactions/operators/propose_tx.h @@ -12,6 +12,7 @@ class IProposeTxOperator: public TTxController::ITransactionOperator { virtual bool DoCheckTxInfoForReply(const TFullTxInfo& originalTxInfo) const override { return GetTxInfo() == originalTxInfo; } + std::unique_ptr BuildProposeResultEvent(const TColumnShard& owner) const; virtual void DoSendReply(TColumnShard& owner, const TActorContext& ctx) override; virtual bool DoCheckAllowUpdate(const TFullTxInfo& currentTxInfo) const override { if (!currentTxInfo.SeqNo || !GetTxInfo().SeqNo) { diff --git a/ydb/core/tx/columnshard/transactions/operators/schema.cpp b/ydb/core/tx/columnshard/transactions/operators/schema.cpp index 13f854e277b5..d4019542bf1e 100644 --- a/ydb/core/tx/columnshard/transactions/operators/schema.cpp +++ b/ydb/core/tx/columnshard/transactions/operators/schema.cpp @@ -40,7 +40,17 @@ class TWaitEraseTablesTxSubscriber: public NSubscriber::ISubscriber { } }; -NKikimr::NColumnShard::TTxController::TProposeResult TSchemaTransactionOperator::DoStartProposeOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) { +TTxController::TProposeResult TSchemaTransactionOperator::DoStartProposeOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) { + auto seqNo = SeqNoFromProto(SchemaTxBody.GetSeqNo()); + auto lastSeqNo = owner.LastSchemaSeqNo; + + // Check if proposal is outdated + if (seqNo < lastSeqNo) { + auto errorMessage = TStringBuilder() << "Ignoring outdated schema tx proposal at tablet " << owner.TabletID() << " txId " << GetTxId() + << " ssId " << owner.CurrentSchemeShardId << " seqNo " << seqNo << " lastSeqNo " << lastSeqNo; + return TProposeResult(NKikimrTxColumnShard::EResultStatus::SCHEMA_CHANGED, errorMessage); + } + switch (SchemaTxBody.TxBody_case()) { case NKikimrTxColumnShard::TSchemaTxBody::kInitShard: { @@ -67,21 +77,6 @@ NKikimr::NColumnShard::TTxController::TProposeResult TSchemaTransactionOperator: break; } - auto seqNo = SeqNoFromProto(SchemaTxBody.GetSeqNo()); - auto lastSeqNo = owner.LastSchemaSeqNo; - - // Check if proposal is outdated - if (seqNo < lastSeqNo) { - auto errorMessage = TStringBuilder() - << "Ignoring outdated schema tx proposal at tablet " - << owner.TabletID() - << " txId " << GetTxId() - << " ssId " << owner.CurrentSchemeShardId - << " seqNo " << seqNo - << " lastSeqNo " << lastSeqNo; - return TProposeResult(NKikimrTxColumnShard::EResultStatus::SCHEMA_CHANGED, errorMessage); - } - owner.UpdateSchemaSeqNo(seqNo, txc); return TProposeResult(); } @@ -166,7 +161,7 @@ NKikimr::TConclusionStatus TSchemaTransactionOperator::ValidateTables(::google:: } return TConclusionStatus::Success(); } -bool TSchemaTransactionOperator::DoOnStartAsync(TColumnShard& owner) { +void TSchemaTransactionOperator::DoOnTabletInit(TColumnShard& owner) { AFL_VERIFY(WaitPathIdsToErase.empty()); switch (SchemaTxBody.TxBody_case()) { case NKikimrTxColumnShard::TSchemaTxBody::kInitShard: @@ -190,11 +185,9 @@ bool TSchemaTransactionOperator::DoOnStartAsync(TColumnShard& owner) { if (WaitPathIdsToErase.size()) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "wait_remove_path_id")("pathes", JoinSeq(",", WaitPathIdsToErase))("tx_id", GetTxId()); owner.Subscribers->RegisterSubscriber(std::make_shared(WaitPathIdsToErase, GetTxId())); - return true; } else { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "remove_pathes_cleaned")("tx_id", GetTxId()); owner.Execute(new TTxFinishAsyncTransaction(owner, GetTxId())); - return false; } } diff --git a/ydb/core/tx/columnshard/transactions/operators/schema.h b/ydb/core/tx/columnshard/transactions/operators/schema.h index f59e92bc2d6d..8b7575cc3cc4 100644 --- a/ydb/core/tx/columnshard/transactions/operators/schema.h +++ b/ydb/core/tx/columnshard/transactions/operators/schema.h @@ -7,7 +7,7 @@ namespace NKikimr::NColumnShard { -class TSchemaTransactionOperator: public IProposeTxOperator { +class TSchemaTransactionOperator: public IProposeTxOperator, public TMonitoringObjectsCounter { private: using TBase = IProposeTxOperator; @@ -18,7 +18,7 @@ class TSchemaTransactionOperator: public IProposeTxOperator { THashSet NotifySubscribers; THashSet WaitPathIdsToErase; - virtual bool DoOnStartAsync(TColumnShard& owner) override; + virtual void DoOnTabletInit(TColumnShard& owner) override; template THashSet GetNotErasedTableIds(const TColumnShard& owner, const TInfoProto& tables) const { @@ -43,6 +43,22 @@ class TSchemaTransactionOperator: public IProposeTxOperator { } virtual void DoFinishProposeOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { } + virtual TString DoGetOpType() const override { + switch (SchemaTxBody.TxBody_case()) { + case NKikimrTxColumnShard::TSchemaTxBody::kInitShard: + return "Scheme:InitShard"; + case NKikimrTxColumnShard::TSchemaTxBody::kEnsureTables: + return "Scheme:EnsureTables"; + case NKikimrTxColumnShard::TSchemaTxBody::kAlterTable: + return "Scheme:AlterTable"; + case NKikimrTxColumnShard::TSchemaTxBody::kAlterStore: + return "Scheme:AlterStore"; + case NKikimrTxColumnShard::TSchemaTxBody::kDropTable: + return "Scheme:DropTable"; + case NKikimrTxColumnShard::TSchemaTxBody::TXBODY_NOT_SET: + return "Scheme:TXBODY_NOT_SET"; + } + } virtual bool DoIsAsync() const override { return WaitPathIdsToErase.size(); } @@ -65,7 +81,8 @@ class TSchemaTransactionOperator: public IProposeTxOperator { public: using TBase::TBase; - virtual bool ExecuteOnProgress(TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) override { + virtual bool ProgressOnExecute( + TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) override { if (!!TxAddSharding) { auto* tx = dynamic_cast(TxAddSharding.get()); AFL_VERIFY(tx); @@ -79,7 +96,7 @@ class TSchemaTransactionOperator: public IProposeTxOperator { return true; } - virtual bool CompleteOnProgress(TColumnShard& owner, const TActorContext& ctx) override { + virtual bool ProgressOnComplete(TColumnShard& owner, const TActorContext& ctx) override { if (!!TxAddSharding) { TxAddSharding->Complete(ctx); } diff --git a/ydb/core/tx/columnshard/transactions/operators/sharing.cpp b/ydb/core/tx/columnshard/transactions/operators/sharing.cpp index b7851d77b75c..ec90f07c16eb 100644 --- a/ydb/core/tx/columnshard/transactions/operators/sharing.cpp +++ b/ydb/core/tx/columnshard/transactions/operators/sharing.cpp @@ -28,9 +28,9 @@ bool TSharingTransactionOperator::DoParse(TColumnShard& owner, const TString& da AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "session_exists")("session_id", SharingTask->GetSessionId())("info", SharingTask->DebugString()); } else { SharingTask->Confirm(); + TxPropose = SharingSessionsManager->ProposeDestSession(&owner, SharingTask); } - TxPropose = SharingSessionsManager->ProposeDestSession(&owner, SharingTask); return true; } @@ -47,15 +47,16 @@ void TSharingTransactionOperator::DoStartProposeOnComplete(TColumnShard& /*owner if (!SessionExistsFlag) { AFL_VERIFY(!!TxPropose); TxPropose->Complete(ctx); + TxPropose.reset(); } - TxPropose.release(); } -bool TSharingTransactionOperator::ExecuteOnProgress(TColumnShard& /*owner*/, const NOlap::TSnapshot& /*version*/, NTabletFlatExecutor::TTransactionContext& /*txc*/) { +bool TSharingTransactionOperator::ProgressOnExecute( + TColumnShard& /*owner*/, const NOlap::TSnapshot& /*version*/, NTabletFlatExecutor::TTransactionContext& /*txc*/) { return true; } -bool TSharingTransactionOperator::CompleteOnProgress(TColumnShard& owner, const TActorContext& ctx) { +bool TSharingTransactionOperator::ProgressOnComplete(TColumnShard& owner, const TActorContext& ctx) { for (TActorId subscriber : NotifySubscribers) { auto event = MakeHolder(owner.TabletID(), GetTxId()); ctx.Send(subscriber, event.Release(), 0, 0); diff --git a/ydb/core/tx/columnshard/transactions/operators/sharing.h b/ydb/core/tx/columnshard/transactions/operators/sharing.h index 4173b5e11c9a..13c7df7cad0e 100644 --- a/ydb/core/tx/columnshard/transactions/operators/sharing.h +++ b/ydb/core/tx/columnshard/transactions/operators/sharing.h @@ -6,7 +6,7 @@ namespace NKikimr::NColumnShard { -class TSharingTransactionOperator: public IProposeTxOperator { +class TSharingTransactionOperator: public IProposeTxOperator, public TMonitoringObjectsCounter { private: using TBase = IProposeTxOperator; @@ -25,6 +25,9 @@ class TSharingTransactionOperator: public IProposeTxOperator { } virtual void DoFinishProposeOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { } + virtual TString DoGetOpType() const override { + return "Sharing"; + } virtual bool DoIsAsync() const override { AFL_VERIFY(SharingTask); return !SharingTask->IsFinished(); @@ -40,9 +43,9 @@ class TSharingTransactionOperator: public IProposeTxOperator { NotifySubscribers.insert(actorId); } - virtual bool ExecuteOnProgress(TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) override; + virtual bool ProgressOnExecute(TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) override; - virtual bool CompleteOnProgress(TColumnShard& owner, const TActorContext& ctx) override; + virtual bool ProgressOnComplete(TColumnShard& owner, const TActorContext& ctx) override; virtual bool ExecuteOnAbort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) override; virtual bool CompleteOnAbort(TColumnShard& owner, const TActorContext& ctx) override; diff --git a/ydb/core/tx/columnshard/transactions/operators/ss_operation.cpp b/ydb/core/tx/columnshard/transactions/operators/ss_operation.cpp deleted file mode 100644 index 1bf60d44d373..000000000000 --- a/ydb/core/tx/columnshard/transactions/operators/ss_operation.cpp +++ /dev/null @@ -1,23 +0,0 @@ -#include "ss_operation.h" - -namespace NKikimr::NColumnShard { - -void ISSTransactionOperator::DoSendReply(TColumnShard& owner, const TActorContext& ctx) { - const auto& txInfo = GetTxInfo(); - std::unique_ptr evResult = std::make_unique( - owner.TabletID(), txInfo.TxKind, txInfo.TxId, GetProposeStartInfoVerified().GetStatus(), GetProposeStartInfoVerified().GetStatusMessage()); - if (IsFail()) { - owner.IncCounter(COUNTER_PREPARE_ERROR); - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("message", GetProposeStartInfoVerified().GetStatusMessage())("tablet_id", owner.TabletID())("tx_id", txInfo.TxId); - } else { - evResult->Record.SetMinStep(txInfo.MinStep); - evResult->Record.SetMaxStep(txInfo.MaxStep); - if (owner.ProcessingParams) { - evResult->Record.MutableDomainCoordinators()->CopyFrom(owner.ProcessingParams->GetCoordinators()); - } - owner.IncCounter(COUNTER_PREPARE_SUCCESS); - } - ctx.Send(txInfo.Source, evResult.release()); -} - -} diff --git a/ydb/core/tx/columnshard/transactions/operators/ss_operation.h b/ydb/core/tx/columnshard/transactions/operators/ss_operation.h deleted file mode 100644 index feff6af77225..000000000000 --- a/ydb/core/tx/columnshard/transactions/operators/ss_operation.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -#include -#include - -namespace NKikimr::NColumnShard { - -class ISSTransactionOperator: public TTxController::ITransactionOperator { -private: - using TBase = TTxController::ITransactionOperator; -protected: - virtual void DoSendReply(TColumnShard& owner, const TActorContext& ctx) override; -public: - using TBase::TBase; -}; - -} diff --git a/ydb/core/tx/columnshard/transactions/operators/ya.make b/ydb/core/tx/columnshard/transactions/operators/ya.make index 579b0d07679c..e96323b51b44 100644 --- a/ydb/core/tx/columnshard/transactions/operators/ya.make +++ b/ydb/core/tx/columnshard/transactions/operators/ya.make @@ -3,7 +3,6 @@ LIBRARY() SRCS( GLOBAL schema.cpp GLOBAL long_tx_write.cpp - GLOBAL ev_write.cpp GLOBAL backup.cpp GLOBAL sharing.cpp propose_tx.cpp @@ -11,6 +10,7 @@ SRCS( PEERDIR( ydb/core/tx/columnshard/data_sharing/destination/events + ydb/core/tx/columnshard/transactions/operators/ev_write ydb/core/tx/columnshard/export/session ) diff --git a/ydb/core/tx/columnshard/transactions/protos/tx_event.proto b/ydb/core/tx/columnshard/transactions/protos/tx_event.proto new file mode 100644 index 000000000000..6e64dde46e4d --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/protos/tx_event.proto @@ -0,0 +1,19 @@ +package NKikimrColumnShardTxProto; + +message TEvent { + optional uint64 PathId = 1; + optional string ClassName = 2; + + message TReadEvent { + optional string Filter = 1; + optional string Schema = 2; + } + + message TWriteEvent { + } + + oneof Implementation { + TReadEvent Read = 20; + TWriteEvent Write = 21; + } +} diff --git a/ydb/core/tx/columnshard/transactions/protos/ya.make b/ydb/core/tx/columnshard/transactions/protos/ya.make new file mode 100644 index 000000000000..7a54fdc3404d --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/protos/ya.make @@ -0,0 +1,12 @@ +PROTO_LIBRARY() + +SRCS( + tx_event.proto +) + +PEERDIR( + ydb/core/tx/columnshard/common/protos + ydb/core/protos +) + +END() diff --git a/ydb/core/tx/columnshard/transactions/tx_controller.cpp b/ydb/core/tx/columnshard/transactions/tx_controller.cpp index aeebb5c78277..afb1e8a33d50 100644 --- a/ydb/core/tx/columnshard/transactions/tx_controller.cpp +++ b/ydb/core/tx/columnshard/transactions/tx_controller.cpp @@ -1,4 +1,5 @@ #include "tx_controller.h" + #include "transactions/tx_finish_async.h" #include @@ -6,7 +7,8 @@ namespace NKikimr::NColumnShard { TTxController::TTxController(TColumnShard& owner) - : Owner(owner) { + : Owner(owner) + , Counters(owner.Counters.GetCSCounters().TxProgress) { } bool TTxController::HaveOutdatedTxs() const { @@ -45,18 +47,28 @@ bool TTxController::Load(NTabletFlatExecutor::TTransactionContext& txc) { return false; } + ui32 countWithDeadline = 0; + ui32 countOverrideDeadline = 0; + ui32 countNoDeadline = 0; while (!rowset.EndOfSet()) { const ui64 txId = rowset.GetValue(); const NKikimrTxColumnShard::ETransactionKind txKind = rowset.GetValue(); ITransactionOperator::TPtr txOperator(ITransactionOperator::TFactory::Construct(txKind, TTxInfo(txKind, txId))); - Y_ABORT_UNLESS(!!txOperator); + AFL_VERIFY(!!txOperator)("kind", txKind); const TString txBody = rowset.GetValue(); - Y_ABORT_UNLESS(txOperator->Parse(Owner, txBody, true)); + AFL_VERIFY(txOperator->Parse(Owner, txBody, true)); auto& txInfo = txOperator->MutableTxInfo(); txInfo.MaxStep = rowset.GetValue(); if (txInfo.MaxStep != Max()) { txInfo.MinStep = txInfo.MaxStep - MaxCommitTxDelay.MilliSeconds(); + ++countWithDeadline; + } else if (txOperator->TxWithDeadline()) { + txInfo.MinStep = GetAllowedStep(); + txInfo.MaxStep = txInfo.MinStep + MaxCommitTxDelay.MilliSeconds(); + ++countOverrideDeadline; + } else { + ++countNoDeadline; } txInfo.PlanStep = rowset.GetValueOrDefault(0); txInfo.Source = rowset.GetValue(); @@ -74,25 +86,14 @@ bool TTxController::Load(NTabletFlatExecutor::TTransactionContext& txc) { return false; } } + AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("override", countOverrideDeadline)("no_dl", countNoDeadline)("dl", countWithDeadline)( + "operators", Operators.size())("plan", PlanQueue.size())("dl_queue", DeadlineQueue.size()); return true; } -TTxController::ITransactionOperator::TPtr TTxController::GetTxOperator(const ui64 txId) const { - auto it = Operators.find(txId); - if (it == Operators.end()) { - return nullptr; - } - return it->second; -} - -TTxController::ITransactionOperator::TPtr TTxController::GetVerifiedTxOperator(const ui64 txId) const { - auto it = Operators.find(txId); - AFL_VERIFY(it != Operators.end())("tx_id", txId); - return it->second; -} - -std::shared_ptr TTxController::UpdateTxSourceInfo(const TFullTxInfo& tx, NTabletFlatExecutor::TTransactionContext& txc) { - auto op = GetVerifiedTxOperator(tx.GetTxId()); +std::shared_ptr TTxController::UpdateTxSourceInfo( + const TFullTxInfo& tx, NTabletFlatExecutor::TTransactionContext& txc) { + auto op = GetTxOperatorVerified(tx.GetTxId()); op->ResetStatusOnUpdate(); auto& txInfo = op->MutableTxInfo(); txInfo.Source = tx.Source; @@ -104,17 +105,20 @@ std::shared_ptr TTxController::UpdateTxSour return op; } -TTxController::TTxInfo TTxController::RegisterTx(const std::shared_ptr& txOperator, const TString& txBody, NTabletFlatExecutor::TTransactionContext& txc) { +TTxController::TTxInfo TTxController::RegisterTx(const std::shared_ptr& txOperator, const TString& txBody, + NTabletFlatExecutor::TTransactionContext& txc) { NIceDb::TNiceDb db(txc.DB); auto& txInfo = txOperator->GetTxInfo(); AFL_VERIFY(txInfo.MaxStep == Max()); AFL_VERIFY(Operators.emplace(txInfo.TxId, txOperator).second); Schema::SaveTxInfo(db, txInfo, txBody); + Counters.OnRegisterTx(txOperator->GetOpType()); return txInfo; } -TTxController::TTxInfo TTxController::RegisterTxWithDeadline(const std::shared_ptr& txOperator, const TString& txBody, NTabletFlatExecutor::TTransactionContext& txc) { +TTxController::TTxInfo TTxController::RegisterTxWithDeadline(const std::shared_ptr& txOperator, + const TString& txBody, NTabletFlatExecutor::TTransactionContext& txc) { NIceDb::TNiceDb db(txc.DB); auto& txInfo = txOperator->MutableTxInfo(); @@ -125,22 +129,22 @@ TTxController::TTxInfo TTxController::RegisterTxWithDeadline(const std::shared_p Schema::SaveTxInfo(db, txInfo, txBody); DeadlineQueue.emplace(txInfo.MaxStep, txOperator->GetTxId()); + Counters.OnRegisterTx(txOperator->GetOpType()); return txInfo; } -bool TTxController::AbortTx(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { - auto opIt = Operators.find(txId); +bool TTxController::AbortTx(const TPlanQueueItem planQueueItem, NTabletFlatExecutor::TTransactionContext& txc) { + auto opIt = Operators.find(planQueueItem.TxId); Y_ABORT_UNLESS(opIt != Operators.end()); Y_ABORT_UNLESS(opIt->second->GetTxInfo().PlanStep == 0); opIt->second->ExecuteOnAbort(Owner, txc); opIt->second->CompleteOnAbort(Owner, NActors::TActivationContext::AsActorContext()); + Counters.OnAbortTx(opIt->second->GetOpType()); - if (opIt->second->GetTxInfo().MaxStep != Max()) { - DeadlineQueue.erase(TPlanQueueItem(opIt->second->GetTxInfo().MaxStep, txId)); - } - Operators.erase(txId); + AFL_VERIFY(Operators.erase(planQueueItem.TxId)); + AFL_VERIFY(DeadlineQueue.erase(planQueueItem)); NIceDb::TNiceDb db(txc.DB); - Schema::EraseTxInfo(db, txId); + Schema::EraseTxInfo(db, planQueueItem.TxId); return true; } @@ -179,7 +183,14 @@ bool TTxController::ExecuteOnCancel(const ui64 txId, NTabletFlatExecutor::TTrans return true; } -std::optional TTxController::StartPlannedTx() { +std::optional TTxController::GetFirstPlannedTx() const { + if (!PlanQueue.empty()) { + return GetTxInfoVerified(PlanQueue.begin()->TxId); + } + return std::nullopt; +} + +std::optional TTxController::PopFirstPlannedTx() { if (!PlanQueue.empty()) { auto node = PlanQueue.extract(PlanQueue.begin()); auto& item = node.value(); @@ -190,13 +201,16 @@ std::optional TTxController::StartPlannedTx() { return std::nullopt; } -void TTxController::FinishPlannedTx(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { +void TTxController::ProgressOnExecute(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { NIceDb::TNiceDb db(txc.DB); + auto opIt = Operators.find(txId); + AFL_VERIFY(opIt != Operators.end())("tx_id", txId); + Counters.OnFinishPlannedTx(opIt->second->GetOpType()); + AFL_VERIFY(Operators.erase(txId)); Schema::EraseTxInfo(db, txId); } -void TTxController::CompleteRunningTx(const TPlanQueueItem& txItem) { - AFL_VERIFY(Operators.erase(txItem.TxId)); +void TTxController::ProgressOnComplete(const TPlanQueueItem& txItem) { AFL_VERIFY(RunningQueue.erase(txItem))("info", txItem.DebugString()); } @@ -240,7 +254,7 @@ size_t TTxController::CleanExpiredTxs(NTabletFlatExecutor::TTransactionContext& } ui64 txId = it->TxId; LOG_S_DEBUG(TStringBuilder() << "Removing outdated txId " << txId << " max step " << it->Step << " outdated step "); - AbortTx(txId, txc); + AbortTx(*it, txc); ++removedCount; } } @@ -263,7 +277,10 @@ TDuration TTxController::GetTxCompleteLag(ui64 timecastStep) const { TTxController::EPlanResult TTxController::PlanTx(const ui64 planStep, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { auto it = Operators.find(txId); if (it == Operators.end()) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_plan_tx")("tx_id", txId); return EPlanResult::Skipped; + } else { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "plan_tx")("tx_id", txId)("plan_step", it->second->MutableTxInfo().PlanStep); } auto& txInfo = it->second->MutableTxInfo(); if (txInfo.PlanStep == 0) { @@ -280,6 +297,8 @@ TTxController::EPlanResult TTxController::PlanTx(const ui64 planStep, const ui64 } void TTxController::OnTabletInit() { + AFL_VERIFY(!StartedFlag); + StartedFlag = true; for (auto&& txOperator : Operators) { txOperator.second->OnTabletInit(Owner); } @@ -287,22 +306,24 @@ void TTxController::OnTabletInit() { std::shared_ptr TTxController::StartProposeOnExecute( const TTxController::TTxInfo& txInfo, const TString& txBody, NTabletFlatExecutor::TTransactionContext& txc) { - NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("method", "TTxController::StartProposeOnExecute")( - "tx_info", txInfo.DebugString())("tx_info", txInfo.DebugString()); + NActors::TLogContextGuard lGuard = + NActors::TLogContextBuilder::Build()("method", "TTxController::StartProposeOnExecute")("tx_info", txInfo.DebugString()); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start"); - std::shared_ptr txOperator(TTxController::ITransactionOperator::TFactory::Construct(txInfo.TxKind, txInfo)); + std::shared_ptr txOperator( + TTxController::ITransactionOperator::TFactory::Construct(txInfo.TxKind, txInfo)); AFL_VERIFY(!!txOperator); if (!txOperator->Parse(Owner, txBody)) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("error", "cannot parse txOperator"); return txOperator; } + Counters.OnStartProposeOnExecute(txOperator->GetOpType()); auto txInfoPtr = GetTxInfo(txInfo.TxId); if (!!txInfoPtr) { if (!txOperator->CheckAllowUpdate(*txInfoPtr)) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", "incorrect duplication")("actual_tx", txInfoPtr->DebugString()); - TTxController::TProposeResult proposeResult( - NKikimrTxColumnShard::EResultStatus::ERROR, TStringBuilder() << "Another commit TxId# " << txInfo.TxId << " has already been proposed"); + TTxController::TProposeResult proposeResult(NKikimrTxColumnShard::EResultStatus::ERROR, + TStringBuilder() << "Another commit TxId# " << txInfo.TxId << " has already been proposed"); txOperator->SetProposeStartInfo(proposeResult); return txOperator; } else { @@ -325,52 +346,48 @@ std::shared_ptr TTxController::StartPropose } } -void TTxController::StartProposeOnComplete(const ui64 txId, const TActorContext& ctx) { - NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("method", "TTxController::StartProposeOnComplete")("tx_id", txId); - auto txOperator = GetTxOperator(txId); - if (!txOperator) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", "cannot found txOperator in propose transaction base")("tx_id", txId); - } else { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start"); - txOperator->StartProposeOnComplete(Owner, ctx); - } +void TTxController::StartProposeOnComplete(ITransactionOperator& txOperator, const TActorContext& ctx) { + NActors::TLogContextGuard lGuard = + NActors::TLogContextBuilder::Build()("method", "TTxController::StartProposeOnComplete")("tx_id", txOperator.GetTxId()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start"); + txOperator.StartProposeOnComplete(Owner, ctx); + Counters.OnStartProposeOnComplete(txOperator.GetOpType()); } void TTxController::FinishProposeOnExecute(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("method", "TTxController::FinishProposeOnExecute")("tx_id", txId); - auto txOperator = GetTxOperator(txId); - if (!txOperator) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", "cannot found txOperator in propose transaction base")("tx_id", txId); - } else { + if (auto txOperator = GetTxOperatorOptional(txId)) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start"); txOperator->FinishProposeOnExecute(Owner, txc); + Counters.OnFinishProposeOnExecute(txOperator->GetOpType()); + } else { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", "cannot found txOperator in propose transaction base")("tx_id", txId); } } +void TTxController::FinishProposeOnComplete(ITransactionOperator& txOperator, const TActorContext& ctx) { + NActors::TLogContextGuard lGuard = + NActors::TLogContextBuilder::Build()("method", "TTxController::FinishProposeOnComplete")("tx_id", txOperator.GetTxId()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start")("tx_info", txOperator.GetTxInfo().DebugString()); + TTxController::TProposeResult proposeResult = txOperator.GetProposeStartInfoVerified(); + AFL_VERIFY(!txOperator.IsFail()); + txOperator.FinishProposeOnComplete(Owner, ctx); + txOperator.SendReply(Owner, ctx); + Counters.OnFinishProposeOnComplete(txOperator.GetOpType()); +} + void TTxController::FinishProposeOnComplete(const ui64 txId, const TActorContext& ctx) { - NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("method", "TTxController::FinishProposeOnComplete")("tx_id", txId); - auto txOperator = GetTxOperator(txId); + auto txOperator = GetTxOperatorOptional(txId); if (!txOperator) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", "cannot found txOperator in propose transaction finish")("tx_id", txId); return; } - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start")("tx_info", txOperator->GetTxInfo().DebugString()); - TTxController::TProposeResult proposeResult = txOperator->GetProposeStartInfoVerified(); - AFL_VERIFY(!txOperator->IsFail()); - txOperator->FinishProposeOnComplete(Owner, ctx); - txOperator->SendReply(Owner, ctx); -} - -void TTxController::StartOperators() { - AFL_VERIFY(!StartedFlag); - StartedFlag = true; - for (auto&& i : Operators) { - Y_UNUSED(i.second->OnStartAsync(Owner)); - } + return FinishProposeOnComplete(*txOperator, ctx); } void TTxController::ITransactionOperator::SwitchStateVerified(const EStatus from, const EStatus to) { - AFL_VERIFY(!Status || *Status == from)("error", "incorrect expected status")("real_state", *Status)("expected", from)("details", DebugString()); + AFL_VERIFY(!Status || *Status == from)("error", "incorrect expected status")("real_state", *Status)("expected", from)( + "details", DebugString()); Status = to; } diff --git a/ydb/core/tx/columnshard/transactions/tx_controller.h b/ydb/core/tx/columnshard/transactions/tx_controller.h index 3e9dfcfb735e..e48f10d3796d 100644 --- a/ydb/core/tx/columnshard/transactions/tx_controller.h +++ b/ydb/core/tx/columnshard/transactions/tx_controller.h @@ -1,11 +1,14 @@ #pragma once -#include - #include +#include +#include #include #include +namespace NKikimr::NOlap::NTxInteractions { +class TManager; +} namespace NKikimr::NColumnShard { @@ -14,10 +17,12 @@ class TColumnShard; struct TBasicTxInfo { const NKikimrTxColumnShard::ETransactionKind TxKind; const ui64 TxId; + public: TBasicTxInfo(const NKikimrTxColumnShard::ETransactionKind& txKind, const ui64 txId) : TxKind(txKind) , TxId(txId) { + AFL_VERIFY(txKind != NKikimrTxColumnShard::TX_KIND_NONE); } bool operator==(const TBasicTxInfo& item) const = default; @@ -42,12 +47,18 @@ struct TFullTxInfo: public TBasicTxInfo { TActorId Source; ui64 Cookie = 0; std::optional SeqNo; + public: + static TFullTxInfo BuildFake(const NKikimrTxColumnShard::ETransactionKind kind) { + return TFullTxInfo(kind, 0, NActors::TActorId(), 0, {}); + } + bool operator==(const TFullTxInfo& item) const = default; TString DebugString() const { TStringBuilder sb; - sb << TBase::DebugString() << ";min=" << MinStep << ";max=" << MaxStep << ";plan=" << PlanStep << ";src=" << Source << ";cookie=" << Cookie; + sb << TBase::DebugString() << ";min=" << MinStep << ";max=" << MaxStep << ";plan=" << PlanStep << ";src=" << Source + << ";cookie=" << Cookie; if (SeqNo) { sb << *SeqNo << ";"; } @@ -75,12 +86,12 @@ struct TFullTxInfo: public TBasicTxInfo { : TBasicTxInfo(txKind, txId) { } - TFullTxInfo(const NKikimrTxColumnShard::ETransactionKind& txKind, const ui64 txId, const TActorId& source, const ui64 cookie, const std::optional& seqNo) + TFullTxInfo(const NKikimrTxColumnShard::ETransactionKind& txKind, const ui64 txId, const TActorId& source, const ui64 cookie, + const std::optional& seqNo) : TBasicTxInfo(txKind, txId) , Source(source) , Cookie(cookie) - , SeqNo(seqNo) - { + , SeqNo(seqNo) { } }; @@ -89,6 +100,7 @@ class TTxProposeResult { class TProposeResult { YDB_READONLY(NKikimrTxColumnShard::EResultStatus, Status, NKikimrTxColumnShard::EResultStatus::PREPARED); YDB_READONLY_DEF(TString, StatusMessage); + public: TProposeResult() = default; TProposeResult(NKikimrTxColumnShard::EResultStatus status, const TString& statusMessage) @@ -109,16 +121,15 @@ class TTxProposeResult { std::optional BaseTxInfo; std::optional FullTxInfo; TProposeResult ProposeResult; + public: TTxProposeResult(const TBasicTxInfo& txInfo, TProposeResult&& result) : BaseTxInfo(txInfo) , ProposeResult(std::move(result)) { - } TTxProposeResult(const TFullTxInfo& txInfo, TProposeResult&& result) : FullTxInfo(txInfo) , ProposeResult(std::move(result)) { - } ui64 GetTxId() const noexcept { @@ -152,8 +163,8 @@ class TTxController { TPlanQueueItem(const ui64 step, const ui64 txId) : Step(step) - , TxId(txId) - {} + , TxId(txId) { + } inline bool operator<(const TPlanQueueItem& rhs) const { return Step < rhs.Step || (Step == rhs.Step && TxId < rhs.TxId); @@ -180,10 +191,12 @@ class TTxController { ReplySent, Failed }; + protected: TTxInfo TxInfo; YDB_READONLY_DEF(std::optional, ProposeStartInfo); std::optional Status = EStatus::Created; + private: friend class TTxController; virtual bool DoParse(TColumnShard& owner, const TString& data) = 0; @@ -191,41 +204,48 @@ class TTxController { virtual void DoStartProposeOnComplete(TColumnShard& owner, const TActorContext& ctx) = 0; virtual void DoFinishProposeOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) = 0; virtual void DoFinishProposeOnComplete(TColumnShard& owner, const TActorContext& ctx) = 0; + virtual TString DoGetOpType() const = 0; virtual bool DoIsAsync() const = 0; virtual void DoSendReply(TColumnShard& owner, const TActorContext& ctx) = 0; virtual bool DoCheckAllowUpdate(const TFullTxInfo& currentTxInfo) const = 0; virtual bool DoCheckTxInfoForReply(const TFullTxInfo& /*originalTxInfo*/) const { return true; } + virtual bool DoPingTimeout(TColumnShard& /*owner*/, const TMonotonic /*now*/) { + return false; + } + + virtual std::unique_ptr DoBuildTxPrepareForProgress(TColumnShard* /*owner*/) const { + return nullptr; + } void SwitchStateVerified(const EStatus from, const EStatus to); TTxInfo& MutableTxInfo() { return TxInfo; } + virtual void DoOnTabletInit(TColumnShard& /*owner*/) { + } + void ResetStatusOnUpdate() { Status = {}; } virtual TString DoDebugString() const = 0; - virtual bool DoOnStartAsync(TColumnShard& /*owner*/) { - return false; - } std::optional StartedAsync; public: using TPtr = std::shared_ptr; using TFactory = NObjectFactory::TParametrizedObjectFactory; + using OpType = TString; - bool CheckTxInfoForReply(const TFullTxInfo& originalTxInfo) const { - return DoCheckTxInfoForReply(originalTxInfo); + bool PingTimeout(TColumnShard& owner, const TMonotonic now) { + return DoPingTimeout(owner, now); } - [[nodiscard]] bool OnStartAsync(TColumnShard& owner) { - AFL_VERIFY(!StartedAsync); - StartedAsync = DoOnStartAsync(owner); - return *StartedAsync; + bool CheckTxInfoForReply(const TFullTxInfo& originalTxInfo) const { + return DoCheckTxInfoForReply(originalTxInfo); } TString DebugString() const { @@ -236,6 +256,10 @@ class TTxController { return DoCheckAllowUpdate(currentTxInfo); } + std::unique_ptr BuildTxPrepareForProgress(TColumnShard* owner) const { + return DoBuildTxPrepareForProgress(owner); + } + bool IsFail() const { return ProposeStartInfo && ProposeStartInfo->IsFail(); } @@ -258,18 +282,23 @@ class TTxController { } ITransactionOperator(const TTxInfo& txInfo) - : TxInfo(txInfo) - {} + : TxInfo(txInfo) { + } ui64 GetTxId() const { return TxInfo.TxId; } + OpType GetOpType() const { + return DoGetOpType(); + } + bool IsAsync() const { return DoIsAsync() && Status != EStatus::Failed && Status != EStatus::ReplySent; } - virtual ~ITransactionOperator() {} + virtual ~ITransactionOperator() { + } virtual bool TxWithDeadline() const { return true; @@ -279,14 +308,15 @@ class TTxController { const bool result = DoParse(owner, data); if (!result) { AFL_VERIFY(!onLoad); - ProposeStartInfo = TTxController::TProposeResult(NKikimrTxColumnShard::EResultStatus::ERROR, TStringBuilder() << "Error processing commit TxId# " << TxInfo.TxId - << ". Parsing error"); + ProposeStartInfo = TTxController::TProposeResult(NKikimrTxColumnShard::EResultStatus::ERROR, + TStringBuilder() << "Error processing commit TxId# " << TxInfo.TxId << ". Parsing error"); SwitchStateVerified(EStatus::Created, EStatus::Failed); } else { SwitchStateVerified(EStatus::Created, EStatus::Parsed); } if (onLoad) { - ProposeStartInfo = TTxController::TProposeResult(NKikimrTxColumnShard::EResultStatus::PREPARED, "success on iteration before restart"); + ProposeStartInfo = + TTxController::TProposeResult(NKikimrTxColumnShard::EResultStatus::PREPARED, "success on iteration before restart"); Status = {}; } return result; @@ -336,8 +366,8 @@ class TTxController { return DoFinishProposeOnComplete(owner, ctx); } - virtual bool ExecuteOnProgress(TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) = 0; - virtual bool CompleteOnProgress(TColumnShard& owner, const TActorContext& ctx) = 0; + virtual bool ProgressOnExecute(TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) = 0; + virtual bool ProgressOnComplete(TColumnShard& owner, const TActorContext& ctx) = 0; virtual bool ExecuteOnAbort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) = 0; virtual bool CompleteOnAbort(TColumnShard& owner, const TActorContext& ctx) = 0; @@ -345,8 +375,15 @@ class TTxController { virtual void RegisterSubscriber(const TActorId&) { AFL_VERIFY(false)("message", "Not implemented"); }; - virtual void OnTabletInit(TColumnShard& /*owner*/) {} + void OnTabletInit(TColumnShard& owner) { + AFL_VERIFY(!StartedAsync); + StartedAsync = true; + DoOnTabletInit(owner); + } }; + TTxProgressCounters& GetCounters() { + return Counters; + } private: const TDuration MaxCommitTxDelay = TDuration::Seconds(30); @@ -354,44 +391,75 @@ class TTxController { std::set DeadlineQueue; std::set PlanQueue; std::set RunningQueue; + TTxProgressCounters Counters; THashMap Operators; - private: ui64 GetAllowedStep() const; - bool AbortTx(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); + bool AbortTx(const TPlanQueueItem planQueueItem, NTabletFlatExecutor::TTransactionContext& txc); - TTxInfo RegisterTx(const std::shared_ptr& txOperator, const TString& txBody, NTabletFlatExecutor::TTransactionContext& txc); - TTxInfo RegisterTxWithDeadline(const std::shared_ptr& txOperator, const TString& txBody, NTabletFlatExecutor::TTransactionContext& txc); + TTxInfo RegisterTx(const std::shared_ptr& txOperator, const TString& txBody, + NTabletFlatExecutor::TTransactionContext& txc); + TTxInfo RegisterTxWithDeadline(const std::shared_ptr& txOperator, const TString& txBody, + NTabletFlatExecutor::TTransactionContext& txc); bool StartedFlag = false; + public: TTxController(TColumnShard& owner); - ITransactionOperator::TPtr GetTxOperator(const ui64 txId) const; - ITransactionOperator::TPtr GetVerifiedTxOperator(const ui64 txId) const; - void StartOperators(); + ITransactionOperator::TPtr GetTxOperatorOptional(const ui64 txId) const { + auto it = Operators.find(txId); + if (it == Operators.end()) { + return nullptr; + } + return it->second; + } + ITransactionOperator::TPtr GetTxOperatorVerified(const ui64 txId) const { + return TValidator::CheckNotNull(GetTxOperatorOptional(txId)); + } + template + std::shared_ptr GetTxOperatorVerifiedAs(const ui64 txId) const { + auto result = GetTxOperatorOptional(txId); + AFL_VERIFY(result); + auto resultClass = dynamic_pointer_cast(result); + AFL_VERIFY(resultClass); + return resultClass; + } + + void PingTimeouts(const TMonotonic now) { + auto txInfo = GetFirstPlannedTx(); + if (!txInfo) { + return; + } + GetTxOperatorVerified(txInfo->GetTxId())->PingTimeout(Owner, now); + } ui64 GetMemoryUsage() const; bool HaveOutdatedTxs() const; bool Load(NTabletFlatExecutor::TTransactionContext& txc); - [[nodiscard]] std::shared_ptr UpdateTxSourceInfo(const TFullTxInfo& tx, NTabletFlatExecutor::TTransactionContext& txc); + [[nodiscard]] std::shared_ptr UpdateTxSourceInfo( + const TFullTxInfo& tx, NTabletFlatExecutor::TTransactionContext& txc); [[nodiscard]] std::shared_ptr StartProposeOnExecute( const TTxController::TTxInfo& txInfo, const TString& txBody, NTabletFlatExecutor::TTransactionContext& txc); - void StartProposeOnComplete(const ui64 txId, const TActorContext& ctx); - + void StartProposeOnComplete(ITransactionOperator& txOperator, const TActorContext& ctx); void FinishProposeOnExecute(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); - + void FinishProposeOnComplete(ITransactionOperator& txOperator, const TActorContext& ctx); void FinishProposeOnComplete(const ui64 txId, const TActorContext& ctx); + void WriteTxOperatorInfo(NTabletFlatExecutor::TTransactionContext& txc, const ui64 txId, const TString& data) { + NIceDb::TNiceDb db(txc.DB); + NColumnShard::Schema::UpdateTxInfoBody(db, txId, data); + } bool ExecuteOnCancel(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); bool CompleteOnCancel(const ui64 txId, const TActorContext& ctx); - std::optional StartPlannedTx(); - void FinishPlannedTx(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); - void CompleteRunningTx(const TPlanQueueItem& tx); + std::optional GetFirstPlannedTx() const; + std::optional PopFirstPlannedTx(); + void ProgressOnExecute(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); + void ProgressOnComplete(const TPlanQueueItem& tx); std::optional GetPlannedTx() const; TPlanQueueItem GetFrontTx() const; @@ -412,5 +480,4 @@ class TTxController { void OnTabletInit(); }; -} - +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/ya.make b/ydb/core/tx/columnshard/transactions/ya.make index 8479005c4d67..c6509a2a6473 100644 --- a/ydb/core/tx/columnshard/transactions/ya.make +++ b/ydb/core/tx/columnshard/transactions/ya.make @@ -11,6 +11,7 @@ PEERDIR( ydb/core/tx/columnshard/data_sharing/destination/events ydb/core/tx/columnshard/transactions/operators ydb/core/tx/columnshard/transactions/transactions + ydb/core/tx/columnshard/transactions/locks ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp b/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp index 700ed7157f15..ad5ec1f688fd 100644 --- a/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp +++ b/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp @@ -14,11 +14,11 @@ #include #include #include -#include +#include #include -#include -#include -#include +#include +#include +#include #include namespace NKikimr { @@ -35,12 +35,6 @@ using TTypeId = NScheme::TTypeId; using TTypeInfo = NScheme::TTypeInfo; using TDefaultTestsController = NKikimr::NYDBTest::NColumnShard::TController; -class TDisableCompactionController: public NKikimr::NYDBTest::NColumnShard::TController { -public: - TDisableCompactionController() { - DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); - } -}; template bool DataHas(const std::vector>& batches, std::pair range, @@ -538,7 +532,7 @@ void TestWriteReadDup(const TestTableDescription& table = {}) { // read if (planStep != initPlanStep) { - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - 1, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - 1, Max())); reader.SetReplyColumns({"timestamp"}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -597,7 +591,7 @@ void TestWriteReadLongTxDup() { // read TAutoPtr handle; { - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -612,7 +606,9 @@ void TestWriteReadLongTxDup() { } void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString codec = "") { - auto csControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + auto csControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + csControllerGuard->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + csControllerGuard->SetOverrideReadTimeoutClean(TDuration::Max()); TTestBasicRuntime runtime; TTester::Setup(runtime); @@ -681,7 +677,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 1); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(0, 0)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(0, 1)); reader.SetReplyColumns({"resource_type"}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -698,7 +694,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 2); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(0, 0)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(0, 1)); reader.SetReplyColumns({"resource_type"}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -708,7 +704,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 3 (committed) { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 3); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); auto rb = reader.ReadAll(); UNIT_ASSERT(rb); @@ -723,7 +719,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 4 (column by id) { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 4); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetReplyColumnIds({1}); auto rb = reader.ReadAll(); UNIT_ASSERT(rb); @@ -738,7 +734,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 5); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetReplyColumns({"timestamp", "message"}); auto rb = reader.ReadAll(); UNIT_ASSERT(rb); @@ -776,7 +772,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 6, planstep 0 { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 6); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(0, 0)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(0, 1)); reader.SetReplyColumns({"timestamp", "message"}); auto rb = reader.ReadAll(); UNIT_ASSERT(!rb); @@ -786,7 +782,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 7, planstep 21 (part of index) { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 7); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(21, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(21, txId)); reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); auto rb = reader.ReadAll(); UNIT_ASSERT(rb); @@ -803,7 +799,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 8, planstep 22 (full index) { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 8); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(22, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(22, txId)); reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); auto rb = reader.ReadAll(); UNIT_ASSERT(rb); @@ -833,7 +829,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 9 (committed, indexed) { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 9); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(23, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(23, txId)); reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); auto rb = reader.ReadAll(); UNIT_ASSERT(rb); @@ -858,7 +854,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 10 { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 10); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(24, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(24, txId)); reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); auto rb = reader.ReadAll(); UNIT_ASSERT(rb); @@ -878,8 +874,8 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString const ui64 committedBytes = reader.GetReadStat("committed_bytes"); Cerr << codec << "/" << compactedBytes << "/" << insertedBytes << "/" << committedBytes << Endl; if (insertedBytes) { - UNIT_ASSERT_GE(insertedBytes / 100000, 40); - UNIT_ASSERT_LE(insertedBytes / 100000, 50); + UNIT_ASSERT_GE(insertedBytes / 100000, 50); + UNIT_ASSERT_LE(insertedBytes / 100000, 60); } if (committedBytes) { UNIT_ASSERT_LE(committedBytes / 100000, 1); @@ -904,7 +900,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 11 (range predicate: closed interval) { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 11); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(24, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(24, txId)); reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); reader.AddRange(MakeTestRange({10, 42}, true, true, testYdbPk)); auto rb = reader.ReadAll(); @@ -921,7 +917,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 12 (range predicate: open interval) { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 11); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(24, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(24, txId)); reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); reader.AddRange(MakeTestRange({10, 42}, false, false, testYdbPk)); auto rb = reader.ReadAll(); @@ -1034,7 +1030,7 @@ void TestCompactionInGranuleImpl(bool reboots, const TestTableDescription& table --txId; for (ui32 i = 0; i < 2; ++i) { - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetReplyColumns({"timestamp", "message"}); auto rb = reader.ReadAll(); UNIT_ASSERT(rb); @@ -1279,7 +1275,7 @@ void TestReadWithProgram(const TestTableDescription& table = {}) ui32 i = 0; for (auto& programText : programs) { - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetProgram(programText); auto rb = reader.ReadAll(); if (i < numWrong) { @@ -1348,7 +1344,7 @@ void TestReadWithProgramLike(const TestTableDescription& table = {}) { ui32 i = 0; for (auto& ssa : ssas) { - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetProgram(ssa); auto rb = reader.ReadAll(); @@ -1420,7 +1416,7 @@ void TestSomePrograms(const TestTableDescription& table) { // TODO: add programs with bugs here for (auto& ssaText : programs) { - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetProgram(ssaText); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsError()); @@ -1530,7 +1526,7 @@ void TestReadAggregate(const std::vector& ydbSchema, for (auto& programText : programs) { Cerr << "-- select program: " << prog << " is filtered: " << (int)isFiltered.count(prog) << "\n"; - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetProgram(programText); auto batch = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -1890,7 +1886,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { txIds.insert(txId); PlanCommit(runtime, sender, planStep, txIds); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({ "timestamp" }); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -1905,7 +1901,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { txIds.insert(txId); PlanCommit(runtime, sender, planStep, txIds); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({ "timestamp" }); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -1921,7 +1917,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { txIds.insert(txId); PlanCommit(runtime, sender, planStep, txIds); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({ "timestamp" }); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -1937,7 +1933,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { txIds.insert(txId); PlanCommit(runtime, sender, planStep, txIds); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({ "timestamp" }); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -1958,7 +1954,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { txIds.insert(txId); PlanCommit(runtime, sender, planStep, txIds); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({ "timestamp" }); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -2239,7 +2235,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { const ui64 tableId = 1; std::set useFields = {"timestamp", "message"}; { // read with predicate (FROM) - NOlap::NTests::TShardReader reader(Owner.Runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(Owner.PlanStep, Owner.TxId)); + TShardReader reader(Owner.Runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(Owner.PlanStep, Owner.TxId)); reader.SetReplyColumns({"timestamp", "message"}); reader.AddRange(MakeRange(Owner.YdbPk)); auto rb = reader.ReadAll(); @@ -2334,7 +2330,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { for (ui32 i = 0; i < 2; ++i) { { - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetReplyColumns({"timestamp", "message"}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -2440,7 +2436,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { ui64 numRows = static_cast(*rows).Value(i); ui64 numBytes = static_cast(*bytes).Value(i); ui64 numRawBytes = static_cast(*rawBytes).Value(i); - bool activity = static_cast(*activities).Value(i); + bool activity = static_cast(*activities).Value(i); if (!activity) { continue; } @@ -2584,7 +2580,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { // Try to read snapshot that is too old { - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - staleness.MilliSeconds(), Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - staleness.MilliSeconds(), Max())); reader.SetReplyColumns({"timestamp", "message"}); reader.ReadAll(); UNIT_ASSERT(reader.IsError()); @@ -2594,8 +2590,10 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { void TestCompactionGC() { TTestBasicRuntime runtime; - TTester::Setup(runtime); auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + csDefaultControllerGuard->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + csDefaultControllerGuard->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + TTester::Setup(runtime); runtime.SetLogPriority(NKikimrServices::BLOB_CACHE, NActors::NLog::PRI_INFO); @@ -2753,7 +2751,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { --planStep; --txId; Cerr << compactionsHappened << Endl; - UNIT_ASSERT_GE(compactionsHappened, 3); // we catch it three times per action +// UNIT_ASSERT_GE(compactionsHappened, 3); // we catch it three times per action ui64 previousCompactionsHappened = compactionsHappened; ui64 previousCleanupsHappened = cleanupsHappened; @@ -2761,12 +2759,13 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { // Send a request that reads the latest version // This request is expected to read at least 1 committed blob and several index portions // These committed blob and portions must not be deleted by the BlobManager until the read request finishes - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - 1, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - 1, Max())); reader.SetReplyColumns({"timestamp", "message"}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); UNIT_ASSERT(CheckOrdered(rb)); UNIT_ASSERT(reader.GetIterationsCount() < 10); + csDefaultControllerGuard->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); // We captured EvReadFinished event and dropped is so the columnshard still thinks that // read request is in progress and keeps the portions @@ -2782,15 +2781,21 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { ProposeCommit(runtime, sender, txId, writeIds); PlanCommit(runtime, sender, planStep, txId); } + { + auto read = std::make_unique(); + ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, read.release()); + } - Cerr << "Compactions happened: " << compactionsHappened << Endl; - Cerr << "Cleanups happened: " << cleanupsHappened << Endl; + Cerr << "Compactions happened: " << csDefaultControllerGuard->GetCompactionStartedCounter().Val() << Endl; + Cerr << "Indexations happened: " << csDefaultControllerGuard->GetInsertStartedCounter().Val() << Endl; + Cerr << "Cleanups happened: " << csDefaultControllerGuard->GetCleaningStartedCounter().Val() << Endl; Cerr << "Old portions: " << JoinStrings(oldPortions.begin(), oldPortions.end(), " ") << Endl; Cerr << "Cleaned up portions: " << JoinStrings(deletedPortions.begin(), deletedPortions.end(), " ") << Endl; + Cerr << "delayedBlobs: " << JoinStrings(delayedBlobs.begin(), delayedBlobs.end(), " ") << Endl; // Check that GC happened but it didn't collect some old portions UNIT_ASSERT_GT(compactionsHappened, previousCompactionsHappened); - UNIT_ASSERT_GT(cleanupsHappened, previousCleanupsHappened); + UNIT_ASSERT_EQUAL(cleanupsHappened, 0); UNIT_ASSERT_GT_C(oldPortions.size(), deletedPortions.size(), "Some old portions must not be deleted because the are in use by read"); UNIT_ASSERT_GT_C(delayedBlobs.size(), 0, "Read request is expected to have at least one committed blob, which deletion must be delayed"); previousCompactionsHappened = compactionsHappened; @@ -2805,9 +2810,25 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { } // Advance the time and trigger some more cleanups withno compactions - auto csControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); - planStep += 2 * delay.MilliSeconds(); - numWrites = 2; + csDefaultControllerGuard->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + { + auto read = std::make_unique(); + ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, read.release()); + } + planStep += (2 * delay).MilliSeconds(); + for (ui32 i = 0; i < numWrites; ++i, ++writeId, ++planStep, ++txId) { + std::vector writeIds; + UNIT_ASSERT(WriteData(runtime, sender, writeId, tableId, triggerData, ydbSchema, true, &writeIds)); + + ProposeCommit(runtime, sender, txId, writeIds); + PlanCommit(runtime, sender, planStep, txId); + } + UNIT_ASSERT_EQUAL(cleanupsHappened, 0); + csDefaultControllerGuard->SetOverrideRequestsTracePingCheckPeriod(TDuration::Zero()); + { + auto read = std::make_unique(); + ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, read.release()); + } for (ui32 i = 0; i < numWrites; ++i, ++writeId, ++planStep, ++txId) { std::vector writeIds; UNIT_ASSERT(WriteData(runtime, sender, writeId, tableId, triggerData, ydbSchema, true, &writeIds)); @@ -2815,9 +2836,12 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { ProposeCommit(runtime, sender, txId, writeIds); PlanCommit(runtime, sender, planStep, txId); } + AFL_VERIFY(csDefaultControllerGuard->GetRequestTracingSnapshotsSave().Val() == 1); + AFL_VERIFY(csDefaultControllerGuard->GetRequestTracingSnapshotsRemove().Val() == 1); - Cerr << "Compactions happened: " << compactionsHappened << Endl; - Cerr << "Cleanups happened: " << cleanupsHappened << Endl; + Cerr << "Compactions happened: " << csDefaultControllerGuard->GetCompactionStartedCounter().Val() << Endl; + Cerr << "Indexations happened: " << csDefaultControllerGuard->GetInsertStartedCounter().Val() << Endl; + Cerr << "Cleanups happened: " << csDefaultControllerGuard->GetCleaningStartedCounter().Val() << Endl; Cerr << "Old portions: " << JoinStrings(oldPortions.begin(), oldPortions.end(), " ") << Endl; Cerr << "Cleaned up portions: " << JoinStrings(deletedPortions.begin(), deletedPortions.end(), " ") << Endl; @@ -2825,7 +2849,6 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { UNIT_ASSERT_GE(compactionsHappened, previousCompactionsHappened); UNIT_ASSERT_GT(cleanupsHappened, previousCleanupsHappened); UNIT_ASSERT_VALUES_EQUAL_C(oldPortions.size(), deletedPortions.size(), "All old portions must be deleted after read has finished"); - UNIT_ASSERT_VALUES_EQUAL_C(delayedBlobs.size(), 0, "All previously delayed deletions must now happen " + JoinSeq(",", delayedBlobs)); } Y_UNIT_TEST(CompactionGC) { diff --git a/ydb/core/tx/columnshard/ut_rw/ut_normalizer.cpp b/ydb/core/tx/columnshard/ut_rw/ut_normalizer.cpp index 50f305bf4fc6..734047952707 100644 --- a/ydb/core/tx/columnshard/ut_rw/ut_normalizer.cpp +++ b/ydb/core/tx/columnshard/ut_rw/ut_normalizer.cpp @@ -3,12 +3,13 @@ #include #include +#include #include -#include -#include -#include +#include +#include +#include namespace NKikimr { @@ -161,7 +162,7 @@ class TColumnChunksCleaner : public NYDBTest::ILocalDBModifier { } }; -class TPortinosCleaner : public NYDBTest::ILocalDBModifier { +class TPortionsCleaner : public NYDBTest::ILocalDBModifier { public: virtual void Apply(NTabletFlatExecutor::TTransactionContext& txc) const override { using namespace NColumnShard; @@ -185,6 +186,21 @@ class TPortinosCleaner : public NYDBTest::ILocalDBModifier { } }; + +class TEmptyPortionsCleaner : public NYDBTest::ILocalDBModifier { +public: + virtual void Apply(NTabletFlatExecutor::TTransactionContext& txc) const override { + using namespace NColumnShard; + NIceDb::TNiceDb db(txc.DB); + for (size_t pathId = 100; pathId != 299; ++pathId) { + for (size_t portionId = 1000; portionId != 1199; ++portionId) { + db.Table().Key(pathId, portionId).Update(); + } + } + } +}; + + class TTablesCleaner : public NYDBTest::ILocalDBModifier { public: virtual void Apply(NTabletFlatExecutor::TTransactionContext& txc) const override { @@ -317,7 +333,11 @@ Y_UNIT_TEST_SUITE(Normalizers) { } Y_UNIT_TEST(PortionsNormalizer) { - TestNormalizerImpl(); + TestNormalizerImpl(); + } + + Y_UNIT_TEST(CleanEmptyPortionsNormalizer) { + TestNormalizerImpl(); } Y_UNIT_TEST(EmptyTablesNormalizer) { diff --git a/ydb/core/tx/columnshard/ut_rw/ya.make b/ydb/core/tx/columnshard/ut_rw/ya.make index d03099069b75..7cf343aad4c6 100644 --- a/ydb/core/tx/columnshard/ut_rw/ya.make +++ b/ydb/core/tx/columnshard/ut_rw/ya.make @@ -22,8 +22,6 @@ PEERDIR( ydb/core/tx/columnshard/test_helper ydb/core/tx/columnshard/hooks/abstract ydb/core/tx/columnshard/hooks/testing - ydb/core/tx/columnshard/common/tests - ydb/core/tx/columnshard/test_helper ydb/services/metadata ydb/core/tx ydb/public/lib/yson_value diff --git a/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp b/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp index cfc9ea9526b5..deb7be3d89e9 100644 --- a/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp +++ b/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include @@ -17,6 +17,9 @@ #include #include +#include + +#include namespace NKikimr { @@ -32,6 +35,16 @@ enum class EInitialEviction { namespace { +Aws::SDKOptions Options; + +Y_TEST_HOOK_BEFORE_RUN(InitAwsAPI) { + Aws::InitAPI(Options); +} + +Y_TEST_HOOK_AFTER_RUN(ShutdownAwsAPI) { + Aws::ShutdownAPI(Options); +} + static const std::vector testYdbSchema = TTestSchema::YdbSchema(); static const std::vector testYdbPk = TTestSchema::YdbPkSchema(); @@ -161,7 +174,7 @@ void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, { auto csControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); csControllerGuard->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); - csControllerGuard->SetTasksActualizationLag(TDuration::Zero()); + csControllerGuard->SetOverrideTasksActualizationLag(TDuration::Zero()); std::vector ts = {1600000000, 1620000000}; ui32 ttlIncSeconds = 1; @@ -246,7 +259,7 @@ void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, { --planStep; - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({spec.TtlColumn}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -278,7 +291,7 @@ void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, { --planStep; - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({spec.TtlColumn, NOlap::TIndexInfo::SPEC_COL_PLAN_STEP}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -312,7 +325,7 @@ void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, { --planStep; - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({spec.TtlColumn}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -513,7 +526,7 @@ std::vector> TestTiers(bool reboots, const std::vector(); csControllerGuard->DisableBackground(NYDBTest::ICSController::EBackground::TTL); - csControllerGuard->SetTasksActualizationLag(TDuration::Zero()); + csControllerGuard->SetOverrideTasksActualizationLag(TDuration::Zero()); TTestBasicRuntime runtime; TTester::Setup(runtime); @@ -621,9 +634,9 @@ std::vector> TestTiers(bool reboots, const std::vector reader; + std::unique_ptr reader; if (!misconfig) { - reader = std::make_unique(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - 1, Max())); + reader = std::make_unique(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - 1, Max())); reader->SetReplyColumns({specs[i].TtlColumn}); counter.CaptureReadEvents = specs[i].WaitEmptyAfter ? 0 : 1; // TODO: we need affected by tiering blob here counter.WaitReadsCaptured(runtime); @@ -662,7 +675,7 @@ std::vector> TestTiers(bool reboots, const std::vector())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - 1, Max())); reader.SetReplyColumns({columnToRead}); auto rb = reader.ReadAll(); if (expectedReadResult == EExpectedResult::ERROR) { @@ -981,7 +994,7 @@ void TestDrop(bool reboots) { TAutoPtr handle; { --planStep; - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({TTestSchema::DefaultTtlColumn}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); diff --git a/ydb/core/tx/columnshard/ut_schema/ya.make b/ydb/core/tx/columnshard/ut_schema/ya.make index 35d906ee2055..d67c0d2ad5b8 100644 --- a/ydb/core/tx/columnshard/ut_schema/ya.make +++ b/ydb/core/tx/columnshard/ut_schema/ya.make @@ -18,6 +18,7 @@ PEERDIR( library/cpp/getopt library/cpp/regex/pcre library/cpp/svnversion + contrib/libs/aws-sdk-cpp/aws-cpp-sdk-core ydb/core/testlib/default ydb/core/tx/columnshard/hooks/abstract ydb/core/tx/columnshard/hooks/testing diff --git a/ydb/core/tx/columnshard/ya.make b/ydb/core/tx/columnshard/ya.make index 716b6eae6174..f1f4df107ffe 100644 --- a/ydb/core/tx/columnshard/ya.make +++ b/ydb/core/tx/columnshard/ya.make @@ -16,7 +16,6 @@ SRCS( columnshard__write_index.cpp columnshard.cpp columnshard_impl.cpp - columnshard_common.cpp columnshard_private_events.cpp columnshard_schema.cpp columnshard_view.cpp diff --git a/ydb/core/tx/conveyor/service/service.cpp b/ydb/core/tx/conveyor/service/service.cpp index cf95fde86778..68900c3e25ec 100644 --- a/ydb/core/tx/conveyor/service/service.cpp +++ b/ydb/core/tx/conveyor/service/service.cpp @@ -7,14 +7,13 @@ namespace NKikimr::NConveyor { TDistributor::TDistributor(const TConfig& config, const TString& conveyorName, TIntrusivePtr<::NMonitoring::TDynamicCounters> conveyorSignals) : Config(config) , ConveyorName(conveyorName) - , Counters(ConveyorName, conveyorSignals) -{ + , Counters(ConveyorName, conveyorSignals) { } void TDistributor::Bootstrap() { const ui32 workersCount = Config.GetWorkersCountForConveyor(NKqp::TStagePredictor::GetUsableThreads()); - AFL_NOTICE(NKikimrServices::TX_CONVEYOR)("action", "conveyor_registered")("actor_id", SelfId())("workers_count", workersCount)("config", Config.DebugString()); + AFL_NOTICE(NKikimrServices::TX_CONVEYOR)("name", ConveyorName)("action", "conveyor_registered")("config", Config.DebugString()); for (ui32 i = 0; i < workersCount; ++i) { const double usage = Config.GetWorkerCPUUsage(i); Workers.emplace_back(Register(new TWorker(ConveyorName, usage, SelfId()))); @@ -36,27 +35,18 @@ void TDistributor::HandleMain(TEvInternal::TEvTaskProcessedResult::TPtr& ev) { Counters.ExecuteHistogram->Collect(dExecution.MilliSeconds()); if (Waiting.size()) { auto task = Waiting.pop(); - Counters.WaitingHistogram->Collect((ev->Get()->GetStartInstant() - task.GetCreateInstant()).MilliSeconds()); + Counters.WaitingHistogram->Collect((now - task.GetCreateInstant()).MilliSeconds()); task.OnBeforeStart(); Send(ev->Sender, new TEvInternal::TEvNewTask(task)); } else { Workers.emplace_back(ev->Sender); } - if (ev->Get()->GetOwnerId()) { - if (ev->Get()->IsFail()) { - ALS_ERROR(NKikimrServices::TX_CONVEYOR) << "action=on_error;owner=" << *ev->Get()->GetOwnerId() << ";workers=" << Workers.size() << ";waiting=" << Waiting.size(); - Send(*ev->Get()->GetOwnerId(), new TEvExecution::TEvTaskProcessedResult(ev->Get()->GetError())); - } else { - Send(*ev->Get()->GetOwnerId(), new TEvExecution::TEvTaskProcessedResult(ev->Get()->GetResult())); - } - } Counters.WaitingQueueSize->Set(Waiting.size()); Counters.AvailableWorkersCount->Set(Workers.size()); - ALS_DEBUG(NKikimrServices::TX_CONVEYOR) << "action=processed;owner=" << ev->Get()->GetOwnerId().value_or(NActors::TActorId()) << ";workers=" << Workers.size() << ";waiting=" << Waiting.size(); } void TDistributor::HandleMain(TEvExecution::TEvNewTask::TPtr& ev) { - ALS_DEBUG(NKikimrServices::TX_CONVEYOR) << "action=add_task;owner=" << ev->Sender << ";workers=" << Workers.size() << ";waiting=" << Waiting.size(); + AFL_DEBUG(NKikimrServices::TX_CONVEYOR)("action", "add_task")("sender", ev->Sender); Counters.IncomingRate->Inc(); const TString taskClass = ev->Get()->GetTask()->GetTaskClassIdentifier(); @@ -65,7 +55,7 @@ void TDistributor::HandleMain(TEvExecution::TEvNewTask::TPtr& ev) { itSignal = Signals.emplace(taskClass, std::make_shared("Conveyor/" + ConveyorName, taskClass)).first; } - TWorkerTask wTask(ev->Get()->GetTask(), ev->Get()->GetTask()->GetOwnerId(), itSignal->second); + TWorkerTask wTask(ev->Get()->GetTask(), itSignal->second); if (Workers.size()) { Counters.WaitingHistogram->Collect(0); @@ -83,11 +73,9 @@ void TDistributor::HandleMain(TEvExecution::TEvNewTask::TPtr& ev) { Waiting.push(wTask); Counters.WaitWorkerRate->Inc(); } else { - ALS_ERROR(NKikimrServices::TX_CONVEYOR) << "action=overlimit;sender=" << ev->Sender << ";workers=" << Workers.size() << ";waiting=" << Waiting.size(); Counters.OverlimitRate->Inc(); - Send(ev->Sender, new TEvExecution::TEvTaskProcessedResult( - TConclusionStatus::Fail("scan conveyor overloaded (" + ::ToString(Waiting.size()) + " >= " + ::ToString(Config.GetQueueSizeLimit()) + ")") - )); + AFL_ERROR(NKikimrServices::TX_CONVEYOR)("action", "queue_overlimit")("sender", ev->Sender)("limit", Config.GetQueueSizeLimit()); + ev->Get()->GetTask()->OnCannotExecute("scan conveyor overloaded (" + ::ToString(Waiting.size()) + " >= " + ::ToString(Config.GetQueueSizeLimit()) + ")"); } Counters.WaitingQueueSize->Set(Waiting.size()); Counters.AvailableWorkersCount->Set(Workers.size()); diff --git a/ydb/core/tx/conveyor/service/service.h b/ydb/core/tx/conveyor/service/service.h index 833952090905..f13629f967ee 100644 --- a/ydb/core/tx/conveyor/service/service.h +++ b/ydb/core/tx/conveyor/service/service.h @@ -4,7 +4,10 @@ #include #include #include +#include + #include + #include namespace NKikimr::NConveyor { @@ -86,11 +89,13 @@ class TDistributor: public TActorBootstrapped { public: STATEFN(StateMain) { + NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("name", ConveyorName) + ("workers", Workers.size())("waiting", Waiting.size())("actor_id", SelfId()); switch (ev->GetTypeRewrite()) { hFunc(TEvExecution::TEvNewTask, HandleMain); hFunc(TEvInternal::TEvTaskProcessedResult, HandleMain); default: - ALS_ERROR(NKikimrServices::TX_CONVEYOR) << ConveyorName << ": unexpected event for task executor: " << ev->GetTypeRewrite(); + AFL_ERROR(NKikimrServices::TX_CONVEYOR)("problem", "unexpected event for task executor")("ev_type", ev->GetTypeName()); break; } } diff --git a/ydb/core/tx/conveyor/service/worker.cpp b/ydb/core/tx/conveyor/service/worker.cpp index 42c0b4e239d7..8861de7e7a67 100644 --- a/ydb/core/tx/conveyor/service/worker.cpp +++ b/ydb/core/tx/conveyor/service/worker.cpp @@ -7,11 +7,8 @@ void TWorker::ExecuteTask(const TWorkerTask& workerTask) { if (CPUUsage < 1) { start = TMonotonic::Now(); } - if (workerTask.GetTask()->Execute(workerTask.GetTaskSignals())) { - TBase::Sender(workerTask, workerTask.GetTask()).SendTo(DistributorId); - } else { - TBase::Sender(workerTask, workerTask.GetTask()->GetErrorMessage()).SendTo(DistributorId); - } + Y_UNUSED(workerTask.GetTask()->Execute(workerTask.GetTaskSignals(), workerTask.GetTask())); + TBase::Sender(workerTask).SendTo(DistributorId); if (CPUUsage < 1) { Schedule((TMonotonic::Now() - *start) * (1 - CPUUsage), new NActors::TEvents::TEvWakeup); WaitWakeUp = true; diff --git a/ydb/core/tx/conveyor/service/worker.h b/ydb/core/tx/conveyor/service/worker.h index 662a6b2e00d6..34904557a57e 100644 --- a/ydb/core/tx/conveyor/service/worker.h +++ b/ydb/core/tx/conveyor/service/worker.h @@ -14,7 +14,6 @@ namespace NKikimr::NConveyor { class TWorkerTask { private: YDB_READONLY_DEF(ITask::TPtr, Task); - YDB_READONLY_DEF(std::optional, OwnerId); YDB_READONLY(TMonotonic, CreateInstant, TMonotonic::Now()); YDB_READONLY_DEF(std::shared_ptr, TaskSignals); std::optional StartInstant; @@ -28,9 +27,8 @@ class TWorkerTask { return *StartInstant; } - TWorkerTask(ITask::TPtr task, const std::optional& ownerId, std::shared_ptr taskSignals) + TWorkerTask(ITask::TPtr task, std::shared_ptr taskSignals) : Task(task) - , OwnerId(ownerId) , TaskSignals(taskSignals) { Y_ABORT_UNLESS(task); @@ -66,23 +64,13 @@ struct TEvInternal { }; class TEvTaskProcessedResult: - public NActors::TEventLocal, - public TConclusion { + public NActors::TEventLocal { private: using TBase = TConclusion; YDB_READONLY_DEF(TMonotonic, StartInstant); - YDB_READONLY_DEF(std::optional, OwnerId); public: - TEvTaskProcessedResult(const TWorkerTask& originalTask, const TString& errorMessage) - : TBase(TConclusionStatus::Fail(errorMessage)) - , StartInstant(originalTask.GetStartInstant()) - , OwnerId(originalTask.GetOwnerId()) { - - } - TEvTaskProcessedResult(const TWorkerTask& originalTask, ITask::TPtr result) - : TBase(result) - , StartInstant(originalTask.GetStartInstant()) - , OwnerId(originalTask.GetOwnerId()) { + TEvTaskProcessedResult(const TWorkerTask& originalTask) + : StartInstant(originalTask.GetStartInstant()) { } }; diff --git a/ydb/core/tx/conveyor/usage/abstract.cpp b/ydb/core/tx/conveyor/usage/abstract.cpp index 4d423bed7caa..1bfa3357f530 100644 --- a/ydb/core/tx/conveyor/usage/abstract.cpp +++ b/ydb/core/tx/conveyor/usage/abstract.cpp @@ -5,41 +5,35 @@ #include namespace NKikimr::NConveyor { -bool ITask::Execute(std::shared_ptr signals) { +TConclusionStatus ITask::Execute(std::shared_ptr signals, const std::shared_ptr& taskPtr) { AFL_VERIFY(!ExecutedFlag); ExecutedFlag = true; - bool result = false; const TMonotonic start = TMonotonic::Now(); try { - result = DoExecute(); - if (!result) { + TConclusionStatus result = DoExecute(taskPtr); + if (result.IsFail()) { if (signals) { signals->Fails->Add(1); signals->FailsDuration->Add((TMonotonic::Now() - start).MicroSeconds()); } - if (!ErrorMessage) { - ErrorMessage = "cannot execute task (not specified error message)"; - } } else { if (signals) { signals->Success->Add(1); signals->SuccessDuration->Add((TMonotonic::Now() - start).MicroSeconds()); } } + return result; } catch (...) { if (signals) { signals->Fails->Add(1); signals->FailsDuration->Add((TMonotonic::Now() - start).MicroSeconds()); } - TStringBuilder sbLocalMessage; - sbLocalMessage << "exception: " << CurrentExceptionMessage(); - if (!ErrorMessage) { - ErrorMessage = sbLocalMessage; - } else { - ErrorMessage += sbLocalMessage; - } + return TConclusionStatus::Fail("exception: " + CurrentExceptionMessage()); } - return result; +} + +void ITask::DoOnCannotExecute(const TString& reason) { + AFL_VERIFY(false)("problem", "cannot execute conveyor task")("reason", reason); } } diff --git a/ydb/core/tx/conveyor/usage/abstract.h b/ydb/core/tx/conveyor/usage/abstract.h index fb80a80b02e6..26f95d16cacf 100644 --- a/ydb/core/tx/conveyor/usage/abstract.h +++ b/ydb/core/tx/conveyor/usage/abstract.h @@ -3,8 +3,9 @@ #include #include - #include +#include + #include namespace NKikimr::NConveyor { @@ -19,8 +20,7 @@ class TTaskSignals: public NColumnShard::TCommonCountersOwner { NMonitoring::TDynamicCounters::TCounterPtr SuccessDuration; TTaskSignals(const TString& moduleId, const TString& taskClassIdentifier, TIntrusivePtr<::NMonitoring::TDynamicCounters> baseSignals = nullptr) - : TBase(moduleId, baseSignals) - { + : TBase(moduleId, baseSignals) { DeepSubGroup("task_class", taskClassIdentifier); Fails = TBase::GetDeriviative("Fails"); FailsDuration = TBase::GetDeriviative("FailsDuration"); @@ -37,32 +37,21 @@ class ITask { Low = 0 }; private: - YDB_READONLY_DEF(TString, ErrorMessage); YDB_ACCESSOR(EPriority, Priority, EPriority::Normal); - YDB_READONLY_DEF(std::optional, OwnerId); bool ExecutedFlag = false; protected: - ITask& SetErrorMessage(const TString& message) { - ErrorMessage = message; - return *this; - } - virtual bool DoExecute() = 0; + virtual TConclusionStatus DoExecute(const std::shared_ptr& taskPtr) = 0; + virtual void DoOnCannotExecute(const TString& reason); public: - ITask(const std::optional& ownerId = {}) - : OwnerId(ownerId) - { - - } using TPtr = std::shared_ptr; virtual ~ITask() = default; virtual TString GetTaskClassIdentifier() const = 0; - bool HasError() const { - return !!ErrorMessage; + void OnCannotExecute(const TString& reason) { + return DoOnCannotExecute(reason); } - - bool Execute(std::shared_ptr signals); + TConclusionStatus Execute(std::shared_ptr signals, const std::shared_ptr& taskPtr); }; } diff --git a/ydb/core/tx/conveyor/usage/events.cpp b/ydb/core/tx/conveyor/usage/events.cpp index beb3b35860d0..3a3387fcd90f 100644 --- a/ydb/core/tx/conveyor/usage/events.cpp +++ b/ydb/core/tx/conveyor/usage/events.cpp @@ -1,5 +1,12 @@ #include "events.h" +#include namespace NKikimr::NConveyor { +TEvExecution::TEvNewTask::TEvNewTask(ITask::TPtr task) + : Task(task) +{ + AFL_VERIFY(Task); +} + } diff --git a/ydb/core/tx/conveyor/usage/events.h b/ydb/core/tx/conveyor/usage/events.h index 9d4075d35fa9..c392d179b306 100644 --- a/ydb/core/tx/conveyor/usage/events.h +++ b/ydb/core/tx/conveyor/usage/events.h @@ -10,7 +10,6 @@ namespace NKikimr::NConveyor { struct TEvExecution { enum EEv { EvNewTask = EventSpaceBegin(TKikimrEvents::ES_CONVEYOR), - EvTaskProcessedResult, EvEnd }; @@ -22,18 +21,7 @@ struct TEvExecution { public: TEvNewTask() = default; - explicit TEvNewTask(ITask::TPtr task) - : Task(task) { - } - }; - - class TEvTaskProcessedResult: - public NActors::TEventLocal, - public TConclusion { - private: - using TBase = TConclusion; - public: - using TBase::TBase; + explicit TEvNewTask(ITask::TPtr task); }; }; diff --git a/ydb/core/tx/conveyor/usage/service.h b/ydb/core/tx/conveyor/usage/service.h index 097f9944bc7b..6ba3c3320fde 100644 --- a/ydb/core/tx/conveyor/usage/service.h +++ b/ydb/core/tx/conveyor/usage/service.h @@ -19,7 +19,7 @@ class TAsyncTaskExecutor: public TActorBootstrapped { void Bootstrap() { auto gAway = PassAwayGuard(); - Task->Execute(nullptr); + Task->Execute(nullptr, Task); } }; @@ -47,10 +47,7 @@ class TServiceOperatorImpl { context.Send(MakeServiceId(selfId.NodeId()), new NConveyor::TEvExecution::TEvNewTask(task)); return true; } else { - task->Execute(nullptr); - if (task->GetOwnerId()) { - context.Send(*task->GetOwnerId(), new NConveyor::TEvExecution::TEvTaskProcessedResult(task)); - } + task->Execute(nullptr, task); return false; } } diff --git a/ydb/core/tx/coordinator/coordinator__acquire_read_step.cpp b/ydb/core/tx/coordinator/coordinator__acquire_read_step.cpp index d34df2ed3f30..d74c67376705 100644 --- a/ydb/core/tx/coordinator/coordinator__acquire_read_step.cpp +++ b/ydb/core/tx/coordinator/coordinator__acquire_read_step.cpp @@ -103,7 +103,12 @@ void TTxCoordinator::Handle(TEvTxProxy::TEvAcquireReadStep::TPtr& ev, const TAct return; } - if (ReadOnlyLeaseEnabled()) { + // Note: when volatile state is preserved we don't want to update the last + // acquired step, because the new generation might miss that and invariants + // not read-step not going back would be violated. Run the code below using + // the normal tx, which will almost certainly fail (the storage is supposed + // to be blocked already), or successfully persist the new read step. + if (ReadOnlyLeaseEnabled() && !VolatileState.Preserved) { // We acquire read step using a read-only lease from executor // It is guaranteed that any future generation was not running at // the time ConfirmReadOnlyLease was called. diff --git a/ydb/core/tx/coordinator/coordinator__plan_step.cpp b/ydb/core/tx/coordinator/coordinator__plan_step.cpp index 2954ff4faafb..c5ce361b4b52 100644 --- a/ydb/core/tx/coordinator/coordinator__plan_step.cpp +++ b/ydb/core/tx/coordinator/coordinator__plan_step.cpp @@ -1,4 +1,5 @@ #include "coordinator_impl.h" +#include "coordinator_hooks.h" #include @@ -42,7 +43,29 @@ struct TTxCoordinator::TTxPlanStep : public TTransactionBase { } void Plan(TTransactionContext &txc, const TActorContext &ctx) { - Y_UNUSED(txc); + if (Self->VolatileState.Preserved) { + // A preserved state indicates a newer generation has been started + // already, and this coordinator will stop eventually. Decline + // all pending transactions. + for (auto& slot : Slots) { + for (auto& proposal : slot) { + Self->MonCounters.StepPlannedDeclinedTx->Inc(); + ProxyPlanConfirmations.Queue.emplace_back( + proposal.TxId, + proposal.Proxy, + TEvTxProxy::TEvProposeTransactionStatus::EStatus::StatusRestarting, + 0); + ++DeclinedCounter; + } + } + Self->SendStepConfirmations(ProxyPlanConfirmations, ctx); + return; + } + + if (auto* hooks = ICoordinatorHooks::Get(); Y_UNLIKELY(hooks)) { + hooks->BeginPlanStep(Self->TabletID(), Self->Executor()->Generation(), PlanOnStep); + } + NIceDb::TNiceDb db(txc.DB); ExecStartMoment = ctx.Now(); const bool lowDiskSpace = Self->Executor()->GetStats().IsAnyChannelYellowStop; diff --git a/ydb/core/tx/coordinator/coordinator__restore_transaction.cpp b/ydb/core/tx/coordinator/coordinator__restore_transaction.cpp index af411d349ed5..f3269e362cab 100644 --- a/ydb/core/tx/coordinator/coordinator__restore_transaction.cpp +++ b/ydb/core/tx/coordinator/coordinator__restore_transaction.cpp @@ -125,10 +125,12 @@ struct TTxCoordinator::TTxRestoreTransactions : public TTransactionBaseVolatileTransactions) { auto txId = pr.first; auto &tx = pr.second; + maxStep = Max(maxStep, tx.PlanOnStep); for (auto &prmed : tx.UnconfirmedAffectedSet) { auto medId = prmed.first; auto &medTx = GetMediatorTx(medId, tx.PlanOnStep, txId); @@ -137,6 +139,7 @@ struct TTxCoordinator::TTxRestoreTransactions : public TTransactionBaseVolatileTransactions.size(); Self->Transactions.swap(transactions); *Self->MonCounters.TxInFly += txCounter; Self->MonCounters.CurrentTxInFly = txCounter; - if (Self->PrevStateActorId) { - NIceDb::TNiceDb db(txc.DB); + NIceDb::TNiceDb db(txc.DB); + // Previous coordinator might have had transactions that were after + // its persistent blocked range, but before LastPlanned was updated. + // Since we pick them up as planned and send to mediators we also need + // to make sure LastPlanned reflects that. + if (Self->VolatileState.LastPlanned < maxVolatileStep) { + Self->VolatileState.LastPlanned = maxVolatileStep; + Schema::SaveState(db, Schema::State::KeyLastPlanned, maxVolatileStep); + } + + if (Self->PrevStateActorId) { ui64 volatileLeaseMs = Self->VolatilePlanLeaseMs; if (volatileLeaseMs > 0) { // Make sure we start and persist new state actor before allowing clients to acquire new read steps diff --git a/ydb/core/tx/coordinator/coordinator_hooks.cpp b/ydb/core/tx/coordinator/coordinator_hooks.cpp index 742ed2bdc4b6..225ed046ac1e 100644 --- a/ydb/core/tx/coordinator/coordinator_hooks.cpp +++ b/ydb/core/tx/coordinator/coordinator_hooks.cpp @@ -14,6 +14,12 @@ namespace NKikimr::NFlatTxCoordinator { return true; } + void ICoordinatorHooks::BeginPlanStep(ui64 tabletId, ui64 generation, ui64 planStep) { + Y_UNUSED(tabletId); + Y_UNUSED(generation); + Y_UNUSED(planStep); + } + ICoordinatorHooks* ICoordinatorHooks::Get() { return CoordinatorHooks.load(std::memory_order_acquire); } diff --git a/ydb/core/tx/coordinator/coordinator_hooks.h b/ydb/core/tx/coordinator/coordinator_hooks.h index 4f96fd25d146..e38dbc72cc96 100644 --- a/ydb/core/tx/coordinator/coordinator_hooks.h +++ b/ydb/core/tx/coordinator/coordinator_hooks.h @@ -10,6 +10,7 @@ namespace NKikimr::NFlatTxCoordinator { public: virtual bool PersistConfig(ui64 tabletId, const NKikimrSubDomains::TProcessingParams& config); + virtual void BeginPlanStep(ui64 tabletId, ui64 generation, ui64 planStep); public: static ICoordinatorHooks* Get(); diff --git a/ydb/core/tx/coordinator/coordinator_impl.cpp b/ydb/core/tx/coordinator/coordinator_impl.cpp index cfdc7bb55fc5..9a130616231b 100644 --- a/ydb/core/tx/coordinator/coordinator_impl.cpp +++ b/ydb/core/tx/coordinator/coordinator_impl.cpp @@ -328,6 +328,11 @@ ui64 TTxCoordinator::AlignPlanStep(ui64 step) { void TTxCoordinator::Handle(TEvPrivate::TEvPlanTick::TPtr &ev, const TActorContext &ctx) { //LOG_DEBUG_S(ctx, NKikimrServices::TX_COORDINATOR, "tablet# " << TabletID() << " HANDLE EvPlanTick LastPlanned " << VolatileState.LastPlanned); + if (VolatileState.Preserved) { + // Avoid planning any new transactions, wait until we are stopped + return; + } + ui64 next = ev->Get()->Step; while (!PendingPlanTicks.empty() && PendingPlanTicks.front() <= next) { PendingPlanTicks.pop_front(); @@ -556,8 +561,14 @@ void TTxCoordinator::TryInitMonCounters(const TActorContext &ctx) { } void TTxCoordinator::SendMediatorStep(TMediator &mediator, const TActorContext &ctx) { + if (VolatileState.Preserved) { + // We don't want to send new steps when state has been preserved and + // potentially sent to newer generations. + return; + } + if (!mediator.Active) { - // We don't want to update LastSentStep when mediators are not empty + // We don't want to update LastSentStep when mediators are not connected return; } diff --git a/ydb/core/tx/coordinator/coordinator_impl.h b/ydb/core/tx/coordinator/coordinator_impl.h index 83272721ee4c..8369e8418f6e 100644 --- a/ydb/core/tx/coordinator/coordinator_impl.h +++ b/ydb/core/tx/coordinator/coordinator_impl.h @@ -433,6 +433,10 @@ class TTxCoordinator : public TActor, public TTabletExecutedFlat TVector AcquireReadStepPending; bool AcquireReadStepFlushing = false; bool AcquireReadStepStarting = false; + + // When true the state has been preserved by the state actor + // Any changes will not be migrated to newer generations + bool Preserved = false; }; public: diff --git a/ydb/core/tx/coordinator/coordinator_state.cpp b/ydb/core/tx/coordinator/coordinator_state.cpp index fa1351c54770..70f91a736244 100644 --- a/ydb/core/tx/coordinator/coordinator_state.cpp +++ b/ydb/core/tx/coordinator/coordinator_state.cpp @@ -80,6 +80,7 @@ void TCoordinatorStateActor::PreserveState() { Y_ABORT_UNLESS(ok); } + Owner->VolatileState.Preserved = true; } STFUNC(TCoordinatorStateActor::StateWork) { diff --git a/ydb/core/tx/coordinator/coordinator_volatile_ut.cpp b/ydb/core/tx/coordinator/coordinator_volatile_ut.cpp index 8d75974813aa..cb57683785b4 100644 --- a/ydb/core/tx/coordinator/coordinator_volatile_ut.cpp +++ b/ydb/core/tx/coordinator/coordinator_volatile_ut.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -357,6 +358,539 @@ namespace NKikimr::NFlatTxCoordinator::NTest { UNIT_ASSERT_VALUES_EQUAL(observedSteps.size(), 1u); } + /** + * Tests a scenario where coordinator's volatile lease expires, which + * causes coordinator to update the lease during volatile planning. + * That transaction is migrated to a newer instance, but commit updating + * last known step fails. A bug caused new instances to reach a confused + * state, which could cause it to attempt planning more transactions in + * the same step, violating invariants. + */ + Y_UNIT_TEST(CoordinatorMigrateUncommittedVolatileTx) { + struct TCoordinatorHooks : public ICoordinatorHooks { + std::vector PlannedSteps; + + void BeginPlanStep(ui64 tabletId, ui64 generation, ui64 planStep) override { + Cerr << "... coordinator " << tabletId << " gen " << generation << " is planning step " << planStep << Endl; + PlannedSteps.push_back(planStep); + } + } hooks; + TCoordinatorHooksGuard hooksGuard(hooks); + + TPortManager pm; + TServerSettings serverSettings(pm.GetPort(2134)); + serverSettings.SetDomainName("Root") + .SetNodeCount(1) + .SetUseRealThreads(false) + .SetDomainPlanResolution(50); + + Tests::TServer::TPtr server = new TServer(serverSettings); + + auto &runtime = *server->GetRuntime(); + runtime.SetLogPriority(NKikimrServices::TX_COORDINATOR, NActors::NLog::PRI_DEBUG); + runtime.SetLogPriority(NKikimrServices::BOOTSTRAPPER, NActors::NLog::PRI_DEBUG); + // runtime.SetLogPriority(NKikimrServices::TX_MEDIATOR, NActors::NLog::PRI_DEBUG); + // runtime.SetLogPriority(NKikimrServices::TX_MEDIATOR_EXEC_QUEUE, NActors::NLog::PRI_DEBUG); + // runtime.SetLogPriority(NKikimrServices::TX_MEDIATOR_TABLETQUEUE, NActors::NLog::PRI_DEBUG); + + auto sender = runtime.AllocateEdgeActor(); + ui64 coordinatorId = ChangeStateStorage(Coordinator, server->GetSettings().Domain); + ui64 tabletId = ChangeStateStorage(TTestTxConfig::TxTablet0, server->GetSettings().Domain); + + CreateTestBootstrapper(runtime, + CreateTestTabletInfo(tabletId, TTabletTypes::Dummy), + [](const TActorId& tablet, TTabletStorageInfo* info) { + return new TPlanTargetTablet(tablet, info); + }); + + { + TDispatchOptions options; + options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot, 1)); + runtime.DispatchEvents(options); + } + + auto waitFor = [&](const auto& condition, const TString& description) { + for (int i = 0; i < 5 && !condition(); ++i) { + Cerr << "... waiting for " << description << Endl; + TDispatchOptions options; + options.CustomFinalCondition = [&]() { + return condition(); + }; + runtime.DispatchEvents(options); + } + UNIT_ASSERT_C(condition(), "... failed to wait for " << description); + }; + + // Wait for the first idle mediator step + waitFor([&]{ return hooks.PlannedSteps.size() >= 2; }, "the first two planned steps"); + + auto oldTimestamp = runtime.GetCurrentTime(); + auto oldCoordinatorSysActorId = ResolveTablet(runtime, coordinatorId, 0, /* sysTablet */ true); + auto oldCoordinatorUserActorId = ResolveTablet(runtime, coordinatorId, 0, /* sysTablet */ false); + + // Sleep for 500ms, so the default volatile lease of 250ms will expire + runtime.SimulateSleep(TDuration::MilliSeconds(500)); + + hooks.PlannedSteps.clear(); + + // Start blocking EvPut responses for the new plan (but allowing data to commit) + std::vector> blockedPutResponses; + auto blockPutResponses = runtime.AddObserver( + [&](TEvBlobStorage::TEvPutResult::TPtr& ev) { + auto* msg = ev->Get(); + if (hooks.PlannedSteps.size() > 0 && msg->Id.TabletID() == coordinatorId) { + // Block commits from coordinator + Cerr << "... blocking put " << msg->Id << " response" << Endl; + blockedPutResponses.emplace_back(ev.Release()); + } + }); + + // Block target tablet's accept messages to keep transactions in mediator + std::vector> blockedPlanStepAccepted; + auto blockPlanStepAccepted = runtime.AddObserver( + [&](TEvTxProcessing::TEvPlanStepAccepted::TPtr& ev) { + auto* msg = ev->Get(); + if (msg->Record.GetTabletId() == tabletId) { + Cerr << "... blocked accept from " << tabletId << Endl; + blockedPlanStepAccepted.emplace_back(ev.Release()); + } + }); + + // Plan a persistent transaction + ui64 persistentTxId = 10000000; + if (auto propose = std::make_unique(coordinatorId, persistentTxId, 0, Min(), Max())) { + auto* tx = propose->Record.MutableTransaction(); + auto* affected = tx->AddAffectedSet(); + affected->SetTabletId(tabletId); + affected->SetFlags(TEvTxProxy::TEvProposeTransaction::AffectedWrite); + + runtime.SendToPipe(coordinatorId, sender, propose.release()); + } + + // Wait until we have some commit responses blocked + // This will ensure planned tx is persisted, but coordinator will not act on it yet + waitFor([&]{ return blockedPutResponses.size() > 0; }, "blocked put responses"); + + UNIT_ASSERT_VALUES_EQUAL(hooks.PlannedSteps.size(), 1u); + ui64 persistentPlanStep = hooks.PlannedSteps.at(0); + hooks.PlannedSteps.clear(); + + // Stop blocking put responses + blockPutResponses.Remove(); + + // Start blocking EvPut requests (not allowing data to commit) + std::vector> blockedPutRequests; + auto blockPutRequests = runtime.AddObserver( + [&](TEvBlobStorage::TEvPut::TPtr& ev) { + auto* msg = ev->Get(); + if (msg->Id.TabletID() == coordinatorId) { + // Block commits from coordinator + Cerr << "... blocking put " << msg->Id << " request" << Endl; + blockedPutRequests.emplace_back(ev.Release()); + } + }); + + // Plan a volatile transaction, expected to be planned for Step+1 + ui64 volatileTxId1 = 10000010; + if (auto propose = std::make_unique(coordinatorId, volatileTxId1, 0, Min(), Max())) { + auto* tx = propose->Record.MutableTransaction(); + tx->SetFlags(TEvTxProxy::TEvProposeTransaction::FlagVolatile); + auto* affected = tx->AddAffectedSet(); + affected->SetTabletId(tabletId); + affected->SetFlags(TEvTxProxy::TEvProposeTransaction::AffectedWrite); + + runtime.SendToPipe(coordinatorId, sender, propose.release()); + } + + // Wait until it's actually planned + waitFor([&]{ return hooks.PlannedSteps.size() >= 1; }, "planned volatile tx"); + + UNIT_ASSERT_VALUES_EQUAL(hooks.PlannedSteps.size(), 1u); + ui64 volatilePlanStep = hooks.PlannedSteps.at(0); + hooks.PlannedSteps.clear(); + + UNIT_ASSERT_C(volatilePlanStep > persistentPlanStep, + "Volatile plan step " << volatilePlanStep << " should be after persistent plan step " << persistentPlanStep); + + // Make sure everything settles + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + + // We expect there to be a commit attempt (extending the lease and updating last planned) + UNIT_ASSERT_C(blockedPutRequests.size() > 0, "expected to have put requests blocked by now"); + blockPutRequests.Remove(); + + // We want to start a new instance in parallel + // Block the old bootstrapper from starting unwanted instances when current tablet dies + bool oldTabletStopped = false; + auto blockOldTabletDead = runtime.AddObserver( + [&](TEvTablet::TEvTabletDead::TPtr& ev) { + if (ev->Sender == oldCoordinatorSysActorId) { + if (ev->GetRecipientRewrite() == oldCoordinatorUserActorId) { + oldTabletStopped = true; + } else { + ev.Reset(); + } + } + }); + + // New instance will migrate the in-memory state, block it from reaching the new instance temporarily + std::vector> blockedStateResponses; + auto blockStateResponses = runtime.AddObserver( + [&](TEvTxCoordinator::TEvCoordinatorStateResponse::TPtr& ev) { + Cerr << "... blocking state response from " << ev->Sender << " to " << ev->GetRecipientRewrite() << Endl; + Cerr << ev->Get()->Record.DebugString(); + blockedStateResponses.emplace_back(ev.Release()); + }); + + // Rewind to some older time + runtime.UpdateCurrentTime(oldTimestamp, /* rewind */ true); + + // Start a new bootstrapper, which will boot a new instance in parallel + Cerr << "... starting a new coordinator instance" << Endl; + CreateTestBootstrapper(runtime, CreateTestTabletInfo(coordinatorId, TTabletTypes::Coordinator), &CreateFlatTxCoordinator); + + // Wait until new coordinator almost receives the in-memory state + waitFor([&]{ return blockedStateResponses.size() >= 1; }, "migrated state"); + + // Unblock previously blocked blobstorage messages + // Since new coordinator has started the storage is already blocked + Cerr << "... unblocking put responses and requests" << Endl; + for (auto& ev : blockedPutResponses) { + runtime.Send(ev.release(), 0, true); + } + blockedPutResponses.clear(); + for (auto& ev : blockedPutRequests) { + runtime.Send(ev.release(), 0, true); + } + blockedPutRequests.clear(); + + // Sleep a little, so everything settles (e.g. committed plan is sent to mediator) + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + + waitFor([&]{ return oldTabletStopped; }, "old tablet stopped"); + hooks.PlannedSteps.clear(); + + // Unblock the in-memory state transfer + blockStateResponses.Remove(); + for (auto& ev : blockedStateResponses) { + runtime.Send(ev.release(), 0, true); + } + blockedStateResponses.clear(); + + // Make sure new requests go to the new instance + InvalidateTabletResolverCache(runtime, coordinatorId); + + // Plan another volatile transaction, with a smaller TxId + ui64 volatileTxId2 = 10000005; + if (auto propose = std::make_unique(coordinatorId, volatileTxId2, 0, Min(), Max())) { + auto* tx = propose->Record.MutableTransaction(); + tx->SetFlags(TEvTxProxy::TEvProposeTransaction::FlagVolatile); + auto* affected = tx->AddAffectedSet(); + affected->SetTabletId(tabletId); + affected->SetFlags(TEvTxProxy::TEvProposeTransaction::AffectedWrite); + + runtime.SendToPipe(coordinatorId, sender, propose.release()); + } + + // Wait until it's actually planned + waitFor([&]{ return hooks.PlannedSteps.size() >= 1; }, "planned volatile tx"); + + UNIT_ASSERT_VALUES_EQUAL(hooks.PlannedSteps.size(), 1u); + ui64 volatilePlanStep2 = hooks.PlannedSteps.at(0); + hooks.PlannedSteps.clear(); + + // Wait until everything settles (e.g. mediators receive all pending transactions) + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + + // Reboot the target tablet to trigger the original assertion + Cerr << "... rebooting target tablet" << Endl; + RebootTablet(runtime, tabletId, sender); + + // Wait until everything settles + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + + // Validate the new plan does not go back in time + UNIT_ASSERT_C(volatilePlanStep2 > volatilePlanStep, + "New volatile plan step " << volatilePlanStep2 << " is expected to be after " << volatilePlanStep); + } + + /** + * This scenario tests an empty volatile plan that is scheduled behind + * a persistent plan, which finishes committing after an in-memory + * state has been snapshotted and migrated. There was a bug where this + * empty plan step would not be considered as confirmed, and could be + * erroneously considered as unused by a previous generation. + */ + Y_UNIT_TEST(CoordinatorRestartWithEnqueuedVolatileStep) { + struct TCoordinatorHooks : public ICoordinatorHooks { + std::vector PlannedSteps; + + void BeginPlanStep(ui64 tabletId, ui64 generation, ui64 planStep) override { + Cerr << "... coordinator " << tabletId << " gen " << generation << " is planning step " << planStep << Endl; + PlannedSteps.push_back(planStep); + } + } hooks; + TCoordinatorHooksGuard hooksGuard(hooks); + + TPortManager pm; + TServerSettings serverSettings(pm.GetPort(2134)); + serverSettings.SetDomainName("Root") + .SetNodeCount(1) + .SetUseRealThreads(false) + .SetDomainPlanResolution(50); + + Tests::TServer::TPtr server = new TServer(serverSettings); + + auto &runtime = *server->GetRuntime(); + runtime.SetLogPriority(NKikimrServices::TX_COORDINATOR, NActors::NLog::PRI_DEBUG); + runtime.SetLogPriority(NKikimrServices::BOOTSTRAPPER, NActors::NLog::PRI_DEBUG); + runtime.SetLogPriority(NKikimrServices::TX_MEDIATOR, NActors::NLog::PRI_DEBUG); + runtime.SetLogPriority(NKikimrServices::TX_MEDIATOR_EXEC_QUEUE, NActors::NLog::PRI_DEBUG); + runtime.SetLogPriority(NKikimrServices::TX_MEDIATOR_TABLETQUEUE, NActors::NLog::PRI_DEBUG); + + auto sender = runtime.AllocateEdgeActor(); + ui64 coordinatorId = ChangeStateStorage(Coordinator, server->GetSettings().Domain); + ui64 mediatorId = ChangeStateStorage(Mediator, server->GetSettings().Domain); + ui64 tabletId = ChangeStateStorage(TTestTxConfig::TxTablet0, server->GetSettings().Domain); + + CreateTestBootstrapper(runtime, + CreateTestTabletInfo(tabletId, TTabletTypes::Dummy), + [](const TActorId& tablet, TTabletStorageInfo* info) { + return new TPlanTargetTablet(tablet, info); + }); + + { + TDispatchOptions options; + options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot, 1)); + runtime.DispatchEvents(options); + } + + auto waitFor = [&](const auto& condition, const TString& description) { + for (int i = 0; i < 5 && !condition(); ++i) { + Cerr << "... waiting for " << description << Endl; + TDispatchOptions options; + options.CustomFinalCondition = [&]() { + return condition(); + }; + runtime.DispatchEvents(options); + } + UNIT_ASSERT_C(condition(), "... failed to wait for " << description); + }; + + // Wait for the first idle mediator step + waitFor([&]{ return hooks.PlannedSteps.size() >= 2; }, "the first two planned steps"); + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + + hooks.PlannedSteps.clear(); + + auto oldTimestamp = runtime.GetCurrentTime(); + auto oldCoordinatorSysActorId = ResolveTablet(runtime, coordinatorId, 0, /* sysTablet */ true); + auto oldCoordinatorUserActorId = ResolveTablet(runtime, coordinatorId, 0, /* sysTablet */ false); + + // Start blocking EvPut responses for the new plan (but allowing data to commit) + std::vector> blockedPutResponses; + auto blockPutResponses = runtime.AddObserver( + [&](TEvBlobStorage::TEvPutResult::TPtr& ev) { + auto* msg = ev->Get(); + if (hooks.PlannedSteps.size() > 0 && msg->Id.TabletID() == coordinatorId) { + // Block commits from coordinator + Cerr << "... blocking put " << msg->Id << " response" << Endl; + blockedPutResponses.emplace_back(ev.Release()); + } + }); + + // Block target tablet's accept messages to keep transactions in mediator + std::vector> blockedPlanStepAccepted; + auto blockPlanStepAccepted = runtime.AddObserver( + [&](TEvTxProcessing::TEvPlanStepAccepted::TPtr& ev) { + auto* msg = ev->Get(); + if (msg->Record.GetTabletId() == tabletId) { + Cerr << "... blocked accept from " << tabletId << Endl; + blockedPlanStepAccepted.emplace_back(ev.Release()); + } + }); + + // step -> list of transactions + std::map> observedSteps; + auto observeSteps = runtime.AddObserver( + [&](TEvTxCoordinator::TEvCoordinatorStep::TPtr& ev) { + auto* msg = ev->Get(); + Cerr << "... observed step:" << Endl; + Cerr << msg->Record.DebugString(); + if (msg->Record.GetCoordinatorID() != coordinatorId) { + return; + } + ui64 step = msg->Record.GetStep(); + std::vector txIds; + for (const auto& tx : msg->Record.GetTransactions()) { + txIds.push_back(tx.GetTxId()); + } + std::sort(txIds.begin(), txIds.end()); + auto it = observedSteps.find(step); + if (it == observedSteps.end()) { + observedSteps[step] = std::move(txIds); + } else { + auto dumpTxIds = [](const std::vector& txIds) -> TString { + TStringBuilder sb; + sb << "{"; + bool first = true; + for (ui64 txId : txIds) { + if (first) { + first = false; + } else { + sb << ", "; + } + sb << txId; + } + sb << "}"; + return std::move(sb); + }; + UNIT_ASSERT_C(it->second == txIds, + "Step " << step << " changed transactions list " + << dumpTxIds(it->second) << " -> " << dumpTxIds(txIds)); + } + }); + + // txId -> step + std::map observedTabletTxs; + auto observeTabletTxs = runtime.AddObserver( + [&](TEvTxProcessing::TEvPlanStep::TPtr& ev) { + auto* msg = ev->Get(); + Cerr << "... observed tablet step:" << Endl; + Cerr << msg->Record.DebugString(); + ui64 step = msg->Record.GetStep(); + for (auto& tx : msg->Record.GetTransactions()) { + observedTabletTxs[tx.GetTxId()] = step; + } + }); + + // Plan a persistent transaction + ui64 persistentTxId = 10000000; + if (auto propose = std::make_unique(coordinatorId, persistentTxId, 0, Min(), Max())) { + auto* tx = propose->Record.MutableTransaction(); + auto* affected = tx->AddAffectedSet(); + affected->SetTabletId(tabletId); + affected->SetFlags(TEvTxProxy::TEvProposeTransaction::AffectedWrite); + + runtime.SendToPipe(coordinatorId, sender, propose.release()); + } + + // Wait until we have some commit responses blocked + waitFor([&]{ return blockedPutResponses.size() > 0; }, "blocked put responses"); + + UNIT_ASSERT_VALUES_EQUAL(hooks.PlannedSteps.size(), 1u); + ui64 persistentPlanStep = hooks.PlannedSteps.at(0); + hooks.PlannedSteps.clear(); + + // Require an empty step, it should be divisible by plan resolution + runtime.SendToPipe(coordinatorId, sender, new TEvTxProxy::TEvRequirePlanSteps(coordinatorId, persistentPlanStep + 50)); + + // Wait until it is also planned + waitFor([&]{ return hooks.PlannedSteps.size() >= 1; }, "planning for the required step"); + + UNIT_ASSERT_VALUES_EQUAL(hooks.PlannedSteps.size(), 1u); + ui64 volatileEmptyPlanStep = hooks.PlannedSteps.at(0); + hooks.PlannedSteps.clear(); + + // Stop blocking newer put responses + blockPutResponses.Remove(); + + // Make sure everything settles + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + + // We want to start a new instance in parallel + // Block the old bootstrapper from starting unwanted instances when current tablet dies + bool oldTabletStopped = false; + auto blockOldTabletDead = runtime.AddObserver( + [&](TEvTablet::TEvTabletDead::TPtr& ev) { + if (ev->Sender == oldCoordinatorSysActorId) { + if (ev->GetRecipientRewrite() == oldCoordinatorUserActorId) { + oldTabletStopped = true; + } else { + ev.Reset(); + } + } + }); + + // New instance will migrate the in-memory state, block it from reaching the new instance temporarily + std::vector> blockedStateResponses; + auto blockStateResponses = runtime.AddObserver( + [&](TEvTxCoordinator::TEvCoordinatorStateResponse::TPtr& ev) { + Cerr << "... blocking state response from " << ev->Sender << " to " << ev->GetRecipientRewrite() << Endl; + Cerr << ev->Get()->Record.DebugString(); + blockedStateResponses.emplace_back(ev.Release()); + }); + + // Rewind to some older time + runtime.UpdateCurrentTime(oldTimestamp, /* rewind */ true); + + // Start a new bootstrapper, which will boot a new instance in parallel + Cerr << "... starting a new coordinator instance" << Endl; + CreateTestBootstrapper(runtime, CreateTestTabletInfo(coordinatorId, TTabletTypes::Coordinator), &CreateFlatTxCoordinator); + + // Wait until new coordinator almost receives the in-memory state + waitFor([&]{ return blockedStateResponses.size() >= 1; }, "migrated state"); + + // Unblock previously blocked blobstorage messages + // Since new coordinator has started the storage is already blocked + Cerr << "... unblocking put responses and requests" << Endl; + for (auto& ev : blockedPutResponses) { + runtime.Send(ev.release(), 0, true); + } + blockedPutResponses.clear(); + + // Sleep a little, so everything settles (e.g. committed plan is sent to mediator) + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + + waitFor([&]{ return oldTabletStopped; }, "old tablet stopped"); + hooks.PlannedSteps.clear(); + + // Unblock the in-memory state transfer + blockStateResponses.Remove(); + for (auto& ev : blockedStateResponses) { + runtime.Send(ev.release(), 0, true); + } + blockedStateResponses.clear(); + + size_t oldObservedSteps = observedSteps.size(); + + // Make sure new requests go to the new instance + InvalidateTabletResolverCache(runtime, coordinatorId); + + // Plan another persistent transaction + ui64 persistentTxId2 = 10000011; + Cerr << "... trying to plan tx " << persistentTxId2 << Endl; + if (auto propose = std::make_unique(coordinatorId, persistentTxId2, 0, Min(), Max())) { + auto* tx = propose->Record.MutableTransaction(); + auto* affected = tx->AddAffectedSet(); + affected->SetTabletId(tabletId); + affected->SetFlags(TEvTxProxy::TEvProposeTransaction::AffectedWrite); + + runtime.SendToPipe(coordinatorId, sender, propose.release()); + } + + // Wait until it's actually planned + waitFor([&]{ return hooks.PlannedSteps.size() >= 1; }, "planned another persistent tx"); + + // Wait until mediator observes it + waitFor([&]{ return observedSteps.size() > oldObservedSteps; }, "new step reaches mediator"); + + runtime.SimulateSleep(TDuration::MilliSeconds(50)); + UNIT_ASSERT_C(observedTabletTxs.contains(persistentTxId2), + "Tablet did not observe a persistent tx " << persistentTxId2); + + Y_UNUSED(sender); + Y_UNUSED(coordinatorId); + Y_UNUSED(mediatorId); + + Y_UNUSED(oldTimestamp); + Y_UNUSED(oldCoordinatorSysActorId); + Y_UNUSED(oldCoordinatorUserActorId); + + Y_UNUSED(persistentPlanStep); + Y_UNUSED(volatileEmptyPlanStep); + } + } // Y_UNIT_TEST_SUITE(CoordinatorVolatile) } // namespace NKikimr::NFlatTxCoordinator::NTest diff --git a/ydb/core/tx/data_events/columnshard_splitter.cpp b/ydb/core/tx/data_events/columnshard_splitter.cpp index 5b435a0f9198..19a787167270 100644 --- a/ydb/core/tx/data_events/columnshard_splitter.cpp +++ b/ydb/core/tx/data_events/columnshard_splitter.cpp @@ -66,9 +66,10 @@ NKikimr::NEvWrite::IShardsSplitter::TYdbConclusionStatus TColumnShardShardsSplit } TFullSplitData result(sharding->GetShardsCount()); + const TString schemaString = NArrow::SerializeSchema(*batch->schema()); for (auto&& [shardId, chunks] : split.GetResult()) { for (auto&& c : chunks) { - result.AddShardInfo(shardId, std::make_shared(c.GetSchemaData(), c.GetData(), c.GetRowsCount(), sharding->GetShardInfoVerified(shardId).GetShardingVersion())); + result.AddShardInfo(shardId, std::make_shared(schemaString, c.GetData(), c.GetRowsCount(), sharding->GetShardInfoVerified(shardId).GetShardingVersion())); } } diff --git a/ydb/core/tx/data_events/events.h b/ydb/core/tx/data_events/events.h index d9f17beceec4..bd4f06284e9d 100644 --- a/ydb/core/tx/data_events/events.h +++ b/ydb/core/tx/data_events/events.h @@ -103,6 +103,13 @@ struct TDataEvents { return result; } + static std::unique_ptr BuildCompleted(const ui64 origin) { + auto result = std::make_unique(); + result->Record.SetOrigin(origin); + result->Record.SetStatus(NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED); + return result; + } + static std::unique_ptr BuildCompleted(const ui64 origin, const ui64 txId) { auto result = std::make_unique(); result->Record.SetOrigin(origin); @@ -116,7 +123,9 @@ struct TDataEvents { result->Record.SetOrigin(origin); result->Record.SetTxId(txId); result->Record.SetStatus(NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED); - *result->Record.AddTxLocks() = lock; + auto& lockResult = *result->Record.AddTxLocks(); + lockResult = lock; + lockResult.SetHasWrites(true); return result; } diff --git a/ydb/core/tx/data_events/shard_writer.cpp b/ydb/core/tx/data_events/shard_writer.cpp index 0158527fee49..7f935f5270cc 100644 --- a/ydb/core/tx/data_events/shard_writer.cpp +++ b/ydb/core/tx/data_events/shard_writer.cpp @@ -18,19 +18,25 @@ namespace NKikimr::NEvWrite { void TWritersController::OnSuccess(const ui64 shardId, const ui64 writeId, const ui32 writePartId) { WriteIds[WritesIndex.Inc() - 1] = TWriteIdForShard(shardId, writeId, writePartId); + Counters->OnCSReply(TMonotonic::Now() - StartInstant); if (!WritesCount.Dec()) { - auto req = MakeHolder(LongTxId); - for (auto&& i : WriteIds) { - req->AddWrite(i.GetShardId(), i.GetWriteId()); - } - LongTxActorId.Send(NLongTxService::MakeLongTxServiceID(LongTxActorId.NodeId()), req.Release()); + SendReply(); } } void TWritersController::OnFail(const Ydb::StatusIds::StatusCode code, const TString& message) { - NYql::TIssues issues; - issues.AddIssue(message); - LongTxActorId.Send(LongTxActorId, new TEvPrivate::TEvShardsWriteResult(code, issues)); + Counters->OnCSFailed(code); + FailsCount.Inc(); + if (!Code) { + TGuard g(Mutex); + if (!Code) { + Issues.AddIssue(message); + Code = code; + } + } + if (!WritesCount.Dec()) { + SendReply(); + } } TShardWriter::TShardWriter(const ui64 shardId, const ui64 tableId, const TString& dedupId, const IShardInfo::TPtr& data, @@ -60,7 +66,7 @@ namespace NKikimr::NEvWrite { const auto ydbStatus = msg->GetYdbStatus(); if (ydbStatus == Ydb::StatusIds::OVERLOADED) { - if (RetryWriteRequest()) { + if (RetryWriteRequest(true)) { return; } } @@ -81,7 +87,7 @@ namespace NKikimr::NEvWrite { const auto* msg = ev->Get(); Y_ABORT_UNLESS(msg->TabletId == ShardId); - if (RetryWriteRequest()) { + if (RetryWriteRequest(true)) { return; } @@ -99,7 +105,7 @@ namespace NKikimr::NEvWrite { RetryWriteRequest(false); } - bool TShardWriter::RetryWriteRequest(bool delayed) { + bool TShardWriter::RetryWriteRequest(const bool delayed) { if (NumRetries >= MaxRetriesPerShard) { return false; } diff --git a/ydb/core/tx/data_events/shard_writer.h b/ydb/core/tx/data_events/shard_writer.h index d7abe2e2a3ed..0a649a4dd3b9 100644 --- a/ydb/core/tx/data_events/shard_writer.h +++ b/ydb/core/tx/data_events/shard_writer.h @@ -8,6 +8,7 @@ #include #include #include +#include namespace NKikimr::NEvWrite { @@ -22,19 +23,88 @@ class TWriteIdForShard { TWriteIdForShard(const ui64 shardId, const ui64 writeId, const ui32 writePartId) : ShardId(shardId) , WriteId(writeId) - , WritePartId(writePartId) - { + , WritePartId(writePartId) { + } +}; + +class TCSUploadCounters: public NColumnShard::TCommonCountersOwner { +private: + using TBase = NColumnShard::TCommonCountersOwner; + NMonitoring::TDynamicCounters::TCounterPtr RequestsCount; + NMonitoring::THistogramPtr CSReplyDuration; + NMonitoring::THistogramPtr SucceedFullReplyDuration; + NMonitoring::THistogramPtr FailedFullReplyDuration; + NMonitoring::THistogramPtr BytesDistribution; + NMonitoring::THistogramPtr RowsDistribution; + NMonitoring::TDynamicCounters::TCounterPtr RowsCount; + NMonitoring::TDynamicCounters::TCounterPtr BytesCount; + NMonitoring::TDynamicCounters::TCounterPtr FailsCount; +public: + TCSUploadCounters() + : TBase("CSUpload") + , RequestsCount(TBase::GetDeriviative("Requests")) + , CSReplyDuration(TBase::GetHistogram("Replies/Shard/DurationMs", NMonitoring::ExponentialHistogram(15, 2, 10))) + , SucceedFullReplyDuration(TBase::GetHistogram("Replies/Success/Full/DurationMs", NMonitoring::ExponentialHistogram(15, 2, 10))) + , FailedFullReplyDuration(TBase::GetHistogram("Replies/Failed/Full/DurationMs", NMonitoring::ExponentialHistogram(15, 2, 10))) + , BytesDistribution(TBase::GetHistogram("Requests/Bytes", NMonitoring::ExponentialHistogram(15, 2, 1024))) + , RowsDistribution(TBase::GetHistogram("Requests/Rows", NMonitoring::ExponentialHistogram(15, 2, 16))) + , RowsCount(TBase::GetDeriviative("Rows")) + , BytesCount(TBase::GetDeriviative("Bytes")) + , FailsCount(TBase::GetDeriviative("Fails")) { + } -}; + void OnRequest(const ui64 rows, const ui64 bytes) const { + BytesDistribution->Collect(bytes); + RowsDistribution->Collect(rows); + BytesCount->Add(bytes); + RowsCount->Add(rows); + } + + void OnCSFailed(const Ydb::StatusIds::StatusCode /*code*/) { + FailsCount->Add(1); + } + + void OnCSReply(const TDuration d) const { + CSReplyDuration->Collect(d.MilliSeconds()); + } + + void OnSucceedFullReply(const TDuration d) const { + SucceedFullReplyDuration->Collect(d.MilliSeconds()); + } + + void OnFailedFullReply(const TDuration d) const { + FailedFullReplyDuration->Collect(d.MilliSeconds()); + } +}; // External transaction controller class class TWritersController { private: TAtomicCounter WritesCount = 0; TAtomicCounter WritesIndex = 0; + TAtomicCounter FailsCount = 0; + TMutex Mutex; + NYql::TIssues Issues; + std::optional Code; NActors::TActorIdentity LongTxActorId; std::vector WriteIds; + const TMonotonic StartInstant = TMonotonic::Now(); YDB_READONLY_DEF(NLongTxService::TLongTxId, LongTxId); + YDB_READONLY(std::shared_ptr, Counters, std::make_shared()); + void SendReply() { + if (FailsCount.Val()) { + Counters->OnFailedFullReply(TMonotonic::Now() - StartInstant); + AFL_VERIFY(Code); + LongTxActorId.Send(LongTxActorId, new TEvPrivate::TEvShardsWriteResult(*Code, Issues)); + } else { + Counters->OnSucceedFullReply(TMonotonic::Now() - StartInstant); + auto req = MakeHolder(LongTxId); + for (auto&& i : WriteIds) { + req->AddWrite(i.GetShardId(), i.GetWriteId()); + } + LongTxActorId.Send(NLongTxService::MakeLongTxServiceID(LongTxActorId.NodeId()), req.Release()); + } + } public: using TPtr = std::shared_ptr; @@ -94,10 +164,6 @@ class TShardWriter: public NActors::TActorBootstrapped { TBase::PassAway(); } public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::GRPC_REQ_SHARD_WRITER; - } - TShardWriter(const ui64 shardId, const ui64 tableId, const TString& dedupId, const IShardInfo::TPtr& data, const NWilson::TProfileSpan& parentSpan, TWritersController::TPtr externalController, const ui32 writePartIdx, const EModificationType mType); @@ -115,6 +181,6 @@ class TShardWriter: public NActors::TActorBootstrapped { void Handle(TEvPipeCache::TEvDeliveryProblem::TPtr& ev); void HandleTimeout(const TActorContext& ctx); private: - bool RetryWriteRequest(bool delayed = true); + bool RetryWriteRequest(const bool delayed = true); }; } diff --git a/ydb/core/tx/data_events/shards_splitter.h b/ydb/core/tx/data_events/shards_splitter.h index bed95ce29d03..77c04e1160e9 100644 --- a/ydb/core/tx/data_events/shards_splitter.h +++ b/ydb/core/tx/data_events/shards_splitter.h @@ -3,7 +3,6 @@ #include #include - #include #include @@ -21,6 +20,8 @@ class IShardsSplitter { using TYdbConclusionStatus = TConclusionSpecialStatus; class IEvWriteDataAccessor { + private: + YDB_READONLY(ui64, Size, 0); public: using TPtr = std::shared_ptr; @@ -29,6 +30,11 @@ class IShardsSplitter { } virtual std::shared_ptr GetDeserializedBatch() const = 0; virtual TString GetSerializedData() const = 0; + IEvWriteDataAccessor(const ui64 size) + : Size(size) + { + + } virtual ~IEvWriteDataAccessor() {} }; diff --git a/ydb/core/tx/data_events/write_data.cpp b/ydb/core/tx/data_events/write_data.cpp index 0457f72d86a7..390667624dda 100644 --- a/ydb/core/tx/data_events/write_data.cpp +++ b/ydb/core/tx/data_events/write_data.cpp @@ -17,9 +17,4 @@ TWriteData::TWriteData(const TWriteMeta& writeMeta, IDataContainer::TPtr data, c Y_ABORT_UNLESS(BlobsAction); } -const NKikimr::NEvWrite::IDataContainer& TWriteData::GetDataVerified() const { - AFL_VERIFY(Data); - return *Data; -} - } diff --git a/ydb/core/tx/data_events/write_data.h b/ydb/core/tx/data_events/write_data.h index 8ed419bb0f0b..0acbec1bcf98 100644 --- a/ydb/core/tx/data_events/write_data.h +++ b/ydb/core/tx/data_events/write_data.h @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -37,7 +38,7 @@ class TWriteMeta { YDB_ACCESSOR_DEF(TString, DedupId); YDB_READONLY(TString, Id, TGUID::CreateTimebased().AsUuidString()); - YDB_ACCESSOR(EModificationType, ModificationType, EModificationType::Upsert); + YDB_ACCESSOR(EModificationType, ModificationType, EModificationType::Replace); YDB_READONLY(TMonotonic, WriteStartInstant, TMonotonic::Now()); YDB_ACCESSOR(TMonotonic, WriteMiddle1StartInstant, TMonotonic::Now()); YDB_ACCESSOR(TMonotonic, WriteMiddle2StartInstant, TMonotonic::Now()); @@ -45,7 +46,21 @@ class TWriteMeta { YDB_ACCESSOR(TMonotonic, WriteMiddle4StartInstant, TMonotonic::Now()); YDB_ACCESSOR(TMonotonic, WriteMiddle5StartInstant, TMonotonic::Now()); YDB_ACCESSOR(TMonotonic, WriteMiddle6StartInstant, TMonotonic::Now()); + std::optional LockId; public: + void SetLockId(const ui64 lockId) { + LockId = lockId; + } + + ui64 GetLockIdVerified() const { + AFL_VERIFY(LockId); + return *LockId; + } + + std::optional GetLockIdOptional() const { + return LockId; + } + bool IsGuaranteeWriter() const { switch (ModificationType) { case EModificationType::Delete: @@ -72,10 +87,14 @@ class TWriteData { YDB_READONLY_DEF(IDataContainer::TPtr, Data); YDB_READONLY_DEF(std::shared_ptr, PrimaryKeySchema); YDB_READONLY_DEF(std::shared_ptr, BlobsAction); + YDB_ACCESSOR_DEF(std::optional, SchemaSubset); public: TWriteData(const TWriteMeta& writeMeta, IDataContainer::TPtr data, const std::shared_ptr& primaryKeySchema, const std::shared_ptr& blobsAction); - const IDataContainer& GetDataVerified() const; + const NArrow::TSchemaSubset& GetSchemaSubsetVerified() const { + AFL_VERIFY(SchemaSubset); + return *SchemaSubset; + } const TWriteMeta& GetWriteMeta() const { return WriteMeta; diff --git a/ydb/core/tx/datashard/build_kqp_data_tx_out_rs_unit.cpp b/ydb/core/tx/datashard/build_kqp_data_tx_out_rs_unit.cpp index 1e661132460d..5aafd5a20ada 100644 --- a/ydb/core/tx/datashard/build_kqp_data_tx_out_rs_unit.cpp +++ b/ydb/core/tx/datashard/build_kqp_data_tx_out_rs_unit.cpp @@ -83,15 +83,9 @@ EExecutionStatus TBuildKqpDataTxOutRSUnit::Execute(TOperation::TPtr op, TTransac auto allocGuard = tasksRunner.BindAllocator(txc.GetMemoryLimit() - dataTx->GetTxSize()); - NKqp::NRm::TKqpResourcesRequest req; - req.MemoryPool = NKqp::NRm::EKqpMemoryPool::DataQuery; - req.ExternalMemory = txc.GetMemoryLimit(); - ui64 taskId = dataTx->GetFirstKqpTaskId(); - - NKqp::GetKqpResourceManager()->NotifyExternalResourcesAllocated(tx->GetTxId(), taskId, req); - + NKqp::GetKqpResourceManager()->GetCounters()->RmExternalMemory->Add(txc.GetMemoryLimit()); Y_DEFER { - NKqp::GetKqpResourceManager()->FreeResources(tx->GetTxId(), taskId); + NKqp::GetKqpResourceManager()->GetCounters()->RmExternalMemory->Sub(txc.GetMemoryLimit()); }; LOG_T("Operation " << *op << " (build_kqp_data_tx_out_rs) at " << tabletId diff --git a/ydb/core/tx/datashard/cdc_stream_heartbeat.cpp b/ydb/core/tx/datashard/cdc_stream_heartbeat.cpp index 6473bc62ba66..0c228730c13a 100644 --- a/ydb/core/tx/datashard/cdc_stream_heartbeat.cpp +++ b/ydb/core/tx/datashard/cdc_stream_heartbeat.cpp @@ -33,6 +33,10 @@ class TDataShard::TTxCdcStreamEmitHeartbeats: public NTabletFlatExecutor::TTrans TTxType GetTxType() const override { return TXTYPE_CDC_STREAM_EMIT_HEARTBEATS; } bool Execute(TTransactionContext& txc, const TActorContext&) override { + if (Self->State != TShardState::Ready) { + return true; + } + LOG_I("Emit change records" << ": edge# " << Edge << ", at tablet# " << Self->TabletID()); @@ -91,27 +95,27 @@ void TDataShard::EmitHeartbeats() { return; } + // We may possibly have more writes at this version + TRowVersion edge = GetMvccTxVersion(EMvccTxMode::ReadWrite); + bool wait = true; + if (const auto& plan = TransQueue.GetPlan()) { - const auto version = Min(plan.begin()->ToRowVersion(), VolatileTxManager.GetMinUncertainVersion()); - if (CdcStreamHeartbeatManager.ShouldEmitHeartbeat(version)) { - return Execute(new TTxCdcStreamEmitHeartbeats(this, version)); - } - return; + edge = Min(edge, plan.begin()->ToRowVersion()); + wait = false; } if (auto version = VolatileTxManager.GetMinUncertainVersion(); !version.IsMax()) { - if (CdcStreamHeartbeatManager.ShouldEmitHeartbeat(version)) { - return Execute(new TTxCdcStreamEmitHeartbeats(this, version)); - } - return; + edge = Min(edge, version); + wait = false; } - const TRowVersion nextWrite = GetMvccTxVersion(EMvccTxMode::ReadWrite); - if (CdcStreamHeartbeatManager.ShouldEmitHeartbeat(nextWrite)) { - return Execute(new TTxCdcStreamEmitHeartbeats(this, nextWrite)); + if (CdcStreamHeartbeatManager.ShouldEmitHeartbeat(edge)) { + return Execute(new TTxCdcStreamEmitHeartbeats(this, edge)); } - WaitPlanStep(lowest.Next().Step); + if (wait) { + WaitPlanStep(lowest.Next().Step); + } } void TCdcStreamHeartbeatManager::Reset() { @@ -211,7 +215,7 @@ bool TCdcStreamHeartbeatManager::ShouldEmitHeartbeat(const TRowVersion& edge) co return false; } - if (Schedule.top().Version > edge) { + if (Schedule.top().Version >= edge) { return false; } @@ -221,7 +225,7 @@ bool TCdcStreamHeartbeatManager::ShouldEmitHeartbeat(const TRowVersion& edge) co THashMap TCdcStreamHeartbeatManager::EmitHeartbeats( NTable::TDatabase& db, const TRowVersion& edge) { - if (Schedule.empty() || Schedule.top().Version > edge) { + if (!ShouldEmitHeartbeat(edge)) { return {}; } @@ -230,7 +234,7 @@ THashMap TCdcStreamHeartbea while (true) { const auto& top = Schedule.top(); - if (top.Version > edge) { + if (top.Version >= edge) { break; } diff --git a/ydb/core/tx/datashard/cdc_stream_scan.cpp b/ydb/core/tx/datashard/cdc_stream_scan.cpp index 2f67fe5a79ac..321147e0ddb4 100644 --- a/ydb/core/tx/datashard/cdc_stream_scan.cpp +++ b/ydb/core/tx/datashard/cdc_stream_scan.cpp @@ -240,7 +240,8 @@ class TDataShard::TTxCdcStreamScanProgress const auto& valueTags = ev.ValueTags; LOG_D("Progress" - << ": streamPathId# " << streamPathId); + << ": streamPathId# " << streamPathId + << ", rows# " << ev.Rows.size()); if (!Self->GetUserTables().contains(tablePathId.LocalPathId)) { LOG_W("Cannot progress on unknown table" diff --git a/ydb/core/tx/datashard/change_record_cdc_serializer.cpp b/ydb/core/tx/datashard/change_record_cdc_serializer.cpp index 6e6eb098d28a..aa7d0e0a0d9c 100644 --- a/ydb/core/tx/datashard/change_record_cdc_serializer.cpp +++ b/ydb/core/tx/datashard/change_record_cdc_serializer.cpp @@ -91,10 +91,16 @@ class TJsonSerializer: public TBaseSerializer { friend class TChangeRecord; // used in GetPartitionKey() static NJson::TJsonWriterConfig DefaultJsonConfig() { - NJson::TJsonWriterConfig jsonConfig; - jsonConfig.ValidateUtf8 = false; - jsonConfig.WriteNanAsString = true; - return jsonConfig; + constexpr ui32 doubleNDigits = std::numeric_limits::max_digits10; + constexpr ui32 floatNDigits = std::numeric_limits::max_digits10; + constexpr EFloatToStringMode floatMode = EFloatToStringMode::PREC_NDIGITS; + return NJson::TJsonWriterConfig { + .DoubleNDigits = doubleNDigits, + .FloatNDigits = floatNDigits, + .FloatToStringMode = floatMode, + .ValidateUtf8 = false, + .WriteNanAsString = true, + }; } protected: diff --git a/ydb/core/tx/datashard/change_sender_async_index.cpp b/ydb/core/tx/datashard/change_sender_async_index.cpp index 38492b20728e..106d03406b58 100644 --- a/ydb/core/tx/datashard/change_sender_async_index.cpp +++ b/ydb/core/tx/datashard/change_sender_async_index.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -435,16 +436,6 @@ class TAsyncIndexChangeSenderMain return Check(&TSchemeCacheHelpers::CheckEntryKind, &TThis::LogWarnAndRetry, entry, expected); } - static TVector MakePartitionIds(const TVector& partitions) { - TVector result(Reserve(partitions.size())); - - for (const auto& partition : partitions) { - result.push_back(partition.ShardId); // partition = shard - } - - return result; - } - /// ResolveUserTable void ResolveUserTable() { @@ -611,6 +602,11 @@ class TAsyncIndexChangeSenderMain return; } + if (IndexTableVersion && IndexTableVersion == entry.Self->Info.GetVersion().GetGeneralVersion()) { + CreateSenders(); + return Become(&TThis::StateMain); + } + TagMap.clear(); TVector keyColumnTypes; @@ -692,11 +688,9 @@ class TAsyncIndexChangeSenderMain return Retry(); } - const bool versionChanged = !IndexTableVersion || IndexTableVersion != entry.GeneralVersion; IndexTableVersion = entry.GeneralVersion; - KeyDesc = std::move(entry.KeyDescription); - CreateSenders(MakePartitionIds(KeyDesc->GetPartitions()), versionChanged); + CreateSenders(NChangeExchange::MakePartitionIds(KeyDesc->GetPartitions())); Become(&TThis::StateMain); } diff --git a/ydb/core/tx/datashard/change_sender_cdc_stream.cpp b/ydb/core/tx/datashard/change_sender_cdc_stream.cpp index 5300357c24cd..ecf916263b6d 100644 --- a/ydb/core/tx/datashard/change_sender_cdc_stream.cpp +++ b/ydb/core/tx/datashard/change_sender_cdc_stream.cpp @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include @@ -300,45 +300,6 @@ class TCdcChangeSenderMain , public NChangeExchange::ISenderFactory , private NSchemeCache::TSchemeCacheHelpers { - struct TPQPartitionInfo { - ui32 PartitionId; - ui64 ShardId; - TPartitionKeyRange KeyRange; - - struct TLess { - TConstArrayRef Schema; - - TLess(const TVector& schema) - : Schema(schema) - { - } - - bool operator()(const TPQPartitionInfo& lhs, const TPQPartitionInfo& rhs) const { - Y_ABORT_UNLESS(lhs.KeyRange.ToBound || rhs.KeyRange.ToBound); - - if (!lhs.KeyRange.ToBound) { - return false; - } - - if (!rhs.KeyRange.ToBound) { - return true; - } - - Y_ABORT_UNLESS(lhs.KeyRange.ToBound && rhs.KeyRange.ToBound); - - const int compares = CompareTypedCellVectors( - lhs.KeyRange.ToBound->GetCells().data(), - rhs.KeyRange.ToBound->GetCells().data(), - Schema.data(), Schema.size() - ); - - return (compares < 0); - } - - }; // TLess - - }; // TPQPartitionInfo - TStringBuf GetLogPrefix() const { if (!LogPrefix) { LogPrefix = TStringBuilder() @@ -430,16 +391,6 @@ class TCdcChangeSenderMain return false; } - static TVector MakePartitionIds(const TVector& partitions) { - TVector result(Reserve(partitions.size())); - - for (const auto& partition : partitions) { - result.push_back(partition.ShardId); - } - - return result; - } - /// ResolveCdcStream void ResolveCdcStream() { @@ -561,77 +512,27 @@ class TCdcChangeSenderMain return; } - const auto& pqDesc = entry.PQGroupInfo->Description; - const auto& pqConfig = pqDesc.GetPQTabletConfig(); - - TVector schema; - PartitionToShard.clear(); - - schema.reserve(pqConfig.PartitionKeySchemaSize()); - for (const auto& keySchema : pqConfig.GetPartitionKeySchema()) { - // TODO: support pg types - schema.push_back(NScheme::TTypeInfo(keySchema.GetTypeId())); + const auto topicVersion = entry.Self->Info.GetVersion().GetGeneralVersion(); + if (TopicVersion && TopicVersion == topicVersion) { + CreateSenders(); + return Become(&TThis::StateMain); } - TSet partitions(schema); - THashSet shards; - - for (const auto& partition : pqDesc.GetPartitions()) { - const auto partitionId = partition.GetPartitionId(); - const auto shardId = partition.GetTabletId(); - - PartitionToShard.emplace(partitionId, shardId); - - auto keyRange = TPartitionKeyRange::Parse(partition.GetKeyRange()); - Y_ABORT_UNLESS(!keyRange.FromBound || keyRange.FromBound->GetCells().size() == schema.size()); - Y_ABORT_UNLESS(!keyRange.ToBound || keyRange.ToBound->GetCells().size() == schema.size()); - - partitions.insert({partitionId, shardId, std::move(keyRange)}); - shards.insert(shardId); - } - - // used to validate - bool isFirst = true; - const TPQPartitionInfo* prev = nullptr; - - TVector partitioning; - partitioning.reserve(partitions.size()); - for (const auto& cur : partitions) { - if (isFirst) { - isFirst = false; - Y_ABORT_UNLESS(!cur.KeyRange.FromBound.Defined()); - } else { - Y_ABORT_UNLESS(cur.KeyRange.FromBound.Defined()); - Y_ABORT_UNLESS(prev); - Y_ABORT_UNLESS(prev->KeyRange.ToBound.Defined()); - // TODO: compare cells - } - - auto& part = partitioning.emplace_back(cur.PartitionId); // TODO: double-check that it is right partitioning - - if (cur.KeyRange.ToBound) { - part.Range = NKikimr::TKeyDesc::TPartitionRangeInfo{ - .EndKeyPrefix = *cur.KeyRange.ToBound, - }; - } else { - part.Range = NKikimr::TKeyDesc::TPartitionRangeInfo{}; - } + TopicVersion = topicVersion; - prev = &cur; - } + const auto& pqDesc = entry.PQGroupInfo->Description; - if (prev) { - Y_ABORT_UNLESS(!prev->KeyRange.ToBound.Defined()); + PartitionToShard.clear(); + for (const auto& partition : pqDesc.GetPartitions()) { + PartitionToShard.emplace(partition.GetPartitionId(), partition.GetTabletId()); } - const auto topicVersion = entry.Self->Info.GetVersion().GetGeneralVersion(); - const bool versionChanged = !TopicVersion || TopicVersion != topicVersion; - TopicVersion = topicVersion; - - KeyDesc = NKikimr::TKeyDesc::CreateMiniKeyDesc(schema); - KeyDesc->Partitioning = std::make_shared>(std::move(partitioning)); + Y_ABORT_UNLESS(entry.PQGroupInfo->Schema); + KeyDesc = NKikimr::TKeyDesc::CreateMiniKeyDesc(entry.PQGroupInfo->Schema); + Y_ABORT_UNLESS(entry.PQGroupInfo->Partitioning); + KeyDesc->Partitioning = std::make_shared>(entry.PQGroupInfo->Partitioning); - CreateSenders(MakePartitionIds(*KeyDesc->Partitioning), versionChanged); + CreateSenders(NChangeExchange::MakePartitionIds(*KeyDesc->Partitioning)); Become(&TThis::StateMain); } diff --git a/ydb/core/tx/datashard/datashard.cpp b/ydb/core/tx/datashard/datashard.cpp index ba07749ec285..363e8b1658d1 100644 --- a/ydb/core/tx/datashard/datashard.cpp +++ b/ydb/core/tx/datashard/datashard.cpp @@ -855,6 +855,39 @@ void TDataShard::PersistChangeRecord(NIceDb::TNiceDb& db, const TChangeRecord& r NIceDb::TUpdate(record.GetKind()), NIceDb::TUpdate(record.GetBody()), NIceDb::TUpdate(record.GetSource())); + + auto res = ChangesQueue.emplace(record.GetOrder(), record); + Y_VERIFY_S(res.second, "Duplicate change record: " << record.GetOrder()); + + if (res.first->second.SchemaVersion) { + res.first->second.SchemaSnapshotAcquired = SchemaSnapshotManager.AcquireReference( + TSchemaSnapshotKey(res.first->second.TableId, res.first->second.SchemaVersion)); + } + + if (CommittingChangeRecords.empty()) { + db.GetDatabase().OnCommit([this] { + CommittingChangeRecords.clear(); + }); + db.GetDatabase().OnRollback([this] { + for (const auto order : CommittingChangeRecords) { + auto cIt = ChangesQueue.find(order); + Y_VERIFY_S(cIt != ChangesQueue.end(), "Cannot find change record: " << order); + + if (cIt->second.SchemaSnapshotAcquired) { + const auto snapshotKey = TSchemaSnapshotKey(cIt->second.TableId, cIt->second.SchemaVersion); + if (const auto last = SchemaSnapshotManager.ReleaseReference(snapshotKey)) { + ScheduleRemoveSchemaSnapshot(snapshotKey); + } + } + + ChangesQueue.erase(cIt); + } + + CommittingChangeRecords.clear(); + }); + } + + CommittingChangeRecords.push_back(record.GetOrder()); } else { auto& state = LockChangeRecords[lockId]; Y_ABORT_UNLESS(state.Changes.empty() || state.Changes.back().LockOffset < record.GetLockOffset(), @@ -934,6 +967,14 @@ void TDataShard::CommitLockChangeRecords(NIceDb::TNiceDb& db, ui64 lockId, ui64 committed.Step = rowVersion.Step; committed.TxId = rowVersion.TxId; collected.push_back(committed); + + auto res = ChangesQueue.emplace(committed.Order, committed); + Y_VERIFY_S(res.second, "Duplicate change record: " << committed.Order); + + if (res.first->second.SchemaVersion) { + res.first->second.SchemaSnapshotAcquired = SchemaSnapshotManager.AcquireReference( + TSchemaSnapshotKey(res.first->second.TableId, res.first->second.SchemaVersion)); + } } Y_VERIFY_S(!CommittedLockChangeRecords.contains(lockId), "Cannot commit lock " << lockId << " more than once"); @@ -960,7 +1001,26 @@ void TDataShard::CommitLockChangeRecords(NIceDb::TNiceDb& db, ui64 lockId, ui64 LockChangeRecords.erase(it); }); db.GetDatabase().OnRollback([this, lockId]() { - CommittedLockChangeRecords.erase(lockId); + auto it = CommittedLockChangeRecords.find(lockId); + Y_VERIFY_S(it != CommittedLockChangeRecords.end(), "Unexpected failure to find lockId# " << lockId); + + for (size_t i = 0; i < it->second.Count; ++i) { + const ui64 order = it->second.Order + i; + + auto cIt = ChangesQueue.find(order); + Y_VERIFY_S(cIt != ChangesQueue.end(), "Cannot find change record: " << order); + + if (cIt->second.SchemaSnapshotAcquired) { + const auto snapshotKey = TSchemaSnapshotKey(cIt->second.TableId, cIt->second.SchemaVersion); + if (const auto last = SchemaSnapshotManager.ReleaseReference(snapshotKey)) { + ScheduleRemoveSchemaSnapshot(snapshotKey); + } + } + + ChangesQueue.erase(cIt); + } + + CommittedLockChangeRecords.erase(it); }); } @@ -994,7 +1054,6 @@ void TDataShard::RemoveChangeRecord(NIceDb::TNiceDb& db, ui64 order) { auto it = ChangesQueue.find(order); if (it == ChangesQueue.end()) { - Y_VERIFY_DEBUG_S(false, "Trying to remove non-enqueud record: " << order); return; } @@ -1022,23 +1081,9 @@ void TDataShard::RemoveChangeRecord(NIceDb::TNiceDb& db, ui64 order) { ChangesQueueBytes -= record.BodySize; if (record.SchemaSnapshotAcquired) { - Y_ABORT_UNLESS(record.TableId); - auto tableIt = TableInfos.find(record.TableId.LocalPathId); - - if (tableIt != TableInfos.end()) { - const auto snapshotKey = TSchemaSnapshotKey(record.TableId, record.SchemaVersion); - const bool last = SchemaSnapshotManager.ReleaseReference(snapshotKey); - - if (last) { - const auto* snapshot = SchemaSnapshotManager.FindSnapshot(snapshotKey); - Y_ABORT_UNLESS(snapshot); - - if (snapshot->Schema->GetTableSchemaVersion() < tableIt->second->GetTableSchemaVersion()) { - SchemaSnapshotManager.RemoveShapshot(db, snapshotKey); - } - } - } else { - Y_DEBUG_ABORT_UNLESS(State == TShardState::PreOffline); + const auto snapshotKey = TSchemaSnapshotKey(record.TableId, record.SchemaVersion); + if (const bool last = SchemaSnapshotManager.ReleaseReference(snapshotKey)) { + ScheduleRemoveSchemaSnapshot(snapshotKey); } } @@ -1047,6 +1092,8 @@ void TDataShard::RemoveChangeRecord(NIceDb::TNiceDb& db, ui64 order) { if (!--rIt->second) { ChangeQueueReservations.erase(rIt); } + + SetCounter(COUNTER_CHANGE_QUEUE_RESERVED_CAPACITY, ChangeQueueReservedCapacity); } UpdateChangeExchangeLag(AppData()->TimeProvider->Now()); @@ -1054,12 +1101,24 @@ void TDataShard::RemoveChangeRecord(NIceDb::TNiceDb& db, ui64 order) { IncCounter(COUNTER_CHANGE_RECORDS_REMOVED); SetCounter(COUNTER_CHANGE_QUEUE_SIZE, ChangesQueue.size()); - SetCounter(COUNTER_CHANGE_QUEUE_RESERVED_CAPACITY, ChangeQueueReservedCapacity); CheckChangesQueueNoOverflow(); } -void TDataShard::EnqueueChangeRecords(TVector&& records, ui64 cookie) { +void TDataShard::EnqueueChangeRecords(TVector&& records, ui64 cookie, bool afterMove) { + if (auto it = ChangeQueueReservations.find(cookie); it != ChangeQueueReservations.end()) { + Y_ABORT_UNLESS(!afterMove); + + ChangeQueueReservedCapacity -= it->second; + it->second = records.size(); + ChangeQueueReservedCapacity += it->second; + if (!it->second) { + ChangeQueueReservations.erase(it); + } + + SetCounter(COUNTER_CHANGE_QUEUE_RESERVED_CAPACITY, ChangeQueueReservedCapacity); + } + if (!records) { return; } @@ -1079,41 +1138,31 @@ void TDataShard::EnqueueChangeRecords(TVectorTimeProvider->Now(); TVector forward(Reserve(records.size())); for (const auto& record : records) { - forward.emplace_back(record.Order, record.PathId, record.BodySize); + auto it = ChangesQueue.find(record.Order); + if (it == ChangesQueue.end()) { + Y_ABORT_UNLESS(afterMove); + continue; + } - auto res = ChangesQueue.emplace( - std::piecewise_construct, - std::forward_as_tuple(record.Order), - std::forward_as_tuple(record, now, cookie) - ); - if (res.second) { - ChangesList.PushBack(&res.first->second); + forward.emplace_back(record.Order, record.PathId, record.BodySize); - Y_ABORT_UNLESS(ChangesQueueBytes <= (Max() - record.BodySize)); - ChangesQueueBytes += record.BodySize; + it->second.EnqueuedAt = now; + it->second.ReservationCookie = cookie; + ChangesList.PushBack(&it->second); - if (record.SchemaVersion) { - res.first->second.SchemaSnapshotAcquired = SchemaSnapshotManager.AcquireReference( - TSchemaSnapshotKey(record.TableId, record.SchemaVersion)); - } - } - } - - if (auto it = ChangeQueueReservations.find(cookie); it != ChangeQueueReservations.end()) { - ChangeQueueReservedCapacity -= it->second; - ChangeQueueReservedCapacity += records.size(); + Y_ABORT_UNLESS(ChangesQueueBytes <= (Max() - record.BodySize)); + ChangesQueueBytes += record.BodySize; } UpdateChangeExchangeLag(now); IncCounter(COUNTER_CHANGE_RECORDS_ENQUEUED, forward.size()); SetCounter(COUNTER_CHANGE_QUEUE_SIZE, ChangesQueue.size()); - SetCounter(COUNTER_CHANGE_QUEUE_RESERVED_CAPACITY, ChangeQueueReservedCapacity); Y_ABORT_UNLESS(OutChangeSender); Send(OutChangeSender, new NChangeExchange::TEvChangeExchange::TEvEnqueueRecords(std::move(forward))); } -ui32 TDataShard::GetFreeChangeQueueCapacity(ui64 cookie) { +ui32 TDataShard::GetFreeChangeQueueCapacity(ui64 cookie) const { const ui64 sizeLimit = AppData()->DataShardConfig.GetChangesQueueItemsLimit(); if (sizeLimit < ChangesQueue.size()) { return 0; @@ -1265,6 +1314,14 @@ bool TDataShard::LoadChangeRecords(NIceDb::TNiceDb& db, TVectorsecond.SchemaVersion) { + res.first->second.SchemaSnapshotAcquired = SchemaSnapshotManager.AcquireReference( + TSchemaSnapshotKey(res.first->second.TableId, res.first->second.SchemaVersion)); + } + if (!rowset.Next()) { return false; } @@ -1363,6 +1420,14 @@ bool TDataShard::LoadChangeRecordCommits(NIceDb::TNiceDb& db, TVectorsecond.SchemaVersion) { + res.first->second.SchemaSnapshotAcquired = SchemaSnapshotManager.AcquireReference( + TSchemaSnapshotKey(res.first->second.TableId, res.first->second.SchemaVersion)); + } } LockChangeRecords.erase(lockId); @@ -1421,6 +1486,51 @@ void TDataShard::ScheduleRemoveAbandonedLockChanges() { } } +void TDataShard::ScheduleRemoveSchemaSnapshot(const TSchemaSnapshotKey& key) { + Y_ABORT_UNLESS(!SchemaSnapshotManager.HasReference(key)); + + const auto* snapshot = SchemaSnapshotManager.FindSnapshot(key); + Y_ABORT_UNLESS(snapshot); + + auto it = TableInfos.find(key.PathId); + if (it == TableInfos.end()) { + Y_DEBUG_ABORT_UNLESS(State == TShardState::PreOffline); + return; + } + + if (snapshot->Schema->GetTableSchemaVersion() < it->second->GetTableSchemaVersion()) { + bool wasEmpty = PendingSchemaSnapshotsToGc.empty(); + PendingSchemaSnapshotsToGc.push_back(key); + if (wasEmpty) { + Send(SelfId(), new TEvPrivate::TEvRemoveSchemaSnapshots); + } + } +} + +void TDataShard::ScheduleRemoveAbandonedSchemaSnapshots() { + bool wasEmpty = PendingSchemaSnapshotsToGc.empty(); + + for (const auto& [key, snapshot] : SchemaSnapshotManager.GetSnapshots()) { + auto it = TableInfos.find(key.PathId); + if (it == TableInfos.end()) { + Y_DEBUG_ABORT_UNLESS(State == TShardState::PreOffline); + break; + } + if (SchemaSnapshotManager.HasReference(key)) { + continue; + } + if (snapshot.Schema->GetTableSchemaVersion() >= it->second->GetTableSchemaVersion()) { + continue; + } + + PendingSchemaSnapshotsToGc.push_back(key); + } + + if (wasEmpty && !PendingSchemaSnapshotsToGc.empty()) { + Send(SelfId(), new TEvPrivate::TEvRemoveSchemaSnapshots); + } +} + void TDataShard::PersistSchemeTxResult(NIceDb::TNiceDb &db, const TSchemaOperation &op) { db.Table().Key(op.TxId).Update( NIceDb::TUpdate(op.Success), @@ -1649,8 +1759,18 @@ void TDataShard::AddSchemaSnapshot(const TPathId& pathId, ui64 tableSchemaVersio Y_ABORT_UNLESS(TableInfos.contains(pathId.LocalPathId)); auto tableInfo = TableInfos[pathId.LocalPathId]; - const auto key = TSchemaSnapshotKey(pathId.OwnerId, pathId.LocalPathId, tableSchemaVersion); + const auto key = TSchemaSnapshotKey(pathId, tableSchemaVersion); SchemaSnapshotManager.AddSnapshot(txc.DB, key, TSchemaSnapshot(tableInfo, step, txId)); + + const auto& snapshots = SchemaSnapshotManager.GetSnapshots(); + for (auto it = snapshots.lower_bound(TSchemaSnapshotKey(pathId, 1)); it != snapshots.end(); ++it) { + if (it->first == key) { + break; + } + if (!SchemaSnapshotManager.HasReference(it->first)) { + ScheduleRemoveSchemaSnapshot(it->first); + } + } } void TDataShard::PersistLastLoanTableTid(NIceDb::TNiceDb& db, ui32 localTid) { diff --git a/ydb/core/tx/datashard/datashard__column_stats.cpp b/ydb/core/tx/datashard/datashard__column_stats.cpp index 9a926e0c92cb..105c3155d499 100644 --- a/ydb/core/tx/datashard/datashard__column_stats.cpp +++ b/ydb/core/tx/datashard/datashard__column_stats.cpp @@ -63,13 +63,13 @@ class TStatisticsScan: public NTable::IScan { record.SetShardTabletId(ShardTabletId); if (abort != EAbort::None) { - record.SetStatus(NKikimrStat::TEvStatisticsResponse::ABORTED); + record.SetStatus(NKikimrStat::TEvStatisticsResponse::STATUS_ABORTED); TlsActivationContext->Send(new IEventHandle(ReplyTo, TActorId(), response.release(), 0, Cookie)); delete this; return nullptr; } - record.SetStatus(NKikimrStat::TEvStatisticsResponse::SUCCESS); + record.SetStatus(NKikimrStat::TEvStatisticsResponse::STATUS_SUCCESS); auto tags = Scheme->Tags(); for (size_t t = 0; t < tags.size(); ++t) { auto* column = record.AddColumns(); @@ -132,16 +132,16 @@ void TDataShard::HandleSafe(NStat::TEvStatistics::TEvStatisticsRequest::TPtr& ev auto response = std::make_unique(); response->Record.SetShardTabletId(TabletID()); - const auto& tableId = record.GetTableId(); - if (PathOwnerId != tableId.GetOwnerId()) { - response->Record.SetStatus(NKikimrStat::TEvStatisticsResponse::ERROR); + const auto& pathId = record.GetTable().GetPathId(); + if (PathOwnerId != pathId.GetOwnerId()) { + response->Record.SetStatus(NKikimrStat::TEvStatisticsResponse::STATUS_ERROR); Send(ev->Sender, response.release(), 0, ev->Cookie); return; } - auto infoIt = TableInfos.find(tableId.GetTableId()); + auto infoIt = TableInfos.find(pathId.GetLocalId()); if (infoIt == TableInfos.end()) { - response->Record.SetStatus(NKikimrStat::TEvStatisticsResponse::ERROR); + response->Record.SetStatus(NKikimrStat::TEvStatisticsResponse::STATUS_ERROR); Send(ev->Sender, response.release(), 0, ev->Cookie); return; } diff --git a/ydb/core/tx/datashard/datashard__engine_host.cpp b/ydb/core/tx/datashard/datashard__engine_host.cpp index 2155bcf91a43..71a46e61854a 100644 --- a/ydb/core/tx/datashard/datashard__engine_host.cpp +++ b/ydb/core/tx/datashard/datashard__engine_host.cpp @@ -370,11 +370,11 @@ class TDataShardEngineHost final TSmallVec ops; ConvertTableValues(Scheme, tableInfo, commands, ops, nullptr); - UserDb.UpdateRow(tableId, key, ops); + UserDb.UpsertRow(tableId, key, ops); } - void UpdateRow(const TTableId& tableId, const TArrayRef key, const TArrayRef ops) override { - UserDb.UpdateRow(tableId, key, ops); + void UpsertRow(const TTableId& tableId, const TArrayRef key, const TArrayRef ops) override { + UserDb.UpsertRow(tableId, key, ops); } void ReplaceRow(const TTableId& tableId, const TArrayRef key, const TArrayRef ops) override { @@ -385,6 +385,10 @@ class TDataShardEngineHost final UserDb.InsertRow(tableId, key, ops); } + void UpdateRow(const TTableId& tableId, const TArrayRef key, const TArrayRef ops) override { + UserDb.UpdateRow(tableId, key, ops); + } + void EraseRow(const TTableId& tableId, const TArrayRef& row) override { if (TSysTables::IsSystemTable(tableId)) { DataShardSysTable(tableId).EraseRow(row); diff --git a/ydb/core/tx/datashard/datashard__init.cpp b/ydb/core/tx/datashard/datashard__init.cpp index 89981068248d..a0561a9c998f 100644 --- a/ydb/core/tx/datashard/datashard__init.cpp +++ b/ydb/core/tx/datashard/datashard__init.cpp @@ -425,6 +425,12 @@ bool TDataShard::TTxInit::ReadEverything(TTransactionContext &txc) { return false; } + if (Self->State != TShardState::Offline && txc.DB.GetScheme().GetTableInfo(Schema::SchemaSnapshots::TableId)) { + if (!Self->SchemaSnapshotManager.Load(db)) { + return false; + } + } + if (Self->State != TShardState::Offline && txc.DB.GetScheme().GetTableInfo(Schema::ChangeRecords::TableId)) { if (!Self->LoadChangeRecords(db, ChangeRecords)) { return false; @@ -512,12 +518,6 @@ bool TDataShard::TTxInit::ReadEverything(TTransactionContext &txc) { } } - if (Self->State != TShardState::Offline && txc.DB.GetScheme().GetTableInfo(Schema::SchemaSnapshots::TableId)) { - if (!Self->SchemaSnapshotManager.Load(db)) { - return false; - } - } - if (Self->State != TShardState::Offline && txc.DB.GetScheme().GetTableInfo(Schema::Locks::TableId)) { TDataShardLocksDb locksDb(*Self, txc); if (!Self->SysLocks.Load(locksDb)) { @@ -547,6 +547,7 @@ bool TDataShard::TTxInit::ReadEverything(TTransactionContext &txc) { Self->SubscribeNewLocks(); Self->ScheduleRemoveAbandonedLockChanges(); + Self->ScheduleRemoveAbandonedSchemaSnapshots(); return true; } diff --git a/ydb/core/tx/datashard/datashard__read_iterator.cpp b/ydb/core/tx/datashard/datashard__read_iterator.cpp index 6e385ebf13e6..230deb29e0f5 100644 --- a/ydb/core/tx/datashard/datashard__read_iterator.cpp +++ b/ydb/core/tx/datashard/datashard__read_iterator.cpp @@ -5,6 +5,7 @@ #include "datashard_locks_db.h" #include "probes.h" +#include #include #include @@ -315,6 +316,8 @@ class TReader { , Self(self) , TableId(state.PathId.OwnerId, state.PathId.LocalPathId, state.SchemaVersion) , FirstUnprocessedQuery(State.FirstUnprocessedQuery) + , LastProcessedKey(State.LastProcessedKey) + , LastProcessedKeyErased(State.LastProcessedKeyErased) { GetTimeFast(&StartTime); EndTime = StartTime; @@ -329,10 +332,10 @@ class TReader { bool toInclusive; TSerializedCellVec keyFromCells; TSerializedCellVec keyToCells; - if (Y_UNLIKELY(FirstUnprocessedQuery == State.FirstUnprocessedQuery && State.LastProcessedKey)) { + if (LastProcessedKey) { if (!State.Reverse) { - keyFromCells = TSerializedCellVec(State.LastProcessedKey); - fromInclusive = State.LastProcessedKeyErased; + keyFromCells = TSerializedCellVec(LastProcessedKey); + fromInclusive = LastProcessedKeyErased; keyToCells = range.To; toInclusive = range.ToInclusive; @@ -341,8 +344,8 @@ class TReader { keyFromCells = range.From; fromInclusive = range.FromInclusive; - keyToCells = TSerializedCellVec(State.LastProcessedKey); - toInclusive = State.LastProcessedKeyErased; + keyToCells = TSerializedCellVec(LastProcessedKey); + toInclusive = LastProcessedKeyErased; } } else { keyFromCells = range.From; @@ -500,6 +503,7 @@ class TReader { while (FirstUnprocessedQuery < State.Request->Ranges.size()) { if (ReachedTotalRowsLimit()) { FirstUnprocessedQuery = -1; + LastProcessedKey.clear(); return true; } @@ -526,6 +530,7 @@ class TReader { FirstUnprocessedQuery++; else FirstUnprocessedQuery--; + LastProcessedKey.clear(); } return true; @@ -537,6 +542,7 @@ class TReader { while (FirstUnprocessedQuery < State.Request->Keys.size()) { if (ReachedTotalRowsLimit()) { FirstUnprocessedQuery = -1; + LastProcessedKey.clear(); return true; } @@ -562,6 +568,7 @@ class TReader { FirstUnprocessedQuery++; else FirstUnprocessedQuery--; + LastProcessedKey.clear(); } return true; @@ -727,6 +734,28 @@ class TReader { } void UpdateState(TReadIteratorState& state, bool sentResult) { + if (state.FirstUnprocessedQuery == FirstUnprocessedQuery && + state.LastProcessedKey && !LastProcessedKey) + { + LOG_CRIT_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, + "DataShard " << Self->TabletID() << " detected unexpected reset of LastProcessedKey:" + << " ReadId# " << State.ReadId + << " LastSeqNo# " << State.SeqNo + << " LastQuery# " << State.FirstUnprocessedQuery + << " RowsRead# " << RowsRead + << " RowsProcessed# " << RowsProcessed + << " RowsSinceLastCheck# " << RowsSinceLastCheck + << " BytesInResult# " << BytesInResult + << " DeletedRowSkips# " << DeletedRowSkips + << " InvisibleRowSkips# " << InvisibleRowSkips + << " Quota.Rows# " << State.Quota.Rows + << " Quota.Bytes# " << State.Quota.Bytes + << " State.TotalRows# " << State.TotalRows + << " State.TotalRowsLimit# " << State.TotalRowsLimit + << " State.MaxRowsInResult# " << State.MaxRowsInResult); + Self->IncCounterReadIteratorLastKeyReset(); + } + state.TotalRows += RowsRead; state.FirstUnprocessedQuery = FirstUnprocessedQuery; state.LastProcessedKey = LastProcessedKey; @@ -1632,6 +1661,7 @@ class TDataShard::TReadOperation : public TOperation, public IReadOperation { if (Reader->HasUnreadQueries()) { Reader->UpdateState(state, ResultSent); if (!state.IsExhausted()) { + state.ReadContinuePending = true; ctx.Send( Self->SelfId(), new TEvDataShard::TEvReadContinue(ReadId.Sender, ReadId.ReadId)); @@ -2282,6 +2312,15 @@ class TDataShard::TTxReadContinue : public NTabletFlatExecutor::TTransactionBase Y_ASSERT(it->second); auto& state = *it->second; + if (state.IsExhausted()) { + // iterator quota reduced and exhausted while ReadContinue was inflight + LOG_TRACE_S(ctx, NKikimrServices::TX_DATASHARD, Self->TabletID() << " ReadContinue for iterator# " << ReadId + << ", quota exhausted while rescheduling"); + state.ReadContinuePending = false; + Result.reset(); + return true; + } + LOG_TRACE_S(ctx, NKikimrServices::TX_DATASHARD, Self->TabletID() << " ReadContinue for iterator# " << ReadId << ", firstUnprocessedQuery# " << state.FirstUnprocessedQuery); @@ -2394,6 +2433,7 @@ class TDataShard::TTxReadContinue : public NTabletFlatExecutor::TTransactionBase if (Reader->Read(txc, ctx)) { // Retry later when dependencies are resolved if (!Reader->GetVolatileReadDependencies().empty()) { + state.ReadContinuePending = true; Self->WaitVolatileDependenciesThenSend( Reader->GetVolatileReadDependencies(), Self->SelfId(), @@ -2480,6 +2520,8 @@ class TDataShard::TTxReadContinue : public NTabletFlatExecutor::TTransactionBase Y_ABORT_UNLESS(it->second); auto& state = *it->second; + state.ReadContinuePending = false; + if (!Result) { LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, Self->TabletID() << " read iterator# " << ReadId << " TTxReadContinue::Execute() finished without Result, aborting"); @@ -2527,14 +2569,14 @@ class TDataShard::TTxReadContinue : public NTabletFlatExecutor::TTransactionBase } if (Reader->HasUnreadQueries()) { - Y_ASSERT(it->second); - auto& state = *it->second; + bool wasExhausted = state.IsExhausted(); Reader->UpdateState(state, useful); if (!state.IsExhausted()) { + state.ReadContinuePending = true; ctx.Send( Self->SelfId(), new TEvDataShard::TEvReadContinue(ReadId.Sender, ReadId.ReadId)); - } else { + } else if (!wasExhausted) { Self->IncCounter(COUNTER_READ_ITERATORS_EXHAUSTED_COUNT); LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, Self->TabletID() << " read iterator# " << ReadId << " exhausted"); @@ -2603,6 +2645,16 @@ void TDataShard::Handle(TEvDataShard::TEvRead::TPtr& ev, const TActorContext& ct return; } + if (State == TShardState::PreOffline || + State == TShardState::Offline) + { + replyWithError( + Ydb::StatusIds::NOT_FOUND, + TStringBuilder() << "Shard " << TabletID() << " finished splitting/merging" + << " (node# " << SelfId().NodeId() << " state# " << DatashardStateName(State) << ")"); + return; + } + if (!IsStateNewReadAllowed()) { replyWithError( Ydb::StatusIds::OVERLOADED, @@ -2807,14 +2859,19 @@ void TDataShard::Handle(TEvDataShard::TEvReadAck::TPtr& ev, const TActorContext& bool wasExhausted = state.IsExhausted(); state.UpQuota( record.GetSeqNo(), - record.GetMaxRows(), - record.GetMaxBytes()); + record.HasMaxRows() ? record.GetMaxRows() : Max(), + record.HasMaxBytes() ? record.GetMaxBytes() : Max()); if (wasExhausted && !state.IsExhausted()) { DecCounter(COUNTER_READ_ITERATORS_EXHAUSTED_COUNT); - ctx.Send( - SelfId(), - new TEvDataShard::TEvReadContinue(ev->Sender, record.GetReadId())); + if (!state.ReadContinuePending) { + state.ReadContinuePending = true; + ctx.Send( + SelfId(), + new TEvDataShard::TEvReadContinue(ev->Sender, record.GetReadId())); + } + } else if (!wasExhausted && state.IsExhausted()) { + IncCounter(COUNTER_READ_ITERATORS_EXHAUSTED_COUNT); } LOG_TRACE_S(ctx, NKikimrServices::TX_DATASHARD, TabletID() << " ReadAck for read iterator# " << readId @@ -2943,6 +3000,16 @@ void TDataShard::UnsubscribeReadIteratorSessions(const TActorContext& ctx) { ReadIteratorSessions.clear(); } +void TDataShard::IncCounterReadIteratorLastKeyReset() { + if (!CounterReadIteratorLastKeyReset) { + CounterReadIteratorLastKeyReset = GetServiceCounters(AppData()->Counters, "tablets") + ->GetSubgroup("type", "DataShard") + ->GetSubgroup("category", "app") + ->GetCounter("DataShard/ReadIteratorLastKeyReset", true); + } + ++*CounterReadIteratorLastKeyReset; +} + } // NKikimr::NDataShard template<> diff --git a/ydb/core/tx/datashard/datashard__stats.cpp b/ydb/core/tx/datashard/datashard__stats.cpp index 9087ecd9bd4b..67d5c62db373 100644 --- a/ydb/core/tx/datashard/datashard__stats.cpp +++ b/ydb/core/tx/datashard/datashard__stats.cpp @@ -167,7 +167,7 @@ class TTableStatsCoroBuilder : public TActorCoroImpl, private IPages { LOG_DEBUG_S(GetActorContext(), NKikimrServices::TX_DATASHARD, "BuildStats result at datashard " << TabletId << ", for tableId " << TableId << ": RowCount " << ev->Stats.RowCount << ", DataSize " << ev->Stats.DataSize.Size << ", IndexSize " << ev->Stats.IndexSize.Size << ", PartCount " << ev->PartCount << (ev->PartOwners.size() > 1 || ev->PartOwners.size() == 1 && *ev->PartOwners.begin() != TabletId ? ", with borrowed parts" : "") - << ", LoadedSize " << PagesSize << ", " << NFmt::Do(*Spent)); + << ", LoadedSize " << PagesSize << ", " << NFmt::Do(*Spent) << ", HistogramKeys " << ev->Stats.DataSizeHistogram.size()); Send(ReplyTo, ev.Release()); diff --git a/ydb/core/tx/datashard/datashard_change_sending.cpp b/ydb/core/tx/datashard/datashard_change_sending.cpp index 181f3fdf8d23..9023b29d7b9f 100644 --- a/ydb/core/tx/datashard/datashard_change_sending.cpp +++ b/ydb/core/tx/datashard/datashard_change_sending.cpp @@ -286,7 +286,7 @@ class TDataShard::TTxRemoveChangeRecords: public TTransactionBase { ChangeExchangeSplit = true; } else { for (const auto dstTabletId : Self->ChangeSenderActivator.GetDstSet()) { - if (Self->SplitSrcSnapshotSender.Acked(dstTabletId)) { + if (Self->SplitSrcSnapshotSender.Acked(dstTabletId) && !Self->ChangeSenderActivator.Acked(dstTabletId)) { ActivationList.insert(dstTabletId); } } @@ -346,9 +346,7 @@ class TDataShard::TTxRemoveChangeRecords: public TTransactionBase { } for (const auto dstTabletId : ActivationList) { - if (!Self->ChangeSenderActivator.Acked(dstTabletId)) { - Self->ChangeSenderActivator.DoSend(dstTabletId, ctx); - } + Self->ChangeSenderActivator.DoSend(dstTabletId, ctx); } Self->CheckStateChange(ctx); @@ -383,7 +381,7 @@ class TDataShard::TTxChangeExchangeSplitAck: public TTransactionBase Y_ABORT_UNLESS(Self->ChangeExchangeSplitter.Done()); for (const auto dstTabletId : Self->ChangeSenderActivator.GetDstSet()) { - if (Self->SplitSrcSnapshotSender.Acked(dstTabletId)) { + if (Self->SplitSrcSnapshotSender.Acked(dstTabletId) && !Self->ChangeSenderActivator.Acked(dstTabletId)) { ActivationList.insert(dstTabletId); } } @@ -396,9 +394,7 @@ class TDataShard::TTxChangeExchangeSplitAck: public TTransactionBase << ", at tablet# " << Self->TabletID()); for (const auto dstTabletId : ActivationList) { - if (!Self->ChangeSenderActivator.Acked(dstTabletId)) { - Self->ChangeSenderActivator.DoSend(dstTabletId, ctx); - } + Self->ChangeSenderActivator.DoSend(dstTabletId, ctx); } } diff --git a/ydb/core/tx/datashard/datashard_impl.h b/ydb/core/tx/datashard/datashard_impl.h index b770044dedb1..2cb3a91a6861 100644 --- a/ydb/core/tx/datashard/datashard_impl.h +++ b/ydb/core/tx/datashard/datashard_impl.h @@ -241,6 +241,7 @@ class TDataShard class TTxCdcStreamScanProgress; class TTxCdcStreamEmitHeartbeats; class TTxUpdateFollowerReadEdge; + class TTxRemoveSchemaSnapshots; template friend class TTxDirectBase; class TTxUploadRows; @@ -374,6 +375,7 @@ class TDataShard EvPlanPredictedTxs, EvStatisticsScanFinished, EvTableStatsError, + EvRemoveSchemaSnapshots, EvEnd }; @@ -595,6 +597,8 @@ class TDataShard struct TEvPlanPredictedTxs : public TEventLocal {}; struct TEvStatisticsScanFinished : public TEventLocal {}; + + struct TEvRemoveSchemaSnapshots : public TEventLocal {}; }; struct Schema : NIceDb::Schema { @@ -1383,6 +1387,8 @@ class TDataShard void Handle(TEvPrivate::TEvPlanPredictedTxs::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPrivate::TEvRemoveSchemaSnapshots::TPtr& ev, const TActorContext& ctx); + void HandleByReplicationSourceOffsetsServer(STATEFN_SIG); void DoPeriodicTasks(const TActorContext &ctx); @@ -1906,8 +1912,9 @@ class TDataShard void MoveChangeRecord(NIceDb::TNiceDb& db, ui64 order, const TPathId& pathId); void MoveChangeRecord(NIceDb::TNiceDb& db, ui64 lockId, ui64 lockOffset, const TPathId& pathId); void RemoveChangeRecord(NIceDb::TNiceDb& db, ui64 order); - void EnqueueChangeRecords(TVector&& records, ui64 cookie = 0); - ui32 GetFreeChangeQueueCapacity(ui64 cookie); + // TODO(ilnaz): remove 'afterMove' after #6541 + void EnqueueChangeRecords(TVector&& records, ui64 cookie = 0, bool afterMove = false); + ui32 GetFreeChangeQueueCapacity(ui64 cookie) const; ui64 ReserveChangeQueueCapacity(ui32 capacity); void UpdateChangeExchangeLag(TInstant now); void CreateChangeSender(const TActorContext& ctx); @@ -1920,6 +1927,8 @@ class TDataShard bool LoadChangeRecordCommits(NIceDb::TNiceDb& db, TVector& records); void ScheduleRemoveLockChanges(ui64 lockId); void ScheduleRemoveAbandonedLockChanges(); + void ScheduleRemoveSchemaSnapshot(const TSchemaSnapshotKey& key); + void ScheduleRemoveAbandonedSchemaSnapshots(); static void PersistCdcStreamScanLastKey(NIceDb::TNiceDb& db, const TSerializedCellVec& value, const TPathId& tablePathId, const TPathId& streamPathId); @@ -2804,24 +2813,29 @@ class TDataShard ui64 LockOffset; ui64 ReservationCookie; - explicit TEnqueuedRecord(ui64 bodySize, const TPathId& tableId, - ui64 schemaVersion, TInstant created, TInstant enqueued, - ui64 lockId = 0, ui64 lockOffset = 0, ui64 cookie = 0) + explicit TEnqueuedRecord(ui64 bodySize, const TPathId& tableId, ui64 schemaVersion, + TInstant created, ui64 lockId = 0, ui64 lockOffset = 0) : BodySize(bodySize) , TableId(tableId) , SchemaVersion(schemaVersion) , SchemaSnapshotAcquired(false) , CreatedAt(created) - , EnqueuedAt(enqueued) + , EnqueuedAt(TInstant::Zero()) , LockId(lockId) , LockOffset(lockOffset) - , ReservationCookie(cookie) + , ReservationCookie(0) + { + } + + explicit TEnqueuedRecord(const IDataShardChangeCollector::TChange& record) + : TEnqueuedRecord(record.BodySize, record.TableId, record.SchemaVersion, + record.CreatedAt(), record.LockId, record.LockOffset) { } - explicit TEnqueuedRecord(const IDataShardChangeCollector::TChange& record, TInstant now, ui64 cookie) - : TEnqueuedRecord(record.BodySize, record.TableId, record.SchemaVersion, record.CreatedAt(), now, - record.LockId, record.LockOffset, cookie) + explicit TEnqueuedRecord(const TChangeRecord& record) + : TEnqueuedRecord(record.GetBody().size(), record.GetTableId(), record.GetSchemaVersion(), + record.GetApproximateCreationDateTime(), record.GetLockId(), record.GetLockOffset()) { } }; @@ -2863,9 +2877,11 @@ class TDataShard size_t Count = 0; }; + TVector CommittingChangeRecords; THashMap LockChangeRecords; // ui64 is lock id THashMap CommittedLockChangeRecords; // ui64 is lock id TVector PendingLockChangeRecordsToRemove; + TVector PendingSchemaSnapshotsToGc; // in THashMap InChangeSenders; // ui64 is shard id @@ -2965,6 +2981,16 @@ class TDataShard CommittedLockChangeRecords = std::move(committedLockChangeRecords); } + auto TakeChangesQueue() { + auto result = std::move(ChangesQueue); + ChangesQueue.clear(); + return result; + } + + void SetChangesQueue(THashMap&& changesQueue) { + ChangesQueue = std::move(changesQueue); + } + protected: // Redundant init state required by flat executor implementation void StateInit(TAutoPtr &ev) { @@ -2986,6 +3012,7 @@ class TDataShard HFuncTraced(TEvMediatorTimecast::TEvNotifyPlanStep, Handle); HFuncTraced(TEvPrivate::TEvMediatorRestoreBackup, Handle); HFuncTraced(TEvPrivate::TEvRemoveLockChangeRecords, Handle); + HFuncTraced(TEvPrivate::TEvRemoveSchemaSnapshots, Handle); default: if (!HandleDefaultEvents(ev, SelfId())) { ALOG_WARN(NKikimrServices::TX_DATASHARD, "TDataShard::StateInactive unhandled event type: " << ev->GetTypeRewrite() @@ -3114,6 +3141,7 @@ class TDataShard HFunc(TEvPrivate::TEvPlanPredictedTxs, Handle); HFunc(NStat::TEvStatistics::TEvStatisticsRequest, Handle); HFunc(TEvPrivate::TEvStatisticsScanFinished, Handle); + HFuncTraced(TEvPrivate::TEvRemoveSchemaSnapshots, Handle); default: if (!HandleDefaultEvents(ev, SelfId())) { ALOG_WARN(NKikimrServices::TX_DATASHARD, "TDataShard::StateWork unhandled event type: " << ev->GetTypeRewrite() << " event: " << ev->ToString()); @@ -3291,6 +3319,10 @@ class TDataShard bool AllowCancelROwithReadsets() const; void ResolveTablePath(const TActorContext &ctx); + +public: + NMonitoring::TDynamicCounters::TCounterPtr CounterReadIteratorLastKeyReset; + void IncCounterReadIteratorLastKeyReset(); }; NKikimrTxDataShard::TError::EKind ConvertErrCode(NMiniKQL::IEngineFlat::EResult code); diff --git a/ydb/core/tx/datashard/datashard_kqp.cpp b/ydb/core/tx/datashard/datashard_kqp.cpp index af3eb3c31c15..47928c55e8cc 100644 --- a/ydb/core/tx/datashard/datashard_kqp.cpp +++ b/ydb/core/tx/datashard/datashard_kqp.cpp @@ -1012,7 +1012,15 @@ class TKqpTaskRunnerExecutionContext: public NDq::IDqTaskRunnerExecutionContext return {}; } - std::function GetWakeupCallback() const override { + NDq::TWakeUpCallback GetWakeupCallback() const override { + return {}; + } + + NDq::TErrorCallback GetErrorCallback() const override { + return {}; + } + + TIntrusivePtr GetSpillingTaskCounters() const override { return {}; } diff --git a/ydb/core/tx/datashard/datashard_kqp_lookup_table.cpp b/ydb/core/tx/datashard/datashard_kqp_lookup_table.cpp index 27fc69b4f132..51752b1d206a 100644 --- a/ydb/core/tx/datashard/datashard_kqp_lookup_table.cpp +++ b/ydb/core/tx/datashard/datashard_kqp_lookup_table.cpp @@ -121,7 +121,7 @@ class TKqpLookupRowsWrapper : public TStatelessFlowComputationNode keyCells(ParseResult.KeyIndices.size()); - FillKeyTupleValue(key, ParseResult.KeyIndices, ParseResult.KeyTypes, keyCells, *ctx.TypeEnv); + FillKeyTupleValue(key, ParseResult.KeyIndices, ParseResult.KeyTypes, keyCells, ctx.TypeEnv); NUdf::TUnboxedValue result; TKqpTableStats stats; @@ -203,10 +203,10 @@ class TKqpLookupTableWrapper : public TStatelessFlowComputationNode fromCells(tableInfo->KeyColumns.size()); - FillKeyTupleValue(key, ParseResult.KeyIndices, ParseResult.KeyTypes, fromCells, *ctx.TypeEnv); + FillKeyTupleValue(key, ParseResult.KeyIndices, ParseResult.KeyTypes, fromCells, ctx.TypeEnv); TVector toCells(ParseResult.KeyIndices.size()); - FillKeyTupleValue(key, ParseResult.KeyIndices, ParseResult.KeyTypes, toCells, *ctx.TypeEnv); + FillKeyTupleValue(key, ParseResult.KeyIndices, ParseResult.KeyTypes, toCells, ctx.TypeEnv); auto range = TTableRange(fromCells, true, toCells, true); diff --git a/ydb/core/tx/datashard/datashard_kqp_read_table.cpp b/ydb/core/tx/datashard/datashard_kqp_read_table.cpp index 131165ae2999..6a06af8814c4 100644 --- a/ydb/core/tx/datashard/datashard_kqp_read_table.cpp +++ b/ydb/core/tx/datashard/datashard_kqp_read_table.cpp @@ -269,10 +269,10 @@ class TKqpWideReadTableWrapper : public TKqpWideReadTableWrapperBase EFetchResult ReadValue(TComputationContext& ctx, NUdf::TUnboxedValue* const* output) const final { if (!this->Iterator) { TVector fromCells; - BuildKeyTupleCells(ParseResult.FromTuple->GetType(), FromNode->GetValue(ctx), fromCells, *ctx.TypeEnv); + BuildKeyTupleCells(ParseResult.FromTuple->GetType(), FromNode->GetValue(ctx), fromCells, ctx.TypeEnv); TVector toCells; - BuildKeyTupleCells(ParseResult.ToTuple->GetType(), ToNode->GetValue(ctx), toCells, *ctx.TypeEnv); + BuildKeyTupleCells(ParseResult.ToTuple->GetType(), ToNode->GetValue(ctx), toCells, ctx.TypeEnv); auto range = TTableRange(fromCells, ParseResult.FromInclusive, toCells, ParseResult.ToInclusive); @@ -328,7 +328,7 @@ class TKqpWideReadTableRangesWrapper : public TKqpWideReadTableWrapperBaseComputeCtx.GetLocalTableId(ParseResult.TableId); const auto* tableInfo = this->ComputeCtx.Database->GetScheme().GetTableInfo(localTid); - Ranges = CreateTableRanges(ParseResult, RangesNode, *ctx.TypeEnv, ctx, tableInfo->KeyColumns.size()); + Ranges = CreateTableRanges(ParseResult, RangesNode, ctx.TypeEnv, ctx, tableInfo->KeyColumns.size()); RangeId = 0; if (ItemsLimit) { diff --git a/ydb/core/tx/datashard/datashard_pipeline.cpp b/ydb/core/tx/datashard/datashard_pipeline.cpp index 8744e937d257..d7afb3621bdb 100644 --- a/ydb/core/tx/datashard/datashard_pipeline.cpp +++ b/ydb/core/tx/datashard/datashard_pipeline.cpp @@ -2285,11 +2285,15 @@ void TPipeline::AddCommittingOp(const TOperation::TPtr& op) { if (!Self->IsMvccEnabled() || op->IsReadOnly()) return; + Y_VERIFY_S(!op->GetCommittingOpsVersion(), + "Trying to AddCommittingOp " << *op << " more than once"); + TRowVersion version = Self->GetReadWriteVersions(op.Get()).WriteVersion; if (op->IsImmediate()) CommittingOps.Add(op->GetTxId(), version); else CommittingOps.Add(version); + op->SetCommittingOpsVersion(version); } void TPipeline::RemoveCommittingOp(const TRowVersion& version) { @@ -2299,13 +2303,13 @@ void TPipeline::RemoveCommittingOp(const TRowVersion& version) { } void TPipeline::RemoveCommittingOp(const TOperation::TPtr& op) { - if (!Self->IsMvccEnabled() || op->IsReadOnly()) - return; - - if (op->IsImmediate()) - CommittingOps.Remove(op->GetTxId()); - else - CommittingOps.Remove(TRowVersion(op->GetStep(), op->GetTxId())); + if (const auto& version = op->GetCommittingOpsVersion()) { + if (op->IsImmediate()) + CommittingOps.Remove(op->GetTxId(), *version); + else + CommittingOps.Remove(*version); + op->ResetCommittingOpsVersion(); + } } bool TPipeline::WaitCompletion(const TOperation::TPtr& op) const { diff --git a/ydb/core/tx/datashard/datashard_pipeline.h b/ydb/core/tx/datashard/datashard_pipeline.h index 10b523a194ab..705fc765966e 100644 --- a/ydb/core/tx/datashard/datashard_pipeline.h +++ b/ydb/core/tx/datashard/datashard_pipeline.h @@ -424,11 +424,13 @@ class TPipeline : TNonCopyable { ui64 Step; ui64 TxId; mutable ui32 Counter; + mutable ui32 TxCounter; TItem(const TRowVersion& from) : Step(from.Step) , TxId(from.TxId) , Counter(1u) + , TxCounter(0u) {} friend constexpr bool operator<(const TItem& a, const TItem& b) { @@ -442,6 +444,7 @@ class TPipeline : TNonCopyable { using TItemsSet = TSet; using TTxIdMap = THashMap; + public: inline void Add(ui64 txId, TRowVersion version) { auto res = ItemsSet.emplace(version); @@ -450,6 +453,7 @@ class TPipeline : TNonCopyable { auto res2 = TxIdMap.emplace(txId, res.first); Y_VERIFY_S(res2.second, "Unexpected duplicate immediate tx " << txId << " committing at " << version); + res.first->TxCounter += 1; } inline void Add(TRowVersion version) { @@ -458,17 +462,29 @@ class TPipeline : TNonCopyable { res.first->Counter += 1; } - inline void Remove(ui64 txId) { - if (auto it = TxIdMap.find(txId); it != TxIdMap.end()) { - if (--it->second->Counter == 0) - ItemsSet.erase(it->second); - TxIdMap.erase(it); - } + inline void Remove(ui64 txId, TRowVersion version) { + auto it = TxIdMap.find(txId); + Y_VERIFY_S(it != TxIdMap.end(), "Removing immediate tx " << txId << " " << version + << " does not match a previous Add"); + Y_VERIFY_S(TRowVersion(it->second->Step, it->second->TxId) == version, "Removing immediate tx " << txId << " " << version + << " does not match a previous Add " << TRowVersion(it->second->Step, it->second->TxId)); + Y_VERIFY_S(it->second->TxCounter > 0, "Removing immediate tx " << txId << " " << version + << " with a mismatching TxCounter"); + --it->second->TxCounter; + if (--it->second->Counter == 0) + ItemsSet.erase(it->second); + TxIdMap.erase(it); } inline void Remove(TRowVersion version) { - if (auto it = ItemsSet.find(version); it != ItemsSet.end() && --it->Counter == 0) + auto it = ItemsSet.find(version); + Y_VERIFY_S(it != ItemsSet.end(), "Removing version " << version + << " does not match a previous Add"); + if (--it->Counter == 0) { + Y_VERIFY_S(it->TxCounter == 0, "Removing version " << version + << " while TxCounter has active references, possible Add/Remove mismatch"); ItemsSet.erase(it); + } } inline bool HasOpsBelow(TRowVersion upperBound) const { diff --git a/ydb/core/tx/datashard/datashard_repl_apply.cpp b/ydb/core/tx/datashard/datashard_repl_apply.cpp index 92533d7befde..718328eb63f2 100644 --- a/ydb/core/tx/datashard/datashard_repl_apply.cpp +++ b/ydb/core/tx/datashard/datashard_repl_apply.cpp @@ -1,4 +1,6 @@ #include "datashard_impl.h" +#include "datashard_locks_db.h" +#include "setup_sys_locks.h" #include @@ -24,6 +26,9 @@ class TDataShard::TTxApplyReplicationChanges : public TTransactionBaseState != TShardState::Ready) { Result = MakeHolder( NKikimrTxDataShard::TEvApplyReplicationChangesResult::STATUS_REJECTED, @@ -80,6 +85,7 @@ class TDataShard::TTxApplyReplicationChanges : public TTransactionBasePromoteImmediatePostExecuteEdges(*MvccReadWriteVersion, TDataShard::EPromotePostExecuteEdges::ReadWrite, txc); Pipeline.AddCommittingOp(*MvccReadWriteVersion); } @@ -88,6 +94,7 @@ class TDataShard::TTxApplyReplicationChanges : public TTransactionBaseSysLocksTable().ApplyLocks(); return true; } diff --git a/ydb/core/tx/datashard/datashard_schema_snapshots.cpp b/ydb/core/tx/datashard/datashard_schema_snapshots.cpp index 7b603ac4eca2..9c56e4ad7381 100644 --- a/ydb/core/tx/datashard/datashard_schema_snapshots.cpp +++ b/ydb/core/tx/datashard/datashard_schema_snapshots.cpp @@ -20,6 +20,7 @@ TSchemaSnapshotManager::TSchemaSnapshotManager(const TDataShard* self) void TSchemaSnapshotManager::Reset() { Snapshots.clear(); + References.clear(); } bool TSchemaSnapshotManager::Load(NIceDb::TNiceDb& db) { @@ -79,14 +80,16 @@ const TSchemaSnapshot* TSchemaSnapshotManager::FindSnapshot(const TSchemaSnapsho return Snapshots.FindPtr(key); } -void TSchemaSnapshotManager::RemoveShapshot(NIceDb::TNiceDb& db, const TSchemaSnapshotKey& key) { +void TSchemaSnapshotManager::RemoveShapshot(NTable::TDatabase& db, const TSchemaSnapshotKey& key) { auto it = Snapshots.find(key); if (it == Snapshots.end()) { return; } Snapshots.erase(it); - PersistRemoveSnapshot(db, key); + + NIceDb::TNiceDb nicedb(db); + PersistRemoveSnapshot(nicedb, key); } void TSchemaSnapshotManager::RenameSnapshots(NTable::TDatabase& db, @@ -119,6 +122,10 @@ void TSchemaSnapshotManager::RenameSnapshots(NTable::TDatabase& db, } } +const TSchemaSnapshotManager::TSnapshots& TSchemaSnapshotManager::GetSnapshots() const { + return Snapshots; +} + bool TSchemaSnapshotManager::AcquireReference(const TSchemaSnapshotKey& key) { auto it = Snapshots.find(key); if (it == Snapshots.end()) { @@ -152,6 +159,15 @@ bool TSchemaSnapshotManager::ReleaseReference(const TSchemaSnapshotKey& key) { return true; } +bool TSchemaSnapshotManager::HasReference(const TSchemaSnapshotKey& key) const { + auto refIt = References.find(key); + if (refIt != References.end()) { + return refIt->second; + } else { + return false; + } +} + void TSchemaSnapshotManager::PersistAddSnapshot(NIceDb::TNiceDb& db, const TSchemaSnapshotKey& key, const TSchemaSnapshot& snapshot) { using Schema = TDataShard::Schema; db.Table() diff --git a/ydb/core/tx/datashard/datashard_schema_snapshots.h b/ydb/core/tx/datashard/datashard_schema_snapshots.h index db0d3b655b34..0bc80a628e2e 100644 --- a/ydb/core/tx/datashard/datashard_schema_snapshots.h +++ b/ydb/core/tx/datashard/datashard_schema_snapshots.h @@ -23,6 +23,8 @@ struct TSchemaSnapshot { }; class TSchemaSnapshotManager { + using TSnapshots = TMap>; + public: explicit TSchemaSnapshotManager(const TDataShard* self); @@ -31,11 +33,13 @@ class TSchemaSnapshotManager { bool AddSnapshot(NTable::TDatabase& db, const TSchemaSnapshotKey& key, const TSchemaSnapshot& snapshot); const TSchemaSnapshot* FindSnapshot(const TSchemaSnapshotKey& key) const; - void RemoveShapshot(NIceDb::TNiceDb& db, const TSchemaSnapshotKey& key); + void RemoveShapshot(NTable::TDatabase& db, const TSchemaSnapshotKey& key); void RenameSnapshots(NTable::TDatabase& db, const TPathId& prevTableId, const TPathId& newTableId); + const TSnapshots& GetSnapshots() const; bool AcquireReference(const TSchemaSnapshotKey& key); bool ReleaseReference(const TSchemaSnapshotKey& key); + bool HasReference(const TSchemaSnapshotKey& key) const; private: void PersistAddSnapshot(NIceDb::TNiceDb& db, const TSchemaSnapshotKey& key, const TSchemaSnapshot& snapshot); @@ -43,7 +47,7 @@ class TSchemaSnapshotManager { private: const TDataShard* Self; - TMap> Snapshots; + TSnapshots Snapshots; THashMap References; }; // TSchemaSnapshotManager diff --git a/ydb/core/tx/datashard/datashard_split_src.cpp b/ydb/core/tx/datashard/datashard_split_src.cpp index b5e303fb5ba1..1e88d7495c63 100644 --- a/ydb/core/tx/datashard/datashard_split_src.cpp +++ b/ydb/core/tx/datashard/datashard_split_src.cpp @@ -244,6 +244,8 @@ class TDataShard::TTxSplitSnapshotComplete : public NTabletFlatExecutor::TTransa private: TIntrusivePtr SnapContext; bool ChangeExchangeSplit; + THashSet ActivationList; + THashSet SplitList; public: TTxSplitSnapshotComplete(TDataShard* ds, TIntrusivePtr snapContext) @@ -378,13 +380,11 @@ class TDataShard::TTxSplitSnapshotComplete : public NTabletFlatExecutor::TTransa proto->SetTimeoutMs(kv.second.Timeout.MilliSeconds()); } - if (Self->ChangesQueue || tableInfo.HasCdcStreams()) { + if (tableInfo.HasAsyncIndexes() || tableInfo.HasCdcStreams()) { snapshot->SetWaitForActivation(true); - Self->ChangeSenderActivator.AddDst(dstTablet); - db.Table().Key(dstTablet).Update(); - + ActivationList.insert(dstTablet); if (tableInfo.HasCdcStreams()) { - Self->ChangeExchangeSplitter.AddDst(dstTablet); + SplitList.insert(dstTablet); } } @@ -403,14 +403,23 @@ class TDataShard::TTxSplitSnapshotComplete : public NTabletFlatExecutor::TTransa } } - ChangeExchangeSplit = !Self->ChangesQueue && !Self->ChangeExchangeSplitter.Done(); - if (needToReadPages) { LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, Self->TabletID() << " BorrowSnapshot is restarting for split OpId " << opId); return false; } else { txc.Env.DropSnapshot(SnapContext); + for (ui64 dstTabletId : ActivationList) { + Self->ChangeSenderActivator.AddDst(dstTabletId); + db.Table().Key(dstTabletId).Update(); + } + + for (ui64 dstTabletId : SplitList) { + Self->ChangeExchangeSplitter.AddDst(dstTabletId); + } + + ChangeExchangeSplit = !Self->ChangesQueue && !Self->ChangeExchangeSplitter.Done(); + Self->State = TShardState::SplitSrcSendingSnapshot; Self->PersistSys(db, Schema::Sys_State, Self->State); @@ -438,14 +447,14 @@ class TDataShard::TTxSplitTransferSnapshotAck : public NTabletFlatExecutor::TTra private: TEvDataShard::TEvSplitTransferSnapshotAck::TPtr Ev; bool AllDstAcksReceived; - bool Activate; + ui64 ActivateTabletId; public: TTxSplitTransferSnapshotAck(TDataShard* ds, TEvDataShard::TEvSplitTransferSnapshotAck::TPtr& ev) : NTabletFlatExecutor::TTransactionBase(ds) , Ev(ev) , AllDstAcksReceived(false) - , Activate(false) + , ActivateTabletId(0) {} TTxType GetTxType() const override { return TXTYPE_SPLIT_TRANSFER_SNAPSHOT_ACK; } @@ -469,8 +478,8 @@ class TDataShard::TTxSplitTransferSnapshotAck : public NTabletFlatExecutor::TTra // Remove the row for acked snapshot db.Table().Key(dstTabletId).Delete(); - if (!Self->ChangesQueue && Self->ChangeExchangeSplitter.Done()) { - Activate = !Self->ChangeSenderActivator.Acked(dstTabletId); + if (!Self->ChangesQueue && Self->ChangeExchangeSplitter.Done() && !Self->ChangeSenderActivator.Acked(dstTabletId)) { + ActivateTabletId = dstTabletId; } return true; @@ -485,11 +494,8 @@ class TDataShard::TTxSplitTransferSnapshotAck : public NTabletFlatExecutor::TTra } } - if (Activate) { - const ui64 dstTabletId = Ev->Get()->Record.GetTabletId(); - if (!Self->ChangeSenderActivator.Acked(dstTabletId)) { - Self->ChangeSenderActivator.DoSend(dstTabletId, ctx); - } + if (ActivateTabletId) { + Self->ChangeSenderActivator.DoSend(ActivateTabletId, ctx); } } }; diff --git a/ydb/core/tx/datashard/datashard_user_db.cpp b/ydb/core/tx/datashard/datashard_user_db.cpp index 4f711124043f..c25492ad2c82 100644 --- a/ydb/core/tx/datashard/datashard_user_db.cpp +++ b/ydb/core/tx/datashard/datashard_user_db.cpp @@ -62,7 +62,7 @@ ui64 CalculateValueBytes(const TArrayRef ops) { return bytes; }; -void TDataShardUserDb::UpdateRow( +void TDataShardUserDb::UpsertRow( const TTableId& tableId, const TArrayRef key, const TArrayRef ops) @@ -108,11 +108,11 @@ void TDataShardUserDb::UpdateRow( if (specUpdates.ColIdUpdateNo != Max()) { addExtendedOp(specUpdates.ColIdUpdateNo, specUpdates.UpdateNo); } - UpdateRowInt(NTable::ERowOp::Upsert, tableId, localTableId, key, extendedOps); + UpsertRowInt(NTable::ERowOp::Upsert, tableId, localTableId, key, extendedOps); IncreaseUpdateCounters(key, extendedOps); } else { - UpdateRowInt(NTable::ERowOp::Upsert, tableId, localTableId, key, ops); + UpsertRowInt(NTable::ERowOp::Upsert, tableId, localTableId, key, ops); IncreaseUpdateCounters(key, ops); } @@ -126,7 +126,7 @@ void TDataShardUserDb::ReplaceRow( auto localTableId = Self.GetLocalTableId(tableId); Y_ABORT_UNLESS(localTableId != 0, "Unexpected ReplaceRow for an unknown table"); - UpdateRowInt(NTable::ERowOp::Reset, tableId, localTableId, key, ops); + UpsertRowInt(NTable::ERowOp::Reset, tableId, localTableId, key, ops); IncreaseUpdateCounters(key, ops); } @@ -139,9 +139,26 @@ void TDataShardUserDb::InsertRow( auto localTableId = Self.GetLocalTableId(tableId); Y_ABORT_UNLESS(localTableId != 0, "Unexpected InsertRow for an unknown table"); - EnsureMissingRow(tableId, key); + if (RowExists(tableId, key)) + throw TUniqueConstrainException(); + + UpsertRowInt(NTable::ERowOp::Upsert, tableId, localTableId, key, ops); + + IncreaseUpdateCounters(key, ops); +} + +void TDataShardUserDb::UpdateRow( + const TTableId& tableId, + const TArrayRef key, + const TArrayRef ops) +{ + auto localTableId = Self.GetLocalTableId(tableId); + Y_ABORT_UNLESS(localTableId != 0, "Unexpected UpdateRow for an unknown table"); + + if (!RowExists(tableId, key)) + return; - UpdateRowInt(NTable::ERowOp::Upsert, tableId, localTableId, key, ops); + UpsertRowInt(NTable::ERowOp::Upsert, tableId, localTableId, key, ops); IncreaseUpdateCounters(key, ops); } @@ -153,7 +170,7 @@ void TDataShardUserDb::EraseRow( auto localTableId = Self.GetLocalTableId(tableId); Y_ABORT_UNLESS(localTableId != 0, "Unexpected UpdateRow for an unknown table"); - UpdateRowInt(NTable::ERowOp::Erase, tableId, localTableId, key, {}); + UpsertRowInt(NTable::ERowOp::Erase, tableId, localTableId, key, {}); ui64 keyBytes = CalculateKeyBytes(key); @@ -172,7 +189,7 @@ void TDataShardUserDb::IncreaseUpdateCounters( Counters.UpdateRowBytes += keyBytes + valueBytes; } -void TDataShardUserDb::UpdateRowInt( +void TDataShardUserDb::UpsertRowInt( NTable::ERowOp rowOp, const TTableId& tableId, ui64 localTableId, @@ -216,7 +233,7 @@ void TDataShardUserDb::UpdateRowInt( Self.GetKeyAccessSampler()->AddSample(tableId, keyCells); } -void TDataShardUserDb::EnsureMissingRow ( +bool TDataShardUserDb::RowExists ( const TTableId& tableId, const TArrayRef key) { @@ -227,12 +244,10 @@ void TDataShardUserDb::EnsureMissingRow ( throw TNotReadyTabletException(); } case NTable::EReady::Data: { - if (rowState == NTable::ERowOp::Upsert) - throw TUniqueConstrainException(); - break; + return true; } case NTable::EReady::Gone: { - break; + return false; } } } diff --git a/ydb/core/tx/datashard/datashard_user_db.h b/ydb/core/tx/datashard/datashard_user_db.h index af6fdb235549..64be2116c284 100644 --- a/ydb/core/tx/datashard/datashard_user_db.h +++ b/ydb/core/tx/datashard/datashard_user_db.h @@ -37,7 +37,7 @@ class IDataShardUserDb { NTable::TRowState& row, const TMaybe& readVersion = {}) = 0; - virtual void UpdateRow( + virtual void UpsertRow( const TTableId& tableId, const TArrayRef key, const TArrayRef ops) = 0; @@ -52,6 +52,11 @@ class IDataShardUserDb { const TArrayRef key, const TArrayRef ops) = 0; + virtual void UpdateRow( + const TTableId& tableId, + const TArrayRef key, + const TArrayRef ops) = 0; + virtual void EraseRow( const TTableId& tableId, const TArrayRef key) = 0; @@ -107,7 +112,7 @@ class TDataShardUserDb final NTable::TRowState& row, const TMaybe& readVersion = {}) override; - void UpdateRow( + void UpsertRow( const TTableId& tableId, const TArrayRef key, const TArrayRef ops) override; @@ -122,6 +127,11 @@ class TDataShardUserDb final const TArrayRef key, const TArrayRef ops) override; + void UpdateRow( + const TTableId& tableId, + const TArrayRef key, + const TArrayRef ops) override; + void EraseRow( const TTableId& tableId, const TArrayRef key) override; @@ -169,8 +179,8 @@ class TDataShardUserDb final private: static TSmallVec ConvertTableKeys(const TArrayRef key); - void UpdateRowInt(NTable::ERowOp rowOp, const TTableId& tableId, ui64 localTableId, const TArrayRef key, const TArrayRef ops); - void EnsureMissingRow(const TTableId& tableId, const TArrayRef key); + void UpsertRowInt(NTable::ERowOp rowOp, const TTableId& tableId, ui64 localTableId, const TArrayRef key, const TArrayRef ops); + bool RowExists(const TTableId& tableId, const TArrayRef key); void IncreaseUpdateCounters(const TArrayRef key, const TArrayRef ops); private: diff --git a/ydb/core/tx/datashard/datashard_user_table.cpp b/ydb/core/tx/datashard/datashard_user_table.cpp index 4e3f58065826..3e95d901b9b8 100644 --- a/ydb/core/tx/datashard/datashard_user_table.cpp +++ b/ydb/core/tx/datashard/datashard_user_table.cpp @@ -392,6 +392,8 @@ void TUserTable::AlterSchema() { schema.SetPartitionRangeEnd(Range.To.GetBuffer()); schema.SetPartitionRangeEndIsInclusive(Range.ToInclusive); + ReplicationConfig.Serialize(*schema.MutableReplicationConfig()); + schema.SetName(Name); schema.SetPath(Path); diff --git a/ydb/core/tx/datashard/datashard_user_table.h b/ydb/core/tx/datashard/datashard_user_table.h index 0f60e80569dc..af7c9ab2a6d8 100644 --- a/ydb/core/tx/datashard/datashard_user_table.h +++ b/ydb/core/tx/datashard/datashard_user_table.h @@ -339,6 +339,11 @@ struct TUserTable : public TThrRefBase { bool HasStrongConsistency() const { return Consistency == NKikimrSchemeOp::TTableReplicationConfig::CONSISTENCY_STRONG; } + + void Serialize(NKikimrSchemeOp::TTableReplicationConfig& proto) const { + proto.SetMode(Mode); + proto.SetConsistency(Consistency); + } }; struct TStats { diff --git a/ydb/core/tx/datashard/datashard_ut_build_index.cpp b/ydb/core/tx/datashard/datashard_ut_build_index.cpp index 35cb7139e6d6..a15064bb875d 100644 --- a/ydb/core/tx/datashard/datashard_ut_build_index.cpp +++ b/ydb/core/tx/datashard/datashard_ut_build_index.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -167,11 +168,8 @@ Y_UNIT_TEST_SUITE(TTxDataShardBuildIndexScan) { CreateShardedTableForIndex(server, sender, "/Root", "table-2", 1, false); - auto observer = runtime.AddObserver([&](TEvDataShard::TEvCompactBorrowed::TPtr& event) { - Cerr << "Captured TEvDataShard::TEvCompactBorrowed from " << runtime.FindActorName(event->Sender) << " to " << runtime.FindActorName(event->GetRecipientRewrite()) << Endl; - if (runtime.FindActorName(event->Sender) == "FLAT_SCHEMESHARD_ACTOR") { - event.Reset(); - } + TBlockEvents block(runtime, [&](const TEvDataShard::TEvCompactBorrowed::TPtr& event) { + return runtime.FindActorName(event->Sender) == "FLAT_SCHEMESHARD_ACTOR"; }); auto snapshot = CreateVolatileSnapshot(server, { "/Root/table-1" }); diff --git a/ydb/core/tx/datashard/datashard_ut_change_exchange.cpp b/ydb/core/tx/datashard/datashard_ut_change_exchange.cpp index de473a467412..89096ae32717 100644 --- a/ydb/core/tx/datashard/datashard_ut_change_exchange.cpp +++ b/ydb/core/tx/datashard/datashard_ut_change_exchange.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -1825,18 +1826,13 @@ Y_UNIT_TEST_SUITE(Cdc) { UNIT_ASSERT_VALUES_EQUAL(res.GetStatus(), NYdb::EStatus::BAD_REQUEST); } - // try to update retention period - { - auto res = client.AlterTopic("/Root/Table/Stream", NYdb::NTopic::TAlterTopicSettings() - .SetRetentionPeriod(TDuration::Hours(48))).ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL(res.GetStatus(), NYdb::EStatus::BAD_REQUEST); - } // try to update supported codecs { auto res = client.AlterTopic("/Root/Table/Stream", NYdb::NTopic::TAlterTopicSettings() .AppendSetSupportedCodecs(NYdb::NTopic::ECodec(5))).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL(res.GetStatus(), NYdb::EStatus::BAD_REQUEST); } + // try to update retention storage { auto res = client.AlterTopic("/Root/Table/Stream", NYdb::NTopic::TAlterTopicSettings() @@ -1990,7 +1986,7 @@ Y_UNIT_TEST_SUITE(Cdc) { return result; } - void WaitForContent(TServer::TPtr server, const TActorId& sender, const TString& path, const TVector& expected) { + TVector WaitForContent(TServer::TPtr server, const TActorId& sender, const TString& path, const TVector& expected) { while (true) { const auto records = GetRecords(*server->GetRuntime(), sender, path, 0); for (ui32 i = 0; i < std::min(records.size(), expected.size()); ++i) { @@ -2000,7 +1996,12 @@ Y_UNIT_TEST_SUITE(Cdc) { if (records.size() >= expected.size()) { UNIT_ASSERT_VALUES_EQUAL_C(records.size(), expected.size(), "Unexpected record: " << records.at(expected.size()).second); - break; + TVector values; + for (const auto& pr : records) { + bool ok = NJson::ReadJsonTree(pr.second, &values.emplace_back()); + Y_ABORT_UNLESS(ok); + } + return values; } SimulateSleep(server, TDuration::Seconds(1)); @@ -2659,12 +2660,13 @@ Y_UNIT_TEST_SUITE(Cdc) { } } - Y_UNIT_TEST(InitialScan) { + void InitialScanTest(bool withTopicSchemeTx) { TPortManager portManager; TServer::TPtr server = new TServer(TServerSettings(portManager.GetPort(2134), {}, DefaultPQConfig()) .SetUseRealThreads(false) .SetDomainName("Root") .SetEnableChangefeedInitialScan(true) + .SetEnablePQConfigTransactionsAtSchemeShard(withTopicSchemeTx) ); auto& runtime = *server->GetRuntime(); @@ -2707,6 +2709,14 @@ Y_UNIT_TEST_SUITE(Cdc) { }); } + Y_UNIT_TEST(InitialScan) { + InitialScanTest(false); + } + + Y_UNIT_TEST(InitialScan_WithTopicSchemeTx) { + InitialScanTest(true); + } + Y_UNIT_TEST(InitialScanDebezium) { TTestTopicEnv env(SimpleTable(), KeysOnly(NKikimrSchemeOp::ECdcStreamFormatDebeziumJson, "UnusedStream")); auto& client = env.GetClient(); @@ -3016,6 +3026,63 @@ Y_UNIT_TEST_SUITE(Cdc) { }); } + Y_UNIT_TEST(InitialScanEnqueuesZeroRecords) { + TPortManager portManager; + TServer::TPtr server = new TServer(TServerSettings(portManager.GetPort(2134), {}, DefaultPQConfig()) + .SetUseRealThreads(false) + .SetDomainName("Root") + .SetEnableChangefeedInitialScan(true) + .SetChangesQueueItemsLimit(2) + ); + + auto& runtime = *server->GetRuntime(); + const auto edgeActor = runtime.AllocateEdgeActor(); + + SetupLogging(runtime); + InitRoot(server, edgeActor); + CreateShardedTable(server, edgeActor, "/Root", "Table", SimpleTable()); + + ExecSQL(server, edgeActor, R"( + UPSERT INTO `/Root/Table` (key, value) VALUES + (1, 10), + (2, 20), + (3, 30), + (4, 40); + )"); + + TBlockEvents blockScanRequest(runtime, [&](auto& ev) { + ev->Get()->Record.MutableLimits()->SetBatchMaxRows(1); + return true; + }); + + WaitTxNotification(server, edgeActor, AsyncAlterAddStream(server, "/Root", "Table", + WithInitialScan(Updates(NKikimrSchemeOp::ECdcStreamFormatJson)))); + + runtime.WaitFor("Scan request", [&]{ return blockScanRequest.size(); }); + runtime.AddObserver([&](auto& ev) { + ev->Get()->Record.MutableLimits()->SetBatchMaxRows(1); + }); + + ExecSQL(server, edgeActor, R"( + UPSERT INTO `/Root/Table` (key, value) VALUES + (1, 100), + (2, 200), + (3, 300); + )"); + + blockScanRequest.Unblock().Stop(); + + WaitForContent(server, edgeActor, "/Root/Table/Stream", { + R"({"update":{"value":10},"key":[1]})", + R"({"update":{"value":100},"key":[1]})", + R"({"update":{"value":20},"key":[2]})", + R"({"update":{"value":200},"key":[2]})", + R"({"update":{"value":30},"key":[3]})", + R"({"update":{"value":300},"key":[3]})", + R"({"update":{"value":40},"key":[4]})", + }); + } + Y_UNIT_TEST(InitialScanRacyProgressAndDrop) { TPortManager portManager; TServer::TPtr server = new TServer(TServerSettings(portManager.GetPort(2134), {}, DefaultPQConfig()) @@ -3500,6 +3567,324 @@ Y_UNIT_TEST_SUITE(Cdc) { }); } + void MustNotLoseSchemaSnapshot(bool enableVolatileTx) { + TPortManager portManager; + TServer::TPtr server = new TServer(TServerSettings(portManager.GetPort(2134), {}, DefaultPQConfig()) + .SetUseRealThreads(false) + .SetDomainName("Root") + .SetEnableDataShardVolatileTransactions(enableVolatileTx) + ); + + auto& runtime = *server->GetRuntime(); + const auto edgeActor = runtime.AllocateEdgeActor(); + + SetupLogging(runtime); + InitRoot(server, edgeActor); + CreateShardedTable(server, edgeActor, "/Root", "Table", SimpleTable()); + + WaitTxNotification(server, edgeActor, AsyncAlterAddStream(server, "/Root", "Table", + Updates(NKikimrSchemeOp::ECdcStreamFormatJson))); + + auto tabletIds = GetTableShards(server, edgeActor, "/Root/Table"); + UNIT_ASSERT_VALUES_EQUAL(tabletIds.size(), 1); + + std::vector> blockedRemoveRecords; + auto blockRemoveRecords = runtime.AddObserver([&](auto& ev) { + Cerr << "... blocked remove record" << Endl; + blockedRemoveRecords.emplace_back(ev.Release()); + }); + + Cerr << "... execute first query" << Endl; + ExecSQL(server, edgeActor, R"( + UPSERT INTO `/Root/Table` (key, value) VALUES (1, 10); + )"); + + WaitFor(runtime, [&]{ return blockedRemoveRecords.size() == 1; }, "blocked remove records"); + blockRemoveRecords.Remove(); + + std::vector> blockedPlans; + auto blockPlans = runtime.AddObserver([&](auto& ev) { + blockedPlans.emplace_back(ev.Release()); + }); + + Cerr << "... execute scheme query" << Endl; + const auto alterTxId = AsyncAlterAddExtraColumn(server, "/Root", "Table"); + + WaitFor(runtime, [&]{ return blockedPlans.size() > 0; }, "blocked plans"); + blockPlans.Remove(); + + std::vector> blockedPutResponses; + auto blockPutResponses = runtime.AddObserver([&](auto& ev) { + auto* msg = ev->Get(); + if (msg->Id.TabletID() == tabletIds[0]) { + Cerr << "... blocked put response:" << msg->Id << Endl; + blockedPutResponses.emplace_back(ev.Release()); + } + }); + + Cerr << "... execute second query" << Endl; + SendSQL(server, edgeActor, R"( + UPSERT INTO `/Root/Table` (key, value) VALUES (2, 20); + )"); + + WaitFor(runtime, [&]{ return blockedPutResponses.size() > 0; }, "blocked put responses"); + auto wasBlockedPutResponses = blockedPutResponses.size(); + + Cerr << "... release blocked plans" << Endl; + for (auto& ev : std::exchange(blockedPlans, {})) { + runtime.Send(ev.release(), 0, true); + } + + WaitFor(runtime, [&]{ return blockedPutResponses.size() > wasBlockedPutResponses; }, "blocked put responses"); + wasBlockedPutResponses = blockedPutResponses.size(); + + Cerr << "... release blocked remove records" << Endl; + for (auto& ev : std::exchange(blockedRemoveRecords, {})) { + runtime.Send(ev.release(), 0, true); + } + + WaitFor(runtime, [&]{ return blockedPutResponses.size() > wasBlockedPutResponses; }, "blocked put responses"); + blockPutResponses.Remove(); + + Cerr << "... release blocked put responses" << Endl; + for (auto& ev : std::exchange(blockedPutResponses, {})) { + runtime.Send(ev.release(), 0, true); + } + + Cerr << "... finalize" << Endl; + WaitTxNotification(server, edgeActor, alterTxId); + WaitForContent(server, edgeActor, "/Root/Table/Stream", { + R"({"update":{"value":10},"key":[1]})", + R"({"update":{"value":20},"key":[2]})", + }); + } + + Y_UNIT_TEST(MustNotLoseSchemaSnapshot) { + MustNotLoseSchemaSnapshot(false); + } + + Y_UNIT_TEST(MustNotLoseSchemaSnapshotWithVolatileTx) { + MustNotLoseSchemaSnapshot(true); + } + + Y_UNIT_TEST(ResolvedTimestampsContinueAfterMerge) { + TPortManager portManager; + TServer::TPtr server = new TServer(TServerSettings(portManager.GetPort(2134), {}, DefaultPQConfig()) + .SetUseRealThreads(false) + .SetDomainName("Root") + ); + + auto& runtime = *server->GetRuntime(); + const auto edgeActor = runtime.AllocateEdgeActor(); + + SetupLogging(runtime); + InitRoot(server, edgeActor); + SetSplitMergePartCountLimit(&runtime, -1); + CreateShardedTable(server, edgeActor, "/Root", "Table", SimpleTable()); + + WaitTxNotification(server, edgeActor, AsyncAlterAddStream(server, "/Root", "Table", + WithResolvedTimestamps(TDuration::Seconds(3), Updates(NKikimrSchemeOp::ECdcStreamFormatJson)))); + + Cerr << "... prepare" << Endl; + { + WaitForContent(server, edgeActor, "/Root/Table/Stream", { + R"({"resolved":"***"})", + }); + + auto tabletIds = GetTableShards(server, edgeActor, "/Root/Table"); + UNIT_ASSERT_VALUES_EQUAL(tabletIds.size(), 1); + + WaitTxNotification(server, edgeActor, AsyncSplitTable(server, edgeActor, "/Root/Table", tabletIds.at(0), 2)); + WaitForContent(server, edgeActor, "/Root/Table/Stream", { + R"({"resolved":"***"})", + R"({"resolved":"***"})", + }); + } + + auto initialTabletIds = GetTableShards(server, edgeActor, "/Root/Table"); + UNIT_ASSERT_VALUES_EQUAL(initialTabletIds.size(), 2); + + std::vector> blockedSplitRequests; + auto blockSplitRequests = runtime.AddObserver([&](auto& ev) { + if (ev->Get()->Record.GetPartitionRequest().HasCmdSplitMessageGroup()) { + blockedSplitRequests.emplace_back(ev.Release()); + } + }); + + Cerr << "... merge table" << Endl; + const auto mergeTxId = AsyncMergeTable(server, edgeActor, "/Root/Table", initialTabletIds); + WaitFor(runtime, [&]{ return blockedSplitRequests.size() == initialTabletIds.size(); }, "blocked split requests"); + blockSplitRequests.Remove(); + + std::vector> blockedRegisterRequests; + auto blockRegisterRequests = runtime.AddObserver([&](auto& ev) { + if (ev->Get()->Record.GetPartitionRequest().HasCmdRegisterMessageGroup()) { + blockedRegisterRequests.emplace_back(ev.Release()); + } + }); + + ui32 splitResponses = 0; + auto countSplitResponses = runtime.AddObserver([&](auto&) { + ++splitResponses; + }); + + Cerr << "... release split requests" << Endl; + for (auto& ev : std::exchange(blockedSplitRequests, {})) { + runtime.Send(ev.release(), 0, true); + WaitFor(runtime, [prev = splitResponses, &splitResponses]{ return splitResponses > prev; }, "split response"); + } + + Cerr << "... reboot pq tablet" << Endl; + RebootTablet(runtime, ResolvePqTablet(runtime, edgeActor, "/Root/Table/Stream", 0), edgeActor); + countSplitResponses.Remove(); + + Cerr << "... release register requests" << Endl; + blockRegisterRequests.Remove(); + for (auto& ev : std::exchange(blockedRegisterRequests, {})) { + runtime.Send(ev.release(), 0, true); + } + + Cerr << "... wait for merge tx notification" << Endl; + WaitTxNotification(server, edgeActor, mergeTxId); + + Cerr << "... wait for final heartbeat" << Endl; + WaitForContent(server, edgeActor, "/Root/Table/Stream", { + R"({"resolved":"***"})", + R"({"resolved":"***"})", + R"({"resolved":"***"})", + }); + } + + Y_UNIT_TEST(ResolvedTimestampForDisplacedUpsert) { + TPortManager portManager; + TServer::TPtr server = new TServer(TServerSettings(portManager.GetPort(2134), {}, DefaultPQConfig()) + .SetUseRealThreads(false) + .SetDomainName("Root") + ); + + TDisableDataShardLogBatching disableDataShardLogBatching; + + auto& runtime = *server->GetRuntime(); + const auto edgeActor = runtime.AllocateEdgeActor(); + + SetupLogging(runtime); + InitRoot(server, edgeActor); + SetSplitMergePartCountLimit(&runtime, -1); + CreateShardedTable(server, edgeActor, "/Root", "Table", SimpleTable()); + + WaitTxNotification(server, edgeActor, AsyncAlterAddStream(server, "/Root", "Table", + WithVirtualTimestamps(WithResolvedTimestamps( + TDuration::Seconds(3), Updates(NKikimrSchemeOp::ECdcStreamFormatJson))))); + + Cerr << "... prepare" << Endl; + WaitForContent(server, edgeActor, "/Root/Table/Stream", { + R"({"resolved":"***"})", + }); + + KqpSimpleExec(runtime, R"( + UPSERT INTO `/Root/Table` (key, value) VALUES (1, 10); + )"); + + auto records = WaitForContent(server, edgeActor, "/Root/Table/Stream", { + R"({"resolved":"***"})", + R"({"update":{"value":10},"key":[1],"ts":"***"})", + R"({"resolved":"***"})", + }); + + // Take the final step + ui64 lastStep = records.back()["resolved"][0].GetUInteger(); + Cerr << "... last heartbeat at " << lastStep << Endl; + + const auto tableId = ResolveTableId(server, edgeActor, "/Root/Table"); + const auto shards = GetTableShards(server, edgeActor, "/Root/Table"); + UNIT_ASSERT_VALUES_EQUAL(shards.size(), 1u); + + ui64 coordinator = ChangeStateStorage(Coordinator, server->GetSettings().Domain); + ui64 snapshotStep = lastStep + 3000 - 1; + ForwardToTablet(runtime, coordinator, edgeActor, new TEvTxProxy::TEvRequirePlanSteps(coordinator, snapshotStep)); + + TBlockEvents blockedUpdates(runtime, + [&](auto& ev) { + return ev->Get()->Record.GetTimeBarrier() > snapshotStep; + }); + + Cerr << "... performing a read from snapshot just before the next heartbeat" << Endl; + { + auto req = std::make_unique(); + { + auto& record = req->Record; + record.SetReadId(1); + record.MutableTableId()->SetOwnerId(tableId.PathId.OwnerId); + record.MutableTableId()->SetTableId(tableId.PathId.LocalPathId); + record.AddColumns(1); + record.AddColumns(2); + record.SetResultFormat(NKikimrDataEvents::FORMAT_CELLVEC); + ui32 key = 1; + TVector keys; + keys.push_back(TCell::Make(key)); + req->Keys.push_back(TSerializedCellVec(TSerializedCellVec::Serialize(keys))); + record.MutableSnapshot()->SetStep(snapshotStep); + record.MutableSnapshot()->SetTxId(Max()); + } + ForwardToTablet(runtime, shards.at(0), edgeActor, req.release()); + auto ev = runtime.GrabEdgeEventRethrow(edgeActor); + auto* res = ev->Get(); + UNIT_ASSERT_VALUES_EQUAL(res->Record.GetStatus().GetCode(), Ydb::StatusIds::SUCCESS); + UNIT_ASSERT_VALUES_EQUAL(res->Record.GetFinished(), true); + Cerr << "... read finished" << Endl; + } + for (int i = 0; i < 10; ++i) { + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + } + + Cerr << "... starting upsert 1 (expected to displace)" << Endl; + auto upsert1 = KqpSimpleSend(runtime, R"( + UPSERT INTO `/Root/Table` (key, value) VALUES (2, 20); + )"); + for (int i = 0; i < 10; ++i) { + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + } + + Cerr << "... starting upsert 2 (expected to displace)" << Endl; + auto upsert2 = KqpSimpleSend(runtime, R"( + UPSERT INTO `/Root/Table` (key, value) VALUES (3, 30); + )"); + for (int i = 0; i < 10; ++i) { + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + } + + Cerr << "... unblocking updates" << Endl; + blockedUpdates.Unblock().Stop(); + for (int i = 0; i < 10; ++i) { + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + } + + Cerr << "... checking the update is logged before the new resolved timestamp" << Endl; + records = WaitForContent(server, edgeActor, "/Root/Table/Stream", { + R"({"resolved":"***"})", + R"({"update":{"value":10},"key":[1],"ts":"***"})", + R"({"resolved":"***"})", + R"({"update":{"value":20},"key":[2],"ts":"***"})", + R"({"update":{"value":30},"key":[3],"ts":"***"})", + R"({"resolved":"***"})", + }); + + TRowVersion resolved(0, 0); + for (auto& record : records) { + if (record.Has("resolved")) { + resolved.Step = record["resolved"][0].GetUInteger(); + resolved.TxId = record["resolved"][1].GetUInteger(); + } + if (record.Has("ts")) { + TRowVersion ts( + record["ts"][0].GetUInteger(), + record["ts"][1].GetUInteger()); + UNIT_ASSERT_C(resolved < ts, + "Record with ts " << ts << " after resolved " << resolved); + } + } + } + } // Cdc } // NKikimr diff --git a/ydb/core/tx/datashard/datashard_ut_column_stats.cpp b/ydb/core/tx/datashard/datashard_ut_column_stats.cpp index 459887375da5..2f7434f1972d 100644 --- a/ydb/core/tx/datashard/datashard_ut_column_stats.cpp +++ b/ydb/core/tx/datashard/datashard_ut_column_stats.cpp @@ -39,14 +39,14 @@ Y_UNIT_TEST_SUITE(StatisticsScan) { ui64 shardId = shards.at(0); auto request = std::make_unique(); - auto* reqTableId = request->Record.MutableTableId(); + auto* reqTableId = request->Record.MutableTable()->MutablePathId(); reqTableId->SetOwnerId(tableId.PathId.OwnerId); - reqTableId->SetTableId(tableId.PathId.LocalPathId); + reqTableId->SetLocalId(tableId.PathId.LocalPathId); runtime.SendToPipe(shardId, sender, request.release()); auto response = runtime.GrabEdgeEventRethrow(sender); auto& record = response->Get()->Record; - UNIT_ASSERT(record.GetStatus() == NKikimrStat::TEvStatisticsResponse::SUCCESS); + UNIT_ASSERT(record.GetStatus() == NKikimrStat::TEvStatisticsResponse::STATUS_SUCCESS); UNIT_ASSERT(record.ColumnsSize() == 2); for (ui32 i = 0; i < 2; ++i) { diff --git a/ydb/core/tx/datashard/datashard_ut_common_kqp.h b/ydb/core/tx/datashard/datashard_ut_common_kqp.h index 1e4950575184..eaec5b5215a2 100644 --- a/ydb/core/tx/datashard/datashard_ut_common_kqp.h +++ b/ydb/core/tx/datashard/datashard_ut_common_kqp.h @@ -186,10 +186,14 @@ namespace NKqpHelpers { return KqpSimpleExec(runtime, query, true, database); } - inline TString KqpSimpleBegin(TTestActorRuntime& runtime, TString& sessionId, TString& txId, const TString& query) { + inline auto KqpSimpleBeginSend(TTestActorRuntime& runtime, TString& sessionId, const TString& query) { sessionId = CreateSessionRPC(runtime); + return SendRequest(runtime, MakeSimpleRequestRPC(query, sessionId, /* txId */ {}, false /* commitTx */)); + } + + inline TString KqpSimpleBegin(TTestActorRuntime& runtime, TString& sessionId, TString& txId, const TString& query) { txId.clear(); - auto response = AwaitResponse(runtime, SendRequest(runtime, MakeSimpleRequestRPC(query, sessionId, txId, false /* commitTx */))); + auto response = AwaitResponse(runtime, KqpSimpleBeginSend(runtime, sessionId, query)); if (response.operation().status() != Ydb::StatusIds::SUCCESS) { return TStringBuilder() << "ERROR: " << response.operation().status(); } diff --git a/ydb/core/tx/datashard/datashard_ut_followers.cpp b/ydb/core/tx/datashard/datashard_ut_followers.cpp index f32604f21b8b..49b738f97dc4 100644 --- a/ydb/core/tx/datashard/datashard_ut_followers.cpp +++ b/ydb/core/tx/datashard/datashard_ut_followers.cpp @@ -426,7 +426,8 @@ Y_UNIT_TEST_SUITE(DataShardFollowers) { Cerr << "Captured pages request" << Endl; for (auto pageId : msg->Fetch->Pages) { auto type = NTable::NPage::EPage(msg->Fetch->PageCollection->Page(pageId).Type); - UNIT_ASSERT_C(type != NTable::EPage::BTreeIndex && type != NTable::EPage::FlatIndex, "Index pages should be preload during a part switch"); + // Note: FlatIndex pages also have been preloaded, but don't stick in private cache (see TLoaderEnv) + UNIT_ASSERT_C(type != NTable::EPage::BTreeIndex, "Index pages should be preload during a part switch"); } }); diff --git a/ydb/core/tx/datashard/datashard_ut_read_iterator.cpp b/ydb/core/tx/datashard/datashard_ut_read_iterator.cpp index f0768d37cc6e..e65b10d9b119 100644 --- a/ydb/core/tx/datashard/datashard_ut_read_iterator.cpp +++ b/ydb/core/tx/datashard/datashard_ut_read_iterator.cpp @@ -4,6 +4,7 @@ #include "read_iterator.h" #include +#include #include #include #include @@ -3969,7 +3970,7 @@ Y_UNIT_TEST_SUITE(DataShardReadIterator) { Y_UNIT_TEST(HandleMvccGoneInContinue) { // TODO } -}; +} Y_UNIT_TEST_SUITE(DataShardReadIteratorSysTables) { Y_UNIT_TEST(ShouldRead) { @@ -4053,7 +4054,7 @@ Y_UNIT_TEST_SUITE(DataShardReadIteratorSysTables) { UNIT_ASSERT_VALUES_EQUAL(record.GetStatus().GetCode(), Ydb::StatusIds::UNSUPPORTED); } -}; +} Y_UNIT_TEST_SUITE(DataShardReadIteratorState) { Y_UNIT_TEST(ShouldCalculateQuota) { @@ -4104,7 +4105,7 @@ Y_UNIT_TEST_SUITE(DataShardReadIteratorState) { UNIT_ASSERT_VALUES_EQUAL(state.Quota.Bytes, 131729); UNIT_ASSERT(state.State == NDataShard::TReadIteratorState::EState::Executing); } -}; +} Y_UNIT_TEST_SUITE(DataShardReadIteratorPageFaults) { Y_UNIT_TEST(CancelPageFaultedReadThenDropTable) { @@ -4670,6 +4671,154 @@ Y_UNIT_TEST_SUITE(DataShardReadIteratorConsistency) { "result2: " << result2); } + Y_UNIT_TEST(Bug_7674_IteratorDuplicateRows) { + TPortManager pm; + TServerSettings serverSettings(pm.GetPort(2134)); + serverSettings.SetDomainName("Root") + .SetUseRealThreads(false); + TServer::TPtr server = new TServer(serverSettings); + + auto& runtime = *server->GetRuntime(); + auto sender = runtime.AllocateEdgeActor(); + + runtime.SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_TRACE); + + InitRoot(server, sender); + + TDisableDataShardLogBatching disableDataShardLogBatching; + + CreateShardedTable(server, sender, "/Root", "table-1", 1); + + ExecSQL(server, sender, "UPSERT INTO `/Root/table-1` (key, value) VALUES (1, 10), (2, 20), (3, 30), (4, 40), (5, 50);"); + ExecSQL(server, sender, "UPSERT INTO `/Root/table-1` (key, value) VALUES (6, 60), (7, 70), (8, 80), (9, 90), (10, 100);"); + runtime.SimulateSleep(TDuration::Seconds(1)); + + auto forceSmallChunks = runtime.AddObserver( + [&](TEvDataShard::TEvRead::TPtr& ev) { + auto* msg = ev->Get(); + // Force chunks of at most 3 rows + msg->Record.SetMaxRowsInResult(3); + }); + + TBlockEvents blockedAcks(runtime); + TBlockEvents blockedResults(runtime); + TBlockEvents blockedContinue(runtime); + + auto waitFor = [&](const TString& description, const auto& condition, size_t count = 1) { + while (!condition()) { + UNIT_ASSERT_C(count > 0, "... failed to wait for " << description); + Cerr << "... waiting for " << description << Endl; + TDispatchOptions options; + options.CustomFinalCondition = [&]() { + return condition(); + }; + runtime.DispatchEvents(options); + --count; + } + }; + + auto readFuture = KqpSimpleSend(runtime, "SELECT key, value FROM `/Root/table-1` ORDER BY key LIMIT 7"); + waitFor("first TEvReadContinue", [&]{ return blockedContinue.size() >= 1; }); + waitFor("first TEvReadResult", [&]{ return blockedResults.size() >= 1; }); + + blockedContinue.Unblock(1); + waitFor("second TEvReadContinue", [&]{ return blockedContinue.size() >= 1; }); + waitFor("second TEvReadResult", [&]{ return blockedResults.size() >= 2; }); + + // We need both results to arrive without pauses + blockedResults.Unblock(); + + waitFor("both TEvReadAcks", [&]{ return blockedAcks.size() >= 2; }); + + // Unblock the first TEvReadAck and then pending TEvReadContinue + blockedAcks.Unblock(1); + blockedContinue.Unblock(1); + + // Give it some time to trigger the bug + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + + // Stop blocking everything + blockedAcks.Unblock().Stop(); + blockedResults.Unblock().Stop(); + blockedContinue.Unblock().Stop(); + + UNIT_ASSERT_VALUES_EQUAL( + FormatResult(AwaitResponse(runtime, std::move(readFuture))), + "{ items { uint32_value: 1 } items { uint32_value: 10 } }, " + "{ items { uint32_value: 2 } items { uint32_value: 20 } }, " + "{ items { uint32_value: 3 } items { uint32_value: 30 } }, " + "{ items { uint32_value: 4 } items { uint32_value: 40 } }, " + "{ items { uint32_value: 5 } items { uint32_value: 50 } }, " + "{ items { uint32_value: 6 } items { uint32_value: 60 } }, " + "{ items { uint32_value: 7 } items { uint32_value: 70 } }"); + } + +} + +Y_UNIT_TEST_SUITE(DataShardReadIteratorLatency) { + + Y_UNIT_TEST(ReadSplitLatency) { + TPortManager pm; + TServerSettings serverSettings(pm.GetPort(2134)); + serverSettings.SetDomainName("Root") + .SetUseRealThreads(false); + TServer::TPtr server = new TServer(serverSettings); + + auto& runtime = *server->GetRuntime(); + auto sender = runtime.AllocateEdgeActor(); + + runtime.SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_TRACE); + + InitRoot(server, sender); + + TDisableDataShardLogBatching disableDataShardLogBatching; + + CreateShardedTable(server, sender, "/Root", "table-1", 1); + + // Insert initial data + ExecSQL(server, sender, "UPSERT INTO `/Root/table-1` (key, value) VALUES (1, 10), (2, 20), (3, 30), (4, 40), (5, 50);"); + ExecSQL(server, sender, "UPSERT INTO `/Root/table-1` (key, value) VALUES (6, 60), (7, 70), (8, 80), (9, 90), (10, 100);"); + + // Copy table (this will ensure original shards stay alive after split) + { + auto senderCopy = runtime.AllocateEdgeActor(); + ui64 txId = AsyncCreateCopyTable(server, senderCopy, "/Root", "table-2", "/Root/table-1"); + WaitTxNotification(server, senderCopy, txId); + } + + TBlockEvents blockedReads(runtime); + + Cerr << "... starting read from table-1" << Endl; + TString readSessionId; + auto readFuture = KqpSimpleBeginSend(runtime, readSessionId, R"( + SELECT * FROM `/Root/table-1` ORDER BY key; + )"); + + runtime.WaitFor("blocked TEvRead", [&]{ return blockedReads.size() >= 1; }); + + { + Cerr << "... splitting table-1" << Endl; + SetSplitMergePartCountLimit(server->GetRuntime(), -1); + auto shards1before = GetTableShards(server, sender, "/Root/table-1"); + ui64 txId = AsyncSplitTable(server, sender, "/Root/table-1", shards1before.at(0), 5); + Cerr << "... split txId# " << txId << " started" << Endl; + WaitTxNotification(server, sender, txId); + Cerr << "... split txId# " << txId << " finished" << Endl; + } + + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + + auto readStartTs = runtime.GetCurrentTime(); + blockedReads.Unblock(); + blockedReads.Stop(); + auto readResponse = runtime.WaitFuture(std::move(readFuture)); + UNIT_ASSERT_VALUES_EQUAL(readResponse.operation().status(), Ydb::StatusIds::SUCCESS); + auto readLatency = runtime.GetCurrentTime() - readStartTs; + Cerr << "... read latency was " << readLatency << Endl; + UNIT_ASSERT_C(readLatency < TDuration::MilliSeconds(100), + "unexpected read latency " << readLatency); + } + } } // namespace NKikimr diff --git a/ydb/core/tx/datashard/datashard_ut_replication.cpp b/ydb/core/tx/datashard/datashard_ut_replication.cpp index b0395077e915..8d5df4025d1c 100644 --- a/ydb/core/tx/datashard/datashard_ut_replication.cpp +++ b/ydb/core/tx/datashard/datashard_ut_replication.cpp @@ -1,11 +1,13 @@ #include #include "datashard_active_transaction.h" +#include "datashard_ut_common_kqp.h" #include namespace NKikimr { using namespace NKikimr::NDataShard; +using namespace NKikimr::NDataShard::NKqpHelpers; using namespace NSchemeShard; using namespace Tests; @@ -244,6 +246,9 @@ Y_UNIT_TEST_SUITE(DataShardReplication) { ExecSQL(server, sender, "SELECT * FROM `/Root/table-1`"); ExecSQL(server, sender, "INSERT INTO `/Root/table-1` (key, value) VALUES (1, 10);", true, Ydb::StatusIds::GENERIC_ERROR); + + WaitTxNotification(server, sender, AsyncAlterDropReplicationConfig(server, "/Root", "table-1")); + ExecSQL(server, sender, "INSERT INTO `/Root/table-1` (key, value) VALUES (1, 10);"); } Y_UNIT_TEST(ApplyChangesToReplicatedTable) { @@ -304,6 +309,46 @@ Y_UNIT_TEST_SUITE(DataShardReplication) { }, NKikimrTxDataShard::TEvApplyReplicationChangesResult::STATUS_REJECTED); } + Y_UNIT_TEST(ApplyChangesWithConcurrentTx) { + TPortManager pm; + TServerSettings serverSettings(pm.GetPort(2134)); + serverSettings.SetDomainName("Root") + .SetUseRealThreads(false); + + Tests::TServer::TPtr server = new TServer(serverSettings); + auto &runtime = *server->GetRuntime(); + auto sender = runtime.AllocateEdgeActor(); + + runtime.SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_TRACE); + + InitRoot(server, sender); + CreateShardedTable(server, sender, "/Root", "table-1", TShardedTableOptions() + .Replicated(true) + .ReplicationConsistency(EReplicationConsistency::Weak) + ); + + auto shards = GetTableShards(server, sender, "/Root/table-1"); + auto tableId = ResolveTableId(server, sender, "/Root/table-1"); + + ApplyChanges(server, shards.at(0), tableId, "my-source", { + TChange{ .Offset = 0, .WriteTxId = 0, .Key = 1, .Value = 11 }, + }); + + TString sessionId; + TString txId; + UNIT_ASSERT_VALUES_EQUAL( + KqpSimpleBegin(runtime, sessionId, txId, "SELECT key, value FROM `/Root/table-1`;"), + "{ items { uint32_value: 1 } items { uint32_value: 11 } }"); + + ApplyChanges(server, shards.at(0), tableId, "my-source", { + TChange{ .Offset = 1, .WriteTxId = 0, .Key = 1, .Value = 21 }, + }); + + UNIT_ASSERT_VALUES_EQUAL( + KqpSimpleCommit(runtime, sessionId, txId, "SELECT key, value FROM `/Root/table-1`;"), + "{ items { uint32_value: 1 } items { uint32_value: 11 } }"); + } + } } // namespace NKikimr diff --git a/ydb/core/tx/datashard/datashard_ut_stats.cpp b/ydb/core/tx/datashard/datashard_ut_stats.cpp index 18f19e6b6bcc..5b4c8311cba9 100644 --- a/ydb/core/tx/datashard/datashard_ut_stats.cpp +++ b/ydb/core/tx/datashard/datashard_ut_stats.cpp @@ -1,6 +1,7 @@ #include #include "ydb/core/tablet_flat/shared_sausagecache.h" #include +#include namespace NKikimr { @@ -441,28 +442,16 @@ Y_UNIT_TEST_SUITE(DataShardStats) { const auto shard1 = GetTableShards(server, sender, "/Root/table-1").at(0); UpsertRows(server, sender); - - bool captured = false; - auto observer = runtime.AddObserver([&](NSharedCache::TEvResult::TPtr& event) { - Cerr << "Captured NSharedCache::TEvResult from " << runtime.FindActorName(event->Sender) << " to " << runtime.FindActorName(event->GetRecipientRewrite()) << Endl; - if (runtime.FindActorName(event->GetRecipientRewrite()) == "DATASHARD_STATS_BUILDER") { - auto& message = *event->Get(); - event.Reset(static_cast *>( - new IEventHandle(event->Recipient, event->Sender, - new NSharedCache::TEvResult(message.Origin, message.Cookie, NKikimrProto::NODATA)))); - captured = true; - } + + TBlockEvents block(runtime, [&](const NSharedCache::TEvResult::TPtr& event) { + return runtime.FindActorName(event->GetRecipientRewrite()) == "DATASHARD_STATS_BUILDER"; }); CompactTable(runtime, shard1, tableId1, false); - for (int i = 0; i < 5 && !captured; ++i) { - TDispatchOptions options; - options.CustomFinalCondition = [&]() { return captured; }; - runtime.DispatchEvents(options, TDuration::Seconds(5)); - } - UNIT_ASSERT(captured); - observer.Remove(); + runtime.WaitFor("blocked read", [&]{ return block.size(); }); + + block.Stop().Unblock(); { Cerr << "Waiting stats.." << Endl; diff --git a/ydb/core/tx/datashard/datashard_ut_volatile.cpp b/ydb/core/tx/datashard/datashard_ut_volatile.cpp index 1ca101657cb6..f7bbe62eed62 100644 --- a/ydb/core/tx/datashard/datashard_ut_volatile.cpp +++ b/ydb/core/tx/datashard/datashard_ut_volatile.cpp @@ -5,6 +5,7 @@ #include #include +#include namespace NKikimr { @@ -1226,8 +1227,12 @@ Y_UNIT_TEST_SUITE(DataShardVolatile) { "value: [\n" " 2\n" " ]\n"); + } + + SimulateSleep(runtime, TDuration::MilliSeconds(0)); - msg = readResults.back()->Get(); + { + auto* msg = readResults.back()->Get(); UNIT_ASSERT_VALUES_EQUAL(msg->Record.GetStatus().GetCode(), Ydb::StatusIds::SUCCESS); UNIT_ASSERT_VALUES_EQUAL(msg->Record.GetFinished(), true); } @@ -2988,6 +2993,94 @@ Y_UNIT_TEST_SUITE(DataShardVolatile) { "ERROR: ABORTED"); } + Y_UNIT_TEST(UpsertDependenciesShardsRestart) { + TPortManager pm; + TServerSettings serverSettings(pm.GetPort(2134)); + serverSettings.SetDomainName("Root") + .SetUseRealThreads(false) + .SetEnableDataShardVolatileTransactions(true); + + Tests::TServer::TPtr server = new TServer(serverSettings); + auto &runtime = *server->GetRuntime(); + auto sender = runtime.AllocateEdgeActor(); + + runtime.SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_TRACE); + runtime.SetLogPriority(NKikimrServices::PIPE_CLIENT, NLog::PRI_TRACE); + + InitRoot(server, sender); + + UNIT_ASSERT_VALUES_EQUAL( + KqpSchemeExec(runtime, R"( + CREATE TABLE `/Root/table` (key uint32, value uint32, PRIMARY KEY (key)) + WITH (PARTITION_AT_KEYS = (10)); + )"), + "SUCCESS"); + + const auto shards = GetTableShards(server, sender, "/Root/table"); + UNIT_ASSERT_VALUES_EQUAL(shards.size(), 2u); + + // We need to fill table with some data + Cerr << "========= Upserting initial values =========" << Endl; + KqpSimpleExec(runtime, R"( + UPSERT INTO `/Root/table` (key, subkey, value) + VALUES (1, 1), (11, 11) + )"); + + TForceVolatileProposeArbiter forceArbiter(runtime, shards.at(0)); + TBlockEvents blockedPlan(runtime, + [actor = ResolveTablet(runtime, shards.at(0))](const auto& ev) { + return ev->GetRecipientRewrite() == actor; + }); + + Cerr << "========= Starting upsert 1 =========" << Endl; + auto upsertFuture1 = KqpSimpleSend(runtime, R"( + UPSERT INTO `/Root/table` (key, value) + VALUES (2, 2), (12, 12); + )"); + runtime.SimulateSleep(TDuration::Seconds(1)); + + Cerr << "========= Starting upsert 2 =========" << Endl; + auto upsertFuture2 = KqpSimpleSend(runtime, R"( + UPSERT INTO `/Root/table` (key, value) + VALUES (2, 1002), (12, 1012); + )"); + runtime.SimulateSleep(TDuration::Seconds(1)); + + UNIT_ASSERT_VALUES_EQUAL(blockedPlan.size(), 2u); + + // We expect transaction to execute at shards[1] + // However at shards[0] it didn't even start due to blocked plans + // Now we need to restart both shards, without giving them a chance to communicate + std::vector shardActors{ + ResolveTablet(runtime, shards.at(0)), + ResolveTablet(runtime, shards.at(1)), + }; + for (auto& shardActor : shardActors) { + Cerr << "... killing actor " << shardActor << Endl; + // Perform a synchronous send, this makes sure both shards handle TEvPoison before anything else + runtime.Send(new IEventHandle(shardActor, TActorId(), new TEvents::TEvPoison), 0, /* viaActorSystem */ false); + } + + blockedPlan.Stop().clear(); + + // Both queries should abort with UNDETERMINED + Cerr << "... waiting for query results" << Endl; + UNIT_ASSERT_VALUES_EQUAL( + FormatResult(runtime.WaitFuture(std::move(upsertFuture1))), + "ERROR: UNDETERMINED"); + UNIT_ASSERT_VALUES_EQUAL( + FormatResult(runtime.WaitFuture(std::move(upsertFuture2))), + "ERROR: UNDETERMINED"); + + // Split the second shard, which makes sure it's not stuck + Cerr << "========= Splitting shard 2 =========" << Endl; + SetSplitMergePartCountLimit(server->GetRuntime(), -1); + ui64 txId = AsyncSplitTable(server, sender, "/Root/table", shards.at(1), 15); + Cerr << "... split txId# " << txId << " started" << Endl; + WaitTxNotification(server, sender, txId); + Cerr << "... split finished" << Endl; + } + } // Y_UNIT_TEST_SUITE(DataShardVolatile) } // namespace NKikimr diff --git a/ydb/core/tx/datashard/datashard_ut_write.cpp b/ydb/core/tx/datashard/datashard_ut_write.cpp index 50bfc995f6fb..03c6b3d79132 100644 --- a/ydb/core/tx/datashard/datashard_ut_write.cpp +++ b/ydb/core/tx/datashard/datashard_ut_write.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include "datashard_ut_common_kqp.h" namespace NKikimr { @@ -402,6 +404,69 @@ Y_UNIT_TEST_SUITE(DataShardWrite) { } } + Y_UNIT_TEST(UpdateImmediate) { + auto [runtime, server, sender] = TestCreateServer(); + + TShardedTableOptions opts; + auto [shards, tableId] = CreateShardedTable(server, sender, "/Root", "table-1", opts); + const ui64 shard = shards[0]; + const ui32 rowCount = 3; + + ui64 txId = 100; + + Cout << "========= Send immediate update to empty table, it should be no op =========\n"; + { + Update(runtime, sender, shard, tableId, opts.Columns_, rowCount, txId, NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); + } + + Cout << "========= Read table =========\n"; + { + auto tableState = TReadTableState(server, MakeReadTableSettings("/Root/table-1")).All(); + UNIT_ASSERT_VALUES_EQUAL(tableState, ""); + } + + Cout << "========= Send immediate insert =========\n"; + { + Insert(runtime, sender, shard, tableId, opts.Columns_, rowCount, txId, NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); + } + + Cout << "========= Read table =========\n"; + { + auto tableState = TReadTableState(server, MakeReadTableSettings("/Root/table-1")).All(); + UNIT_ASSERT_VALUES_EQUAL(tableState, expectedTableState); + } + + Cout << "========= Send immediate upsert, change one row =========\n"; + { + UpsertOneKeyValue(runtime, sender, shard, tableId, opts.Columns_, 0, 555, txId, NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); + } + + Cout << "========= Read table =========\n"; + { + auto tableState = TReadTableState(server, MakeReadTableSettings("/Root/table-1")).All(); + UNIT_ASSERT_VALUES_EQUAL(tableState, "key = 0, value = 555\nkey = 2, value = 3\nkey = 4, value = 5\n"); + } + + Cout << "========= Send immediate update, it should override all the rows =========\n"; + { + const auto writeResult = Update(runtime, sender, shard, tableId, opts.Columns_, rowCount, txId, NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); + + UNIT_ASSERT_VALUES_EQUAL(writeResult.GetOrigin(), shard); + UNIT_ASSERT_VALUES_EQUAL(writeResult.GetStep(), 0); + UNIT_ASSERT_VALUES_EQUAL(writeResult.GetOrderId(), txId); + UNIT_ASSERT_VALUES_EQUAL(writeResult.GetTxId(), txId); + + const auto& tableAccessStats = writeResult.GetTxStats().GetTableAccessStats(0); + UNIT_ASSERT_VALUES_EQUAL(tableAccessStats.GetTableInfo().GetName(), "/Root/table-1"); + UNIT_ASSERT_VALUES_EQUAL(tableAccessStats.GetUpdateRow().GetCount(), rowCount); + } + + Cout << "========= Read table =========\n"; + { + auto tableState = TReadTableState(server, MakeReadTableSettings("/Root/table-1")).All(); + UNIT_ASSERT_VALUES_EQUAL(tableState, expectedTableState); + } + } Y_UNIT_TEST_TWIN(UpsertPrepared, Volatile) { auto [runtime, server, sender] = TestCreateServer(); @@ -1190,5 +1255,447 @@ Y_UNIT_TEST_SUITE(DataShardWrite) { ""); } + Y_UNIT_TEST(ImmediateAndPlannedCommittedOpsRace) { + TPortManager pm; + TServerSettings serverSettings(pm.GetPort(2134)); + serverSettings.SetDomainName("Root") + .SetUseRealThreads(false) + // It's easier to reproduce without volatile transactions, since + // then we can block pipeline by blocking readsets + .SetEnableDataShardVolatileTransactions(false); + + auto [runtime, server, sender] = TestCreateServer(serverSettings); + + TDisableDataShardLogBatching disableDataShardLogBatching; + UNIT_ASSERT_VALUES_EQUAL( + KqpSchemeExec(runtime, R"( + CREATE TABLE `/Root/table` (key int, value int, PRIMARY KEY (key)) + WITH (PARTITION_AT_KEYS = (10)); + )"), + "SUCCESS"); + + const auto tableId = ResolveTableId(server, sender, "/Root/table"); + const auto shards = GetTableShards(server, sender, "/Root/table"); + UNIT_ASSERT_VALUES_EQUAL(shards.size(), 2u); + + TVector columns{ + {"key", "Int32", true, false}, + {"value", "Int32", false, false}, + }; + + const ui64 coordinator = ChangeStateStorage(Coordinator, server->GetSettings().Domain); + + const ui64 lockTxId1 = 1234567890001; + const ui64 lockTxId2 = 1234567890002; + const ui64 lockTxId3 = 1234567890003; + const ui64 lockNodeId = runtime.GetNodeId(0); + NLongTxService::TLockHandle lockHandle1(lockTxId1, runtime.GetActorSystem(0)); + NLongTxService::TLockHandle lockHandle2(lockTxId2, runtime.GetActorSystem(0)); + NLongTxService::TLockHandle lockHandle3(lockTxId3, runtime.GetActorSystem(0)); + + auto shard1 = shards.at(0); + auto shard1actor = ResolveTablet(runtime, shard1); + auto shard2 = shards.at(1); + + NKikimrDataEvents::TLock lock1shard1; + NKikimrDataEvents::TLock lock1shard2; + NKikimrDataEvents::TLock lock2; + + // 1. Make a read (lock1 shard1) + auto read1sender = runtime.AllocateEdgeActor(); + { + Cerr << "... making a read from " << shard1 << Endl; + auto req = std::make_unique(); + { + auto& record = req->Record; + record.SetReadId(1); + record.MutableTableId()->SetOwnerId(tableId.PathId.OwnerId); + record.MutableTableId()->SetTableId(tableId.PathId.LocalPathId); + record.AddColumns(1); + record.AddColumns(2); + record.SetLockTxId(lockTxId1); + record.SetLockNodeId(lockNodeId); + record.SetResultFormat(NKikimrDataEvents::FORMAT_CELLVEC); + i32 key = 1; + TVector keys; + keys.push_back(TCell::Make(key)); + req->Keys.push_back(TSerializedCellVec(TSerializedCellVec::Serialize(keys))); + } + ForwardToTablet(runtime, shard1, read1sender, req.release()); + auto ev = runtime.GrabEdgeEventRethrow(read1sender); + auto* res = ev->Get(); + UNIT_ASSERT_VALUES_EQUAL(res->Record.GetStatus().GetCode(), Ydb::StatusIds::SUCCESS); + UNIT_ASSERT_VALUES_EQUAL(res->Record.GetFinished(), true); + UNIT_ASSERT_VALUES_EQUAL(res->Record.GetTxLocks().size(), 1u); + lock1shard1 = res->Record.GetTxLocks().at(0); + UNIT_ASSERT_C(lock1shard1.GetCounter() < 1000, "Unexpected lock in the result: " << lock1shard1.ShortDebugString()); + } + + // 2. Make an uncommitted write (lock1 shard2) + { + Cerr << "... making an uncommmited write to " << shard2 << Endl; + auto req = MakeWriteRequestOneKeyValue( + std::nullopt, + NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE, + NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, + tableId, + columns, + 11, 1101); + req->SetLockId(lockTxId1, lockNodeId); + auto result = Write(runtime, sender, shard2, std::move(req)); + UNIT_ASSERT_VALUES_EQUAL(result.GetTxLocks().size(), 1u); + lock1shard2 = result.GetTxLocks().at(0); + UNIT_ASSERT_C(lock1shard2.GetCounter() < 1000, "Unexpected lock in the result: " << lock1shard2.ShortDebugString()); + } + + // 3. Make an uncommitted write (lock2 shard1) + { + Cerr << "... making an uncommmited write to " << shard1 << Endl; + auto req = MakeWriteRequestOneKeyValue( + std::nullopt, + NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE, + NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, + tableId, + columns, + 2, 202); + req->SetLockId(lockTxId2, lockNodeId); + auto result = Write(runtime, sender, shard1, std::move(req)); + UNIT_ASSERT_VALUES_EQUAL(result.GetTxLocks().size(), 1u); + lock2 = result.GetTxLocks().at(0); + UNIT_ASSERT_C(lock2.GetCounter() < 1000, "Unexpected lock in the result: " << lock2.ShortDebugString()); + } + + // 4. Break lock2 so later we could make an aborted distributed commit + { + Cerr << "... making an immediate write to " << shard1 << Endl; + auto req = MakeWriteRequestOneKeyValue( + std::nullopt, + NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE, + NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, + tableId, + columns, + 2, 203); + Write(runtime, sender, shard1, std::move(req)); + } + + // Start blocking readsets + TBlockEvents blockedReadSets(runtime); + + // Prepare an upsert (readsets flow between shards) + ui64 txId1 = 1234567890011; + auto tx1sender = runtime.AllocateEdgeActor(); + { + auto req1 = MakeWriteRequestOneKeyValue( + txId1, + NKikimrDataEvents::TEvWrite::MODE_PREPARE, + NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, + tableId, + columns, + 3, 304); + req1->Record.MutableLocks()->SetOp(NKikimrDataEvents::TKqpLocks::Commit); + req1->Record.MutableLocks()->AddSendingShards(shard1); + req1->Record.MutableLocks()->AddSendingShards(shard2); + req1->Record.MutableLocks()->AddReceivingShards(shard1); + req1->Record.MutableLocks()->AddReceivingShards(shard2); + *req1->Record.MutableLocks()->AddLocks() = lock1shard1; + + auto req2 = MakeWriteRequestOneKeyValue( + txId1, + NKikimrDataEvents::TEvWrite::MODE_PREPARE, + NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, + tableId, + columns, + 13, 1304); + req2->Record.MutableLocks()->SetOp(NKikimrDataEvents::TKqpLocks::Commit); + req2->Record.MutableLocks()->AddSendingShards(shard1); + req2->Record.MutableLocks()->AddSendingShards(shard2); + req2->Record.MutableLocks()->AddReceivingShards(shard1); + req2->Record.MutableLocks()->AddReceivingShards(shard2); + *req2->Record.MutableLocks()->AddLocks() = lock1shard2; + + Cerr << "... preparing tx1 at " << shard1 << Endl; + auto res1 = Write(runtime, tx1sender, shard1, std::move(req1)); + Cerr << "... preparing tx1 at " << shard2 << Endl; + auto res2 = Write(runtime, tx1sender, shard2, std::move(req2)); + + ui64 minStep = Max(res1.GetMinStep(), res2.GetMinStep()); + ui64 maxStep = Min(res1.GetMaxStep(), res2.GetMaxStep()); + + Cerr << "... planning tx1 at " << coordinator << Endl; + SendProposeToCoordinator( + runtime, tx1sender, shards, { + .TxId = txId1, + .Coordinator = coordinator, + .MinStep = minStep, + .MaxStep = maxStep, + }); + } + + runtime.WaitFor("blocked readsets", [&]{ return blockedReadSets.size() >= 2; }); + UNIT_ASSERT_VALUES_EQUAL(blockedReadSets.size(), 2u); + + // Start blocking new plan steps + TBlockEvents blockedPlanSteps(runtime); + + // Prepare an upsert (readset flows from shard 1 to shard 2, already broken) + // Must not conflict with other transactions + ui64 txId2 = 1234567890012; + auto tx2sender = runtime.AllocateEdgeActor(); + { + auto req1 = MakeWriteRequestOneKeyValue( + txId2, + NKikimrDataEvents::TEvWrite::MODE_PREPARE, + NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, + tableId, + columns, + 5, 505); + req1->Record.MutableLocks()->SetOp(NKikimrDataEvents::TKqpLocks::Commit); + req1->Record.MutableLocks()->AddSendingShards(shard1); + req1->Record.MutableLocks()->AddReceivingShards(shard2); + *req1->Record.MutableLocks()->AddLocks() = lock2; + + auto req2 = MakeWriteRequestOneKeyValue( + txId2, + NKikimrDataEvents::TEvWrite::MODE_PREPARE, + NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, + tableId, + columns, + 15, 1505); + req2->Record.MutableLocks()->SetOp(NKikimrDataEvents::TKqpLocks::Commit); + req2->Record.MutableLocks()->AddSendingShards(shard1); + req2->Record.MutableLocks()->AddReceivingShards(shard2); + + Cerr << "... preparing tx2 at " << shard1 << Endl; + auto res1 = Write(runtime, tx2sender, shard1, std::move(req1)); + Cerr << "... preparing tx2 at " << shard2 << Endl; + auto res2 = Write(runtime, tx2sender, shard2, std::move(req2)); + + ui64 minStep = Max(res1.GetMinStep(), res2.GetMinStep()); + ui64 maxStep = Min(res1.GetMaxStep(), res2.GetMaxStep()); + + Cerr << "... planning tx2 at " << coordinator << Endl; + SendProposeToCoordinator( + runtime, tx2sender, shards, { + .TxId = txId2, + .Coordinator = coordinator, + .MinStep = minStep, + .MaxStep = maxStep, + }); + } + + runtime.WaitFor("blocked plan steps", [&]{ return blockedPlanSteps.size() >= 2; }); + UNIT_ASSERT_VALUES_EQUAL(blockedPlanSteps.size(), 2u); + + // Block TEvPrivate::TEvProgressTransaction for shard1 + TBlockEvents blockedProgress(runtime, + [&](const TAutoPtr& ev) { + return ev->GetRecipientRewrite() == shard1actor && + ev->GetTypeRewrite() == EventSpaceBegin(TKikimrEvents::ES_PRIVATE) + 0; + }); + + blockedPlanSteps.Unblock(); + runtime.WaitFor("blocked progress", [&]{ return blockedProgress.size() >= 1; }); + runtime.SimulateSleep(TDuration::MilliSeconds(1)); // let it commit + UNIT_ASSERT_VALUES_EQUAL(blockedProgress.size(), 1u); + + // Make an unrelated immediate write, this will pin write (and future snapshot) version to tx2 + { + Cerr << "... making an immediate write to " << shard1 << Endl; + auto req = MakeWriteRequestOneKeyValue( + std::nullopt, + NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE, + NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, + tableId, + columns, + 4, 406); + Write(runtime, sender, shard1, std::move(req)); + } + + // Block commit attempts at shard1 + TBlockEvents blockedCommits(runtime, + [&](const TEvBlobStorage::TEvPut::TPtr& ev) { + auto* msg = ev->Get(); + return msg->Id.TabletID() == shard1 && msg->Id.Channel() == 0; + }); + + // Make an uncommitted write to a key overlapping with tx1 + // Since tx1 has been validated, and reads are pinned at tx2, tx3 will + // be after tx1 and blocked by a read dependency. Since tx2 has not + // entered the pipeline yet, version will not be above tx2. + auto tx3sender = runtime.AllocateEdgeActor(); + { + Cerr << "... starting uncommitted upsert at " << shard1 << Endl; + auto req = MakeWriteRequestOneKeyValue( + std::nullopt, + NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE, + NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, + tableId, + columns, + 3, 307); + req->SetLockId(lockTxId3, lockNodeId); + runtime.SendToPipe(shard1, tx3sender, req.release()); + } + + // Wait for some time and make sure there have been no unexpected + // commits, which would indicate the upsert is blocked by tx1. + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + UNIT_ASSERT_VALUES_EQUAL_C(blockedCommits.size(), 0u, + "The uncommitted upsert didn't block. Something may have changed and the test needs to be revised."); + + // Now, while blocking commits, unblock progress and let tx2 to execute, + // which will abort due to broken locks. + blockedProgress.Unblock(); + blockedProgress.Stop(); + + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + size_t commitsAfterTx2 = blockedCommits.size(); + Cerr << "... observed " << commitsAfterTx2 << " commits after tx2 unblock" << Endl; + UNIT_ASSERT_C(commitsAfterTx2 >= 2, + "Expected tx2 to produce at least 2 commits (store out rs + abort tx)" + << ", observed " << commitsAfterTx2 << ". Something may have changed."); + + // Now, while still blocking commits, unblock readsets + // Everything will unblock and execute tx1 then tx3 + blockedReadSets.Unblock(); + blockedReadSets.Stop(); + + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + size_t commitsAfterTx3 = blockedCommits.size() - commitsAfterTx2; + Cerr << "... observed " << commitsAfterTx3 << " more commits after readset unblock" << Endl; + UNIT_ASSERT_C(commitsAfterTx3 >= 2, + "Expected at least 2 commits after readset unblock (tx1, tx3), but only " + << commitsAfterTx3 << " have been observed."); + + // Finally, stop blocking commits + // We expect completion handlers to run in tx3, tx1, tx2 order, triggering the bug + blockedCommits.Unblock(); + blockedCommits.Stop(); + + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + + // Check tx3 reply + { + auto ev = runtime.GrabEdgeEventRethrow(tx3sender); + UNIT_ASSERT_VALUES_EQUAL(ev->Get()->Record.GetStatus(), NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED); + } + + // Check tx1 reply + { + auto ev1 = runtime.GrabEdgeEventRethrow(tx1sender); + UNIT_ASSERT_VALUES_EQUAL(ev1->Get()->Record.GetStatus(), NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED); + auto ev2 = runtime.GrabEdgeEventRethrow(tx1sender); + UNIT_ASSERT_VALUES_EQUAL(ev2->Get()->Record.GetStatus(), NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED); + } + + // Check tx2 reply + { + auto ev1 = runtime.GrabEdgeEventRethrow(tx2sender); + UNIT_ASSERT_VALUES_EQUAL(ev1->Get()->Record.GetStatus(), NKikimrDataEvents::TEvWriteResult::STATUS_LOCKS_BROKEN); + auto ev2 = runtime.GrabEdgeEventRethrow(tx2sender); + UNIT_ASSERT_VALUES_EQUAL(ev2->Get()->Record.GetStatus(), NKikimrDataEvents::TEvWriteResult::STATUS_LOCKS_BROKEN); + } + } + + Y_UNIT_TEST(PreparedDistributedWritePageFault) { + TPortManager pm; + TServerSettings serverSettings(pm.GetPort(2134)); + serverSettings.SetDomainName("Root") + .SetUseRealThreads(false) + .SetEnableDataShardVolatileTransactions(false); + + auto [runtime, server, sender] = TestCreateServer(serverSettings); + + TDisableDataShardLogBatching disableDataShardLogBatching; + + // Use a policy without levels and very small page sizes, effectively making each row on its own page + NLocalDb::TCompactionPolicyPtr policy = NLocalDb::CreateDefaultTablePolicy(); + policy->MinDataPageSize = 1; + + auto opts = TShardedTableOptions() + .Columns({{"key", "Int32", true, false}, + {"value", "Int32", false, false}}) + .Policy(policy.Get()); + const auto& columns = opts.Columns_; + auto [shards, tableId] = CreateShardedTable(server, sender, "/Root", "table", opts); + UNIT_ASSERT_VALUES_EQUAL(shards.size(), 1u); + + const ui64 coordinator = ChangeStateStorage(Coordinator, server->GetSettings().Domain); + + const ui64 lockTxId1 = 1234567890001; + const ui64 lockNodeId = runtime.GetNodeId(0); + NLongTxService::TLockHandle lockHandle1(lockTxId1, runtime.GetActorSystem(0)); + + auto shard1 = shards.at(0); + NKikimrDataEvents::TLock lock1shard1; + + // 1. Make an uncommitted write (lock1 shard1) + { + Cerr << "... making an uncommmited write to " << shard1 << Endl; + auto req = MakeWriteRequestOneKeyValue( + std::nullopt, + NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE, + NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, + tableId, + columns, + 1, 11); + req->SetLockId(lockTxId1, lockNodeId); + auto result = Write(runtime, sender, shard1, std::move(req)); + UNIT_ASSERT_VALUES_EQUAL(result.GetTxLocks().size(), 1u); + lock1shard1 = result.GetTxLocks().at(0); + UNIT_ASSERT_C(lock1shard1.GetCounter() < 1000, "Unexpected lock in the result: " << lock1shard1.ShortDebugString()); + } + + // 2. Compact and reboot the tablet + Cerr << "... compacting shard " << shard1 << Endl; + CompactTable(runtime, shard1, tableId, false); + Cerr << "... rebooting shard " << shard1 << Endl; + RebootTablet(runtime, shard1, sender); + runtime.SimulateSleep(TDuration::Seconds(1)); + + // 3. Prepare a distributed write (single shard for simplicity) + ui64 txId1 = 1234567890011; + auto tx1sender = runtime.AllocateEdgeActor(); + { + auto req1 = MakeWriteRequestOneKeyValue( + txId1, + NKikimrDataEvents::TEvWrite::MODE_PREPARE, + NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, + tableId, + columns, + 1, 22); + req1->Record.MutableLocks()->SetOp(NKikimrDataEvents::TKqpLocks::Commit); + + Cerr << "... preparing tx1 at " << shard1 << Endl; + auto res1 = Write(runtime, tx1sender, shard1, std::move(req1)); + + // Reboot, making sure tx is only loaded after it's planned + // This causes tx to skip conflicts cache and go to execution + // The first attempt to execute will page fault looking for conflicts + // Tx will be released, and will trigger the bug on restore + Cerr << "... rebooting shard " << shard1 << Endl; + RebootTablet(runtime, shard1, sender); + runtime.SimulateSleep(TDuration::Seconds(1)); + + ui64 minStep = res1.GetMinStep(); + ui64 maxStep = res1.GetMaxStep(); + + Cerr << "... planning tx1 at " << coordinator << Endl; + SendProposeToCoordinator( + runtime, tx1sender, { shard1 }, { + .TxId = txId1, + .Coordinator = coordinator, + .MinStep = minStep, + .MaxStep = maxStep, + }); + } + + // 4. Check tx1 reply (it must succeed) + { + Cerr << "... waiting for tx1 result" << Endl; + auto ev = runtime.GrabEdgeEventRethrow(tx1sender); + UNIT_ASSERT_VALUES_EQUAL(ev->Get()->Record.GetStatus(), NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED); + } + } + } // Y_UNIT_TEST_SUITE } // namespace NKikimr diff --git a/ydb/core/tx/datashard/datashard_write_operation.cpp b/ydb/core/tx/datashard/datashard_write_operation.cpp index 5670a3abca88..0d67c7f02350 100644 --- a/ydb/core/tx/datashard/datashard_write_operation.cpp +++ b/ydb/core/tx/datashard/datashard_write_operation.cpp @@ -95,6 +95,7 @@ std::tuple TValidatedWriteTxOperatio case NKikimrDataEvents::TEvWrite::TOperation::OPERATION_DELETE: case NKikimrDataEvents::TEvWrite::TOperation::OPERATION_REPLACE: case NKikimrDataEvents::TEvWrite::TOperation::OPERATION_INSERT: + case NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPDATE: break; default: return {NKikimrTxDataShard::TError::BAD_ARGUMENT, TStringBuilder() << OperationType << " operation is not supported now"}; @@ -415,8 +416,9 @@ TValidatedWriteTx::TPtr TWriteOperation::BuildWriteTx(TDataShard* self) void TWriteOperation::ReleaseTxData(NTabletFlatExecutor::TTxMemoryProviderBase& provider) { ReleasedTxDataSize = provider.GetMemoryLimit() + provider.GetRequestedMemory(); - if (!WriteTx || IsTxDataReleased()) + if (!WriteTx || WriteTx->GetIsReleased()) { return; + } WriteTx->ReleaseTxData(); // Immediate transactions have no body stored. diff --git a/ydb/core/tx/datashard/execute_kqp_data_tx_unit.cpp b/ydb/core/tx/datashard/execute_kqp_data_tx_unit.cpp index 74c0d4e740a5..b61b78d7f180 100644 --- a/ydb/core/tx/datashard/execute_kqp_data_tx_unit.cpp +++ b/ydb/core/tx/datashard/execute_kqp_data_tx_unit.cpp @@ -217,14 +217,9 @@ EExecutionStatus TExecuteKqpDataTxUnit::Execute(TOperation::TPtr op, TTransactio auto allocGuard = tasksRunner.BindAllocator(txc.GetMemoryLimit() - dataTx->GetTxSize()); - NKqp::NRm::TKqpResourcesRequest req; - req.MemoryPool = NKqp::NRm::EKqpMemoryPool::DataQuery; - req.ExternalMemory = txc.GetMemoryLimit(); - ui64 taskId = dataTx->GetFirstKqpTaskId(); - NKqp::GetKqpResourceManager()->NotifyExternalResourcesAllocated(txId, taskId, req); - + NKqp::GetKqpResourceManager()->GetCounters()->RmExternalMemory->Add(txc.GetMemoryLimit()); Y_DEFER { - NKqp::GetKqpResourceManager()->FreeResources(txId, taskId); + NKqp::GetKqpResourceManager()->GetCounters()->RmExternalMemory->Sub(txc.GetMemoryLimit()); }; LOG_T("Operation " << *op << " (execute_kqp_data_tx) at " << tabletId diff --git a/ydb/core/tx/datashard/execute_write_unit.cpp b/ydb/core/tx/datashard/execute_write_unit.cpp index 57111e235be5..b5ba1432cecb 100644 --- a/ydb/core/tx/datashard/execute_write_unit.cpp +++ b/ydb/core/tx/datashard/execute_write_unit.cpp @@ -147,7 +147,7 @@ class TExecuteWriteUnit : public TExecutionUnit { switch (operationType) { case NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT: { fillOps(rowIdx); - userDb.UpdateRow(fullTableId, key, ops); + userDb.UpsertRow(fullTableId, key, ops); break; } case NKikimrDataEvents::TEvWrite::TOperation::OPERATION_REPLACE: { @@ -164,6 +164,11 @@ class TExecuteWriteUnit : public TExecutionUnit { userDb.InsertRow(fullTableId, key, ops); break; } + case NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPDATE: { + fillOps(rowIdx); + userDb.UpdateRow(fullTableId, key, ops); + break; + } default: // Checked before in TWriteOperation Y_FAIL_S(operationType << " operation is not supported now"); @@ -173,7 +178,8 @@ class TExecuteWriteUnit : public TExecutionUnit { switch (operationType) { case NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT: case NKikimrDataEvents::TEvWrite::TOperation::OPERATION_REPLACE: - case NKikimrDataEvents::TEvWrite::TOperation::OPERATION_INSERT: { + case NKikimrDataEvents::TEvWrite::TOperation::OPERATION_INSERT: + case NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPDATE: { DataShard.IncCounter(COUNTER_WRITE_ROWS, matrix.GetRowCount()); DataShard.IncCounter(COUNTER_WRITE_BYTES, matrix.GetBuffer().size()); break; diff --git a/ydb/core/tx/datashard/export_common.cpp b/ydb/core/tx/datashard/export_common.cpp index ee801ebe4c52..cf999f1f76a7 100644 --- a/ydb/core/tx/datashard/export_common.cpp +++ b/ydb/core/tx/datashard/export_common.cpp @@ -55,18 +55,23 @@ TMaybe GenYdbScheme( try { FillTableBoundary(scheme, tableDesc, mkqlKeyType); + FillIndexDescription(scheme, tableDesc); } catch (const yexception&) { return Nothing(); } - FillIndexDescription(scheme, tableDesc, mkqlKeyType); FillStorageSettings(scheme, tableDesc); FillColumnFamilies(scheme, tableDesc); FillAttributes(scheme, pathDesc); FillPartitioningSettings(scheme, tableDesc); FillKeyBloomFilter(scheme, tableDesc); FillReadReplicasSettings(scheme, tableDesc); - FillSequenceDescription(scheme, tableDesc); + + TString error; + Ydb::StatusIds::StatusCode status; + if (!FillSequenceDescription(scheme, tableDesc, status, error)) { + return Nothing(); + } return scheme; } diff --git a/ydb/core/tx/datashard/import_s3.cpp b/ydb/core/tx/datashard/import_s3.cpp index ba7227a7a74c..655ee80172ab 100644 --- a/ydb/core/tx/datashard/import_s3.cpp +++ b/ydb/core/tx/datashard/import_s3.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/ydb/core/tx/datashard/move_index_unit.cpp b/ydb/core/tx/datashard/move_index_unit.cpp index 6b3a30be457a..73fa338d35e7 100644 --- a/ydb/core/tx/datashard/move_index_unit.cpp +++ b/ydb/core/tx/datashard/move_index_unit.cpp @@ -60,20 +60,27 @@ class TMoveIndexUnit : public TExecutionUnit { NIceDb::TNiceDb db(txc.DB); ChangeRecords.clear(); - if (!DataShard.LoadChangeRecords(db, ChangeRecords)) { - return EExecutionStatus::Restart; - } + auto changesQueue = DataShard.TakeChangesQueue(); auto lockChangeRecords = DataShard.TakeLockChangeRecords(); auto committedLockChangeRecords = DataShard.TakeCommittedLockChangeRecords(); + if (!DataShard.LoadChangeRecords(db, ChangeRecords)) { + DataShard.SetChangesQueue(std::move(changesQueue)); + DataShard.SetLockChangeRecords(std::move(lockChangeRecords)); + DataShard.SetCommittedLockChangeRecords(std::move(committedLockChangeRecords)); + return EExecutionStatus::Restart; + } + if (!DataShard.LoadLockChangeRecords(db)) { + DataShard.SetChangesQueue(std::move(changesQueue)); DataShard.SetLockChangeRecords(std::move(lockChangeRecords)); DataShard.SetCommittedLockChangeRecords(std::move(committedLockChangeRecords)); return EExecutionStatus::Restart; } if (!DataShard.LoadChangeRecordCommits(db, ChangeRecords)) { + DataShard.SetChangesQueue(std::move(changesQueue)); DataShard.SetLockChangeRecords(std::move(lockChangeRecords)); DataShard.SetCommittedLockChangeRecords(std::move(committedLockChangeRecords)); return EExecutionStatus::Restart; @@ -99,7 +106,7 @@ class TMoveIndexUnit : public TExecutionUnit { void Complete(TOperation::TPtr, const TActorContext& ctx) override { DataShard.CreateChangeSender(ctx); DataShard.MaybeActivateChangeSender(ctx); - DataShard.EnqueueChangeRecords(std::move(ChangeRecords)); + DataShard.EnqueueChangeRecords(std::move(ChangeRecords), 0, true); } }; diff --git a/ydb/core/tx/datashard/move_table_unit.cpp b/ydb/core/tx/datashard/move_table_unit.cpp index 846f517ee10a..3e34394e15d6 100644 --- a/ydb/core/tx/datashard/move_table_unit.cpp +++ b/ydb/core/tx/datashard/move_table_unit.cpp @@ -60,20 +60,27 @@ class TMoveTableUnit : public TExecutionUnit { NIceDb::TNiceDb db(txc.DB); ChangeRecords.clear(); - if (!DataShard.LoadChangeRecords(db, ChangeRecords)) { - return EExecutionStatus::Restart; - } + auto changesQueue = DataShard.TakeChangesQueue(); auto lockChangeRecords = DataShard.TakeLockChangeRecords(); auto committedLockChangeRecords = DataShard.TakeCommittedLockChangeRecords(); + if (!DataShard.LoadChangeRecords(db, ChangeRecords)) { + DataShard.SetChangesQueue(std::move(changesQueue)); + DataShard.SetLockChangeRecords(std::move(lockChangeRecords)); + DataShard.SetCommittedLockChangeRecords(std::move(committedLockChangeRecords)); + return EExecutionStatus::Restart; + } + if (!DataShard.LoadLockChangeRecords(db)) { + DataShard.SetChangesQueue(std::move(changesQueue)); DataShard.SetLockChangeRecords(std::move(lockChangeRecords)); DataShard.SetCommittedLockChangeRecords(std::move(committedLockChangeRecords)); return EExecutionStatus::Restart; } if (!DataShard.LoadChangeRecordCommits(db, ChangeRecords)) { + DataShard.SetChangesQueue(std::move(changesQueue)); DataShard.SetLockChangeRecords(std::move(lockChangeRecords)); DataShard.SetCommittedLockChangeRecords(std::move(committedLockChangeRecords)); return EExecutionStatus::Restart; @@ -99,7 +106,7 @@ class TMoveTableUnit : public TExecutionUnit { void Complete(TOperation::TPtr, const TActorContext& ctx) override { DataShard.CreateChangeSender(ctx); DataShard.MaybeActivateChangeSender(ctx); - DataShard.EnqueueChangeRecords(std::move(ChangeRecords)); + DataShard.EnqueueChangeRecords(std::move(ChangeRecords), 0, true); } }; diff --git a/ydb/core/tx/datashard/operation.h b/ydb/core/tx/datashard/operation.h index e530fe6fc254..df0687d57488 100644 --- a/ydb/core/tx/datashard/operation.h +++ b/ydb/core/tx/datashard/operation.h @@ -883,6 +883,12 @@ class TOperation */ virtual void OnCleanup(TDataShard& self, std::vector>& replies); + + // CommittingOps book keeping + const std::optional& GetCommittingOpsVersion() const { return CommittingOpsVersion; } + void SetCommittingOpsVersion(const TRowVersion& version) { CommittingOpsVersion = version; } + void ResetCommittingOpsVersion() { CommittingOpsVersion.reset(); } + protected: TOperation() : TOperation(TBasicOpInfo()) @@ -956,6 +962,8 @@ class TOperation static NMiniKQL::IEngineFlat::TValidationInfo EmptyKeysInfo; + std::optional CommittingOpsVersion; + public: std::optional MvccReadWriteVersion; diff --git a/ydb/core/tx/datashard/read_iterator.h b/ydb/core/tx/datashard/read_iterator.h index 8b2a4a2b7e5e..6648388a6138 100644 --- a/ydb/core/tx/datashard/read_iterator.h +++ b/ydb/core/tx/datashard/read_iterator.h @@ -205,6 +205,7 @@ struct TReadIteratorState { TActorId SessionId; TMonotonic StartTs; bool IsFinished = false; + bool ReadContinuePending = false; // note that we send SeqNo's starting from 1 ui64 SeqNo = 0; diff --git a/ydb/core/tx/datashard/remove_schema_snapshots.cpp b/ydb/core/tx/datashard/remove_schema_snapshots.cpp new file mode 100644 index 000000000000..fe63f30be61d --- /dev/null +++ b/ydb/core/tx/datashard/remove_schema_snapshots.cpp @@ -0,0 +1,54 @@ +#include "datashard_impl.h" + +namespace NKikimr::NDataShard { + +class TDataShard::TTxRemoveSchemaSnapshots: public NTabletFlatExecutor::TTransactionBase { +public: + TTxRemoveSchemaSnapshots(TDataShard* self) + : TBase(self) + { } + + TTxType GetTxType() const override { return TXTYPE_REMOVE_SCHEMA_SNAPSHOTS; } + + bool Execute(TTransactionContext& txc, const TActorContext&) override { + while (!Self->PendingSchemaSnapshotsToGc.empty()) { + const auto key = Self->PendingSchemaSnapshotsToGc.back(); + const auto* snapshot = Self->GetSchemaSnapshotManager().FindSnapshot(key); + + if (!snapshot) { + Self->PendingSchemaSnapshotsToGc.pop_back(); + continue; + } + + if (Self->GetSchemaSnapshotManager().HasReference(key)) { + Self->PendingSchemaSnapshotsToGc.pop_back(); + continue; + } + + auto table = Self->FindUserTable(TPathId(key.OwnerId, key.PathId)); + if (!table) { + Self->PendingSchemaSnapshotsToGc.pop_back(); + continue; + } + + if (snapshot->Schema->GetTableSchemaVersion() >= table->GetTableSchemaVersion()) { + Self->PendingSchemaSnapshotsToGc.pop_back(); + continue; + } + + Self->GetSchemaSnapshotManager().RemoveShapshot(txc.DB, key); + Self->PendingSchemaSnapshotsToGc.pop_back(); + } + + return true; + } + + void Complete(const TActorContext&) override { + } +}; + +void TDataShard::Handle(TEvPrivate::TEvRemoveSchemaSnapshots::TPtr&, const TActorContext& ctx) { + Execute(new TTxRemoveSchemaSnapshots(this), ctx); +} + +} // namespace NKikimr::NDataShard diff --git a/ydb/core/tx/datashard/ut_common/datashard_ut_common.cpp b/ydb/core/tx/datashard/ut_common/datashard_ut_common.cpp index 1421e3afa670..0a4d50965dd8 100644 --- a/ydb/core/tx/datashard/ut_common/datashard_ut_common.cpp +++ b/ydb/core/tx/datashard/ut_common/datashard_ut_common.cpp @@ -1730,6 +1730,22 @@ ui64 AsyncAlterDropStream( return RunSchemeTx(*server->GetRuntime(), std::move(request)); } +ui64 AsyncAlterDropReplicationConfig( + Tests::TServer::TPtr server, + const TString& workingDir, + const TString& tableName) +{ + auto request = SchemeTxTemplate(NKikimrSchemeOp::ESchemeOpAlterTable, workingDir); + auto& tx = *request->Record.MutableTransaction()->MutableModifyScheme(); + tx.SetInternal(true); + + auto& desc = *tx.MutableAlterTable(); + desc.SetName(tableName); + desc.MutableReplicationConfig()->SetMode(NKikimrSchemeOp::TTableReplicationConfig::REPLICATION_MODE_NONE); + + return RunSchemeTx(*server->GetRuntime(), std::move(request)); +} + ui64 AsyncCreateContinuousBackup( Tests::TServer::TPtr server, const TString& workingDir, @@ -1890,6 +1906,24 @@ TRowVersion AcquireReadSnapshot(TTestActorRuntime& runtime, const TString& datab return TRowVersion(record.GetSnapshotStep(), record.GetSnapshotTxId()); } +void AddValueToCells(ui64 value, const TString& columnType, TVector& cells, TVector& stringValues) { + if (columnType == "Uint64") { + cells.emplace_back(TCell((const char*)&value, sizeof(ui64))); + } else if (columnType == "Uint32") { + ui32 value32 = (ui32)value; + cells.emplace_back(TCell((const char*)&value32, sizeof(ui32))); + } else if (columnType == "Int32") { + i32 value32 = (i32)value; + cells.push_back(TCell::Make(value32)); + } else if (columnType == "Utf8") { + stringValues.emplace_back(Sprintf("String_%" PRIu64, value)); + cells.emplace_back(TCell(stringValues.back().c_str(), stringValues.back().size())); + } else { + Y_ABORT("Unsupported column type"); + } +} + + std::unique_ptr MakeWriteRequest(std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWrite_TOperation::EOperationType operationType, const TTableId& tableId, const TVector& columns, ui32 rowCount, ui64 seed) { std::vector columnIds; for (ui32 col = 0; col < columns.size(); ++col) { @@ -1906,19 +1940,8 @@ std::unique_ptr MakeWriteRequest(std::optional MakeWriteRequest(std::optional MakeWriteRequestOneKeyValue(std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWrite_TOperation::EOperationType operationType, const TTableId& tableId, const TVector& columns, ui64 key, ui64 value) { + UNIT_ASSERT_VALUES_EQUAL(columns.size(), 2); + + std::vector columnIds = {1, 2}; + + TVector stringValues; + TVector cells; + + AddValueToCells(key, columns[0].Type, cells, stringValues); + AddValueToCells(value, columns[1].Type, cells, stringValues); + + TSerializedCellMatrix matrix(cells, 1, 2); + TString blobData = matrix.ReleaseBuffer(); + + std::unique_ptr evWrite = txId ? std::make_unique(*txId, txMode) : std::make_unique(txMode); + ui64 payloadIndex = NKikimr::NEvWrite::TPayloadWriter(*evWrite).AddDataToPayload(std::move(blobData)); + evWrite->AddOperation(operationType, tableId, columnIds, payloadIndex, NKikimrDataEvents::FORMAT_CELLVEC); + + return evWrite; +} + NKikimrDataEvents::TEvWriteResult Write(TTestActorRuntime& runtime, TActorId sender, ui64 shardId, std::unique_ptr&& request, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus) { auto txMode = request->Record.GetTxMode(); @@ -1968,6 +2012,13 @@ NKikimrDataEvents::TEvWriteResult Upsert(TTestActorRuntime& runtime, TActorId se return Write(runtime, sender, shardId, std::move(request), expectedStatus); } +NKikimrDataEvents::TEvWriteResult UpsertOneKeyValue(TTestActorRuntime& runtime, TActorId sender, ui64 shardId, const TTableId& tableId, const TVector& columns, ui64 key, ui64 value, std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus) +{ + auto request = MakeWriteRequestOneKeyValue(txId, txMode, NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, tableId, columns, key, value); + return Write(runtime, sender, shardId, std::move(request), expectedStatus); +} + + NKikimrDataEvents::TEvWriteResult Replace(TTestActorRuntime& runtime, TActorId sender, ui64 shardId, const TTableId& tableId, const TVector& columns, ui32 rowCount, std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus) { auto request = MakeWriteRequest(txId, txMode, NKikimrDataEvents::TEvWrite::TOperation::OPERATION_REPLACE, tableId, columns, rowCount); @@ -1986,6 +2037,12 @@ NKikimrDataEvents::TEvWriteResult Insert(TTestActorRuntime& runtime, TActorId se return Write(runtime, sender, shardId, std::move(request), expectedStatus); } +NKikimrDataEvents::TEvWriteResult Update(TTestActorRuntime& runtime, TActorId sender, ui64 shardId, const TTableId& tableId, const TVector& columns, ui32 rowCount, std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus) +{ + auto request = MakeWriteRequest(txId, txMode, NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPDATE, tableId, columns, rowCount); + return Write(runtime, sender, shardId, std::move(request), expectedStatus); +} + TTestActorRuntimeBase::TEventObserverHolderPair ReplaceEvProposeTransactionWithEvWrite(TTestActorRuntime& runtime, TEvWriteRows& rows) { if (rows.empty()) return {}; diff --git a/ydb/core/tx/datashard/ut_common/datashard_ut_common.h b/ydb/core/tx/datashard/ut_common/datashard_ut_common.h index 9257505c43a9..cc348b3c0f24 100644 --- a/ydb/core/tx/datashard/ut_common/datashard_ut_common.h +++ b/ydb/core/tx/datashard/ut_common/datashard_ut_common.h @@ -670,6 +670,11 @@ ui64 AsyncAlterDropStream( const TString& tableName, const TString& streamName); +ui64 AsyncAlterDropReplicationConfig( + Tests::TServer::TPtr server, + const TString& workingDir, + const TString& tableName); + ui64 AsyncCreateContinuousBackup( Tests::TServer::TPtr server, const TString& workingDir, @@ -734,11 +739,15 @@ void ExecSQL(Tests::TServer::TPtr server, TRowVersion AcquireReadSnapshot(TTestActorRuntime& runtime, const TString& databaseName, ui32 nodeIndex = 0); std::unique_ptr MakeWriteRequest(std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWrite_TOperation::EOperationType operationType, const TTableId& tableId, const TVector& columns, ui32 rowCount, ui64 seed = 0); +std::unique_ptr MakeWriteRequestOneKeyValue(std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWrite_TOperation::EOperationType operationType, const TTableId& tableId, const TVector& columns, ui64 key, ui64 value); + NKikimrDataEvents::TEvWriteResult Write(TTestActorRuntime& runtime, TActorId sender, ui64 shardId, std::unique_ptr&& request, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus = NKikimrDataEvents::TEvWriteResult::STATUS_UNSPECIFIED); NKikimrDataEvents::TEvWriteResult Upsert(TTestActorRuntime& runtime, TActorId sender, ui64 shardId, const TTableId& tableId, const TVector& columns, ui32 rowCount, std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus = NKikimrDataEvents::TEvWriteResult::STATUS_UNSPECIFIED); +NKikimrDataEvents::TEvWriteResult UpsertOneKeyValue(TTestActorRuntime& runtime, TActorId sender, ui64 shardId, const TTableId& tableId, const TVector& columns, ui64 key, ui64 value, std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus = NKikimrDataEvents::TEvWriteResult::STATUS_UNSPECIFIED); NKikimrDataEvents::TEvWriteResult Replace(TTestActorRuntime& runtime, TActorId sender, ui64 shardId, const TTableId& tableId, const TVector& columns, ui32 rowCount, std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus = NKikimrDataEvents::TEvWriteResult::STATUS_UNSPECIFIED); NKikimrDataEvents::TEvWriteResult Delete(TTestActorRuntime& runtime, TActorId sender, ui64 shardId, const TTableId& tableId, const TVector& columns, ui32 rowCount, std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus = NKikimrDataEvents::TEvWriteResult::STATUS_UNSPECIFIED); NKikimrDataEvents::TEvWriteResult Insert(TTestActorRuntime& runtime, TActorId sender, ui64 shardId, const TTableId& tableId, const TVector& columns, ui32 rowCount, std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus = NKikimrDataEvents::TEvWriteResult::STATUS_UNSPECIFIED); +NKikimrDataEvents::TEvWriteResult Update(TTestActorRuntime& runtime, TActorId sender, ui64 shardId, const TTableId& tableId, const TVector& columns, ui32 rowCount, std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus = NKikimrDataEvents::TEvWriteResult::STATUS_UNSPECIFIED); NKikimrDataEvents::TEvWriteResult WaitForWriteCompleted(TTestActorRuntime& runtime, TActorId sender, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus = NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED); struct TEvWriteRow { diff --git a/ydb/core/tx/datashard/volatile_tx.cpp b/ydb/core/tx/datashard/volatile_tx.cpp index 1a72b7c10783..93a28c836295 100644 --- a/ydb/core/tx/datashard/volatile_tx.cpp +++ b/ydb/core/tx/datashard/volatile_tx.cpp @@ -271,9 +271,6 @@ namespace NKikimr::NDataShard { void TVolatileTxManager::Start(const TActorContext& ctx) { for (auto& pr : VolatileTxs) { - if (!pr.second->Dependencies.empty()) { - continue; - } switch (pr.second->State) { case EVolatileTxState::Waiting: for (ui64 target : pr.second->Participants) { @@ -347,7 +344,7 @@ namespace NKikimr::NDataShard { std::vector byCommitOrder; byCommitOrder.reserve(VolatileTxs.size()); - auto postProcessTxInfo = [this, &byCommitOrder](TVolatileTxInfo* info) { + auto postProcessTxInfo = [&](TVolatileTxInfo* info) { switch (info->State) { case EVolatileTxState::Waiting: case EVolatileTxState::Committed: { @@ -402,6 +399,28 @@ namespace NKikimr::NDataShard { VolatileTxByCommitOrder.PushBack(info); } + ui64 numWaiting = 0; + ui64 numCommitted = 0; + ui64 numAborting = 0; + for (auto& pr : VolatileTxs) { + switch (pr.second->State) { + case EVolatileTxState::Waiting: + ++numWaiting; + break; + case EVolatileTxState::Committed: + ++numCommitted; + break; + case EVolatileTxState::Aborting: + ++numAborting; + break; + } + } + + Self->SetCounter(COUNTER_VOLATILE_TX_INFLIGHT, VolatileTxs.size()); + Self->SetCounter(COUNTER_VOLATILE_TX_WAITING_COUNT, numWaiting); + Self->SetCounter(COUNTER_VOLATILE_TX_COMMITTED_COUNT, numCommitted); + Self->SetCounter(COUNTER_VOLATILE_TX_ABORTING_COUNT, numAborting); + return true; } @@ -557,6 +576,8 @@ namespace NKikimr::NDataShard { db.Table().Key(info->TxId, shardId).Update(); } + UpdateCountersAdd(info); + txc.DB.OnRollback([this, txId]() { RollbackAddVolatileTx(txId); }); @@ -596,7 +617,10 @@ namespace NKikimr::NDataShard { // FIXME: do we need to handle WaitingSnapshotEvents somehow? + // Note: not counting latency (this is a rollback) + // This will also unlink from linked lists + UpdateCountersRemove(info); VolatileTxs.erase(txId); } @@ -635,6 +659,10 @@ namespace NKikimr::NDataShard { VolatileTxByCommitTxId.erase(commitTxId); } VolatileTxByVersion.erase(info); + + Self->IncCounter(COUNTER_VOLATILE_TX_TOTAL_LATENCY_MS, info->LatencyTimer.Passed() * 1000); + + UpdateCountersRemove(info); VolatileTxs.erase(txId); if (prevUncertain < GetMinUncertainVersion()) { @@ -731,7 +759,7 @@ namespace NKikimr::NDataShard { ui64 txId = info->TxId; // Move tx to aborting, but don't persist yet, we need a separate transaction for that - info->State = EVolatileTxState::Aborting; + ChangeState(info, EVolatileTxState::Aborting); // Aborted transactions don't have dependencies for (ui64 dependencyTxId : info->Dependencies) { @@ -845,7 +873,7 @@ namespace NKikimr::NDataShard { // Move tx to committed. // Note that we don't need to wait until the new state is committed (it's repeatable), // but we need to wait until the initial effects are committed and persisted. - info->State = EVolatileTxState::Committed; + ChangeState(info, EVolatileTxState::Committed); db.Table().Key(txId).Update( NIceDb::TUpdate(info->State)); @@ -875,7 +903,7 @@ namespace NKikimr::NDataShard { if (info->AddCommitted) { RunCommitCallbacks(info); } - if (info->Dependencies.empty() && ReadyToDbCommit(info)) { + if (ReadyToDbCommit(info)) { AddPendingCommit(txId); } } @@ -926,7 +954,9 @@ namespace NKikimr::NDataShard { case EVolatileTxState::Waiting: break; case EVolatileTxState::Committed: - AddPendingCommit(dependentTxId); + if (ReadyToDbCommit(dependent)) { + AddPendingCommit(dependentTxId); + } break; case EVolatileTxState::Aborting: Y_ABORT("FIXME: unexpected dependency removed from aborting tx"); @@ -1031,4 +1061,43 @@ namespace NKikimr::NDataShard { return false; } + void TVolatileTxManager::UpdateCountersAdd(TVolatileTxInfo* info) { + Self->IncCounter(COUNTER_VOLATILE_TX_INFLIGHT); + switch (info->State) { + case EVolatileTxState::Waiting: + Self->IncCounter(COUNTER_VOLATILE_TX_WAITING_COUNT); + break; + case EVolatileTxState::Committed: + Self->IncCounter(COUNTER_VOLATILE_TX_COMMITTED_COUNT); + break; + case EVolatileTxState::Aborting: + Self->IncCounter(COUNTER_VOLATILE_TX_ABORTING_COUNT); + break; + } + } + + void TVolatileTxManager::UpdateCountersRemove(TVolatileTxInfo* info) { + Self->DecCounter(COUNTER_VOLATILE_TX_INFLIGHT); + switch (info->State) { + case EVolatileTxState::Waiting: + Self->DecCounter(COUNTER_VOLATILE_TX_WAITING_COUNT); + break; + case EVolatileTxState::Committed: + Self->DecCounter(COUNTER_VOLATILE_TX_COMMITTED_COUNT); + break; + case EVolatileTxState::Aborting: + Self->DecCounter(COUNTER_VOLATILE_TX_ABORTING_COUNT); + break; + } + } + + void TVolatileTxManager::ChangeState(TVolatileTxInfo* info, EVolatileTxState state) { + if (info->State == EVolatileTxState::Waiting) { + Self->IncCounter(COUNTER_VOLATILE_TX_WAIT_LATENCY_MS, info->LatencyTimer.Passed() * 1000); + } + UpdateCountersRemove(info); + info->State = state; + UpdateCountersAdd(info); + } + } // namespace NKikimr::NDataShard diff --git a/ydb/core/tx/datashard/volatile_tx.h b/ydb/core/tx/datashard/volatile_tx.h index a7f40b3178bf..010a21ab3988 100644 --- a/ydb/core/tx/datashard/volatile_tx.h +++ b/ydb/core/tx/datashard/volatile_tx.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace NKikimr::NTabletFlatExecutor { @@ -75,6 +76,9 @@ namespace NKikimr::NDataShard { // DECISION_ABORT on abort. std::vector ArbiterReadSets; + // Calculates Waiting and Total latency + THPTimer LatencyTimer; + template bool IsInList() const { using TItem = TIntrusiveListItem; @@ -276,6 +280,10 @@ namespace NKikimr::NDataShard { void RemoveFromCommitOrder(TVolatileTxInfo* info); bool ReadyToDbCommit(TVolatileTxInfo* info) const; + void UpdateCountersAdd(TVolatileTxInfo* info); + void UpdateCountersRemove(TVolatileTxInfo* info); + void ChangeState(TVolatileTxInfo* info, EVolatileTxState state); + private: TDataShard* const Self; absl::flat_hash_map> VolatileTxs; // TxId -> Info diff --git a/ydb/core/tx/datashard/ya.make b/ydb/core/tx/datashard/ya.make index 7bcfa4c2af15..6cfc69901adb 100644 --- a/ydb/core/tx/datashard/ya.make +++ b/ydb/core/tx/datashard/ya.make @@ -189,6 +189,7 @@ SRCS( receive_snapshot_unit.cpp remove_lock_change_records.cpp remove_locks.cpp + remove_schema_snapshots.cpp range_ops.cpp read_iterator.h restore_unit.cpp @@ -243,7 +244,6 @@ PEERDIR( ydb/core/formats ydb/core/io_formats/ydb_dump ydb/core/kqp/runtime - ydb/core/persqueue/partition_key_range ydb/core/persqueue/writer ydb/core/protos ydb/core/tablet diff --git a/ydb/core/tx/limiter/grouped_memory/service/actor.cpp b/ydb/core/tx/limiter/grouped_memory/service/actor.cpp new file mode 100644 index 000000000000..e7573c23612e --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/actor.cpp @@ -0,0 +1,50 @@ +#include "actor.h" + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +void TMemoryLimiterActor::Bootstrap() { + Manager = std::make_shared(SelfId(), Config, Name, Signals, DefaultStage); + Become(&TThis::StateWait); +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvStartTask::TPtr& ev) { + for (auto&& i : ev->Get()->GetAllocations()) { + Manager->RegisterAllocation(ev->Get()->GetExternalProcessId(), ev->Get()->GetExternalScopeId(), ev->Get()->GetExternalGroupId(), i, + ev->Get()->GetStageFeaturesIdx()); + } +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvFinishTask::TPtr& ev) { + Manager->UnregisterAllocation(ev->Get()->GetExternalProcessId(), ev->Get()->GetExternalScopeId(), ev->Get()->GetAllocationId()); +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvUpdateTask::TPtr& ev) { + Manager->UpdateAllocation( + ev->Get()->GetExternalProcessId(), ev->Get()->GetExternalScopeId(), ev->Get()->GetAllocationId(), ev->Get()->GetVolume()); +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvFinishGroup::TPtr& ev) { + Manager->UnregisterGroup(ev->Get()->GetExternalProcessId(), ev->Get()->GetExternalScopeId(), ev->Get()->GetExternalGroupId()); +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvStartGroup::TPtr& ev) { + Manager->RegisterGroup(ev->Get()->GetExternalProcessId(), ev->Get()->GetExternalScopeId(), ev->Get()->GetExternalGroupId()); +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvFinishProcess::TPtr& ev) { + Manager->UnregisterProcess(ev->Get()->GetExternalProcessId()); +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvStartProcess::TPtr& ev) { + Manager->RegisterProcess(ev->Get()->GetExternalProcessId(), ev->Get()->GetStages()); +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvFinishProcessScope::TPtr& ev) { + Manager->UnregisterProcessScope(ev->Get()->GetExternalProcessId(), ev->Get()->GetExternalScopeId()); +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvStartProcessScope::TPtr& ev) { + Manager->RegisterProcessScope(ev->Get()->GetExternalProcessId(), ev->Get()->GetExternalScopeId()); +} + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/actor.h b/ydb/core/tx/limiter/grouped_memory/service/actor.h new file mode 100644 index 000000000000..4b4506ba5b99 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/actor.h @@ -0,0 +1,59 @@ +#pragma once +#include "counters.h" +#include "manager.h" + +#include +#include + +#include +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { +class TManager; +class TMemoryLimiterActor: public NActors::TActorBootstrapped { +private: + std::shared_ptr Manager; + const TConfig Config; + const TString Name; + const std::shared_ptr Signals; + const std::shared_ptr DefaultStage; + +public: + TMemoryLimiterActor(const TConfig& config, const TString& name, const std::shared_ptr& signals, + const std::shared_ptr& defaultStage) + : Config(config) + , Name(name) + , Signals(signals) + , DefaultStage(defaultStage) { + } + + void Handle(NEvents::TEvExternal::TEvStartTask::TPtr& ev); + void Handle(NEvents::TEvExternal::TEvFinishTask::TPtr& ev); + void Handle(NEvents::TEvExternal::TEvUpdateTask::TPtr& ev); + void Handle(NEvents::TEvExternal::TEvStartGroup::TPtr& ev); + void Handle(NEvents::TEvExternal::TEvFinishGroup::TPtr& ev); + void Handle(NEvents::TEvExternal::TEvStartProcess::TPtr& ev); + void Handle(NEvents::TEvExternal::TEvFinishProcess::TPtr& ev); + void Handle(NEvents::TEvExternal::TEvStartProcessScope::TPtr& ev); + void Handle(NEvents::TEvExternal::TEvFinishProcessScope::TPtr& ev); + + void Bootstrap(); + + STFUNC(StateWait) { + switch (ev->GetTypeRewrite()) { + hFunc(NEvents::TEvExternal::TEvStartTask, Handle); + hFunc(NEvents::TEvExternal::TEvFinishTask, Handle); + hFunc(NEvents::TEvExternal::TEvUpdateTask, Handle); + hFunc(NEvents::TEvExternal::TEvStartGroup, Handle); + hFunc(NEvents::TEvExternal::TEvFinishGroup, Handle); + hFunc(NEvents::TEvExternal::TEvStartProcess, Handle); + hFunc(NEvents::TEvExternal::TEvFinishProcess, Handle); + hFunc(NEvents::TEvExternal::TEvStartProcessScope, Handle); + hFunc(NEvents::TEvExternal::TEvFinishProcessScope, Handle); + default: + AFL_VERIFY(false)("ev_type", ev->GetTypeName()); + } + } +}; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/allocation.cpp b/ydb/core/tx/limiter/grouped_memory/service/allocation.cpp new file mode 100644 index 000000000000..2d04be2c9cef --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/allocation.cpp @@ -0,0 +1,26 @@ +#include "allocation.h" +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +TAllocationInfo::TAllocationInfo(const ui64 processId, const ui64 scopeId, const ui64 allocationInternalGroupId, + const std::shared_ptr& allocation, + const std::shared_ptr& stage) + : Allocation(allocation) + , AllocationInternalGroupId(allocationInternalGroupId) + , Identifier(TValidator::CheckNotNull(Allocation)->GetIdentifier()) + , ProcessId(processId) + , ScopeId(scopeId) + , Stage(stage) { + AFL_VERIFY(Stage); + AFL_VERIFY(Allocation); + AFL_INFO(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "add")("id", Allocation->GetIdentifier())("stage", Stage->GetName()); + AllocatedVolume = Allocation->GetMemory(); + Stage->Add(AllocatedVolume, Allocation->IsAllocated()); + if (allocation->IsAllocated()) { + AFL_INFO(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "allocated_on_add")("allocation_id", Identifier)("stage", Stage->GetName()); + Allocation = nullptr; + } +} + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/allocation.h b/ydb/core/tx/limiter/grouped_memory/service/allocation.h new file mode 100644 index 000000000000..47d5043188d0 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/allocation.h @@ -0,0 +1,76 @@ +#pragma once +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +enum class EAllocationStatus { + Allocated, + Waiting, + Failed +}; + +class TAllocationInfo { +private: + std::shared_ptr Allocation; + YDB_READONLY(ui64, AllocationInternalGroupId, 0); + ui64 AllocatedVolume = 0; + YDB_READONLY(ui64, Identifier, 0); + YDB_READONLY(ui64, ProcessId, 0); + YDB_READONLY(ui64, ScopeId, 0); + const std::shared_ptr Stage; + bool AllocationFailed = false; + +public: + ~TAllocationInfo() { + if (GetAllocationStatus() != EAllocationStatus::Failed) { + Stage->Free(AllocatedVolume, GetAllocationStatus() == EAllocationStatus::Allocated); + } + + AFL_TRACE(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "destroy")("allocation_id", Identifier)("stage", Stage->GetName()); + } + + bool IsAllocatable(const ui64 additional) const { + return Stage->IsAllocatable(AllocatedVolume, additional); + } + + void SetAllocatedVolume(const ui64 value) { + AFL_VERIFY(GetAllocationStatus() != EAllocationStatus::Failed); + Stage->UpdateVolume(AllocatedVolume, value, GetAllocationStatus() == EAllocationStatus::Allocated); + AllocatedVolume = value; + } + + ui64 GetAllocatedVolume() const { + return AllocatedVolume; + } + + [[nodiscard]] bool Allocate(const NActors::TActorId& ownerId) { + AFL_TRACE(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "allocated")("allocation_id", Identifier)("stage", Stage->GetName()); + AFL_VERIFY(Allocation)("status", GetAllocationStatus())("volume", AllocatedVolume)("id", Identifier)("stage", Stage->GetName())( + "allocation_internal_group_id", AllocationInternalGroupId); + const bool result = Allocation->OnAllocated( + std::make_shared(ProcessId, ScopeId, Allocation->GetIdentifier(), ownerId, Allocation->GetMemory()), Allocation); + if (result) { + Stage->Allocate(AllocatedVolume); + } else { + Stage->Free(AllocatedVolume, false); + AllocationFailed = true; + } + Allocation = nullptr; + return result; + } + + EAllocationStatus GetAllocationStatus() const { + if (AllocationFailed) { + return EAllocationStatus::Failed; + } else if (Allocation) { + return EAllocationStatus::Waiting; + } else { + return EAllocationStatus::Allocated; + } + } + + TAllocationInfo(const ui64 processId, const ui64 scopeId, const ui64 allocationInternalGroupId, const std::shared_ptr& allocation, + const std::shared_ptr& stage); +}; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/counters.cpp b/ydb/core/tx/limiter/grouped_memory/service/counters.cpp new file mode 100644 index 000000000000..d780bfd499d2 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/counters.cpp @@ -0,0 +1,5 @@ +#include "counters.h" + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +} diff --git a/ydb/core/tx/limiter/grouped_memory/service/counters.h b/ydb/core/tx/limiter/grouped_memory/service/counters.h new file mode 100644 index 000000000000..3c96b3b8b9a4 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/counters.h @@ -0,0 +1,62 @@ +#pragma once +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +class TStageCounters: public NColumnShard::TCommonCountersOwner { +private: + using TBase = NColumnShard::TCommonCountersOwner; + NMonitoring::TDynamicCounters::TCounterPtr AllocatedBytes; + NMonitoring::TDynamicCounters::TCounterPtr AllocatedChunks; + NMonitoring::TDynamicCounters::TCounterPtr WaitingBytes; + NMonitoring::TDynamicCounters::TCounterPtr WaitingChunks; + +public: + TStageCounters(const TCommonCountersOwner& owner, const TString& name) + : TBase(owner, "stage", name) + , AllocatedBytes(TBase::GetValue("Allocated/Bytes")) + , AllocatedChunks(TBase::GetValue("Allocated/Count")) + , WaitingBytes(TBase::GetValue("Waiting/Bytes")) + , WaitingChunks(TBase::GetValue("Waiting/Count")) { + } + + void Add(const ui64 volume, const bool allocated) { + if (allocated) { + AllocatedBytes->Add(volume); + AllocatedChunks->Add(1); + } else { + WaitingBytes->Add(volume); + WaitingChunks->Add(1); + } + } + + void Sub(const ui64 volume, const bool allocated) { + if (allocated) { + AllocatedBytes->Sub(volume); + AllocatedChunks->Sub(1); + } else { + WaitingBytes->Sub(volume); + WaitingChunks->Sub(1); + } + } +}; + +class TCounters: public NColumnShard::TCommonCountersOwner { +private: + using TBase = NColumnShard::TCommonCountersOwner; + +public: + NMonitoring::TDynamicCounters::TCounterPtr GroupsCount; + NMonitoring::TDynamicCounters::TCounterPtr ProcessesCount; + TCounters(const TIntrusivePtr<::NMonitoring::TDynamicCounters>& counters, const TString& name) + : TBase(NColumnShard::TCommonCountersOwner("grouped_memory_limiter", counters), "limiter_name", name) + , GroupsCount(TBase::GetValue("Groups/Count")) + , ProcessesCount(TBase::GetValue("Processes/Count")) { + } + + std::shared_ptr BuildStageCounters(const TString& stageName) const { + return std::make_shared(*this, stageName); + } +}; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/group.cpp b/ydb/core/tx/limiter/grouped_memory/service/group.cpp new file mode 100644 index 000000000000..3bf671ff76b9 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/group.cpp @@ -0,0 +1,68 @@ +#include "group.h" +#include "process.h" + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +std::vector> TGrouppedAllocations::AllocatePossible(const ui32 allocationsLimit) { + std::vector> result; + ui64 allocationMemory = 0; + ui32 allocationsCount = 0; + for (auto&& [_, allocation] : Allocations) { + if (allocation->IsAllocatable(allocationMemory)) { + allocationMemory += allocation->GetAllocatedVolume(); + result.emplace_back(allocation); + if (++allocationsCount == allocationsLimit) { + return result; + } + } + } + return result; +} + +bool TAllocationGroups::Allocate(const bool isPriorityProcess, TProcessMemoryScope& process, const ui32 allocationsLimit) { + AFL_DEBUG(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "try_allocation")("limit", allocationsLimit)( + "external_process_id", process.ExternalProcessId)("forced_internal_group_id", process.GroupIds.GetMinInternalIdOptional())( + "external_scope_id", process.ExternalScopeId)("forced_external_group_id", process.GroupIds.GetMinExternalIdOptional()); + ui32 allocationsCount = 0; + while (true) { + std::vector toRemove; + for (auto it = Groups.begin(); it != Groups.end();) { + const ui64 internalGroupId = it->first; + const bool forced = isPriorityProcess && internalGroupId == process.GroupIds.GetMinInternalIdVerified(); + std::vector> allocated; + if (forced) { + allocated = it->second.ExtractAllocationsToVector(); + } else if (allocationsLimit) { + allocated = it->second.AllocatePossible(allocationsLimit - allocationsCount); + } else { + break; + } + for (auto&& i : allocated) { + if (!i->Allocate(process.OwnerActorId)) { + toRemove.emplace_back(i->GetIdentifier()); + } else if (!forced) { + AFL_VERIFY(++allocationsCount <= allocationsLimit)("count", allocationsCount)("limit", allocationsLimit); + } + if (!forced) { + AFL_VERIFY(it->second.Remove(i)); + } + } + if (!it->second.IsEmpty()) { + break; + } + it = Groups.erase(it); + if (!forced && allocationsCount == allocationsLimit) { + break; + } + } + for (auto&& i : toRemove) { + process.UnregisterAllocation(i); + } + if (toRemove.empty() || allocationsCount == allocationsLimit) { + break; + } + } + return allocationsCount; +} + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/group.h b/ydb/core/tx/limiter/grouped_memory/service/group.h new file mode 100644 index 000000000000..8f4434c886b8 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/group.h @@ -0,0 +1,90 @@ +#pragma once +#include "allocation.h" + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +class TProcessMemoryScope; + +class TGrouppedAllocations { +private: + THashMap> Allocations; + +public: + std::vector> ExtractAllocationsToVector() { + std::vector> result; + result.reserve(Allocations.size()); + for (auto&& i : Allocations) { + result.emplace_back(std::move(i.second)); + } + Allocations.clear(); + return result; + } + + const THashMap>& GetAllocations() const { + return Allocations; + } + + bool IsEmpty() const { + return Allocations.empty(); + } + + void AddAllocation(const std::shared_ptr& allocation) { + AFL_VERIFY(Allocations.emplace(allocation->GetIdentifier(), allocation).second); + } + + [[nodiscard]] bool Remove(const std::shared_ptr& allocation) { + return Allocations.erase(allocation->GetIdentifier()); + } + + std::vector> AllocatePossible(const ui32 allocationsLimit); +}; + +class TAllocationGroups { +private: + std::map Groups; + +public: + bool IsEmpty() const { + return Groups.empty(); + } + + [[nodiscard]] bool Allocate(const bool isPriorityProcess, TProcessMemoryScope& process, const ui32 allocationsLimit); + + [[nodiscard]] std::vector> ExtractGroup(const ui64 id) { + auto it = Groups.find(id); + if (it == Groups.end()) { + return {}; + } + auto result = it->second.ExtractAllocationsToVector(); + Groups.erase(it); + return result; + } + + std::optional GetMinGroupId() const { + if (Groups.size()) { + return Groups.begin()->first; + } else { + return std::nullopt; + } + } + + [[nodiscard]] bool RemoveAllocation(const ui64 internalGroupId, const std::shared_ptr& allocation) { + auto groupIt = Groups.find(internalGroupId); + if (groupIt == Groups.end()) { + return false; + } + if (!groupIt->second.Remove(allocation)) { + return false; + } + if (groupIt->second.IsEmpty()) { + Groups.erase(groupIt); + } + return true; + } + + void AddAllocation(const ui64 internalGroupId, const std::shared_ptr& allocation) { + Groups[internalGroupId].AddAllocation(allocation); + } +}; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/ids.cpp b/ydb/core/tx/limiter/grouped_memory/service/ids.cpp new file mode 100644 index 000000000000..163c90efcf12 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/ids.cpp @@ -0,0 +1,66 @@ +#include "ids.h" +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +ui64 TIdsControl::ExtractInternalIdVerified(const ui64 externalId) { + auto it = ExternalIdIntoInternalId.find(externalId); + AFL_VERIFY(it != ExternalIdIntoInternalId.end())("external_id", externalId); + const ui64 result = it->second; + InternalIdIntoExternalId.erase(result); + ExternalIdIntoInternalId.erase(it); + return result; +} + +std::optional TIdsControl::GetInternalIdOptional(const ui64 externalId) const { + auto it = ExternalIdIntoInternalId.find(externalId); + if (it != ExternalIdIntoInternalId.end()) { + return it->second; + } + return std::nullopt; +} + +ui64 TIdsControl::GetMinInternalIdVerified() const { + AFL_VERIFY(InternalIdIntoExternalId.size()); + return InternalIdIntoExternalId.begin()->first; +} + +ui64 TIdsControl::GetInternalIdVerified(const ui64 externalId) const { + auto it = ExternalIdIntoInternalId.find(externalId); + AFL_VERIFY(it != ExternalIdIntoInternalId.end())("external_id", externalId); + return it->second; +} + +ui64 TIdsControl::RegisterExternalId(const ui64 externalId) { + AFL_VERIFY(ExternalIdIntoInternalId.emplace(externalId, ++CurrentInternalId).second); + InternalIdIntoExternalId.emplace(CurrentInternalId, externalId); + return CurrentInternalId; +} + +ui64 TIdsControl::RegisterExternalIdOrGet(const ui64 externalId) { + auto it = ExternalIdIntoInternalId.find(externalId); + if (it != ExternalIdIntoInternalId.end()) { + return it->second; + } + AFL_VERIFY(ExternalIdIntoInternalId.emplace(externalId, ++CurrentInternalId).second); + InternalIdIntoExternalId.emplace(CurrentInternalId, externalId); + return CurrentInternalId; +} + +bool TIdsControl::UnregisterExternalId(const ui64 externalId) { + auto it = ExternalIdIntoInternalId.find(externalId); + if (it == ExternalIdIntoInternalId.end()) { + return false; + } + AFL_VERIFY(InternalIdIntoExternalId.erase(it->second)); + ExternalIdIntoInternalId.erase(it); + return true; +} + +ui64 TIdsControl::GetExternalIdVerified(const ui64 internalId) const { + auto it = InternalIdIntoExternalId.find(internalId); + AFL_VERIFY(it != InternalIdIntoExternalId.end()); + return it->second; +} + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/ids.h b/ydb/core/tx/limiter/grouped_memory/service/ids.h new file mode 100644 index 000000000000..acaa700411ab --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/ids.h @@ -0,0 +1,70 @@ +#pragma once +#include + +#include + +#include +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +class TIdsControl { +private: + THashMap ExternalIdIntoInternalId; + std::map InternalIdIntoExternalId; + ui64 CurrentInternalId = 0; + +public: + void Clear() { + ExternalIdIntoInternalId.clear(); + InternalIdIntoExternalId.clear(); + } + + const std::map& GetInternalIdToExternalIds() const { + return InternalIdIntoExternalId; + } + + ui64 GetSize() const { + return InternalIdIntoExternalId.size(); + } + + [[nodiscard]] ui64 ExtractInternalIdVerified(const ui64 externalId); + + ui64 GetMinInternalIdVerified() const; + ui64 GetExternalIdVerified(const ui64 internalId) const; + + std::optional GetInternalIdOptional(const ui64 externalId) const; + + ui64 GetInternalIdVerified(const ui64 externalId) const; + + [[nodiscard]] ui64 RegisterExternalId(const ui64 externalId); + [[nodiscard]] ui64 RegisterExternalIdOrGet(const ui64 externalId); + + [[nodiscard]] bool UnregisterExternalId(const ui64 externalId); + + std::optional GetMinInternalIdOptional() const { + if (InternalIdIntoExternalId.size()) { + return InternalIdIntoExternalId.begin()->first; + } else { + return std::nullopt; + } + } + + std::optional GetMinExternalIdOptional() const { + if (InternalIdIntoExternalId.size()) { + return InternalIdIntoExternalId.begin()->second; + } else { + return std::nullopt; + } + } + + ui64 GetMinInternalIdDef(const ui64 def) const { + if (InternalIdIntoExternalId.size()) { + return InternalIdIntoExternalId.begin()->first; + } else { + return def; + } + } +}; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/manager.cpp b/ydb/core/tx/limiter/grouped_memory/service/manager.cpp new file mode 100644 index 000000000000..96fe8bcefc17 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/manager.cpp @@ -0,0 +1,122 @@ +#include "manager.h" + +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +TProcessMemory* TManager::GetProcessMemoryByExternalIdOptional(const ui64 externalProcessId) { + auto internalId = ProcessIds.GetInternalIdOptional(externalProcessId); + if (!internalId) { + return nullptr; + } + return GetProcessMemoryOptional(*internalId); +} + +void TManager::RegisterGroup(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 externalGroupId) { + AFL_DEBUG(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "register_group")("external_process_id", externalProcessId)( + "external_group_id", externalGroupId)("size", ProcessIds.GetSize())("external_scope_id", externalScopeId); + if (auto* process = GetProcessMemoryByExternalIdOptional(externalProcessId)) { + process->RegisterGroup(externalScopeId, externalGroupId); + } + RefreshSignals(); +} + +void TManager::UnregisterGroup(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 externalGroupId) { + AFL_DEBUG(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "unregister_group")("external_process_id", externalProcessId)( + "external_group_id", externalGroupId)("size", ProcessIds.GetSize()); + if (auto* process = GetProcessMemoryByExternalIdOptional(externalProcessId)) { + process->UnregisterGroup(externalScopeId, externalGroupId); + } + RefreshSignals(); +} + +void TManager::UpdateAllocation(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 allocationId, const ui64 volume) { + TProcessMemory& process = GetProcessMemoryVerified(ProcessIds.GetInternalIdVerified(externalProcessId)); + if (process.UpdateAllocation(externalScopeId, allocationId, volume)) { + TryAllocateWaiting(); + } + + RefreshSignals(); +} + +void TManager::TryAllocateWaiting() { + if (Processes.size()) { + auto it = Processes.find(ProcessIds.GetMinInternalIdVerified()); + AFL_VERIFY(it != Processes.end()); + AFL_VERIFY(it->second.IsPriorityProcess()); + it->second.TryAllocateWaiting(0); + } + while (true) { + bool found = false; + for (auto&& i : Processes) { + if (i.second.TryAllocateWaiting(1)) { + found = true; + } + } + if (!found) { + break; + } + } + RefreshSignals(); +} + +void TManager::UnregisterAllocation(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 allocationId) { + if (auto* process = GetProcessMemoryByExternalIdOptional(externalProcessId)) { + if (process->UnregisterAllocation(externalScopeId, allocationId)) { + TryAllocateWaiting(); + } + } + RefreshSignals(); +} + +void TManager::RegisterAllocation(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 externalGroupId, + const std::shared_ptr& task, const std::optional& stageIdx) { + if (auto* process = GetProcessMemoryByExternalIdOptional(externalProcessId)) { + process->RegisterAllocation(externalScopeId, externalGroupId, task, stageIdx); + } else { + AFL_VERIFY(!task->OnAllocated(std::make_shared(externalProcessId, externalScopeId, task->GetIdentifier(), OwnerActorId, task->GetMemory()), task))( + "ext_group", externalGroupId)("stage_idx", stageIdx); + } + RefreshSignals(); +} + +void TManager::RegisterProcess(const ui64 externalProcessId, const std::vector>& stages) { + auto internalId = ProcessIds.GetInternalIdOptional(externalProcessId); + if (!internalId) { + const ui64 internalProcessId = ProcessIds.RegisterExternalIdOrGet(externalProcessId); + AFL_VERIFY(Processes.emplace(internalProcessId, TProcessMemory(externalProcessId, OwnerActorId, Processes.empty(), stages, DefaultStage)).second); + } else { + ++Processes.find(*internalId)->second.MutableLinksCount(); + } + RefreshSignals(); +} + +void TManager::UnregisterProcess(const ui64 externalProcessId) { + const ui64 internalProcessId = ProcessIds.GetInternalIdVerified(externalProcessId); + auto it = Processes.find(internalProcessId); + AFL_VERIFY(it != Processes.end()); + if (--it->second.MutableLinksCount()) { + return; + } + Y_UNUSED(ProcessIds.ExtractInternalIdVerified(externalProcessId)); + it->second.Unregister(); + Processes.erase(it); + const ui64 nextInternalProcessId = ProcessIds.GetMinInternalIdDef(internalProcessId); + if (internalProcessId < nextInternalProcessId) { + GetProcessMemoryVerified(nextInternalProcessId).SetPriorityProcess(); + TryAllocateWaiting(); + } + RefreshSignals(); +} + +void TManager::RegisterProcessScope(const ui64 externalProcessId, const ui64 externalProcessScopeId) { + GetProcessMemoryVerified(ProcessIds.GetInternalIdVerified(externalProcessId)).RegisterScope(externalProcessScopeId); + RefreshSignals(); +} + +void TManager::UnregisterProcessScope(const ui64 externalProcessId, const ui64 externalProcessScopeId) { + GetProcessMemoryVerified(ProcessIds.GetInternalIdVerified(externalProcessId)).UnregisterScope(externalProcessScopeId); + RefreshSignals(); +} + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/manager.h b/ydb/core/tx/limiter/grouped_memory/service/manager.h new file mode 100644 index 000000000000..fd641a3f69b1 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/manager.h @@ -0,0 +1,79 @@ +#pragma once +#include "counters.h" +#include "process.h" + +#include +#include + +#include +#include + +#include +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +class TManager { +private: + const TConfig Config; + const TString Name; + const std::shared_ptr Signals; + const NActors::TActorId OwnerActorId; + THashMap Processes; + std::shared_ptr DefaultStage; + TIdsControl ProcessIds; + + void TryAllocateWaiting(); + void RefreshSignals() const { + Signals->ProcessesCount->Set(Processes.size()); + } + + TProcessMemory& GetProcessMemoryVerified(const ui64 internalProcessId) { + auto it = Processes.find(internalProcessId); + AFL_VERIFY(it != Processes.end()); + return it->second; + } + + TProcessMemory* GetProcessMemoryByExternalIdOptional(const ui64 externalProcessId); + + TProcessMemory* GetProcessMemoryOptional(const ui64 internalProcessId) { + auto it = Processes.find(internalProcessId); + if (it != Processes.end()) { + return &it->second; + } else { + return nullptr; + } + } + +public: + TManager(const NActors::TActorId& ownerActorId, const TConfig& config, const TString& name, const std::shared_ptr& signals, + const std::shared_ptr& defaultStage) + : Config(config) + , Name(name) + , Signals(signals) + , OwnerActorId(ownerActorId) + , DefaultStage(defaultStage) + { + } + + void RegisterGroup(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 externalGroupId); + void UnregisterGroup(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 externalGroupId); + + void RegisterProcessScope(const ui64 externalProcessId, const ui64 externalScopeId); + void UnregisterProcessScope(const ui64 externalProcessId, const ui64 externalScopeId); + + void RegisterProcess(const ui64 externalProcessId, const std::vector>& stages); + void UnregisterProcess(const ui64 externalProcessId); + + void RegisterAllocation(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 externalGroupId, + const std::shared_ptr& task, + const std::optional& stageIdx); + void UnregisterAllocation(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 allocationId); + void UpdateAllocation(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 allocationId, const ui64 volume); + + bool IsEmpty() const { + return Processes.empty(); + } +}; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/process.cpp b/ydb/core/tx/limiter/grouped_memory/service/process.cpp new file mode 100644 index 000000000000..bcde6532e797 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/process.cpp @@ -0,0 +1,5 @@ +#include "process.h" + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/process.h b/ydb/core/tx/limiter/grouped_memory/service/process.h new file mode 100644 index 000000000000..a1c13e091d59 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/process.h @@ -0,0 +1,262 @@ +#pragma once +#include "group.h" +#include "ids.h" + +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +class TProcessMemoryScope { +private: + const ui64 ExternalProcessId; + const ui64 ExternalScopeId; + TAllocationGroups WaitAllocations; + THashMap> AllocationInfo; + TIdsControl GroupIds; + ui32 Links = 1; + const NActors::TActorId OwnerActorId; + + TAllocationInfo& GetAllocationInfoVerified(const ui64 allocationId) const { + auto it = AllocationInfo.find(allocationId); + AFL_VERIFY(it != AllocationInfo.end()); + return *it->second; + } + + void UnregisterGroupImpl(const ui64 internalGroupId) { + auto data = WaitAllocations.ExtractGroup(internalGroupId); + for (auto&& allocation : data) { + AFL_VERIFY(!allocation->Allocate(OwnerActorId)); + } + } + + const std::shared_ptr& RegisterAllocationImpl( + const ui64 internalGroupId, const std::shared_ptr& task, const std::shared_ptr& stage) { + auto it = AllocationInfo.find(task->GetIdentifier()); + if (it == AllocationInfo.end()) { + it = AllocationInfo + .emplace(task->GetIdentifier(), + std::make_shared(ExternalProcessId, ExternalScopeId, internalGroupId, task, stage)) + .first; + } + return it->second; + } + + friend class TAllocationGroups; + +public: + TProcessMemoryScope(const ui64 externalProcessId, const ui64 externalScopeId, const NActors::TActorId& ownerActorId) + : ExternalProcessId(externalProcessId) + , ExternalScopeId(externalScopeId) + , OwnerActorId(ownerActorId) { + } + + void Register() { + ++Links; + } + + [[nodiscard]] bool Unregister() { + if (--Links) { + return false; + } + for (auto&& [i, _] : GroupIds.GetInternalIdToExternalIds()) { + UnregisterGroupImpl(i); + } + GroupIds.Clear(); + AllocationInfo.clear(); + return true; + } + + void RegisterAllocation(const bool isPriorityProcess, const ui64 externalGroupId, const std::shared_ptr& task, + const std::shared_ptr& stage) { + AFL_VERIFY(task); + AFL_VERIFY(stage); + const std::optional internalGroupIdOptional = GroupIds.GetInternalIdOptional(externalGroupId); + if (!internalGroupIdOptional) { + AFL_VERIFY(!task->OnAllocated(std::make_shared(ExternalProcessId, ExternalScopeId, task->GetIdentifier(), OwnerActorId, task->GetMemory()), task))( + "ext_group", externalGroupId)( + "min_group", GroupIds.GetMinInternalIdOptional())("stage", stage->GetName()); + AFL_VERIFY(!AllocationInfo.contains(task->GetIdentifier())); + } else { + const ui64 internalGroupId = *internalGroupIdOptional; + auto allocationInfo = RegisterAllocationImpl(internalGroupId, task, stage); + + if (allocationInfo->GetAllocationStatus() != EAllocationStatus::Waiting) { + } else if (WaitAllocations.GetMinGroupId().value_or(internalGroupId) < internalGroupId) { + WaitAllocations.AddAllocation(internalGroupId, allocationInfo); + } else if (allocationInfo->IsAllocatable(0) || (isPriorityProcess && internalGroupId == GroupIds.GetMinInternalIdVerified())) { + Y_UNUSED(WaitAllocations.RemoveAllocation(internalGroupId, allocationInfo)); + if (!allocationInfo->Allocate(OwnerActorId)) { + UnregisterAllocation(allocationInfo->GetIdentifier()); + } + } else { + WaitAllocations.AddAllocation(internalGroupId, allocationInfo); + } + } + } + + bool UpdateAllocation(const ui64 allocationId, const ui64 volume) { + GetAllocationInfoVerified(allocationId).SetAllocatedVolume(volume); + return true; + } + + bool TryAllocateWaiting(const bool isPriorityProcess, const ui32 allocationsCountLimit) { + return WaitAllocations.Allocate(isPriorityProcess, *this, allocationsCountLimit); + } + + bool UnregisterAllocation(const ui64 allocationId) { + ui64 memoryAllocated = 0; + auto it = AllocationInfo.find(allocationId); + AFL_VERIFY(it != AllocationInfo.end()); + bool waitFlag = false; + const ui64 internalGroupId = it->second->GetAllocationInternalGroupId(); + switch (it->second->GetAllocationStatus()) { + case EAllocationStatus::Allocated: + case EAllocationStatus::Failed: + AFL_VERIFY(!WaitAllocations.RemoveAllocation(internalGroupId, it->second)); + break; + case EAllocationStatus::Waiting: + AFL_VERIFY(WaitAllocations.RemoveAllocation(internalGroupId, it->second)); + waitFlag = true; + break; + } + AFL_DEBUG(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "allocation_unregister")("allocation_id", allocationId)("wait", waitFlag)( + "internal_group_id", internalGroupId)("allocation_status", it->second->GetAllocationStatus()); + memoryAllocated = it->second->GetAllocatedVolume(); + AllocationInfo.erase(it); + return !!memoryAllocated; + } + + void UnregisterGroup(const bool isPriorityProcess, const ui64 externalGroupId) { + const ui64 internalGroupId = GroupIds.ExtractInternalIdVerified(externalGroupId); + AFL_INFO(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "remove_group")("external_group_id", externalGroupId)( + "internal_group_id", internalGroupId); + UnregisterGroupImpl(internalGroupId); + if (isPriorityProcess && (internalGroupId < GroupIds.GetMinInternalIdDef(internalGroupId))) { + Y_UNUSED(TryAllocateWaiting(isPriorityProcess, 0)); + } + } + + void RegisterGroup(const ui64 externalGroupId) { + Y_UNUSED(GroupIds.RegisterExternalId(externalGroupId)); + } +}; + +class TProcessMemory { +private: + const ui64 ExternalProcessId; + + const NActors::TActorId OwnerActorId; + bool PriorityProcessFlag = false; + + YDB_ACCESSOR(ui32, LinksCount, 1); + YDB_READONLY_DEF(std::vector>, Stages); + const std::shared_ptr DefaultStage; + THashMap> AllocationScopes; + + TProcessMemoryScope* GetAllocationScopeOptional(const ui64 externalScopeId) const { + auto it = AllocationScopes.find(externalScopeId); + if (it == AllocationScopes.end()) { + return nullptr; + } + return it->second.get(); + } + + TProcessMemoryScope& GetAllocationScopeVerified(const ui64 externalScopeId) const { + return *TValidator::CheckNotNull(GetAllocationScopeOptional(externalScopeId)); + } + +public: + bool IsPriorityProcess() const { + return PriorityProcessFlag; + } + + bool UpdateAllocation(const ui64 externalScopeId, const ui64 allocationId, const ui64 volume) { + return GetAllocationScopeVerified(externalScopeId).UpdateAllocation(allocationId, volume); + } + + void RegisterAllocation( + const ui64 externalScopeId, const ui64 externalGroupId, const std::shared_ptr& task, const std::optional& stageIdx) { + AFL_VERIFY(task); + std::shared_ptr stage; + if (Stages.empty()) { + AFL_VERIFY(!stageIdx); + stage = DefaultStage; + } else { + AFL_VERIFY(stageIdx); + AFL_VERIFY(*stageIdx < Stages.size()); + stage = Stages[*stageIdx]; + } + AFL_VERIFY(stage); + auto& scope = GetAllocationScopeVerified(externalScopeId); + scope.RegisterAllocation(IsPriorityProcess(), externalGroupId, task, stage); + } + + bool UnregisterAllocation(const ui64 externalScopeId, const ui64 allocationId) { + if (auto* scope = GetAllocationScopeOptional(externalScopeId)) { + return scope->UnregisterAllocation(allocationId); + } + return false; + } + + void UnregisterGroup(const ui64 externalScopeId, const ui64 externalGroupId) { + if (auto* scope = GetAllocationScopeOptional(externalScopeId)) { + scope->UnregisterGroup(IsPriorityProcess(), externalGroupId); + } + } + + void RegisterGroup(const ui64 externalScopeId, const ui64 externalGroupId) { + GetAllocationScopeVerified(externalScopeId).RegisterGroup(externalGroupId); + } + + void UnregisterScope(const ui64 externalScopeId) { + auto it = AllocationScopes.find(externalScopeId); + AFL_VERIFY(it != AllocationScopes.end()); + if (it->second->Unregister()) { + AllocationScopes.erase(it); + } + + } + + void RegisterScope(const ui64 externalScopeId) { + auto it = AllocationScopes.find(externalScopeId); + if (it == AllocationScopes.end()) { + AFL_VERIFY(AllocationScopes.emplace(externalScopeId, std::make_shared(ExternalProcessId, externalScopeId, OwnerActorId)).second); + } else { + it->second->Register(); + } + + } + + void SetPriorityProcess() { + AFL_VERIFY(!PriorityProcessFlag); + PriorityProcessFlag = true; + } + + TProcessMemory(const ui64 externalProcessId, const NActors::TActorId& ownerActorId, const bool isPriority, + const std::vector>& stages, const std::shared_ptr& defaultStage) + : ExternalProcessId(externalProcessId) + , OwnerActorId(ownerActorId) + , PriorityProcessFlag(isPriority) + , Stages(stages) + , DefaultStage(defaultStage) { + } + + bool TryAllocateWaiting(const ui32 allocationsCountLimit) { + bool allocated = false; + for (auto&& i : AllocationScopes) { + if (i.second->TryAllocateWaiting(IsPriorityProcess(), allocationsCountLimit)) { + allocated = true; + } + } + return allocated; + } + + void Unregister() { + for (auto&& i : AllocationScopes) { + Y_UNUSED(i.second->Unregister()); + } + AllocationScopes.clear(); + } +}; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/ya.make b/ydb/core/tx/limiter/grouped_memory/service/ya.make new file mode 100644 index 000000000000..d67332688426 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/ya.make @@ -0,0 +1,20 @@ +LIBRARY() + +SRCS( + actor.cpp + manager.cpp + counters.cpp + group.cpp + process.cpp + allocation.cpp + ids.cpp +) + +PEERDIR( + ydb/core/protos + ydb/core/tx/columnshard/counters/common +) + +GENERATE_ENUM_SERIALIZATION(allocation.h) + +END() diff --git a/ydb/core/tx/limiter/grouped_memory/usage/abstract.cpp b/ydb/core/tx/limiter/grouped_memory/usage/abstract.cpp new file mode 100644 index 000000000000..2d72f0039846 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/abstract.cpp @@ -0,0 +1,79 @@ +#include "abstract.h" +#include "events.h" + +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +TAllocationGuard::~TAllocationGuard() { + if (TlsActivationContext && !Released) { + NActors::TActivationContext::AsActorContext().Send( + ActorId, std::make_unique(ProcessId, ScopeId, AllocationId)); + } +} + +void TAllocationGuard::Update(const ui64 newVolume) { + AFL_VERIFY(!Released); + Memory = newVolume; + if (TlsActivationContext) { + NActors::TActivationContext::AsActorContext().Send( + ActorId, std::make_unique(ProcessId, ScopeId, AllocationId, newVolume)); + } +} + +bool IAllocation::OnAllocated(std::shared_ptr&& guard, const std::shared_ptr& allocation) { + AFL_VERIFY(!Allocated); + Allocated = true; + AFL_VERIFY(allocation); + AFL_VERIFY(guard); + return DoOnAllocated(std::move(guard), allocation); +} + +TGroupGuard::~TGroupGuard() { + if (TlsActivationContext) { + NActors::TActivationContext::AsActorContext().Send( + ActorId, std::make_unique(ProcessId, ExternalScopeId, GroupId)); + } +} + +TGroupGuard::TGroupGuard(const NActors::TActorId& actorId, const ui64 processId, const ui64 externalScopeId, const ui64 groupId) + : ActorId(actorId) + , ProcessId(processId) + , ExternalScopeId(externalScopeId) + , GroupId(groupId) { + if (TlsActivationContext) { + NActors::TActivationContext::AsActorContext().Send( + ActorId, std::make_unique(ProcessId, ExternalScopeId, GroupId)); + } +} + +TProcessGuard::~TProcessGuard() { + if (TlsActivationContext) { + NActors::TActivationContext::AsActorContext().Send(ActorId, std::make_unique(ProcessId)); + } +} + +TProcessGuard::TProcessGuard(const NActors::TActorId& actorId, const ui64 processId, const std::vector>& stages) + : ActorId(actorId) + , ProcessId(processId) { + if (TlsActivationContext) { + NActors::TActivationContext::AsActorContext().Send(ActorId, std::make_unique(ProcessId, stages)); + } +} + +TScopeGuard::~TScopeGuard() { + if (TlsActivationContext) { + NActors::TActivationContext::AsActorContext().Send(ActorId, std::make_unique(ProcessId, ScopeId)); + } +} + +TScopeGuard::TScopeGuard(const NActors::TActorId& actorId, const ui64 processId, const ui64 scopeId) + : ActorId(actorId) + , ProcessId(processId) + , ScopeId(scopeId) { + if (TlsActivationContext) { + NActors::TActivationContext::AsActorContext().Send(ActorId, std::make_unique(ProcessId, ScopeId)); + } +} + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/usage/abstract.h b/ydb/core/tx/limiter/grouped_memory/usage/abstract.h new file mode 100644 index 000000000000..d92120f46fb6 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/abstract.h @@ -0,0 +1,223 @@ +#pragma once +#include + +#include +#include +#include +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +class TStageFeatures; + +class TGroupGuard { +private: + const NActors::TActorId ActorId; + YDB_READONLY(ui64, ProcessId, 0); + YDB_READONLY(ui64, ExternalScopeId, 0); + YDB_READONLY(ui64, GroupId, 0); + +public: + TGroupGuard(const NActors::TActorId& actorId, const ui64 processId, const ui64 externalScopeId, const ui64 groupId); + + ~TGroupGuard(); +}; + +class TProcessGuard { +private: + const NActors::TActorId ActorId; + YDB_READONLY(ui64, ProcessId, 0); + +public: + TProcessGuard(const NActors::TActorId& actorId, const ui64 processId, const std::vector>& stages); + + ~TProcessGuard(); +}; + +class TScopeGuard { +private: + const NActors::TActorId ActorId; + YDB_READONLY(ui64, ProcessId, 0); + YDB_READONLY(ui64, ScopeId, 0); + +public: + TScopeGuard(const NActors::TActorId& actorId, const ui64 processId, const ui64 scopeId); + + ~TScopeGuard(); +}; + +class TAllocationGuard { +private: + const NActors::TActorId ActorId; + YDB_READONLY(ui64, ProcessId, 0) + YDB_READONLY(ui64, ScopeId, 0) + YDB_READONLY(ui64, AllocationId, 0) + YDB_READONLY(ui64, Memory, 0) + bool Released = false; + +public: + TAllocationGuard(const ui64 processId, const ui64 scopeId, const ui64 allocationId, const NActors::TActorId actorId, const ui64 memory) + : ActorId(actorId) + , ProcessId(processId) + , ScopeId(scopeId) + , AllocationId(allocationId) + , Memory(memory) { + } + + void Release() { + AFL_VERIFY(!Released); + Released = true; + } + + void Update(const ui64 newVolume); + + ~TAllocationGuard(); +}; + +class TPositiveControlInteger { +private: + ui64 Value = 0; + +public: + void Add(const ui64 value) { + Value += value; + } + void Sub(const ui64 value) { + AFL_VERIFY(value <= Value); + Value -= value; + } + ui64 Val() const { + return Value; + } +}; + +class TStageFeatures { +private: + YDB_READONLY_DEF(TString, Name); + YDB_READONLY(ui64, Limit, 0); + YDB_ACCESSOR_DEF(TPositiveControlInteger, Usage); + YDB_ACCESSOR_DEF(TPositiveControlInteger, Waiting); + std::shared_ptr Owner; + std::shared_ptr Counters; + +public: + TString DebugString() const { + TStringBuilder result; + result << "name=" << Name << ";limit=" << Limit << ";"; + if (Owner) { + result << "owner=" << Owner->DebugString() << ";"; + } + return result; + } + + ui64 GetFullMemory() const { + return Usage.Val() + Waiting.Val(); + } + + TStageFeatures( + const TString& name, const ui64 limit, const std::shared_ptr& owner, const std::shared_ptr& counters) + : Name(name) + , Limit(limit) + , Owner(owner) + , Counters(counters) { + } + + void Allocate(const ui64 volume) { + Waiting.Sub(volume); + Usage.Add(volume); + if (Counters) { + Counters->Add(volume, true); + Counters->Sub(volume, false); + } + if (Owner) { + Owner->Allocate(volume); + } + } + + void Free(const ui64 volume, const bool allocated) { + if (Counters) { + Counters->Sub(volume, allocated); + } + if (allocated) { + Usage.Sub(volume); + } else { + Waiting.Sub(volume); + } + + if (Owner) { + Owner->Free(volume, allocated); + } + } + + void UpdateVolume(const ui64 from, const ui64 to, const bool allocated) { + if (Counters) { + Counters->Sub(from, allocated); + Counters->Add(to, allocated); + } + if (allocated) { + Usage.Sub(from); + Usage.Add(to); + } else { + Waiting.Sub(from); + Waiting.Add(to); + } + + if (Owner) { + Owner->UpdateVolume(from, to, allocated); + } + } + + bool IsAllocatable(const ui64 volume, const ui64 additional) const { + if (Limit < additional + Usage.Val() + volume) { + return false; + } + if (Owner) { + return Owner->IsAllocatable(volume, additional); + } + return true; + } + + void Add(const ui64 volume, const bool allocated) { + if (Counters) { + Counters->Add(volume, allocated); + } + if (allocated) { + Usage.Add(volume); + } else { + Waiting.Add(volume); + } + + if (Owner) { + Owner->Add(volume, allocated); + } + } +}; + +class IAllocation { +private: + static inline TAtomicCounter Counter = 0; + YDB_READONLY(ui64, Identifier, Counter.Inc()); + YDB_READONLY(ui64, Memory, 0); + bool Allocated = false; + virtual bool DoOnAllocated( + std::shared_ptr&& guard, const std::shared_ptr& allocation) = 0; + +public: + virtual ~IAllocation() = default; + IAllocation(const ui64 mem) + : Memory(mem) { + } + + void ResetAllocation() { + Allocated = false; + } + + bool IsAllocated() const { + return Allocated; + } + + [[nodiscard]] bool OnAllocated( + std::shared_ptr&& guard, const std::shared_ptr& allocation); +}; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/usage/config.cpp b/ydb/core/tx/limiter/grouped_memory/usage/config.cpp new file mode 100644 index 000000000000..17fe55975744 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/config.cpp @@ -0,0 +1,20 @@ +#include "config.h" +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +bool TConfig::DeserializeFromProto(const NKikimrConfig::TGroupedMemoryLimiterConfig& config) { + if (config.HasMemoryLimit()) { + MemoryLimit = config.GetMemoryLimit(); + } + Enabled = config.GetEnabled(); + return true; +} + +TString TConfig::DebugString() const { + TStringBuilder sb; + sb << "MemoryLimit=" << MemoryLimit << ";Enabled=" << Enabled << ";"; + return sb; +} + +} diff --git a/ydb/core/tx/limiter/grouped_memory/usage/config.h b/ydb/core/tx/limiter/grouped_memory/usage/config.h new file mode 100644 index 000000000000..91a9b5bc7afe --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/config.h @@ -0,0 +1,27 @@ +#pragma once +#include +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +class TConfig { +private: + YDB_READONLY(bool, Enabled, true); + YDB_READONLY(ui64, MemoryLimit, ui64(3) << 30); + +public: + + static TConfig BuildDisabledConfig() { + TConfig result; + result.Enabled = false; + return result; + } + + bool IsEnabled() const { + return Enabled; + } + bool DeserializeFromProto(const NKikimrConfig::TGroupedMemoryLimiterConfig& config); + TString DebugString() const; +}; + +} diff --git a/ydb/core/tx/limiter/grouped_memory/usage/events.cpp b/ydb/core/tx/limiter/grouped_memory/usage/events.cpp new file mode 100644 index 000000000000..cafd00f2bd82 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/events.cpp @@ -0,0 +1,5 @@ +#include "events.h" + +namespace NKikimr::NOlap::NGroupedMemoryManager::NEvents { + +} diff --git a/ydb/core/tx/limiter/grouped_memory/usage/events.h b/ydb/core/tx/limiter/grouped_memory/usage/events.h new file mode 100644 index 000000000000..d3a8200c584c --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/events.h @@ -0,0 +1,150 @@ +#pragma once +#include "abstract.h" + +#include + +#include +#include +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager::NEvents { +struct TEvExternal { + enum EEv { + EvStartAllocationTask = EventSpaceBegin(TKikimrEvents::ES_GROUPED_ALLOCATIONS_MANAGER), + EvFinishAllocationTask, + EvStartAllocationGroup, + EvFinishAllocationGroup, + EvUpdateAllocationTask, + EvStartAllocationProcess, + EvFinishAllocationProcess, + EvStartAllocationProcessScope, + EvFinishAllocationProcessScope, + EvEnd + }; + + class TEvStartTask: public NActors::TEventLocal { + private: + YDB_READONLY_DEF(std::vector>, Allocations); + YDB_READONLY_DEF(std::optional, StageFeaturesIdx); + YDB_READONLY(ui64, ExternalProcessId, 0); + YDB_READONLY(ui64, ExternalScopeId, 0); + YDB_READONLY(ui64, ExternalGroupId, 0); + + public: + explicit TEvStartTask(const ui64 externalProcessId, const ui64 externalScopeId, + const ui64 externalGroupId, const std::vector>& allocations, + const std::optional& stageFeaturesIdx) + : Allocations(allocations) + , StageFeaturesIdx(stageFeaturesIdx) + , ExternalProcessId(externalProcessId) + , ExternalScopeId(externalScopeId) + , ExternalGroupId(externalGroupId) { + AFL_VERIFY(Allocations.size()); + } + }; + + class TEvFinishTask: public NActors::TEventLocal { + private: + YDB_READONLY(ui64, ExternalProcessId, 0); + YDB_READONLY(ui64, ExternalScopeId, 0); + YDB_READONLY(ui64, AllocationId, 0); + + public: + explicit TEvFinishTask(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 allocationId) + : ExternalProcessId(externalProcessId) + , ExternalScopeId(externalScopeId) + , AllocationId(allocationId) { + } + }; + + class TEvUpdateTask: public NActors::TEventLocal { + private: + YDB_READONLY(ui64, ExternalProcessId, 0); + YDB_READONLY(ui64, ExternalScopeId, 0); + YDB_READONLY(ui64, AllocationId, 0); + YDB_READONLY(ui64, Volume, 0); + + public: + explicit TEvUpdateTask(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 allocationId, const ui64 volume) + : ExternalProcessId(externalProcessId) + , ExternalScopeId(externalScopeId) + , AllocationId(allocationId) + , Volume(volume) { + } + }; + + class TEvFinishGroup: public NActors::TEventLocal { + private: + YDB_READONLY(ui64, ExternalProcessId, 0); + YDB_READONLY(ui32, ExternalScopeId, 0); + YDB_READONLY(ui64, ExternalGroupId, 0); + + public: + explicit TEvFinishGroup(const ui64 externalProcessId, const ui32 externalScopeId, const ui64 externalGroupId) + : ExternalProcessId(externalProcessId) + , ExternalScopeId(externalScopeId) + , ExternalGroupId(externalGroupId) { + } + }; + + class TEvStartGroup: public NActors::TEventLocal { + private: + YDB_READONLY(ui64, ExternalProcessId, 0); + YDB_READONLY(ui32, ExternalScopeId, 0); + YDB_READONLY(ui64, ExternalGroupId, 0); + + public: + explicit TEvStartGroup(const ui64 externalProcessId, const ui32 externalScopeId, const ui64 externalGroupId) + : ExternalProcessId(externalProcessId) + , ExternalScopeId(externalScopeId) + , ExternalGroupId(externalGroupId) { + } + }; + + class TEvFinishProcess: public NActors::TEventLocal { + private: + YDB_READONLY(ui64, ExternalProcessId, 0); + + public: + explicit TEvFinishProcess(const ui64 externalProcessId) + : ExternalProcessId(externalProcessId) { + } + }; + + class TEvStartProcess: public NActors::TEventLocal { + private: + YDB_READONLY(ui64, ExternalProcessId, 0); + YDB_READONLY_DEF(std::vector>, Stages); + public: + explicit TEvStartProcess(const ui64 externalProcessId, const std::vector>& stages) + : ExternalProcessId(externalProcessId) + , Stages(stages) { + } + }; + + class TEvFinishProcessScope: public NActors::TEventLocal { + private: + YDB_READONLY(ui64, ExternalProcessId, 0); + YDB_READONLY(ui64, ExternalScopeId, 0); + + public: + explicit TEvFinishProcessScope(const ui64 externalProcessId, const ui64 externalScopeId) + : ExternalProcessId(externalProcessId) + , ExternalScopeId(externalScopeId) + { + } + }; + + class TEvStartProcessScope: public NActors::TEventLocal { + private: + YDB_READONLY(ui64, ExternalProcessId, 0); + YDB_READONLY(ui64, ExternalScopeId, 0); + + public: + explicit TEvStartProcessScope(const ui64 externalProcessId, const ui64 externalScopeId) + : ExternalProcessId(externalProcessId) + , ExternalScopeId(externalScopeId) { + } + }; +}; +} // namespace NKikimr::NOlap::NGroupedMemoryManager::NEvents diff --git a/ydb/core/tx/limiter/grouped_memory/usage/service.cpp b/ydb/core/tx/limiter/grouped_memory/usage/service.cpp new file mode 100644 index 000000000000..affe243f11bb --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/service.cpp @@ -0,0 +1,5 @@ +#include "service.h" + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +} diff --git a/ydb/core/tx/limiter/grouped_memory/usage/service.h b/ydb/core/tx/limiter/grouped_memory/usage/service.h new file mode 100644 index 000000000000..8192743218b1 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/service.h @@ -0,0 +1,102 @@ +#pragma once +#include "abstract.h" +#include "config.h" +#include "events.h" + +#include + +#include +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +template +class TServiceOperatorImpl { +private: + TConfig ServiceConfig = TConfig::BuildDisabledConfig(); + std::shared_ptr Counters; + std::shared_ptr DefaultStageFeatures = std::make_shared("DEFAULT", ((ui64)3) << 30, nullptr, nullptr); + using TSelf = TServiceOperatorImpl; + static void Register(const TConfig& serviceConfig, TIntrusivePtr<::NMonitoring::TDynamicCounters> counters) { + Singleton()->Counters = std::make_shared(counters, TMemoryLimiterPolicy::Name); + Singleton()->ServiceConfig = serviceConfig; + Singleton()->DefaultStageFeatures = std::make_shared( + "GLOBAL", serviceConfig.GetMemoryLimit(), nullptr, Singleton()->Counters->BuildStageCounters("general")); + } + static const TString& GetMemoryLimiterName() { + Y_ABORT_UNLESS(TMemoryLimiterPolicy::Name.size() == 4); + return TMemoryLimiterPolicy::Name; + } + +public: + static std::shared_ptr BuildStageFeatures(const TString& name, const ui64 limit) { + if (!IsEnabled()) { + return Singleton()->DefaultStageFeatures; + } else { + AFL_VERIFY(Singleton()->DefaultStageFeatures); + return std::make_shared( + name, limit, Singleton()->DefaultStageFeatures, Singleton()->Counters->BuildStageCounters(name)); + } + } + + static std::shared_ptr GetDefaultStageFeatures() { + AFL_VERIFY(Singleton()->DefaultStageFeatures); + return Singleton()->DefaultStageFeatures; + } + + static std::shared_ptr BuildGroupGuard(const ui64 processId, const ui32 scopeId) { + static TAtomicCounter counter = 0; + auto& context = NActors::TActorContext::AsActorContext(); + const NActors::TActorId& selfId = context.SelfID; + return std::make_shared(MakeServiceId(selfId.NodeId()), processId, scopeId, counter.Inc()); + } + + static std::shared_ptr BuildScopeGuard(const ui64 processId, const ui32 scopeId) { + auto& context = NActors::TActorContext::AsActorContext(); + const NActors::TActorId& selfId = context.SelfID; + return std::make_shared(MakeServiceId(selfId.NodeId()), processId, scopeId); + } + + static std::shared_ptr BuildProcessGuard(const ui64 processId, const std::vector>& stages) { + auto& context = NActors::TActorContext::AsActorContext(); + const NActors::TActorId& selfId = context.SelfID; + return std::make_shared(MakeServiceId(selfId.NodeId()), processId, stages); + } + + static bool SendToAllocation(const ui64 processId, const ui64 scopeId, const ui64 groupId, + const std::vector>& tasks, + const std::optional& stageIdx) { + auto& context = NActors::TActorContext::AsActorContext(); + const NActors::TActorId& selfId = context.SelfID; + if (TSelf::IsEnabled()) { + context.Send(MakeServiceId(selfId.NodeId()), new NEvents::TEvExternal::TEvStartTask(processId, scopeId, groupId, tasks, stageIdx)); + return true; + } else { + for (auto&& i : tasks) { + if (!i->IsAllocated()) { + AFL_VERIFY(i->OnAllocated(std::make_shared(0, 0, 0, NActors::TActorId(), i->GetMemory()), i)); + } + } + return false; + } + } + static bool IsEnabled() { + return Singleton()->ServiceConfig.IsEnabled(); + } + static NActors::TActorId MakeServiceId(const ui32 nodeId) { + return NActors::TActorId(nodeId, "SrvcMlmt" + GetMemoryLimiterName()); + } + static NActors::IActor* CreateService(const TConfig& config, TIntrusivePtr<::NMonitoring::TDynamicCounters> signals) { + Register(config, signals); + return new TMemoryLimiterActor(config, GetMemoryLimiterName(), Singleton()->Counters, Singleton()->DefaultStageFeatures); + } +}; + +class TScanMemoryLimiterPolicy { +public: + static const inline TString Name = "Scan"; +}; + +using TScanMemoryLimiterOperator = TServiceOperatorImpl; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/usage/ya.make b/ydb/core/tx/limiter/grouped_memory/usage/ya.make new file mode 100644 index 000000000000..4295b9f8cafe --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/ya.make @@ -0,0 +1,16 @@ +LIBRARY() + +SRCS( + events.cpp + config.cpp + abstract.cpp + service.cpp +) + +PEERDIR( + ydb/library/actors/core + ydb/services/metadata/request + ydb/core/tx/limiter/grouped_memory/service +) + +END() diff --git a/ydb/core/tx/limiter/grouped_memory/ut/ut_manager.cpp b/ydb/core/tx/limiter/grouped_memory/ut/ut_manager.cpp new file mode 100644 index 000000000000..277d62903205 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/ut/ut_manager.cpp @@ -0,0 +1,222 @@ +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +Y_UNIT_TEST_SUITE(GroupedMemoryLimiter) { + using namespace NKikimr; + + class TAllocation: public NOlap::NGroupedMemoryManager::IAllocation, public TObjectCounter { + private: + using TBase = NOlap::NGroupedMemoryManager::IAllocation; + virtual bool DoOnAllocated(std::shared_ptr&& /*guard*/, + const std::shared_ptr& /*allocation*/) override { + return true; + } + + public: + TAllocation(const ui64 mem) + : TBase(mem) { + } + }; + + Y_UNIT_TEST(Simplest) { + auto counters = std::make_shared(MakeIntrusive(), "test"); + NOlap::NGroupedMemoryManager::TConfig config; + { + NKikimrConfig::TGroupedMemoryLimiterConfig protoConfig; + protoConfig.SetMemoryLimit(100); + AFL_VERIFY(config.DeserializeFromProto(protoConfig)); + } + std::unique_ptr actor( + NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::CreateService(config, MakeIntrusive())); + auto stage = NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::GetDefaultStageFeatures(); + auto manager = std::make_shared(NActors::TActorId(), config, "test", counters, stage); + { + auto alloc1 = std::make_shared(50); + manager->RegisterProcess(0, {}); + manager->RegisterProcessScope(0, 0); + manager->RegisterGroup(0, 0, 1); + manager->RegisterAllocation(0, 0, 1, alloc1, {}); + AFL_VERIFY(alloc1->IsAllocated()); + auto alloc1_1 = std::make_shared(50); + manager->RegisterAllocation(0, 0, 1, alloc1_1, {}); + AFL_VERIFY(alloc1_1->IsAllocated()); + + manager->RegisterGroup(0, 0, 2); + auto alloc2 = std::make_shared(50); + manager->RegisterAllocation(0, 0, 2, alloc2, {}); + AFL_VERIFY(!alloc2->IsAllocated()); + + manager->UnregisterAllocation(0, 0, alloc1->GetIdentifier()); + AFL_VERIFY(alloc2->IsAllocated()); + manager->UnregisterAllocation(0, 0, alloc2->GetIdentifier()); + manager->UnregisterAllocation(0, 0, alloc1_1->GetIdentifier()); + manager->UnregisterGroup(0, 0, 1); + manager->UnregisterGroup(0, 0, 2); + manager->UnregisterProcessScope(0, 0); + manager->UnregisterProcess(0); + } + AFL_VERIFY(!stage->GetUsage().Val()); + AFL_VERIFY(manager->IsEmpty()); + AFL_VERIFY(!TObjectCounter::ObjectCount()); + } + + Y_UNIT_TEST(Simple) { + auto counters = std::make_shared(MakeIntrusive(), "test"); + NOlap::NGroupedMemoryManager::TConfig config; + { + NKikimrConfig::TGroupedMemoryLimiterConfig protoConfig; + protoConfig.SetMemoryLimit(100); + AFL_VERIFY(config.DeserializeFromProto(protoConfig)); + } + std::unique_ptr actor(NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::CreateService(config, MakeIntrusive())); + auto stage = NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::GetDefaultStageFeatures(); + auto manager = std::make_shared(NActors::TActorId(), config, "test", counters, stage); + { + manager->RegisterProcess(0, {}); + manager->RegisterProcessScope(0, 0); + auto alloc1 = std::make_shared(10); + manager->RegisterGroup(0, 0, 1); + manager->RegisterAllocation(0, 0, 1, alloc1, {}); + AFL_VERIFY(alloc1->IsAllocated()); + auto alloc2 = std::make_shared(1000); + manager->RegisterGroup(0, 0, 2); + manager->RegisterAllocation(0, 0, 2, alloc2, {}); + AFL_VERIFY(!alloc2->IsAllocated()); + auto alloc3 = std::make_shared(1000); + manager->RegisterGroup(0, 0, 3); + manager->RegisterAllocation(0, 0, 3, alloc3, {}); + AFL_VERIFY(alloc1->IsAllocated()); + AFL_VERIFY(!alloc2->IsAllocated()); + AFL_VERIFY(!alloc3->IsAllocated()); + auto alloc1_1 = std::make_shared(1000); + manager->RegisterAllocation(0, 0, 1, alloc1_1, {}); + AFL_VERIFY(alloc1_1->IsAllocated()); + AFL_VERIFY(!alloc2->IsAllocated()); + manager->UnregisterAllocation(0, 0, alloc1_1->GetIdentifier()); + AFL_VERIFY(!alloc2->IsAllocated()); + manager->UnregisterGroup(0, 0, 1); + AFL_VERIFY(alloc2->IsAllocated()); + + manager->UnregisterAllocation(0, 0, alloc1->GetIdentifier()); + AFL_VERIFY(!alloc3->IsAllocated()); + manager->UnregisterGroup(0, 0, 2); + manager->UnregisterAllocation(0, 0, alloc2->GetIdentifier()); + AFL_VERIFY(alloc3->IsAllocated()); + manager->UnregisterGroup(0, 0, 3); + manager->UnregisterAllocation(0, 0, alloc3->GetIdentifier()); + manager->UnregisterProcessScope(0, 0); + manager->UnregisterProcess(0); + } + AFL_VERIFY(!stage->GetUsage().Val()); + AFL_VERIFY(manager->IsEmpty()); + AFL_VERIFY(!TObjectCounter::ObjectCount()); + } + + Y_UNIT_TEST(CommonUsage) { + auto counters = std::make_shared(MakeIntrusive(), "test"); + NOlap::NGroupedMemoryManager::TConfig config; + { + NKikimrConfig::TGroupedMemoryLimiterConfig protoConfig; + protoConfig.SetMemoryLimit(100); + AFL_VERIFY(config.DeserializeFromProto(protoConfig)); + } + std::unique_ptr actor( + NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::CreateService(config, MakeIntrusive())); + auto stage = NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::GetDefaultStageFeatures(); + auto manager = std::make_shared(NActors::TActorId(), config, "test", counters, stage); + { + manager->RegisterProcess(0, {}); + manager->RegisterProcessScope(0, 0); + manager->RegisterGroup(0, 0, 1); + auto alloc0 = std::make_shared(1000); + manager->RegisterAllocation(0, 0, 1, alloc0, {}); + auto alloc1 = std::make_shared(1000); + manager->RegisterAllocation(0, 0, 1, alloc1, {}); + AFL_VERIFY(alloc0->IsAllocated()); + AFL_VERIFY(alloc1->IsAllocated()); + + manager->RegisterGroup(0, 0, 2); + auto alloc2 = std::make_shared(1000); + manager->RegisterAllocation(0, 0, 2, alloc0, {}); + manager->RegisterAllocation(0, 0, 2, alloc2, {}); + AFL_VERIFY(alloc0->IsAllocated()); + AFL_VERIFY(!alloc2->IsAllocated()); + + auto alloc3 = std::make_shared(1000); + manager->RegisterGroup(0, 0, 3); + manager->RegisterAllocation(0, 0, 3, alloc0, {}); + manager->RegisterAllocation(0, 0, 3, alloc3, {}); + AFL_VERIFY(alloc0->IsAllocated()); + AFL_VERIFY(alloc1->IsAllocated()); + AFL_VERIFY(!alloc2->IsAllocated()); + AFL_VERIFY(!alloc3->IsAllocated()); + + manager->UnregisterGroup(0, 0, 1); + manager->UnregisterAllocation(0, 0, alloc1->GetIdentifier()); + + AFL_VERIFY(alloc0->IsAllocated()); + AFL_VERIFY(alloc2->IsAllocated()); + AFL_VERIFY(!alloc3->IsAllocated()); + manager->UnregisterGroup(0, 0, 2); + manager->UnregisterAllocation(0, 0, alloc2->GetIdentifier()); + AFL_VERIFY(alloc0->IsAllocated()); + AFL_VERIFY(alloc3->IsAllocated()); + + manager->UnregisterGroup(0, 0, 3); + manager->UnregisterAllocation(0, 0, alloc3->GetIdentifier()); + manager->UnregisterAllocation(0, 0, alloc0->GetIdentifier()); + manager->UnregisterProcess(0); + } + AFL_VERIFY(!stage->GetUsage().Val()); + AFL_VERIFY(manager->IsEmpty()); + AFL_VERIFY(!TObjectCounter::ObjectCount()); + } + + Y_UNIT_TEST(Update) { + auto counters = std::make_shared(MakeIntrusive(), "test"); + NOlap::NGroupedMemoryManager::TConfig config; + { + NKikimrConfig::TGroupedMemoryLimiterConfig protoConfig; + protoConfig.SetMemoryLimit(100); + AFL_VERIFY(config.DeserializeFromProto(protoConfig)); + } + std::unique_ptr actor( + NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::CreateService(config, MakeIntrusive())); + auto stage = NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::GetDefaultStageFeatures(); + auto manager = std::make_shared(NActors::TActorId(), config, "test", counters, stage); + { + manager->RegisterProcess(0, {}); + manager->RegisterProcessScope(0, 0); + auto alloc1 = std::make_shared(1000); + manager->RegisterGroup(0, 0, 1); + manager->RegisterAllocation(0, 0, 1, alloc1, {}); + AFL_VERIFY(alloc1->IsAllocated()); + auto alloc2 = std::make_shared(10); + manager->RegisterGroup(0, 0, 3); + manager->RegisterAllocation(0, 0, 3, alloc2, {}); + AFL_VERIFY(!alloc2->IsAllocated()); + + manager->UpdateAllocation(0, 0, alloc1->GetIdentifier(), 10); + AFL_VERIFY(alloc2->IsAllocated()); + + manager->UnregisterGroup(0, 0, 3); + manager->UnregisterAllocation(0, 0, alloc2->GetIdentifier()); + + manager->UnregisterGroup(0, 0, 1); + manager->UnregisterAllocation(0, 0, alloc1->GetIdentifier()); + manager->UnregisterProcess(0); + } + AFL_VERIFY(!stage->GetUsage().Val()); + AFL_VERIFY(manager->IsEmpty()); + AFL_VERIFY(!TObjectCounter::ObjectCount()); + } +}; diff --git a/ydb/core/tx/limiter/grouped_memory/ut/ya.make b/ydb/core/tx/limiter/grouped_memory/ut/ya.make new file mode 100644 index 000000000000..eeae3952e358 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/ut/ya.make @@ -0,0 +1,17 @@ +UNITTEST_FOR(ydb/core/formats/arrow) + +SIZE(SMALL) + +PEERDIR( + ydb/core/tx/limiter/grouped_memory/usage + ydb/library/yql/public/udf/service/stub + ydb/library/yql/parser/pg_wrapper +) + +SRCS( + ut_manager.cpp +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/core/tx/limiter/grouped_memory/ya.make b/ydb/core/tx/limiter/grouped_memory/ya.make new file mode 100644 index 000000000000..2e071158a966 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/ya.make @@ -0,0 +1,5 @@ +RECURSE( + ut + service + usage +) diff --git a/ydb/core/tx/limiter/service/service.cpp b/ydb/core/tx/limiter/service/service.cpp index edbce04b7256..e31ea0f253c7 100644 --- a/ydb/core/tx/limiter/service/service.cpp +++ b/ydb/core/tx/limiter/service/service.cpp @@ -12,7 +12,7 @@ TLimiterActor::TLimiterActor(const TConfig& config, const TString& limiterName, void TLimiterActor::HandleMain(TEvExternal::TEvAskResource::TPtr& ev) { const auto now = TMonotonic::Now(); - if (RequestsInFlight.empty() || VolumeInFlight + ev->Get()->GetRequest()->GetVolume() <= Config.GetLimit()) { + if (RequestsInFlight.empty() || (RequestsQueue.empty() && VolumeInFlight + ev->Get()->GetRequest()->GetVolume() <= Config.GetLimit())) { VolumeInFlight += ev->Get()->GetRequest()->GetVolume(); RequestsInFlight.emplace_back(now, ev->Get()->GetRequest()->GetVolume()); if (RequestsInFlight.size() == 1) { diff --git a/ydb/core/tx/limiter/ya.make b/ydb/core/tx/limiter/ya.make new file mode 100644 index 000000000000..c27ad41aa02b --- /dev/null +++ b/ydb/core/tx/limiter/ya.make @@ -0,0 +1,5 @@ +RECURSE( + grouped_memory + service + usage +) diff --git a/ydb/core/tx/program/program.cpp b/ydb/core/tx/program/program.cpp index a6dbce7fed75..a4ada441cd74 100644 --- a/ydb/core/tx/program/program.cpp +++ b/ydb/core/tx/program/program.cpp @@ -330,7 +330,7 @@ NSsa::TAggregateAssign TProgramBuilder::MakeAggregate(const NSsa::TColumnInfo& n } } else if (func.ArgumentsSize() == 0 && func.GetId() == TId::AGG_COUNT) { // COUNT(*) case - return TAggregateAssign(name, EAggregate::Count); + return TAggregateAssign(name, EAggregate::NumRows); } return TAggregateAssign(name); // !ok() } @@ -483,7 +483,7 @@ bool TProgramContainer::Init(const IColumnResolver& columnResolver, const NKikim if (IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD)) { TString out; ::google::protobuf::TextFormat::PrintToString(programProto, &out); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("program", out); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "parse_program")("program", out); } if (programProto.HasKernels()) { @@ -496,6 +496,7 @@ bool TProgramContainer::Init(const IColumnResolver& columnResolver, const NKikim } return false; } + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "program_parsed")("result", DebugString()); return true; } diff --git a/ydb/core/tx/program/program.h b/ydb/core/tx/program/program.h index 76fbe8702488..3ab18eccc9d1 100644 --- a/ydb/core/tx/program/program.h +++ b/ydb/core/tx/program/program.h @@ -2,7 +2,7 @@ #include "registry.h" #include -#include +#include #include #include #include @@ -20,7 +20,6 @@ class IColumnResolver { virtual ~IColumnResolver() = default; virtual TString GetColumnName(ui32 id, bool required = true) const = 0; virtual std::optional GetColumnIdOptional(const TString& name) const = 0; - virtual const NTable::TScheme::TTableSchema& GetSchema() const = 0; virtual NSsa::TColumnInfo GetDefaultColumn() const = 0; }; @@ -35,10 +34,6 @@ class TSchemaResolverColumnsOnly: public IColumnResolver { virtual TString GetColumnName(ui32 id, bool required = true) const override; virtual std::optional GetColumnIdOptional(const TString& name) const override; - virtual const NTable::TScheme::TTableSchema& GetSchema() const override { - AFL_VERIFY(false); - return Default(); - } virtual NSsa::TColumnInfo GetDefaultColumn() const override { return NSsa::TColumnInfo::Original((ui32)NOlap::NPortion::TSpecialColumns::SPEC_COL_PLAN_STEP_INDEX, NOlap::NPortion::TSpecialColumns::SPEC_COL_PLAN_STEP); } diff --git a/ydb/core/tx/program/registry.cpp b/ydb/core/tx/program/registry.cpp index f8ba71e37d78..5b68a06787e4 100644 --- a/ydb/core/tx/program/registry.cpp +++ b/ydb/core/tx/program/registry.cpp @@ -7,18 +7,18 @@ namespace NKikimr::NOlap { -::NTls::TValue Registry; +::NTls::TValue> Registry; bool TKernelsRegistry::Parse(const TString& serialized) { Y_ABORT_UNLESS(!!serialized); if (!Registry.Get()) { - Registry = NMiniKQL::CreateBuiltinRegistry(); + auto registry = NMiniKQL::CreateFunctionRegistry(NMiniKQL::CreateBuiltinRegistry())->Clone(); + NMiniKQL::FillStaticModules(*registry.Get()); + Registry = std::move(registry); } - auto copy = Registry.Get(); - auto functionRegistry = NMiniKQL::CreateFunctionRegistry(std::move(copy))->Clone(); - NMiniKQL::FillStaticModules(*functionRegistry); + auto nodeFactory = NMiniKQL::GetBuiltinFactory(); - auto kernels = NYql::LoadKernels(serialized, *functionRegistry, nodeFactory); + auto kernels = NYql::LoadKernels(serialized, *Registry.Get(), nodeFactory); Kernels.swap(kernels); for (const auto& kernel : Kernels) { arrow::compute::Arity arity(kernel->signature->in_types().size(), kernel->signature->is_varargs()); diff --git a/ydb/core/tx/program/ya.make b/ydb/core/tx/program/ya.make index 4e69528543a3..d8ef7ed60696 100644 --- a/ydb/core/tx/program/ya.make +++ b/ydb/core/tx/program/ya.make @@ -8,7 +8,7 @@ SRCS( PEERDIR( ydb/core/formats/arrow ydb/core/protos - ydb/core/formats/arrow/protos + ydb/library/formats/arrow/protos ydb/core/tablet_flat ydb/library/yql/minikql/comp_nodes ydb/library/yql/core/arrow_kernels/registry diff --git a/ydb/core/tx/replication/controller/controller.cpp b/ydb/core/tx/replication/controller/controller.cpp index 4f73d84d68b0..ffd461ca6613 100644 --- a/ydb/core/tx/replication/controller/controller.cpp +++ b/ydb/core/tx/replication/controller/controller.cpp @@ -1,8 +1,10 @@ #include "controller.h" #include "controller_impl.h" +#include #include #include +#include namespace NKikimr::NReplication { @@ -12,6 +14,13 @@ TController::TController(const TActorId& tablet, TTabletStorageInfo* info) : TActor(&TThis::StateInit) , TTabletExecutedFlat(info, tablet, new NMiniKQL::TMiniKQLFactory) , LogPrefix(this) + , TabletCountersPtr(new TProtobufTabletCounters< + ESimpleCounters_descriptor, + ECumulativeCounters_descriptor, + EPercentileCounters_descriptor, + ETxTypes_descriptor + >()) + , TabletCounters(TabletCountersPtr.Get()) { } @@ -29,6 +38,7 @@ void TController::OnTabletDead(TEvTablet::TEvTabletDead::TPtr&, const TActorCont void TController::OnActivateExecutor(const TActorContext& ctx) { CLOG_T(ctx, "OnActivateExecutor"); + Executor()->RegisterExternalTabletCounters(TabletCountersPtr.Release()); RunTxInitSchema(ctx); } @@ -60,6 +70,8 @@ STFUNC(TController::StateWork) { HFunc(TEvPrivate::TEvProcessQueues, Handle); HFunc(TEvPrivate::TEvRemoveWorker, Handle); HFunc(TEvPrivate::TEvDescribeTargetsResult, Handle); + HFunc(TEvPrivate::TEvRequestCreateStream, Handle); + HFunc(TEvPrivate::TEvRequestDropStream, Handle); HFunc(TEvDiscovery::TEvDiscoveryData, Handle); HFunc(TEvDiscovery::TEvError, Handle); HFunc(TEvService::TEvStatus, Handle); @@ -148,13 +160,53 @@ void TController::Handle(TEvPrivate::TEvAssignStreamName::TPtr& ev, const TActor RunTxAssignStreamName(ev, ctx); } +template +void ProcessLimiterQueue(TDeque& requested, THashSet& inflight, ui32 limit, const TActorContext& ctx) { + while (!requested.empty() && inflight.size() < limit) { + const auto& actorId = requested.front(); + ctx.Send(actorId, new TEvent()); + inflight.insert(actorId); + requested.pop_front(); + } +} + +void TController::ProcessCreateStreamQueue(const TActorContext& ctx) { + const auto& limits = AppData()->ReplicationConfig.GetSchemeOperationLimits(); + ProcessLimiterQueue(RequestedCreateStream, InflightCreateStream, limits.GetInflightCreateStreamLimit(), ctx); +} + +void TController::ProcessDropStreamQueue(const TActorContext& ctx) { + const auto& limits = AppData()->ReplicationConfig.GetSchemeOperationLimits(); + ProcessLimiterQueue(RequestedDropStream, InflightDropStream, limits.GetInflightDropStreamLimit(), ctx); +} + +void TController::Handle(TEvPrivate::TEvRequestCreateStream::TPtr& ev, const TActorContext& ctx) { + CLOG_T(ctx, "Handle " << ev->Get()->ToString()); + + RequestedCreateStream.push_back(ev->Sender); + ProcessCreateStreamQueue(ctx); +} + void TController::Handle(TEvPrivate::TEvCreateStreamResult::TPtr& ev, const TActorContext& ctx) { CLOG_T(ctx, "Handle " << ev->Get()->ToString()); + + InflightCreateStream.erase(ev->Sender); + ProcessCreateStreamQueue(ctx); RunTxCreateStreamResult(ev, ctx); } +void TController::Handle(TEvPrivate::TEvRequestDropStream::TPtr& ev, const TActorContext& ctx) { + CLOG_T(ctx, "Handle " << ev->Get()->ToString()); + + RequestedDropStream.push_back(ev->Sender); + ProcessDropStreamQueue(ctx); +} + void TController::Handle(TEvPrivate::TEvDropStreamResult::TPtr& ev, const TActorContext& ctx) { CLOG_T(ctx, "Handle " << ev->Get()->ToString()); + + InflightDropStream.erase(ev->Sender); + ProcessDropStreamQueue(ctx); RunTxDropStreamResult(ev, ctx); } @@ -249,9 +301,11 @@ void TController::Handle(TEvDiscovery::TEvError::TPtr& ev, const TActorContext& void TController::CreateSession(ui32 nodeId, const TActorContext& ctx) { CLOG_D(ctx, "Create session" << ": nodeId# " << nodeId); + TabletCounters->Cumulative()[COUNTER_CREATE_SESSION] += 1; Y_ABORT_UNLESS(!Sessions.contains(nodeId)); Sessions.emplace(nodeId, TSessionInfo()); + TabletCounters->Simple()[COUNTER_SESSIONS] = Sessions.size(); auto ev = MakeHolder(TabletID(), Executor()->Generation()); ui32 flags = 0; @@ -265,6 +319,7 @@ void TController::CreateSession(ui32 nodeId, const TActorContext& ctx) { void TController::DeleteSession(ui32 nodeId, const TActorContext& ctx) { CLOG_D(ctx, "Delete session" << ": nodeId# " << nodeId); + TabletCounters->Cumulative()[COUNTER_DELETE_SESSION] += 1; Y_ABORT_UNLESS(Sessions.contains(nodeId)); auto& session = Sessions[nodeId]; @@ -284,6 +339,8 @@ void TController::DeleteSession(ui32 nodeId, const TActorContext& ctx) { } Sessions.erase(nodeId); + TabletCounters->Simple()[COUNTER_SESSIONS] = Sessions.size(); + CloseSession(nodeId, ctx); ScheduleProcessQueues(); } @@ -388,6 +445,9 @@ void TController::UpdateLag(const TWorkerId& id, TDuration lag) { } target->UpdateLag(id.WorkerId(), lag); + if (const auto lag = replication->GetLag()) { + TabletCounters->Simple()[COUNTER_DATA_LAG] = lag->MilliSeconds(); + } } void TController::Handle(TEvService::TEvRunWorker::TPtr& ev, const TActorContext& ctx) { @@ -443,6 +503,7 @@ TWorkerInfo* TController::GetOrCreateWorker(const TWorkerId& id, NKikimrReplicat auto it = Workers.find(id); if (it == Workers.end()) { it = Workers.emplace(id, cmd).first; + TabletCounters->Simple()[COUNTER_WORKERS] = Workers.size(); } auto replication = Find(id.ReplicationId()); @@ -456,6 +517,9 @@ TWorkerInfo* TController::GetOrCreateWorker(const TWorkerId& id, NKikimrReplicat } void TController::ScheduleProcessQueues() { + TabletCounters->Simple()[COUNTER_BOOT_QUEUE] = BootQueue.size(); + TabletCounters->Simple()[COUNTER_STOP_QUEUE] = StopQueue.size(); + if (ProcessQueuesScheduled || (!BootQueue && !StopQueue)) { return; } @@ -609,6 +673,7 @@ void TController::RemoveWorker(const TWorkerId& id, const TActorContext& ctx) { RemoveQueue.erase(id); Workers.erase(id); + TabletCounters->Simple()[COUNTER_WORKERS] = Workers.size(); auto replication = Find(id.ReplicationId()); if (!replication) { diff --git a/ydb/core/tx/replication/controller/controller_impl.h b/ydb/core/tx/replication/controller/controller_impl.h index 927d2d5bf528..d6791b3ed303 100644 --- a/ydb/core/tx/replication/controller/controller_impl.h +++ b/ydb/core/tx/replication/controller/controller_impl.h @@ -12,11 +12,13 @@ #include #include #include +#include #include #include #include #include +#include #include #include @@ -83,6 +85,8 @@ class TController void Handle(TEvPrivate::TEvProcessQueues::TPtr& ev, const TActorContext& ctx); void Handle(TEvPrivate::TEvRemoveWorker::TPtr& ev, const TActorContext& ctx); void Handle(TEvPrivate::TEvDescribeTargetsResult::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPrivate::TEvRequestCreateStream::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPrivate::TEvRequestDropStream::TPtr& ev, const TActorContext& ctx); void Handle(TEvDiscovery::TEvDiscoveryData::TPtr& ev, const TActorContext& ctx); void Handle(TEvDiscovery::TEvError::TPtr& ev, const TActorContext& ctx); void Handle(TEvService::TEvStatus::TPtr& ev, const TActorContext& ctx); @@ -103,6 +107,8 @@ class TController void RemoveWorker(const TWorkerId& id, const TActorContext& ctx); bool MaybeRemoveWorker(const TWorkerId& id, const TActorContext& ctx); void UpdateLag(const TWorkerId& id, TDuration lag); + void ProcessCreateStreamQueue(const TActorContext& ctx); + void ProcessDropStreamQueue(const TActorContext& ctx); // local transactions class TTxInitSchema; @@ -162,6 +168,8 @@ class TController private: const TTabletLogPrefix LogPrefix; + THolder TabletCountersPtr; + TTabletCountersBase* TabletCounters; TSysParams SysParams; THashMap Replications; @@ -178,6 +186,13 @@ class TController bool ProcessQueuesScheduled = false; static constexpr ui32 ProcessBatchLimit = 100; + // create stream limiter + TDeque RequestedCreateStream; + THashSet InflightCreateStream; + // drop stream limiter + TDeque RequestedDropStream; + THashSet InflightDropStream; + }; // TController } diff --git a/ydb/core/tx/replication/controller/dst_alterer.cpp b/ydb/core/tx/replication/controller/dst_alterer.cpp index cb436259fae2..c03314f1d59e 100644 --- a/ydb/core/tx/replication/controller/dst_alterer.cpp +++ b/ydb/core/tx/replication/controller/dst_alterer.cpp @@ -41,6 +41,7 @@ class TDstAlterer: public TActorBootstrapped { switch (Kind) { case TReplication::ETargetKind::Table: + case TReplication::ETargetKind::IndexTable: tx.SetOperationType(NKikimrSchemeOp::ESchemeOpAlterTable); PathIdFromPathId(DstPathId, tx.MutableAlterTable()->MutablePathId()); tx.MutableAlterTable()->MutableReplicationConfig()->SetMode( diff --git a/ydb/core/tx/replication/controller/dst_creator.cpp b/ydb/core/tx/replication/controller/dst_creator.cpp index 4b43cbad4fd5..cabb8c757334 100644 --- a/ydb/core/tx/replication/controller/dst_creator.cpp +++ b/ydb/core/tx/replication/controller/dst_creator.cpp @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include #include @@ -116,6 +118,8 @@ class TDstCreator: public TActorBootstrapped { .WithKeyShardBoundary(true))); } break; + case TReplication::ETargetKind::IndexTable: + Y_ABORT("unreachable"); } } @@ -128,7 +132,7 @@ class TDstCreator: public TActorBootstrapped { } } - NKikimrScheme::EStatus ConvertStatus(NYdb::EStatus status) { + static NKikimrScheme::EStatus ConvertStatus(NYdb::EStatus status) { switch (status) { case NYdb::EStatus::SUCCESS: return NKikimrScheme::StatusSuccess; @@ -165,8 +169,20 @@ class TDstCreator: public TActorBootstrapped { Ydb::Table::CreateTableRequest scheme; result.GetTableDescription().SerializeTo(scheme); - // Disable index support until other replicator code be ready to process index replication - scheme.mutable_indexes()->Clear(); + + // filter out unsupported index types + auto& indexes = *scheme.mutable_indexes(); + for (auto it = indexes.begin(); it != indexes.end();) { + switch (it->type_case()) { + case Ydb::Table::TableIndex::kGlobalIndex: + case Ydb::Table::TableIndex::kGlobalUniqueIndex: + ++it; + continue; + default: + it = indexes.erase(it); + break; + } + } Ydb::StatusIds::StatusCode status; TString error; @@ -182,30 +198,37 @@ class TDstCreator: public TActorBootstrapped { TxBody.SetWorkingDir(pathPair.first); - NKikimrSchemeOp::TTableDescription* tableDesc = nullptr; + NKikimrSchemeOp::TTableDescription* desc = nullptr; if (scheme.indexes_size()) { + NeedToCheck = true; TxBody.SetOperationType(NKikimrSchemeOp::ESchemeOpCreateIndexedTable); - tableDesc = TxBody.MutableCreateIndexedTable()->MutableTableDescription(); TxBody.SetInternal(true); + desc = TxBody.MutableCreateIndexedTable()->MutableTableDescription(); + if (!FillIndexDescription(*TxBody.MutableCreateIndexedTable(), scheme, status, error)) { + return Error(NKikimrScheme::StatusSchemeError, error); + } } else { TxBody.SetOperationType(NKikimrSchemeOp::ESchemeOpCreateTable); - tableDesc = TxBody.MutableCreateTable(); + desc = TxBody.MutableCreateTable(); } - Ydb::StatusIds::StatusCode dummyCode; + Y_ABORT_UNLESS(desc); + desc->SetName(pathPair.second); - if (!FillIndexDescription(*TxBody.MutableCreateIndexedTable(), scheme, dummyCode, error)) { - return Error(NKikimrScheme::StatusSchemeError, error); + FillReplicationConfig(*desc->MutableReplicationConfig()); + if (scheme.indexes_size()) { + for (auto& index : *TxBody.MutableCreateIndexedTable()->MutableIndexDescription()) { + FillReplicationConfig(*index.MutableIndexImplTableDescription()->MutableReplicationConfig()); + } } - tableDesc->SetName(pathPair.second); + AllocateTxId(); + } + static void FillReplicationConfig(NKikimrSchemeOp::TTableReplicationConfig& replicationConfig) { // TODO: support other modes - auto& replicationConfig = *tableDesc->MutableReplicationConfig(); replicationConfig.SetMode(NKikimrSchemeOp::TTableReplicationConfig::REPLICATION_MODE_READ_ONLY); replicationConfig.SetConsistency(NKikimrSchemeOp::TTableReplicationConfig::CONSISTENCY_WEAK); - - AllocateTxId(); } void AllocateTxId() { @@ -257,7 +280,9 @@ class TDstCreator: public TActorBootstrapped { switch (record.GetStatus()) { case NKikimrScheme::StatusAccepted: - DstPathId = TPathId(SchemeShardId, record.GetPathId()); + if (!NeedToCheck) { + DstPathId = TPathId(SchemeShardId, record.GetPathId()); + } Y_DEBUG_ABORT_UNLESS(TxId == record.GetTxId()); return SubscribeTx(record.GetTxId()); case NKikimrScheme::StatusMultipleModifications: @@ -338,6 +363,8 @@ class TDstCreator: public TActorBootstrapped { switch (Kind) { case TReplication::ETargetKind::Table: return CheckTableScheme(desc.GetTable(), error); + case TReplication::ETargetKind::IndexTable: + Y_ABORT("unreachable"); } } @@ -366,21 +393,30 @@ class TDstCreator: public TActorBootstrapped { return false; } - const auto& expected = TxBody.GetCreateTable(); + const NKikimrSchemeOp::TIndexedTableCreationConfig* indexedDesc = nullptr; + const NKikimrSchemeOp::TTableDescription* tableDesc = nullptr; + if (TxBody.GetOperationType() == NKikimrSchemeOp::ESchemeOpCreateIndexedTable) { + indexedDesc = &TxBody.GetCreateIndexedTable(); + tableDesc = &indexedDesc->GetTableDescription(); + } else { + tableDesc = &TxBody.GetCreateTable(); + } + + Y_ABORT_UNLESS(tableDesc); // check key - if (expected.KeyColumnNamesSize() != got.KeyColumnNamesSize()) { + if (tableDesc->KeyColumnNamesSize() != got.KeyColumnNamesSize()) { error = TStringBuilder() << "Key columns size mismatch" - << ": expected: " << expected.KeyColumnNamesSize() + << ": expected: " << tableDesc->KeyColumnNamesSize() << ", got: " << got.KeyColumnNamesSize(); return false; } - for (ui32 i = 0; i < expected.KeyColumnNamesSize(); ++i) { - if (expected.GetKeyColumnNames(i) != got.GetKeyColumnNames(i)) { + for (ui32 i = 0; i < tableDesc->KeyColumnNamesSize(); ++i) { + if (tableDesc->GetKeyColumnNames(i) != got.GetKeyColumnNames(i)) { error = TStringBuilder() << "Key column name mismatch" << ": position: " << i - << ", expected: " << expected.GetKeyColumnNames(i) + << ", expected: " << tableDesc->GetKeyColumnNames(i) << ", got: " << got.GetKeyColumnNames(i); return false; } @@ -392,14 +428,14 @@ class TDstCreator: public TActorBootstrapped { columns.emplace(column.GetName(), column.GetType()); } - if (expected.ColumnsSize() != columns.size()) { + if (tableDesc->ColumnsSize() != columns.size()) { error = TStringBuilder() << "Columns size mismatch" - << ": expected: " << expected.ColumnsSize() + << ": expected: " << tableDesc->ColumnsSize() << ", got: " << columns.size(); return false; } - for (const auto& column : expected.GetColumns()) { + for (const auto& column : tableDesc->GetColumns()) { auto it = columns.find(column.GetName()); if (it == columns.end()) { error = TStringBuilder() << "Cannot find column" @@ -422,14 +458,25 @@ class TDstCreator: public TActorBootstrapped { indexes.emplace(index.GetName(), &index); } - if (expected.TableIndexesSize() != indexes.size()) { + if (!indexedDesc) { + if (!indexes.empty()) { + error = TStringBuilder() << "Indexes size mismatch" + << ": expected: " << 0 + << ", got: " << indexes.size(); + return false; + } + + return true; + } + + if (indexedDesc->IndexDescriptionSize() != indexes.size()) { error = TStringBuilder() << "Indexes size mismatch" - << ": expected: " << expected.TableIndexesSize() + << ": expected: " << indexedDesc->IndexDescriptionSize() << ", got: " << indexes.size(); return false; } - for (const auto& index : expected.GetTableIndexes()) { + for (const auto& index : indexedDesc->GetIndexDescription()) { auto it = indexes.find(index.GetName()); if (it == indexes.end()) { error = TStringBuilder() << "Cannot find index" @@ -487,6 +534,36 @@ class TDstCreator: public TActorBootstrapped { return true; } + void SubscribeDstPath() { + Subscriber = Register(CreateSchemeBoardSubscriber(SelfId(), DstPath)); + Become(&TThis::StateSubscribeDstPath); + } + + STATEFN(StateSubscribeDstPath) { + switch (ev->GetTypeRewrite()) { + hFunc(TSchemeBoardEvents::TEvNotifyUpdate, Handle); + default: + return StateBase(ev); + } + } + + void Handle(TSchemeBoardEvents::TEvNotifyUpdate::TPtr& ev) { + LOG_T("Handle " << ev->Get()->ToString()); + + const auto& desc = ev->Get()->DescribeSchemeResult; + if (desc.GetStatus() != NKikimrScheme::StatusSuccess) { + return; + } + + const auto& entryDesc = desc.GetPathDescription().GetSelf(); + if (!entryDesc.HasCreateFinished() || !entryDesc.GetCreateFinished()) { + return; + } + + DstPathId = ev->Get()->PathId; + return Success(); + } + void Handle(TEvPipeCache::TEvDeliveryProblem::TPtr& ev) { LOG_T("Handle " << ev->Get()->ToString()); @@ -525,6 +602,12 @@ class TDstCreator: public TActorBootstrapped { Schedule(TDuration::Seconds(10), new TEvents::TEvWakeup); } + void PassAway() override { + if (const auto& actorId = std::exchange(Subscriber, {})) { + Send(actorId, new TEvents::TEvPoison()); + } + } + public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::REPLICATION_CONTROLLER_DST_CREATOR; @@ -554,7 +637,13 @@ class TDstCreator: public TActorBootstrapped { } void Bootstrap() { - Resolve(PathId); + switch (Kind) { + case TReplication::ETargetKind::Table: + return Resolve(PathId); + case TReplication::ETargetKind::IndexTable: + // indexed table will be created along with its indexes + return SubscribeDstPath(); + } } STATEFN(StateBase) { @@ -586,6 +675,7 @@ class TDstCreator: public TActorBootstrapped { TActorId PipeCache; bool NeedToCheck = false; TPathId DstPathId; + TActorId Subscriber; }; // TDstCreator diff --git a/ydb/core/tx/replication/controller/dst_creator_ut.cpp b/ydb/core/tx/replication/controller/dst_creator_ut.cpp index d7fab275eb28..805cd2b1e3b3 100644 --- a/ydb/core/tx/replication/controller/dst_creator_ut.cpp +++ b/ydb/core/tx/replication/controller/dst_creator_ut.cpp @@ -62,8 +62,16 @@ Y_UNIT_TEST_SUITE(DstCreator) { CheckTableReplica(tableDesc, replicatedDesc); } - void WithSyncIndex(const TString& replicatedPath) { - TEnv env; + Y_UNIT_TEST(Basic) { + Basic("/Root/Replicated"); + } + + Y_UNIT_TEST(WithIntermediateDir) { + Basic("/Root/Dir/Replicated"); + } + + void WithIndex(const TString& replicatedPath, NKikimrSchemeOp::EIndexType indexType) { + TEnv env(TFeatureFlags().SetEnableChangefeedsOnIndexTables(true)); env.GetRuntime().SetLogPriority(NKikimrServices::REPLICATION_CONTROLLER, NLog::PRI_TRACE); const auto tableDesc = TTestTableDescription{ @@ -79,25 +87,45 @@ Y_UNIT_TEST_SUITE(DstCreator) { const TString indexName = "index_by_value"; env.CreateTableWithIndex("/Root", *MakeTableDescription(tableDesc), - indexName, TVector{"value"}, NKikimrSchemeOp::EIndexTypeGlobal, - TVector{}, TDuration::Seconds(5000)); + indexName, TVector{"value"}, indexType); env.GetRuntime().Register(CreateDstCreator( env.GetSender(), env.GetSchemeshardId("/Root/Table"), env.GetYdbProxy(), env.GetPathId("/Root"), 1 /* rid */, 1 /* tid */, TReplication::ETargetKind::Table, "/Root/Table", replicatedPath )); - - auto ev = env.GetRuntime().GrabEdgeEvent(env.GetSender()); - UNIT_ASSERT_VALUES_EQUAL(ev->Get()->Status, NKikimrScheme::StatusSuccess); + { + auto ev = env.GetRuntime().GrabEdgeEvent(env.GetSender()); + UNIT_ASSERT_VALUES_EQUAL(ev->Get()->Status, NKikimrScheme::StatusSuccess); + } auto desc = env.GetDescription(replicatedPath); const auto& replicatedDesc = desc.GetPathDescription().GetTable(); CheckTableReplica(tableDesc, replicatedDesc); + switch (indexType) { + case NKikimrSchemeOp::EIndexTypeGlobal: + case NKikimrSchemeOp::EIndexTypeGlobalUnique: + UNIT_ASSERT_VALUES_EQUAL(replicatedDesc.TableIndexesSize(), 1); + break; + default: + UNIT_ASSERT_VALUES_EQUAL(replicatedDesc.TableIndexesSize(), 0); + return; + } + + env.GetRuntime().Register(CreateDstCreator( + env.GetSender(), env.GetSchemeshardId("/Root/Table"), env.GetYdbProxy(), env.GetPathId("/Root"), + 1 /* rid */, 2 /* tid */, TReplication::ETargetKind::IndexTable, + "/Root/Table/" + indexName + "/indexImplTable", replicatedPath + "/" + indexName + "/indexImplTable" + )); + { + auto ev = env.GetRuntime().GrabEdgeEvent(env.GetSender()); + UNIT_ASSERT_VALUES_EQUAL(ev->Get()->Status, NKikimrScheme::StatusSuccess); + } + { auto desc = env.GetDescription(replicatedPath + "/" + indexName); UNIT_ASSERT_VALUES_EQUAL(desc.GetPathDescription().GetTableIndex().GetName(), indexName); - UNIT_ASSERT_VALUES_EQUAL(desc.GetPathDescription().GetTableIndex().GetType(), NKikimrSchemeOp::EIndexType::EIndexTypeGlobal); + UNIT_ASSERT_VALUES_EQUAL(desc.GetPathDescription().GetTableIndex().GetType(), indexType); } { @@ -106,25 +134,19 @@ Y_UNIT_TEST_SUITE(DstCreator) { const auto& indexTableDesc = desc.GetPathDescription().GetTable(); UNIT_ASSERT_VALUES_EQUAL(indexTableDesc.KeyColumnNamesSize(), 2); } - } - - - Y_UNIT_TEST(Basic) { - Basic("/Root/Replicated"); } - Y_UNIT_TEST(WithIntermediateDir) { - Basic("/Root/Dir/Replicated"); - } -/* Y_UNIT_TEST(WithSyncIndex) { - WithSyncIndex("/Root/Replicated"); + WithIndex("/Root/Replicated", NKikimrSchemeOp::EIndexTypeGlobal); + } + + Y_UNIT_TEST(WithSyncIndexAndIntermediateDir) { + WithIndex("/Root/Dir/Replicated", NKikimrSchemeOp::EIndexTypeGlobal); } - Y_UNIT_TEST(WithSyncIndexWithIntermediateDir) { - WithSyncIndex("/Root/Dir/Replicated"); + Y_UNIT_TEST(WithAsyncIndex) { + WithIndex("/Root/Replicated", NKikimrSchemeOp::EIndexTypeGlobalAsync); } -*/ Y_UNIT_TEST(SameOwner) { TEnv env; diff --git a/ydb/core/tx/replication/controller/dst_remover.cpp b/ydb/core/tx/replication/controller/dst_remover.cpp index 2d0ec2d3b72f..538eddd812ff 100644 --- a/ydb/core/tx/replication/controller/dst_remover.cpp +++ b/ydb/core/tx/replication/controller/dst_remover.cpp @@ -43,6 +43,8 @@ class TDstRemover: public TActorBootstrapped { case TReplication::ETargetKind::Table: tx.SetOperationType(NKikimrSchemeOp::ESchemeOpDropTable); break; + case TReplication::ETargetKind::IndexTable: + Y_ABORT("unreachable"); } Send(PipeCache, new TEvPipeCache::TEvForward(ev.Release(), SchemeShardId, true)); @@ -156,7 +158,13 @@ class TDstRemover: public TActorBootstrapped { if (!DstPathId) { Success(); } else { - AllocateTxId(); + switch (Kind) { + case TReplication::ETargetKind::Table: + return AllocateTxId(); + case TReplication::ETargetKind::IndexTable: + // indexed table will be removed along with its indexes + return Success(); + } } } diff --git a/ydb/core/tx/replication/controller/private_events.cpp b/ydb/core/tx/replication/controller/private_events.cpp index f562331cc26d..12807487c856 100644 --- a/ydb/core/tx/replication/controller/private_events.cpp +++ b/ydb/core/tx/replication/controller/private_events.cpp @@ -4,6 +4,20 @@ namespace NKikimr::NReplication::NController { +TEvPrivate::TEvDiscoveryTargetsResult::TAddEntry::TAddEntry( + const TString& srcPath, const TString& dstPath, TReplication::ETargetKind kind) + : SrcPath(srcPath) + , DstPath(dstPath) + , Kind(kind) +{ +} + +TEvPrivate::TEvDiscoveryTargetsResult::TFailedEntry::TFailedEntry(const TString& srcPath, const NYdb::TStatus& error) + : SrcPath(srcPath) + , Error(error) +{ +} + TEvPrivate::TEvDiscoveryTargetsResult::TEvDiscoveryTargetsResult(ui64 rid, TVector&& toAdd, TVector&& toDel) : ReplicationId(rid) , ToAdd(std::move(toAdd)) @@ -179,11 +193,11 @@ TString TEvPrivate::TEvDescribeTargetsResult::ToString() const { } Y_DECLARE_OUT_SPEC(, NKikimr::NReplication::NController::TEvPrivate::TEvDiscoveryTargetsResult::TAddEntry, stream, value) { - stream << value.first.Name << " (" << value.first.Type << ")"; + stream << value.SrcPath << " (" << value.Kind << ")"; } Y_DECLARE_OUT_SPEC(, NKikimr::NReplication::NController::TEvPrivate::TEvDiscoveryTargetsResult::TFailedEntry, stream, value) { - stream << value.first << ": " << value.second.GetStatus() << " ("; - value.second.GetIssues().PrintTo(stream, true); + stream << value.SrcPath << ": " << value.Error.GetStatus() << " ("; + value.Error.GetIssues().PrintTo(stream, true); stream << ")"; } diff --git a/ydb/core/tx/replication/controller/private_events.h b/ydb/core/tx/replication/controller/private_events.h index 7383d6f7ffc0..ebf083f97b0b 100644 --- a/ydb/core/tx/replication/controller/private_events.h +++ b/ydb/core/tx/replication/controller/private_events.h @@ -1,6 +1,7 @@ #pragma once -#include +#include "replication.h" + #include #include @@ -31,6 +32,10 @@ struct TEvPrivate { EvAlterDstResult, EvRemoveWorker, EvDescribeTargetsResult, + EvRequestCreateStream, + EvAllowCreateStream, + EvRequestDropStream, + EvAllowDropStream, EvEnd, }; @@ -38,8 +43,20 @@ struct TEvPrivate { static_assert(EvEnd < EventSpaceEnd(TKikimrEvents::ES_PRIVATE), "expect EvEnd < EventSpaceEnd(TKikimrEvents::ES_PRIVATE)"); struct TEvDiscoveryTargetsResult: public TEventLocal { - using TAddEntry = std::pair; // src, dst - using TFailedEntry = std::pair; // src, error + struct TAddEntry { + TString SrcPath; + TString DstPath; + TReplication::ETargetKind Kind; + + explicit TAddEntry(const TString& srcPath, const TString& dstPath, TReplication::ETargetKind kind); + }; + + struct TFailedEntry { + TString SrcPath; + NYdb::TStatus Error; + + explicit TFailedEntry(const TString& srcPath, const NYdb::TStatus& error); + }; const ui64 ReplicationId; TVector ToAdd; @@ -208,6 +225,18 @@ struct TEvPrivate { TString ToString() const override; }; + struct TEvRequestCreateStream: public TEventLocal { + }; + + struct TEvAllowCreateStream: public TEventLocal { + }; + + struct TEvRequestDropStream: public TEventLocal { + }; + + struct TEvAllowDropStream: public TEventLocal { + }; + }; // TEvPrivate } diff --git a/ydb/core/tx/replication/controller/replication.cpp b/ydb/core/tx/replication/controller/replication.cpp index 983f89fa4eb2..9acc0f7f9b7a 100644 --- a/ydb/core/tx/replication/controller/replication.cpp +++ b/ydb/core/tx/replication/controller/replication.cpp @@ -42,7 +42,9 @@ class TReplication::TImpl: public TLagProvider { ITarget* CreateTarget(TReplication* self, ui64 id, ETargetKind kind, Args&&... args) const { switch (kind) { case ETargetKind::Table: - return new TTableTarget(self, id, std::forward(args)...); + return new TTargetTable(self, id, std::forward(args)...); + case ETargetKind::IndexTable: + return new TTargetIndexTable(self, id, std::forward(args)...); } } @@ -113,19 +115,22 @@ class TReplication::TImpl: public TLagProvider { if (!YdbProxy && !(State == EState::Removing && !Targets)) { THolder ydbProxy; const auto& params = Config.GetSrcConnectionParams(); + const auto& endpoint = params.GetEndpoint(); + const auto& database = params.GetDatabase(); + const bool ssl = params.GetEnableSsl(); switch (params.GetCredentialsCase()) { case NKikimrReplication::TConnectionParams::kStaticCredentials: if (!params.GetStaticCredentials().HasPassword()) { return ResolveSecret(params.GetStaticCredentials().GetPasswordSecretName(), ctx); } - ydbProxy.Reset(CreateYdbProxy(params.GetEndpoint(), params.GetDatabase(), params.GetStaticCredentials())); + ydbProxy.Reset(CreateYdbProxy(endpoint, database, ssl, params.GetStaticCredentials())); break; case NKikimrReplication::TConnectionParams::kOAuthToken: if (!params.GetOAuthToken().HasToken()) { return ResolveSecret(params.GetOAuthToken().GetTokenSecretName(), ctx); } - ydbProxy.Reset(CreateYdbProxy(params.GetEndpoint(), params.GetDatabase(), params.GetOAuthToken().GetToken())); + ydbProxy.Reset(CreateYdbProxy(endpoint, database, ssl, params.GetOAuthToken().GetToken())); break; default: ErrorState(TStringBuilder() << "Unexpected credentials: " << params.GetCredentialsCase()); diff --git a/ydb/core/tx/replication/controller/replication.h b/ydb/core/tx/replication/controller/replication.h index afdcfccebe16..c4e64436bb6d 100644 --- a/ydb/core/tx/replication/controller/replication.h +++ b/ydb/core/tx/replication/controller/replication.h @@ -32,6 +32,7 @@ class TReplication: public TSimpleRefCount { enum class ETargetKind: ui8 { Table, + IndexTable, }; enum class EDstState: ui8 { diff --git a/ydb/core/tx/replication/controller/stream_creator.cpp b/ydb/core/tx/replication/controller/stream_creator.cpp index 2e295fab12ed..cd8cb06d8e7c 100644 --- a/ydb/core/tx/replication/controller/stream_creator.cpp +++ b/ydb/core/tx/replication/controller/stream_creator.cpp @@ -4,7 +4,9 @@ #include "target_with_stream.h" #include "util.h" +#include #include +#include #include #include #include @@ -16,16 +18,38 @@ namespace NKikimr::NReplication::NController { class TStreamCreator: public TActorBootstrapped { - static NYdb::NTable::TChangefeedDescription MakeChangefeed(const TString& name, const NJson::TJsonMap& attrs) { + static NYdb::NTable::TChangefeedDescription MakeChangefeed( + const TString& name, const TDuration& retentionPeriod, const NJson::TJsonMap& attrs) + { using namespace NYdb::NTable; return TChangefeedDescription(name, EChangefeedMode::Updates, EChangefeedFormat::Json) + .WithRetentionPeriod(retentionPeriod) .WithInitialScan() .AddAttribute("__async_replication", NJson::WriteJson(attrs, false)); } + void RequestPermission() { + Send(Parent, new TEvPrivate::TEvRequestCreateStream()); + Become(&TThis::StateRequestPermission); + } + + STATEFN(StateRequestPermission) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvPrivate::TEvAllowCreateStream, Handle); + default: + return StateBase(ev); + } + } + + void Handle(TEvPrivate::TEvAllowCreateStream::TPtr& ev) { + LOG_T("Handle " << ev->Get()->ToString()); + CreateStream(); + } + void CreateStream() { switch (Kind) { case TReplication::ETargetKind::Table: + case TReplication::ETargetKind::IndexTable: Send(YdbProxy, new TEvYdbProxy::TEvAlterTableRequest(SrcPath, NYdb::NTable::TAlterTableSettings() .AppendAddChangefeeds(Changefeed))); break; @@ -64,8 +88,17 @@ class TStreamCreator: public TActorBootstrapped { } } + TString BuildStreamPath() const { + switch (Kind) { + case TReplication::ETargetKind::Table: + return CanonizePath(ChildPath(SplitPath(SrcPath), Changefeed.GetName())); + case TReplication::ETargetKind::IndexTable: + return CanonizePath(ChildPath(SplitPath(SrcPath), {"indexImplTable", Changefeed.GetName()})); + } + } + void CreateConsumer() { - const auto streamPath = CanonizePath(ChildPath(SplitPath(SrcPath), Changefeed.GetName())); + const auto streamPath = BuildStreamPath(); const auto settings = NYdb::NTopic::TAlterTopicSettings() .BeginAddConsumer() .ConsumerName(ReplicationConsumerName) @@ -88,6 +121,10 @@ class TStreamCreator: public TActorBootstrapped { LOG_T("Handle " << ev->Get()->ToString()); auto& result = ev->Get()->Result; + if (result.GetStatus() == NYdb::EStatus::ALREADY_EXISTS) { + return Reply(NYdb::TStatus(NYdb::EStatus::SUCCESS, NYql::TIssues())); + } + if (!result.IsSuccess()) { if (IsRetryableError(result)) { LOG_D("Retry CreateConsumer"); @@ -123,14 +160,15 @@ class TStreamCreator: public TActorBootstrapped { TReplication::ETargetKind kind, const TString& srcPath, const TString& dstPath, - const TString& streamName) + const TString& streamName, + const TDuration& streamRetentionPeriod) : Parent(parent) , YdbProxy(proxy) , ReplicationId(rid) , TargetId(tid) , Kind(kind) , SrcPath(srcPath) - , Changefeed(MakeChangefeed(streamName, NJson::TJsonMap{ + , Changefeed(MakeChangefeed(streamName, streamRetentionPeriod, NJson::TJsonMap{ {"path", dstPath}, {"id", ToString(rid)}, })) @@ -139,7 +177,7 @@ class TStreamCreator: public TActorBootstrapped { } void Bootstrap() { - CreateStream(); + RequestPermission(); } STATEFN(StateBase) { @@ -165,13 +203,15 @@ IActor* CreateStreamCreator(TReplication* replication, ui64 targetId, const TAct Y_ABORT_UNLESS(target); return CreateStreamCreator(ctx.SelfID, replication->GetYdbProxy(), replication->GetId(), target->GetId(), target->GetKind(), - target->GetSrcPath(), target->GetDstPath(), target->GetStreamName()); + target->GetSrcPath(), target->GetDstPath(), target->GetStreamName(), + TDuration::Seconds(AppData()->ReplicationConfig.GetRetentionPeriodSeconds())); } IActor* CreateStreamCreator(const TActorId& parent, const TActorId& proxy, ui64 rid, ui64 tid, - TReplication::ETargetKind kind, const TString& srcPath, const TString& dstPath, const TString& streamName) + TReplication::ETargetKind kind, const TString& srcPath, const TString& dstPath, + const TString& streamName, const TDuration& streamRetentionPeriod) { - return new TStreamCreator(parent, proxy, rid, tid, kind, srcPath, dstPath, streamName); + return new TStreamCreator(parent, proxy, rid, tid, kind, srcPath, dstPath, streamName, streamRetentionPeriod); } } diff --git a/ydb/core/tx/replication/controller/stream_creator.h b/ydb/core/tx/replication/controller/stream_creator.h index 94eca13b4554..1eca930efb6b 100644 --- a/ydb/core/tx/replication/controller/stream_creator.h +++ b/ydb/core/tx/replication/controller/stream_creator.h @@ -6,6 +6,7 @@ namespace NKikimr::NReplication::NController { IActor* CreateStreamCreator(TReplication* replication, ui64 targetId, const TActorContext& ctx); IActor* CreateStreamCreator(const TActorId& parent, const TActorId& proxy, ui64 rid, ui64 tid, - TReplication::ETargetKind kind, const TString& srcPath, const TString& dstPath, const TString& streamName); + TReplication::ETargetKind kind, const TString& srcPath, const TString& dstPath, + const TString& streamName, const TDuration& streamRetentionPeriod); } diff --git a/ydb/core/tx/replication/controller/stream_remover.cpp b/ydb/core/tx/replication/controller/stream_remover.cpp index 6a30ac851801..4f75dc27a69a 100644 --- a/ydb/core/tx/replication/controller/stream_remover.cpp +++ b/ydb/core/tx/replication/controller/stream_remover.cpp @@ -10,9 +10,28 @@ namespace NKikimr::NReplication::NController { class TStreamRemover: public TActorBootstrapped { + void RequestPermission() { + Send(Parent, new TEvPrivate::TEvRequestDropStream()); + Become(&TThis::StateRequestPermission); + } + + STATEFN(StateRequestPermission) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvPrivate::TEvAllowDropStream, Handle); + default: + return StateBase(ev); + } + } + + void Handle(TEvPrivate::TEvAllowDropStream::TPtr& ev) { + LOG_T("Handle " << ev->Get()->ToString()); + DropStream(); + } + void DropStream() { switch (Kind) { case TReplication::ETargetKind::Table: + case TReplication::ETargetKind::IndexTable: Send(YdbProxy, new TEvYdbProxy::TEvAlterTableRequest(SrcPath, NYdb::NTable::TAlterTableSettings() .AppendDropChangefeeds(StreamName))); break; @@ -25,7 +44,8 @@ class TStreamRemover: public TActorBootstrapped { switch (ev->GetTypeRewrite()) { hFunc(TEvYdbProxy::TEvAlterTableResponse, Handle); sFunc(TEvents::TEvWakeup, DropStream); - sFunc(TEvents::TEvPoison, PassAway); + default: + return StateBase(ev); } } @@ -76,7 +96,13 @@ class TStreamRemover: public TActorBootstrapped { } void Bootstrap() { - DropStream(); + RequestPermission(); + } + + STATEFN(StateBase) { + switch (ev->GetTypeRewrite()) { + sFunc(TEvents::TEvPoison, PassAway); + } } private: diff --git a/ydb/core/tx/replication/controller/target_discoverer.cpp b/ydb/core/tx/replication/controller/target_discoverer.cpp index c5f956555cac..a29fafea1ffd 100644 --- a/ydb/core/tx/replication/controller/target_discoverer.cpp +++ b/ydb/core/tx/replication/controller/target_discoverer.cpp @@ -3,11 +3,11 @@ #include "target_discoverer.h" #include "util.h" +#include +#include #include #include -#include - #include #include @@ -25,7 +25,7 @@ class TTargetDiscoverer: public TActorBootstrapped { auto it = Pending.find(ev->Cookie); if (it == Pending.end()) { - LOG_W("Unknown describe response" + LOG_W("Unknown describe path response" << ": cookie# " << ev->Cookie); return; } @@ -35,37 +35,95 @@ class TTargetDiscoverer: public TActorBootstrapped { const auto& result = ev->Get()->Result; if (result.IsSuccess()) { - LOG_D("Describe succeeded" + LOG_D("Describe path succeeded" << ": path# " << path.first); - auto entry = result.GetEntry(); + const auto& entry = result.GetEntry(); switch (entry.Type) { case NYdb::NScheme::ESchemeEntryType::SubDomain: case NYdb::NScheme::ESchemeEntryType::Directory: Pending.erase(it); return ListDirectory(path); + case NYdb::NScheme::ESchemeEntryType::Table: + return DescribeTable(ev->Cookie); default: break; } - entry.Name = path.first; // replace by full path + LOG_W("Unsupported entry type" + << ": path# " << path.first + << ", type# " << entry.Type); - if (const auto kind = TryTargetKindFromEntryType(entry.Type)) { - LOG_I("Add target" - << ": path# " << path.first - << ", kind# " << kind); - ToAdd.emplace_back(std::move(entry), path.second); + NYql::TIssues issues; + issues.AddIssue(TStringBuilder() << "Unsupported entry type: " << entry.Type); + Failed.emplace_back(path.first, NYdb::TStatus(NYdb::EStatus::UNSUPPORTED, std::move(issues))); + } else { + LOG_E("Describe path failed" + << ": path# " << path.first + << ", status# " << result.GetStatus() + << ", issues# " << result.GetIssues().ToOneLineString()); + + if (IsRetryableError(result)) { + return RetryDescribe(*it); } else { - LOG_W("Unsupported entry type" - << ": path# " << path.first - << ", type# " << entry.Type); + Failed.emplace_back(path.first, result); + } + } + + Pending.erase(it); + MaybeReply(); + } + + void DescribeTable(ui32 idx) { + Y_ABORT_UNLESS(idx < Paths.size()); + Send(YdbProxy, new TEvYdbProxy::TEvDescribeTableRequest(Paths.at(idx).first, {}), 0, idx); + Pending.insert(idx); + } - NYql::TIssues issues; - issues.AddIssue(TStringBuilder() << "Unsupported entry type: " << entry.Type); - Failed.emplace_back(path.first, NYdb::TStatus(NYdb::EStatus::UNSUPPORTED, std::move(issues))); + void Handle(TEvYdbProxy::TEvDescribeTableResponse::TPtr& ev) { + LOG_T("Handle " << ev->Get()->ToString()); + + auto it = Pending.find(ev->Cookie); + if (it == Pending.end()) { + LOG_W("Unknown describe table response" + << ": cookie# " << ev->Cookie); + return; + } + + Y_ABORT_UNLESS(*it < Paths.size()); + const auto& path = Paths.at(*it); + + const auto& result = ev->Get()->Result; + if (result.IsSuccess()) { + LOG_D("Describe table succeeded" + << ": path# " << path.first); + + const auto& target = ToAdd.emplace_back(path.first, path.second, TReplication::ETargetKind::Table); + LOG_I("Add target" + << ": srcPath# " << target.SrcPath + << ", dstPath# " << target.DstPath + << ", kind# " << target.Kind); + + for (const auto& index : result.GetTableDescription().GetIndexDescriptions()) { + switch (index.GetIndexType()) { + case NYdb::NTable::EIndexType::GlobalSync: + case NYdb::NTable::EIndexType::GlobalUnique: + break; + default: + continue; + } + + const auto& target = ToAdd.emplace_back( + CanonizePath(ChildPath(SplitPath(path.first), index.GetIndexName())), + CanonizePath(ChildPath(SplitPath(path.second), {index.GetIndexName(), "indexImplTable"})), + TReplication::ETargetKind::IndexTable); + LOG_I("Add target" + << ": srcPath# " << target.SrcPath + << ", dstPath# " << target.DstPath + << ", kind# " << target.Kind); } } else { - LOG_E("Describe failed" + LOG_E("Describe table failed" << ": path# " << path.first << ", status# " << result.GetStatus() << ", issues# " << result.GetIssues().ToOneLineString()); @@ -114,7 +172,8 @@ class TTargetDiscoverer: public TActorBootstrapped { return entry.Name.StartsWith("~") || entry.Name.StartsWith(".sys") - || entry.Name.StartsWith(".metadata"); + || entry.Name.StartsWith(".metadata") + || entry.Name.StartsWith("export-"); } void Handle(TEvYdbProxy::TEvListDirectoryResponse::TPtr& ev) { @@ -143,13 +202,13 @@ class TTargetDiscoverer: public TActorBootstrapped { path.second + '/' + child.Name)); } break; + case NYdb::NScheme::ESchemeEntryType::Table: + Paths.emplace_back( + path.first + '/' + child.Name, + path.second + '/' + child.Name); + DescribeTable(Paths.size() - 1); + break; default: - if (TryTargetKindFromEntryType(child.Type)) { - Paths.emplace_back( - path.first + '/' + child.Name, - path.second + '/' + child.Name); - DescribePath(Paths.size() - 1); - } break; } } @@ -225,6 +284,7 @@ class TTargetDiscoverer: public TActorBootstrapped { switch (ev->GetTypeRewrite()) { hFunc(TEvYdbProxy::TEvDescribePathResponse, Handle); hFunc(TEvYdbProxy::TEvListDirectoryResponse, Handle); + hFunc(TEvYdbProxy::TEvDescribeTableResponse, Handle); sFunc(TEvents::TEvWakeup, Retry); sFunc(TEvents::TEvPoison, PassAway); } diff --git a/ydb/core/tx/replication/controller/target_discoverer_ut.cpp b/ydb/core/tx/replication/controller/target_discoverer_ut.cpp new file mode 100644 index 000000000000..70bd0f863c26 --- /dev/null +++ b/ydb/core/tx/replication/controller/target_discoverer_ut.cpp @@ -0,0 +1,135 @@ +#include "private_events.h" +#include "target_discoverer.h" + +#include +#include + +#include + +namespace NKikimr::NReplication::NController { + +Y_UNIT_TEST_SUITE(TargetDiscoverer) { + using namespace NTestHelpers; + + TTestTableDescription DummyTable() { + return TTestTableDescription{ + .Name = "Table", + .KeyColumns = {"key"}, + .Columns = { + {.Name = "key", .Type = "Uint32"}, + {.Name = "value", .Type = "Uint32"}, + }, + .ReplicationConfig = Nothing(), + }; + } + + Y_UNIT_TEST(Basic) { + TEnv env; + env.GetRuntime().SetLogPriority(NKikimrServices::REPLICATION_CONTROLLER, NLog::PRI_TRACE); + + env.CreateTable("/Root", *MakeTableDescription(DummyTable())); + + env.GetRuntime().Register(CreateTargetDiscoverer(env.GetSender(), 1, env.GetYdbProxy(), + TVector>{ + {"/Root", "/Root/Replicated"}, + } + )); + + auto ev = env.GetRuntime().GrabEdgeEvent(env.GetSender()); + UNIT_ASSERT(ev->Get()->IsSuccess()); + + const auto& toAdd = ev->Get()->ToAdd; + UNIT_ASSERT_VALUES_EQUAL(toAdd.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(toAdd.at(0).SrcPath, "/Root/Table"); + UNIT_ASSERT_VALUES_EQUAL(toAdd.at(0).DstPath, "/Root/Replicated/Table"); + UNIT_ASSERT_VALUES_EQUAL(toAdd.at(0).Kind, TReplication::ETargetKind::Table); + } + + Y_UNIT_TEST(IndexedTable) { + TEnv env; + env.GetRuntime().SetLogPriority(NKikimrServices::REPLICATION_CONTROLLER, NLog::PRI_TRACE); + + env.CreateTableWithIndex("/Root", *MakeTableDescription(DummyTable()), + "Index", TVector{"value"}, NKikimrSchemeOp::EIndexTypeGlobal); + + env.GetRuntime().Register(CreateTargetDiscoverer(env.GetSender(), 1, env.GetYdbProxy(), + TVector>{ + {"/Root", "/Root/Replicated"}, + } + )); + + auto ev = env.GetRuntime().GrabEdgeEvent(env.GetSender()); + UNIT_ASSERT(ev->Get()->IsSuccess()); + + const auto& toAdd = ev->Get()->ToAdd; + UNIT_ASSERT_VALUES_EQUAL(toAdd.size(), 2); + UNIT_ASSERT_VALUES_EQUAL(toAdd.at(1).SrcPath, "/Root/Table/Index"); + UNIT_ASSERT_VALUES_EQUAL(toAdd.at(1).DstPath, "/Root/Replicated/Table/Index/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(toAdd.at(1).Kind, TReplication::ETargetKind::IndexTable); + } + + Y_UNIT_TEST(Negative) { + TEnv env; + env.GetRuntime().SetLogPriority(NKikimrServices::REPLICATION_CONTROLLER, NLog::PRI_TRACE); + + env.GetRuntime().Register(CreateTargetDiscoverer(env.GetSender(), 1, env.GetYdbProxy(), + TVector>{ + {"/Root/Table", "/Root/ReplicatedTable"}, + } + )); + + auto ev = env.GetRuntime().GrabEdgeEvent(env.GetSender()); + UNIT_ASSERT(!ev->Get()->IsSuccess()); + + const auto& failed = ev->Get()->Failed; + UNIT_ASSERT_VALUES_EQUAL(failed.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(failed.at(0).SrcPath, "/Root/Table"); + UNIT_ASSERT_VALUES_EQUAL(failed.at(0).Error.GetStatus(), NYdb::EStatus::SCHEME_ERROR); + } + + Y_UNIT_TEST(Dirs) { + TEnv env; + env.GetRuntime().SetLogPriority(NKikimrServices::REPLICATION_CONTROLLER, NLog::PRI_TRACE); + + env.MkDir("/Root", "Dir"); + env.CreateTable("/Root/Dir", *MakeTableDescription(DummyTable())); + + env.GetRuntime().Register(CreateTargetDiscoverer(env.GetSender(), 1, env.GetYdbProxy(), + TVector>{ + {"/Root", "/Root/Replicated"}, + } + )); + + auto ev = env.GetRuntime().GrabEdgeEvent(env.GetSender()); + UNIT_ASSERT(ev->Get()->IsSuccess()); + + const auto& toAdd = ev->Get()->ToAdd; + UNIT_ASSERT_VALUES_EQUAL(toAdd.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(toAdd.at(0).SrcPath, "/Root/Dir/Table"); + UNIT_ASSERT_VALUES_EQUAL(toAdd.at(0).DstPath, "/Root/Replicated/Dir/Table"); + } + + Y_UNIT_TEST(SystemObjects) { + TEnv env; + env.GetRuntime().SetLogPriority(NKikimrServices::REPLICATION_CONTROLLER, NLog::PRI_TRACE); + + env.CreateTable("/Root", *MakeTableDescription(DummyTable())); + env.MkDir("/Root", "export-100500"); + env.CreateTable("/Root/export-100500", *MakeTableDescription(DummyTable())); + + env.GetRuntime().Register(CreateTargetDiscoverer(env.GetSender(), 1, env.GetYdbProxy(), + TVector>{ + {"/Root", "/Root/Replicated"}, + } + )); + + auto ev = env.GetRuntime().GrabEdgeEvent(env.GetSender()); + UNIT_ASSERT(ev->Get()->IsSuccess()); + + const auto& toAdd = ev->Get()->ToAdd; + UNIT_ASSERT_VALUES_EQUAL(toAdd.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(toAdd.at(0).SrcPath, "/Root/Table"); + } +} + +} diff --git a/ydb/core/tx/replication/controller/target_table.cpp b/ydb/core/tx/replication/controller/target_table.cpp index d44b699bf0d0..6201bc29fbfa 100644 --- a/ydb/core/tx/replication/controller/target_table.cpp +++ b/ydb/core/tx/replication/controller/target_table.cpp @@ -102,16 +102,35 @@ class TTableWorkerRegistar: public TActorBootstrapped { }; // TTableWorkerRegistar -TTableTarget::TTableTarget(TReplication* replication, ui64 id, const TString& srcPath, const TString& dstPath) - : TTargetWithStream(replication, ETargetKind::Table, id, srcPath, dstPath) +TTargetTableBase::TTargetTableBase(TReplication* replication, ETargetKind finalKind, + ui64 id, const TString& srcPath, const TString& dstPath) + : TTargetWithStream(replication, finalKind, id, srcPath, dstPath) { } -IActor* TTableTarget::CreateWorkerRegistar(const TActorContext& ctx) const { +IActor* TTargetTableBase::CreateWorkerRegistar(const TActorContext& ctx) const { auto replication = GetReplication(); return new TTableWorkerRegistar(ctx.SelfID, replication->GetYdbProxy(), replication->GetConfig().GetSrcConnectionParams(), replication->GetId(), GetId(), - CanonizePath(ChildPath(SplitPath(GetSrcPath()), GetStreamName())), GetDstPathId()); + BuildStreamPath(), GetDstPathId()); +} + +TTargetTable::TTargetTable(TReplication* replication, ui64 id, const TString& srcPath, const TString& dstPath) + : TTargetTableBase(replication, ETargetKind::Table, id, srcPath, dstPath) +{ +} + +TString TTargetTable::BuildStreamPath() const { + return CanonizePath(ChildPath(SplitPath(GetSrcPath()), GetStreamName())); +} + +TTargetIndexTable::TTargetIndexTable(TReplication* replication, ui64 id, const TString& srcPath, const TString& dstPath) + : TTargetTableBase(replication, ETargetKind::IndexTable, id, srcPath, dstPath) +{ +} + +TString TTargetIndexTable::BuildStreamPath() const { + return CanonizePath(ChildPath(SplitPath(GetSrcPath()), {"indexImplTable", GetStreamName()})); } } diff --git a/ydb/core/tx/replication/controller/target_table.h b/ydb/core/tx/replication/controller/target_table.h index 08b45d7b0153..30f57d5db31a 100644 --- a/ydb/core/tx/replication/controller/target_table.h +++ b/ydb/core/tx/replication/controller/target_table.h @@ -4,14 +4,32 @@ namespace NKikimr::NReplication::NController { -class TTableTarget: public TTargetWithStream { +class TTargetTableBase: public TTargetWithStream { public: - explicit TTableTarget(TReplication* replication, + explicit TTargetTableBase(TReplication* replication, ETargetKind finalKind, ui64 id, const TString& srcPath, const TString& dstPath); protected: IActor* CreateWorkerRegistar(const TActorContext& ctx) const override; + virtual TString BuildStreamPath() const = 0; +}; -}; // TTableTarget +class TTargetTable: public TTargetTableBase { +public: + explicit TTargetTable(TReplication* replication, + ui64 id, const TString& srcPath, const TString& dstPath); + +protected: + TString BuildStreamPath() const override; +}; + +class TTargetIndexTable: public TTargetTableBase { +public: + explicit TTargetIndexTable(TReplication* replication, + ui64 id, const TString& srcPath, const TString& dstPath); + +protected: + TString BuildStreamPath() const override; +}; } diff --git a/ydb/core/tx/replication/controller/tx_discovery_targets_result.cpp b/ydb/core/tx/replication/controller/tx_discovery_targets_result.cpp index 686de8fb261e..7b75428e2c71 100644 --- a/ydb/core/tx/replication/controller/tx_discovery_targets_result.cpp +++ b/ydb/core/tx/replication/controller/tx_discovery_targets_result.cpp @@ -43,23 +43,19 @@ class TController::TTxDiscoveryTargetsResult: public TTxBase { if (Ev->Get()->IsSuccess()) { for (const auto& target : Ev->Get()->ToAdd) { - const auto kind = TargetKindFromEntryType(target.first.Type); - const auto& srcPath = target.first.Name; - const auto& dstPath = target.second; - - const auto tid = Replication->AddTarget(kind, srcPath, dstPath); + const auto tid = Replication->AddTarget(target.Kind, target.SrcPath, target.DstPath); db.Table().Key(rid, tid).Update( - NIceDb::TUpdate(kind), - NIceDb::TUpdate(srcPath), - NIceDb::TUpdate(dstPath) + NIceDb::TUpdate(target.Kind), + NIceDb::TUpdate(target.SrcPath), + NIceDb::TUpdate(target.DstPath) ); CLOG_N(ctx, "Add target" << ": rid# " << rid << ", tid# " << tid - << ", kind# " << kind - << ", srcPath# " << srcPath - << ", dstPath# " << dstPath); + << ", kind# " << target.Kind + << ", srcPath# " << target.SrcPath + << ", dstPath# " << target.DstPath); } } else { const auto error = JoinSeq(", ", Ev->Get()->Failed); diff --git a/ydb/core/tx/replication/controller/ut_target_discoverer/ya.make b/ydb/core/tx/replication/controller/ut_target_discoverer/ya.make new file mode 100644 index 000000000000..08eec946a5b4 --- /dev/null +++ b/ydb/core/tx/replication/controller/ut_target_discoverer/ya.make @@ -0,0 +1,20 @@ +UNITTEST_FOR(ydb/core/tx/replication/controller) + +FORK_SUBTESTS() + +SIZE(MEDIUM) + +TIMEOUT(600) + +PEERDIR( + ydb/core/tx/replication/ut_helpers + library/cpp/testing/unittest +) + +SRCS( + target_discoverer_ut.cpp +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/core/tx/replication/controller/util.h b/ydb/core/tx/replication/controller/util.h index 0963fa19d044..d0b3afba3199 100644 --- a/ydb/core/tx/replication/controller/util.h +++ b/ydb/core/tx/replication/controller/util.h @@ -1,47 +1,12 @@ #pragma once -#include "replication.h" - -#include #include -#include - #include -#include #include namespace NKikimr::NReplication::NController { -inline TMaybe TryTargetKindFromEntryType(NYdb::NScheme::ESchemeEntryType type) { - switch (type) { - case NYdb::NScheme::ESchemeEntryType::Table: - return TReplication::ETargetKind::Table; - case NYdb::NScheme::ESchemeEntryType::Unknown: - case NYdb::NScheme::ESchemeEntryType::Directory: - case NYdb::NScheme::ESchemeEntryType::PqGroup: - case NYdb::NScheme::ESchemeEntryType::SubDomain: - case NYdb::NScheme::ESchemeEntryType::RtmrVolume: - case NYdb::NScheme::ESchemeEntryType::BlockStoreVolume: - case NYdb::NScheme::ESchemeEntryType::CoordinationNode: - case NYdb::NScheme::ESchemeEntryType::Sequence: - case NYdb::NScheme::ESchemeEntryType::Replication: - case NYdb::NScheme::ESchemeEntryType::ColumnTable: - case NYdb::NScheme::ESchemeEntryType::ColumnStore: - case NYdb::NScheme::ESchemeEntryType::Topic: - case NYdb::NScheme::ESchemeEntryType::ExternalTable: - case NYdb::NScheme::ESchemeEntryType::ExternalDataSource: - case NYdb::NScheme::ESchemeEntryType::View: - return Nothing(); - } -} - -inline TReplication::ETargetKind TargetKindFromEntryType(NYdb::NScheme::ESchemeEntryType type) { - auto res = TryTargetKindFromEntryType(type); - Y_VERIFY_S(res, "Unexpected entry type: " << static_cast(type)); - return *res; -} - inline TString& TruncatedIssue(TString& issue) { static constexpr ui32 sizeLimit = 2_KB; static constexpr TStringBuf ellipsis = "..."; diff --git a/ydb/core/tx/replication/controller/ya.make b/ydb/core/tx/replication/controller/ya.make index 52bdfbbabdcb..d779aa7fa500 100644 --- a/ydb/core/tx/replication/controller/ya.make +++ b/ydb/core/tx/replication/controller/ya.make @@ -5,9 +5,11 @@ PEERDIR( ydb/core/discovery ydb/core/engine/minikql ydb/core/protos + ydb/core/tablet ydb/core/tablet_flat ydb/core/tx/replication/common ydb/core/tx/replication/ydb_proxy + ydb/core/tx/scheme_board ydb/core/util ydb/core/ydb_convert ydb/services/metadata @@ -59,4 +61,5 @@ END() RECURSE_FOR_TESTS( ut_dst_creator + ut_target_discoverer ) diff --git a/ydb/core/tx/replication/service/service.cpp b/ydb/core/tx/replication/service/service.cpp index a8ea5d64ef5f..0e399990b268 100644 --- a/ydb/core/tx/replication/service/service.cpp +++ b/ydb/core/tx/replication/service/service.cpp @@ -125,9 +125,9 @@ class TSessionInfo { }; // TSessionInfo -struct TCredentialsKey: std::tuple { - explicit TCredentialsKey(const TString& endpoint, const TString& database, const TString& user) - : std::tuple(endpoint, database, user) +struct TCredentialsKey: std::tuple { + explicit TCredentialsKey(const TString& endpoint, const TString& database, bool ssl, const TString& user) + : std::tuple(endpoint, database, ssl, user) { } @@ -139,12 +139,20 @@ struct TCredentialsKey: std::tuple { return std::get<1>(*this); } + bool EnableSsl() const { + return std::get<2>(*this); + } + static TCredentialsKey FromParams(const NKikimrReplication::TConnectionParams& params) { + const auto& endpoint = params.GetEndpoint(); + const auto& database = params.GetDatabase(); + const bool ssl = params.GetEnableSsl(); + switch (params.GetCredentialsCase()) { case NKikimrReplication::TConnectionParams::kStaticCredentials: - return TCredentialsKey(params.GetEndpoint(), params.GetDatabase(), params.GetStaticCredentials().GetUser()); + return TCredentialsKey(endpoint, database, ssl, params.GetStaticCredentials().GetUser()); case NKikimrReplication::TConnectionParams::kOAuthToken: - return TCredentialsKey(params.GetEndpoint(), params.GetDatabase(), params.GetOAuthToken().GetToken() /* TODO */); + return TCredentialsKey(endpoint, database, ssl, params.GetOAuthToken().GetToken()); default: Y_ABORT("Unexpected credentials"); } @@ -155,7 +163,7 @@ struct TCredentialsKey: std::tuple { } // NKikimr::NReplication::NService template <> -struct THash : THash> {}; +struct THash : THash> {}; namespace NKikimr::NReplication { @@ -212,7 +220,7 @@ class TReplicationService: public TActorBootstrapped { const TActorId& GetOrCreateYdbProxy(TCredentialsKey&& key, Args&&... args) { auto it = YdbProxies.find(key); if (it == YdbProxies.end()) { - auto ydbProxy = Register(CreateYdbProxy(key.Endpoint(), key.Database(), std::forward(args)...)); + auto ydbProxy = Register(CreateYdbProxy(key.Endpoint(), key.Database(), key.EnableSsl(), std::forward(args)...)); auto res = YdbProxies.emplace(std::move(key), std::move(ydbProxy)); Y_ABORT_UNLESS(res.second); it = res.first; diff --git a/ydb/core/tx/replication/service/table_writer_impl.h b/ydb/core/tx/replication/service/table_writer_impl.h index 1fd77232fb2d..475aa3c351a2 100644 --- a/ydb/core/tx/replication/service/table_writer_impl.h +++ b/ydb/core/tx/replication/service/table_writer_impl.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -278,16 +279,6 @@ class TLocalTableWriter return Check(&TSchemeCacheHelpers::CheckEntryKind, &TThis::LogCritAndLeave, entry, expected); } - static TVector MakePartitionIds(const TVector& partitions) { - TVector result(::Reserve(partitions.size())); - - for (const auto& partition : partitions) { - result.push_back(partition.ShardId); - } - - return result; - } - void Registered(TActorSystem*, const TActorId&) override { this->ChangeServer = this->SelfId(); } @@ -348,6 +339,12 @@ class TLocalTableWriter return; } + if (TableVersion && TableVersion == entry.Self->Info.GetVersion().GetGeneralVersion()) { + Y_ABORT_UNLESS(Initialized); + Resolving = false; + return this->CreateSenders(); + } + auto schema = MakeIntrusive(); if (entry.Self && entry.Self->Info.HasVersion()) { schema->Version = entry.Self->Info.GetVersion().GetTableSchemaVersion(); @@ -415,11 +412,9 @@ class TLocalTableWriter return LogWarnAndRetry("Empty partitions"); } - const bool versionChanged = !TableVersion || TableVersion != entry.GeneralVersion; TableVersion = entry.GeneralVersion; - KeyDesc = std::move(entry.KeyDescription); - this->CreateSenders(MakePartitionIds(KeyDesc->GetPartitions()), versionChanged); + this->CreateSenders(NChangeExchange::MakePartitionIds(KeyDesc->GetPartitions())); if (!Initialized) { this->Send(Worker, new TEvWorker::TEvHandshake()); diff --git a/ydb/core/tx/replication/service/table_writer_ut.cpp b/ydb/core/tx/replication/service/table_writer_ut.cpp index 65c2c710e2a9..5b1b3de7b169 100644 --- a/ydb/core/tx/replication/service/table_writer_ut.cpp +++ b/ydb/core/tx/replication/service/table_writer_ut.cpp @@ -39,10 +39,7 @@ Y_UNIT_TEST_SUITE(LocalTableWriter) { } Y_UNIT_TEST(SupportedTypes) { - auto featureFlags = TFeatureFlags(); - featureFlags.SetEnableTableDatetime64(true); - - TEnv env(featureFlags); + TEnv env(TFeatureFlags().SetEnableTableDatetime64(true)); env.GetRuntime().SetLogPriority(NKikimrServices::REPLICATION_SERVICE, NLog::PRI_DEBUG); env.CreateTable("/Root", *MakeTableDescription(TTestTableDescription{ diff --git a/ydb/core/tx/replication/ut_helpers/test_env.h b/ydb/core/tx/replication/ut_helpers/test_env.h index f0aec854575f..e51542c99513 100644 --- a/ydb/core/tx/replication/ut_helpers/test_env.h +++ b/ydb/core/tx/replication/ut_helpers/test_env.h @@ -10,6 +10,9 @@ namespace NKikimr::NReplication::NTestHelpers { +class TFeatureFlags: public TTestFeatureFlagsHolder { +}; + template class TEnv { static constexpr char DomainName[] = "Root"; @@ -32,7 +35,7 @@ class TEnv { Database = "/" + ToString(DomainName); YdbProxy = Server.GetRuntime()->Register(CreateYdbProxy( - Endpoint, UseDatabase ? Database : "", std::forward(args)...)); + Endpoint, UseDatabase ? Database : "", false /* ssl */, std::forward(args)...)); Sender = Server.GetRuntime()->AllocateEdgeActor(); } @@ -63,7 +66,7 @@ class TEnv { TEnv(const TFeatureFlags& featureFlags, bool init = true) : Settings(Tests::TServerSettings(PortManager.GetPort(), {}, MakePqConfig()) .SetDomainName(DomainName) - .SetFeatureFlags(featureFlags) + .SetFeatureFlags(featureFlags.FeatureFlags) ) , Server(Settings) , Client(Settings) @@ -160,7 +163,12 @@ class TEnv { auto CreateTableWithIndex(Args&&... args) { return Client.CreateTableWithUniformShardedIndex(std::forward(args)...); } - + + template + auto MkDir(Args&&... args) { + return Client.MkDir(std::forward(args)...); + } + void SendAsync(const TActorId& recipient, IEventBase* ev) { Server.GetRuntime()->Send(new IEventHandle(recipient, Sender, ev)); } diff --git a/ydb/core/tx/replication/ydb_proxy/ydb_proxy.cpp b/ydb/core/tx/replication/ydb_proxy/ydb_proxy.cpp index ccd65e9cc62e..a92aa13aec37 100644 --- a/ydb/core/tx/replication/ydb_proxy/ydb_proxy.cpp +++ b/ydb/core/tx/replication/ydb_proxy/ydb_proxy.cpp @@ -419,20 +419,21 @@ class TYdbProxy: public TBaseProxyActor { Call(ev, &TTopicClient::CommitOffset); } - static TCommonClientSettings MakeSettings(const TString& endpoint, const TString& database) { + static TCommonClientSettings MakeSettings(const TString& endpoint, const TString& database, bool ssl) { return TCommonClientSettings() .DiscoveryEndpoint(endpoint) .DiscoveryMode(EDiscoveryMode::Async) - .Database(database); + .Database(database) + .SslCredentials(ssl); } - static TCommonClientSettings MakeSettings(const TString& endpoint, const TString& database, const TString& token) { - return MakeSettings(endpoint, database) + static TCommonClientSettings MakeSettings(const TString& endpoint, const TString& database, bool ssl, const TString& token) { + return MakeSettings(endpoint, database, ssl) .AuthToken(token); } - static TCommonClientSettings MakeSettings(const TString& endpoint, const TString& database, const TStaticCredentials& credentials) { - return MakeSettings(endpoint, database) + static TCommonClientSettings MakeSettings(const TString& endpoint, const TString& database, bool ssl, const TStaticCredentials& credentials) { + return MakeSettings(endpoint, database, ssl) .CredentialsProviderFactory(CreateLoginCredentialsProviderFactory({ .User = credentials.GetUser(), .Password = credentials.GetPassword(), @@ -485,16 +486,16 @@ class TYdbProxy: public TBaseProxyActor { }; // TYdbProxy -IActor* CreateYdbProxy(const TString& endpoint, const TString& database) { - return new TYdbProxy(endpoint, database); +IActor* CreateYdbProxy(const TString& endpoint, const TString& database, bool ssl) { + return new TYdbProxy(endpoint, database, ssl); } -IActor* CreateYdbProxy(const TString& endpoint, const TString& database, const TString& token) { - return new TYdbProxy(endpoint, database, token); +IActor* CreateYdbProxy(const TString& endpoint, const TString& database, bool ssl, const TString& token) { + return new TYdbProxy(endpoint, database, ssl, token); } -IActor* CreateYdbProxy(const TString& endpoint, const TString& database, const TStaticCredentials& credentials) { - return new TYdbProxy(endpoint, database, credentials); +IActor* CreateYdbProxy(const TString& endpoint, const TString& database, bool ssl, const TStaticCredentials& credentials) { + return new TYdbProxy(endpoint, database, ssl, credentials); } } diff --git a/ydb/core/tx/replication/ydb_proxy/ydb_proxy.h b/ydb/core/tx/replication/ydb_proxy/ydb_proxy.h index 8eba5e9a00a2..9b9c13244679 100644 --- a/ydb/core/tx/replication/ydb_proxy/ydb_proxy.h +++ b/ydb/core/tx/replication/ydb_proxy/ydb_proxy.h @@ -255,9 +255,9 @@ struct TEvYdbProxy { #pragma pop_macro("RemoveDirectory") -IActor* CreateYdbProxy(const TString& endpoint, const TString& database); -IActor* CreateYdbProxy(const TString& endpoint, const TString& database, const TString& token); -IActor* CreateYdbProxy(const TString& endpoint, const TString& database, +IActor* CreateYdbProxy(const TString& endpoint, const TString& database, bool ssl); +IActor* CreateYdbProxy(const TString& endpoint, const TString& database, bool ssl, const TString& token); +IActor* CreateYdbProxy(const TString& endpoint, const TString& database, bool ssl, const NKikimrReplication::TStaticCredentials& credentials); } diff --git a/ydb/core/tx/scheme_board/cache.cpp b/ydb/core/tx/scheme_board/cache.cpp index 07062b4ce553..0c0c2061a3c3 100644 --- a/ydb/core/tx/scheme_board/cache.cpp +++ b/ydb/core/tx/scheme_board/cache.cpp @@ -13,9 +13,9 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -26,6 +26,8 @@ #include #include #include +#include + #include #include @@ -898,8 +900,6 @@ class TSchemeCache: public TMonitorableActor { default: return false; } - case NKikimrSchemeOp::EPathTypeTableIndex: - return true; default: return false; } @@ -978,6 +978,62 @@ class TSchemeCache: public TMonitorableActor { return partitions; } + static void FillTopicPartitioning( + const NKikimrSchemeOp::TPersQueueGroupDescription& pqDesc, + TVector& schema, + TVector& partitioning) + { + const auto& pqConfig = pqDesc.GetPQTabletConfig(); + if (pqConfig.GetPartitionKeySchema().empty()) { + return; + } + + schema.reserve(pqConfig.PartitionKeySchemaSize()); + for (const auto& keySchema : pqConfig.GetPartitionKeySchema()) { + // TODO: support pg types + schema.push_back(NScheme::TTypeInfo(keySchema.GetTypeId())); + } + + partitioning.reserve(pqDesc.PartitionsSize()); + for (const auto& partition : pqDesc.GetPartitions()) { + auto keyRange = NPQ::TPartitionKeyRange::Parse(partition.GetKeyRange()); + Y_ABORT_UNLESS(!keyRange.FromBound || keyRange.FromBound->GetCells().size() == schema.size()); + Y_ABORT_UNLESS(!keyRange.ToBound || keyRange.ToBound->GetCells().size() == schema.size()); + + auto& info = partitioning.emplace_back(partition.GetPartitionId()); + if (keyRange.ToBound) { + info.Range = NKikimr::TKeyDesc::TPartitionRangeInfo{ + .EndKeyPrefix = *keyRange.ToBound, + }; + } else { + info.Range = NKikimr::TKeyDesc::TPartitionRangeInfo{}; + } + } + + Sort(partitioning.begin(), partitioning.end(), [&schema](const auto& lhs, const auto& rhs) { + Y_ABORT_UNLESS(lhs.Range && rhs.Range); + Y_ABORT_UNLESS(lhs.Range->EndKeyPrefix || rhs.Range->EndKeyPrefix); + + if (!lhs.Range->EndKeyPrefix) { + return false; + } + + if (!rhs.Range->EndKeyPrefix) { + return true; + } + + Y_ABORT_UNLESS(lhs.Range->EndKeyPrefix && rhs.Range->EndKeyPrefix); + + const int compares = CompareTypedCellVectors( + lhs.Range->EndKeyPrefix.GetCells().data(), + rhs.Range->EndKeyPrefix.GetCells().data(), + schema.data(), schema.size() + ); + + return (compares < 0); + }); + } + bool IsSysTable() const { return Kind == TNavigate::KindTable && PathId.OwnerId == TSysTables::SysSchemeShard; } @@ -1486,6 +1542,7 @@ class TSchemeCache: public TMonitorableActor { if (Created) { NPQ::Migrate(*pathDesc.MutablePersQueueGroup()->MutablePQTabletConfig()); FillInfo(Kind, PQGroupInfo, std::move(*pathDesc.MutablePersQueueGroup())); + FillTopicPartitioning(PQGroupInfo->Description, PQGroupInfo->Schema, PQGroupInfo->Partitioning); } break; case NKikimrSchemeOp::EPathTypeCdcStream: @@ -2570,14 +2627,12 @@ class TSchemeCache: public TMonitorableActor { if (entry.RequestType == TNavigate::TEntry::ERequestType::ByPath) { auto pathExtractor = [this](TNavigate::TEntry& entry) { + NSysView::ISystemViewResolver::TSystemViewPath sysViewPath; if (AppData()->FeatureFlags.GetEnableSystemViews() - && (entry.Operation == TNavigate::OpPath || entry.Operation == TNavigate::OpTable)) + && SystemViewResolver->IsSystemViewPath(entry.Path, sysViewPath)) { - NSysView::ISystemViewResolver::TSystemViewPath sysViewPath; - if (SystemViewResolver->IsSystemViewPath(entry.Path, sysViewPath)) { - entry.TableId.SysViewInfo = sysViewPath.ViewName; - return CanonizePath(sysViewPath.Parent); - } + entry.TableId.SysViewInfo = sysViewPath.ViewName; + return CanonizePath(sysViewPath.Parent); } TString path = CanonizePath(entry.Path); diff --git a/ydb/core/tx/scheme_board/ya.make b/ydb/core/tx/scheme_board/ya.make index 22f66c217f09..ffb2121ca4c9 100644 --- a/ydb/core/tx/scheme_board/ya.make +++ b/ydb/core/tx/scheme_board/ya.make @@ -4,6 +4,7 @@ PEERDIR( ydb/library/actors/core ydb/core/base ydb/core/mon + ydb/core/persqueue/partition_key_range ydb/core/protos ydb/core/sys_view/common ydb/core/tx/scheme_cache diff --git a/ydb/core/tx/scheme_cache/scheme_cache.h b/ydb/core/tx/scheme_cache/scheme_cache.h index b81736613daa..5a819b8fe896 100644 --- a/ydb/core/tx/scheme_cache/scheme_cache.h +++ b/ydb/core/tx/scheme_cache/scheme_cache.h @@ -66,6 +66,10 @@ struct TDomainInfo : public TAtomicRefCount { if (descr.HasServerlessComputeResourcesMode()) { ServerlessComputeResourcesMode = descr.GetServerlessComputeResourcesMode(); } + + if (descr.HasSharedHive()) { + SharedHiveId = descr.GetSharedHive(); + } } inline ui64 GetVersion() const { @@ -80,6 +84,14 @@ struct TDomainInfo : public TAtomicRefCount { } } + inline ui64 ExtractHive() const { + if (IsServerless()) { + return SharedHiveId; + } else { + return Params.GetHive(); + } + } + inline bool IsServerless() const { return DomainKey != ResourcesDomainKey; } @@ -89,6 +101,7 @@ struct TDomainInfo : public TAtomicRefCount { NKikimrSubDomains::TProcessingParams Params; TCoordinators Coordinators; TMaybeServerlessComputeResourcesMode ServerlessComputeResourcesMode; + ui64 SharedHiveId = 0; TString ToString() const; @@ -177,6 +190,8 @@ struct TSchemeCacheNavigate { struct TPQGroupInfo : public TAtomicRefCount { EKind Kind = KindUnknown; NKikimrSchemeOp::TPersQueueGroupDescription Description; + TVector Schema; + TVector Partitioning; }; struct TRtmrVolumeInfo : public TAtomicRefCount { diff --git a/ydb/core/tx/schemeshard/olap/bg_tasks/tx_chain/session.h b/ydb/core/tx/schemeshard/olap/bg_tasks/tx_chain/session.h index 1a56abe419e1..06c2b8939d22 100644 --- a/ydb/core/tx/schemeshard/olap/bg_tasks/tx_chain/session.h +++ b/ydb/core/tx/schemeshard/olap/bg_tasks/tx_chain/session.h @@ -21,6 +21,9 @@ class TTxChainSession: public NKikimr::NOlap::NBackground::TSessionProtoAdapter< YDB_READONLY_DEF(TTxChainData, TxData); YDB_READONLY(ui32, StepForExecute, 0); std::optional CurrentTxId; + + static const inline TFactory::TRegistrator Registrator = TFactory::TRegistrator(GetStaticClassName()); + protected: virtual TConclusion> DoCreateActor(const NKikimr::NOlap::NBackground::TStartContext& context) const override; virtual TConclusionStatus DoDeserializeFromProto(const TProtoLogic& proto) override { @@ -48,6 +51,8 @@ class TTxChainSession: public NKikimr::NOlap::NBackground::TSessionProtoAdapter< return result; } public: + TTxChainSession() = default; + TTxChainSession(const TTxChainData& data) : TxData(data) { diff --git a/ydb/core/tx/schemeshard/olap/columns/update.cpp b/ydb/core/tx/schemeshard/olap/columns/update.cpp index 83e300176007..c66da237c712 100644 --- a/ydb/core/tx/schemeshard/olap/columns/update.cpp +++ b/ydb/core/tx/schemeshard/olap/columns/update.cpp @@ -119,6 +119,11 @@ namespace NKikimr::NSchemeShard { serializer.DeserializeFromProto(columnSchema.GetCompression()).Validate(); Serializer = serializer; } + if (columnSchema.HasDataAccessorConstructor()) { + NArrow::NAccessor::TConstructorContainer container; + AFL_VERIFY(container.DeserializeFromProto(columnSchema.GetDataAccessorConstructor())); + AccessorConstructor = container; + } if (columnSchema.HasDictionaryEncoding()) { auto settings = NArrow::NDictionary::TEncodingSettings::BuildFromProto(columnSchema.GetDictionaryEncoding()); Y_ABORT_UNLESS(settings.IsSuccess()); @@ -140,6 +145,9 @@ namespace NKikimr::NSchemeShard { if (Serializer) { Serializer->SerializeToProto(*columnSchema.MutableSerializer()); } + if (AccessorConstructor) { + *columnSchema.MutableDataAccessorConstructor() = AccessorConstructor.SerializeToProto(); + } if (DictionaryEncoding) { *columnSchema.MutableDictionaryEncoding() = DictionaryEncoding->SerializeToProto(); } @@ -160,6 +168,14 @@ namespace NKikimr::NSchemeShard { return false; } } + if (!!diffColumn.GetAccessorConstructor()) { + auto conclusion = diffColumn.GetAccessorConstructor()->BuildConstructor(); + if (conclusion.IsFail()) { + errors.AddError(conclusion.GetErrorMessage()); + return false; + } + AccessorConstructor = conclusion.DetachResult(); + } if (diffColumn.GetStorageId()) { StorageId = *diffColumn.GetStorageId(); } diff --git a/ydb/core/tx/schemeshard/olap/columns/update.h b/ydb/core/tx/schemeshard/olap/columns/update.h index ec463a69c7c2..84a728829d6e 100644 --- a/ydb/core/tx/schemeshard/olap/columns/update.h +++ b/ydb/core/tx/schemeshard/olap/columns/update.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -17,6 +18,7 @@ class TOlapColumnDiff { YDB_READONLY_DEF(NArrow::NDictionary::TEncodingDiff, DictionaryEncoding); YDB_READONLY_DEF(std::optional, StorageId); YDB_READONLY_DEF(std::optional, DefaultValue); + YDB_READONLY_DEF(NArrow::NAccessor::TRequestedConstructorContainer, AccessorConstructor); public: bool ParseFromRequest(const NKikimrSchemeOp::TOlapColumnDiff& columnSchema, IErrorCollector& errors) { Name = columnSchema.GetName(); @@ -30,6 +32,12 @@ class TOlapColumnDiff { if (columnSchema.HasDefaultValue()) { DefaultValue = columnSchema.GetDefaultValue(); } + if (columnSchema.HasDataAccessorConstructor()) { + if (!AccessorConstructor.DeserializeFromProto(columnSchema.GetDataAccessorConstructor())) { + errors.AddError("cannot parse accessor constructor from proto"); + return false; + } + } if (columnSchema.HasSerializer()) { if (!Serializer.DeserializeFromProto(columnSchema.GetSerializer())) { errors.AddError("cannot parse serializer diff from proto"); @@ -55,6 +63,7 @@ class TOlapColumnAdd { YDB_READONLY_DEF(std::optional, Serializer); YDB_READONLY_DEF(std::optional, DictionaryEncoding); YDB_READONLY_DEF(NOlap::TColumnDefaultScalarValue, DefaultValue); + YDB_READONLY_DEF(NArrow::NAccessor::TConstructorContainer, AccessorConstructor); public: TOlapColumnAdd(const std::optional& keyOrder) : KeyOrder(keyOrder) { diff --git a/ydb/core/tx/schemeshard/olap/indexes/schema.cpp b/ydb/core/tx/schemeshard/olap/indexes/schema.cpp index 0f31bf0e2ede..8b00178a459c 100644 --- a/ydb/core/tx/schemeshard/olap/indexes/schema.cpp +++ b/ydb/core/tx/schemeshard/olap/indexes/schema.cpp @@ -1,19 +1,18 @@ #include "schema.h" #include +#include namespace NKikimr::NSchemeShard { void TOlapIndexSchema::SerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& indexSchema) const { indexSchema.SetId(Id); indexSchema.SetName(Name); - indexSchema.SetStorageId(StorageId); IndexMeta.SerializeToProto(indexSchema); } void TOlapIndexSchema::DeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& indexSchema) { Id = indexSchema.GetId(); Name = indexSchema.GetName(); - StorageId = indexSchema.GetStorageId(); AFL_VERIFY(IndexMeta.DeserializeFromProto(indexSchema))("incorrect_proto", indexSchema.DebugString()); } @@ -24,9 +23,6 @@ bool TOlapIndexSchema::ApplyUpdate(const TOlapSchema& currentSchema, const TOlap errors.AddError("different index classes: " + upsert.GetIndexConstructor().GetClassName() + " vs " + IndexMeta.GetClassName()); return false; } - if (upsert.GetStorageId()) { - StorageId = *upsert.GetStorageId(); - } auto object = upsert.GetIndexConstructor()->CreateIndexMeta(GetId(), GetName(), currentSchema, errors); if (!object) { return false; diff --git a/ydb/core/tx/schemeshard/olap/indexes/schema.h b/ydb/core/tx/schemeshard/olap/indexes/schema.h index 1aa302ecb826..630016fe96a5 100644 --- a/ydb/core/tx/schemeshard/olap/indexes/schema.h +++ b/ydb/core/tx/schemeshard/olap/indexes/schema.h @@ -10,7 +10,6 @@ class TOlapIndexSchema { using TBase = TOlapIndexUpsert; YDB_READONLY(ui32, Id, Max()); YDB_READONLY_DEF(TString, Name); - YDB_READONLY_DEF(TString, StorageId); YDB_READONLY_DEF(NBackgroundTasks::TInterfaceProtoContainer, IndexMeta); public: TOlapIndexSchema() = default; diff --git a/ydb/core/tx/schemeshard/olap/layout/layout.h b/ydb/core/tx/schemeshard/olap/layout/layout.h index b3e338af1014..087194ceb6c2 100644 --- a/ydb/core/tx/schemeshard/olap/layout/layout.h +++ b/ydb/core/tx/schemeshard/olap/layout/layout.h @@ -15,48 +15,19 @@ template class TLayoutIdSet { private: ui64 Hash = 0; - std::set Elements; + THashSet Elements; public: TLayoutIdSet() = default; TLayoutIdSet(const TSetElement elem) { AddId(elem); } - typename std::set::const_iterator begin() const { - return Elements.begin(); - } - - typename std::set::const_iterator end() const { - return Elements.end(); - } - size_t Size() const { return Elements.size(); } - std::vector GetIdsVector() const { - return std::vector(Elements.begin(), Elements.end()); - } - - const std::set& GetIds() const { - return Elements; - } - - std::set GetIds(const ui32 count) const { - std::set result; - ui32 idx = 0; - for (auto&& i : Elements) { - if (++idx > count) { - return result; - } - result.emplace(i); - } - return result; - } - - std::vector GetIdsVector(const ui32 count) const { - std::set result = GetIds(count); - return std::vector(result.begin(), result.end()); + bool HasId(const TSetElement& id) const { + return Elements.contains(id); } bool AddId(const TSetElement& id) { @@ -131,7 +102,7 @@ class TColumnTablesLayout { void RemoveGroupsWithPathId(const TPathId& pathId) { const auto pred = [&](const TTablesGroup& item) { - return item.GetTableIds().GetIds().contains(pathId); + return item.GetTableIds().HasId(pathId); }; Groups.erase(std::remove_if(Groups.begin(), Groups.end(), pred), Groups.end()); } diff --git a/ydb/core/tx/schemeshard/olap/manager/manager.cpp b/ydb/core/tx/schemeshard/olap/manager/manager.cpp index 0f8b13cd0104..72a8c93c5f1c 100644 --- a/ydb/core/tx/schemeshard/olap/manager/manager.cpp +++ b/ydb/core/tx/schemeshard/olap/manager/manager.cpp @@ -29,12 +29,12 @@ void TTablesStorage::OnRemoveObject(const TPathId& pathId, TColumnTableInfo::TPt } } -const std::set& TTablesStorage::GetTablesWithTiering(const TString& tieringId) const { +const THashSet& TTablesStorage::GetTablesWithTiering(const TString& tieringId) const { auto it = PathsByTieringId.find(tieringId); if (it != PathsByTieringId.end()) { return it->second; } else { - return Default>(); + return Default>(); } } diff --git a/ydb/core/tx/schemeshard/olap/manager/manager.h b/ydb/core/tx/schemeshard/olap/manager/manager.h index d4ff264ec874..0873a12da22d 100644 --- a/ydb/core/tx/schemeshard/olap/manager/manager.h +++ b/ydb/core/tx/schemeshard/olap/manager/manager.h @@ -9,7 +9,7 @@ namespace NKikimr::NSchemeShard { class TTablesStorage { private: THashMap Tables; - THashMap> PathsByTieringId; + THashMap> PathsByTieringId; THashMap TablesByShard; void OnAddObject(const TPathId& pathId, TColumnTableInfo::TPtr object); @@ -20,7 +20,7 @@ class TTablesStorage { TColumnTablesLayout GetTablesLayout(const std::vector& tabletIds) const; - const std::set& GetTablesWithTiering(const TString& tieringId) const; + const THashSet& GetTablesWithTiering(const TString& tieringId) const; class TTableReadGuard { protected: @@ -104,7 +104,9 @@ class TTablesStorage { TTableCreatedGuard BuildNew(const TPathId& id, TColumnTableInfo::TPtr object); TTableExtractedGuard TakeVerified(const TPathId& id); TTableExtractedGuard TakeAlterVerified(const TPathId& id); - + bool empty() const { + return Tables.empty(); + } bool contains(const TPathId& id) const { return Tables.contains(id); } diff --git a/ydb/core/tx/schemeshard/olap/operations/alter/abstract/ya.make b/ydb/core/tx/schemeshard/olap/operations/alter/abstract/ya.make index 66a40695a604..bb4459a412fd 100644 --- a/ydb/core/tx/schemeshard/olap/operations/alter/abstract/ya.make +++ b/ydb/core/tx/schemeshard/olap/operations/alter/abstract/ya.make @@ -12,6 +12,7 @@ PEERDIR( ydb/library/accessor ydb/core/protos ydb/library/actors/wilson + ydb/library/formats/arrow ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/tx/schemeshard/olap/operations/alter/common/update.h b/ydb/core/tx/schemeshard/olap/operations/alter/common/update.h index c33f18eb031b..fd10245bc284 100644 --- a/ydb/core/tx/schemeshard/olap/operations/alter/common/update.h +++ b/ydb/core/tx/schemeshard/olap/operations/alter/common/update.h @@ -2,6 +2,7 @@ #include #include #include +#include namespace NKikimr::NSchemeShard::NOlap::NAlter { @@ -19,6 +20,16 @@ class TColumnTableUpdate: public ISSEntityUpdate { return NKikimrTxColumnShard::ETransactionKind::TX_KIND_SCHEMA; } virtual TConclusionStatus DoInitializeImpl(const TUpdateInitializationContext& context) = 0; + + bool IsAlterCompression(const TUpdateInitializationContext& context) const { + for (const auto& alterColumn : context.GetModification()->GetAlterColumnTable().GetAlterSchema().GetAlterColumns()) { + if (alterColumn.HasSerializer()) { + return true; + } + } + return false; + } + protected: virtual TConclusionStatus DoStartImpl(const TUpdateStartContext& /*context*/) { return TConclusionStatus::Success(); @@ -27,6 +38,9 @@ class TColumnTableUpdate: public ISSEntityUpdate { return TConclusionStatus::Success(); } virtual TConclusionStatus DoInitialize(const TUpdateInitializationContext& context) override final { + if (!AppData()->FeatureFlags.GetEnableOlapCompression() && IsAlterCompression(context)) { + return TConclusionStatus::Fail("Compression is disabled for OLAP tables"); + } if (!context.GetModification()->HasAlterColumnTable() && !context.GetModification()->HasAlterTable()) { return TConclusionStatus::Fail("no update data"); } @@ -52,6 +66,17 @@ class TColumnTableUpdate: public ISSEntityUpdate { return result; } + bool CheckTargetSchema(const TOlapSchema& targetSchema) { + if (!AppData()->FeatureFlags.GetEnableSparsedColumns()) { + for (auto& [_, column]: targetSchema.GetColumns().GetColumns()) { + if (column.GetDefaultValue().GetValue() || (column.GetAccessorConstructor().GetClassName() == NKikimr::NArrow::NAccessor::TGlobalConst::SparsedDataAccessorName)) { + return false; + } + } + } + return true; + } + public: }; diff --git a/ydb/core/tx/schemeshard/olap/operations/alter/in_store/transfer/update.cpp b/ydb/core/tx/schemeshard/olap/operations/alter/in_store/transfer/update.cpp index 119dc04e00c4..1173535cb0fe 100644 --- a/ydb/core/tx/schemeshard/olap/operations/alter/in_store/transfer/update.cpp +++ b/ydb/core/tx/schemeshard/olap/operations/alter/in_store/transfer/update.cpp @@ -30,7 +30,7 @@ NKikimr::TConclusionStatus TInStoreShardsTransfer::DoInitializeImpl(const TUpdat for (auto&& i : alter.GetSourceTabletIds()) { destinationSession.MutableTransferContext()->AddSourceTabletIds(i); } - DestinationSessions.emplace_back(destinationSession); + AFL_VERIFY(DestinationSessions.emplace(destinationSession.GetTransferContext().GetDestinationTabletId(), destinationSession).second); AFL_VERIFY(ShardIdsUsage.emplace(alter.GetDestinationTabletId()).second); } const auto& inStoreOriginal = context.GetOriginalEntityAsVerified(); diff --git a/ydb/core/tx/schemeshard/olap/operations/alter/in_store/transfer/update.h b/ydb/core/tx/schemeshard/olap/operations/alter/in_store/transfer/update.h index 5b19aa69fb99..0207e3528dc9 100644 --- a/ydb/core/tx/schemeshard/olap/operations/alter/in_store/transfer/update.h +++ b/ydb/core/tx/schemeshard/olap/operations/alter/in_store/transfer/update.h @@ -10,7 +10,7 @@ namespace NKikimr::NSchemeShard::NOlap::NAlter { class TInStoreShardsTransfer: public TInStoreTableUpdate { private: using TBase = TInStoreTableUpdate; - std::vector DestinationSessions; + THashMap DestinationSessions; std::shared_ptr TargetInStoreTable; std::set ShardIdsUsage; @@ -29,13 +29,9 @@ class TInStoreShardsTransfer: public TInStoreTableUpdate { virtual TConclusionStatus DoInitializeImpl(const TUpdateInitializationContext& context) override; virtual TString DoGetShardTxBodyString(const ui64 tabletId, const TMessageSeqNo& /*seqNo*/) const override { - for (auto&& i : DestinationSessions) { - if (i.GetTransferContext().GetDestinationTabletId() == tabletId) { - return i.SerializeAsString(); - } - } - AFL_VERIFY(false); - return ""; + auto it = DestinationSessions.find(tabletId); + AFL_VERIFY(it != DestinationSessions.end()); + return it->second.SerializeAsString(); } virtual std::set DoGetShardIds() const override { diff --git a/ydb/core/tx/schemeshard/olap/operations/alter/standalone/update.cpp b/ydb/core/tx/schemeshard/olap/operations/alter/standalone/update.cpp index b94ff1888af2..2902534fbb24 100644 --- a/ydb/core/tx/schemeshard/olap/operations/alter/standalone/update.cpp +++ b/ydb/core/tx/schemeshard/olap/operations/alter/standalone/update.cpp @@ -1,6 +1,7 @@ #include "update.h" #include #include +#include namespace NKikimr::NSchemeShard::NOlap::NAlter { @@ -36,6 +37,24 @@ NKikimr::TConclusionStatus TStandaloneSchemaUpdate::DoInitializeImpl(const TUpda return TConclusionStatus::Fail("schema update error: " + collector->GetErrorMessage() + ". in alter constructor STANDALONE_UPDATE"); } } + + const TString& parentPathStr = context.GetModification()->GetWorkingDir(); + if (parentPathStr) { // Not empty only if called from Propose, not from ProgressState + NSchemeShard::TPath parentPath = NSchemeShard::TPath::Resolve(parentPathStr, context.GetSSOperationContext()->SS); + auto domainInfo = parentPath.DomainInfo(); + const TSchemeLimits& limits = domainInfo->GetSchemeLimits(); + if (targetSchema.GetColumns().GetColumns().size() > limits.MaxColumnTableColumns) { + TString errStr = TStringBuilder() + << "Too many columns" + << ": new: " << targetSchema.GetColumns().GetColumns().size() + << ". Limit: " << limits.MaxColumnTableColumns; + return TConclusionStatus::Fail(errStr); + } + } + + if (!CheckTargetSchema(targetSchema)) { + return TConclusionStatus::Fail("schema update error: sparsed columns are disabled"); + } auto description = originalTable.GetTableInfoVerified().Description; targetSchema.Serialize(*description.MutableSchema()); auto ttl = originalTable.GetTableTTLOptional() ? *originalTable.GetTableTTLOptional() : TOlapTTL(); diff --git a/ydb/core/tx/schemeshard/olap/operations/alter_store.cpp b/ydb/core/tx/schemeshard/olap/operations/alter_store.cpp index 446b43017821..57f05068b162 100644 --- a/ydb/core/tx/schemeshard/olap/operations/alter_store.cpp +++ b/ydb/core/tx/schemeshard/olap/operations/alter_store.cpp @@ -1,6 +1,9 @@ #include #include #include +#include + +#include "checks.h" namespace { @@ -434,6 +437,18 @@ class TAlterOlapStore: public TSubOperation { } } + bool IsAlterCompression() const { + const auto& alter = Transaction.GetAlterColumnStore(); + for (const auto& alterSchema : alter.GetAlterSchemaPresets()) { + for (const auto& alterColumn : alterSchema.GetAlterSchema().GetAlterColumns()) { + if (alterColumn.HasSerializer()) { + return true; + } + } + } + return false; + } + public: using TSubOperation::TSubOperation; @@ -458,7 +473,13 @@ class TAlterOlapStore: public TSubOperation { return result; } - TPath path = TPath::Resolve(parentPathStr, context.SS).Dive(name); + if (!AppData()->FeatureFlags.GetEnableOlapCompression() && IsAlterCompression()) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, "Compression is disabled for OLAP tables"); + return result; + } + + TPath parentPath = TPath::Resolve(parentPathStr, context.SS); + TPath path = parentPath.Dive(name); { TPath::TChecker checks = path.Check(); checks @@ -504,6 +525,26 @@ class TAlterOlapStore: public TSubOperation { if (!alterData) { return result; } + + if (!AppData()->FeatureFlags.GetEnableSparsedColumns()) { + for (auto& [_, preset]: alterData->SchemaPresets) { + for (auto& [_, column]: preset.GetColumns().GetColumns()) { + if (column.GetDefaultValue().GetValue() || (column.GetAccessorConstructor().GetClassName() == NKikimr::NArrow::NAccessor::TGlobalConst::SparsedDataAccessorName)) { + result->SetError(NKikimrScheme::StatusSchemeError,"schema update error: sparsed columns are disabled"); + return result; + } + } + } + } + + auto domainInfo = parentPath.DomainInfo(); + const TSchemeLimits& limits = domainInfo->GetSchemeLimits(); + + if (!NKikimr::NSchemeShard::NOlap::CheckLimits(limits, alterData, errStr)) { + result->SetError(NKikimrScheme::StatusSchemeError, errStr); + return result; + } + storeInfo->AlterData = alterData; NIceDb::TNiceDb db(context.GetDB()); diff --git a/ydb/core/tx/schemeshard/olap/operations/alter_table.cpp b/ydb/core/tx/schemeshard/olap/operations/alter_table.cpp index 6dff78961fff..4fb76b4a75a0 100644 --- a/ydb/core/tx/schemeshard/olap/operations/alter_table.cpp +++ b/ydb/core/tx/schemeshard/olap/operations/alter_table.cpp @@ -265,6 +265,19 @@ class TAlterColumnTable: public TSubOperation { auto result = MakeHolder(NKikimrScheme::StatusAccepted, ui64(OperationId.GetTxId()), ui64(ssId)); + const bool isAlterSharding = Transaction.HasAlterColumnTable() && Transaction.GetAlterColumnTable().HasReshardColumnTable(); + if (isAlterSharding && !AppData()->FeatureFlags.GetEnableAlterShardingInColumnShard()) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, "Alter sharding is disabled for OLAP tables"); + return result; + } + + const bool hasTiering = Transaction.HasAlterColumnTable() && Transaction.GetAlterColumnTable().HasAlterTtlSettings() && + Transaction.GetAlterColumnTable().GetAlterTtlSettings().HasUseTiering(); + if (hasTiering && HasAppData() && !AppDataVerified().FeatureFlags.GetEnableTieringInColumnShard()) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, "Tiering functionality is disabled for OLAP tables"); + return result; + } + const TString& parentPathStr = Transaction.GetWorkingDir(); const TString& name = Transaction.HasAlterColumnTable() ? Transaction.GetAlterColumnTable().GetName() : Transaction.GetAlterTable().GetName(); LOG_NOTICE_S(context.Ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, diff --git a/ydb/core/tx/schemeshard/olap/operations/checks.h b/ydb/core/tx/schemeshard/olap/operations/checks.h new file mode 100644 index 000000000000..7a2e58fa807d --- /dev/null +++ b/ydb/core/tx/schemeshard/olap/operations/checks.h @@ -0,0 +1,19 @@ +#pragma once + +namespace NKikimr::NSchemeShard::NOlap { + inline bool CheckLimits(const TSchemeLimits& limits, TOlapStoreInfo::TPtr alterData, TString& errStr) { + for (auto& [_, preset]: alterData->SchemaPresets) { + ui64 columnCount = preset.GetColumns().GetColumns().size(); + if (columnCount > limits.MaxColumnTableColumns) { + errStr = TStringBuilder() + << "Too many columns" + << ". new: " << columnCount + << ". Limit: " << limits.MaxColumnTableColumns; + return false; + } + } + return true; + } +} + + diff --git a/ydb/core/tx/schemeshard/olap/operations/create_store.cpp b/ydb/core/tx/schemeshard/olap/operations/create_store.cpp index 137c42d8fd4c..d66f3bed7d44 100644 --- a/ydb/core/tx/schemeshard/olap/operations/create_store.cpp +++ b/ydb/core/tx/schemeshard/olap/operations/create_store.cpp @@ -7,6 +7,8 @@ #include #include +#include "checks.h" + using namespace NKikimr; using namespace NKikimr::NSchemeShard; @@ -327,12 +329,10 @@ class TCreateOlapStore: public TSubOperation { TEvSchemeShard::EStatus status = NKikimrScheme::StatusAccepted; auto result = MakeHolder(status, ui64(OperationId.GetTxId()), ui64(ssId)); - if (context.SS->IsServerlessDomain(TPath::Init(context.SS->RootPathId(), context.SS))) { - if (AppData()->ColumnShardConfig.GetDisabledOnSchemeShard()) { - result->SetError(NKikimrScheme::StatusPreconditionFailed, - "OLAP schema operations are not supported"); - return result; - } + if (AppData()->ColumnShardConfig.GetDisabledOnSchemeShard() && context.SS->OlapStores.empty()) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, + "OLAP schema operations are not supported"); + return result; } NSchemeShard::TPath parentPath = NSchemeShard::TPath::Resolve(parentPathStr, context.SS); @@ -396,12 +396,20 @@ class TCreateOlapStore: public TSubOperation { return result; } + auto domainInfo = parentPath.DomainInfo(); + const TSchemeLimits& limits = domainInfo->GetSchemeLimits(); + TProposeErrorCollector errors(*result); TOlapStoreInfo::TPtr storeInfo = std::make_shared(); if (!storeInfo->ParseFromRequest(createDescription, errors)) { return result; } + if (!NKikimr::NSchemeShard::NOlap::CheckLimits(limits, storeInfo, errStr)) { + result->SetError(NKikimrScheme::StatusSchemeError, errStr); + return result; + } + // Construct channels bindings for columnshards TChannelsBindings channelsBindings; if (!context.SS->GetOlapChannelsBindings(dstPath.GetPathIdForDomain(), storeInfo->GetStorageConfig(), channelsBindings, errStr)) { diff --git a/ydb/core/tx/schemeshard/olap/operations/create_table.cpp b/ydb/core/tx/schemeshard/olap/operations/create_table.cpp index 0f4c491b76f1..0c155d68d761 100644 --- a/ydb/core/tx/schemeshard/olap/operations/create_table.cpp +++ b/ydb/core/tx/schemeshard/olap/operations/create_table.cpp @@ -582,12 +582,10 @@ class TCreateColumnTable: public TSubOperation { TEvSchemeShard::EStatus status = NKikimrScheme::StatusAccepted; auto result = MakeHolder(status, ui64(opTxId), ui64(ssId)); - if (context.SS->IsServerlessDomain(TPath::Init(context.SS->RootPathId(), context.SS))) { - if (AppData()->ColumnShardConfig.GetDisabledOnSchemeShard()) { - result->SetError(NKikimrScheme::StatusPreconditionFailed, - "OLAP schema operations are not supported"); - return result; - } + if (AppData()->ColumnShardConfig.GetDisabledOnSchemeShard() && context.SS->ColumnTables.empty()) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, + "OLAP schema operations are not supported"); + return result; } if (createDescription.GetSharding().GetColumnShards().size()) { @@ -683,11 +681,23 @@ class TCreateColumnTable: public TSubOperation { TProposeErrorCollector errors(*result); TColumnTableInfo::TPtr tableInfo; bool needUpdateObject = false; + auto domainInfo = parentPath.DomainInfo(); + const TSchemeLimits& limits = domainInfo->GetSchemeLimits(); + if (storeInfo) { TOlapPresetConstructor tableConstructor(*storeInfo); tableInfo = tableConstructor.BuildTableInfo(createDescription, context, errors); needUpdateObject = tableConstructor.GetNeedUpdateObject(); } else { + ui64 columnCount = createDescription.schema().columns().size(); + if (columnCount > limits.MaxColumnTableColumns) { + TString errStr = TStringBuilder() + << "Too many columns" + << ". new: " << columnCount + << ". Limit: " << limits.MaxColumnTableColumns; + result->SetError(NKikimrScheme::StatusSchemeError, errStr); + return result; + } TOlapTableConstructor tableConstructor; tableInfo = tableConstructor.BuildTableInfo(createDescription, context, errors); } diff --git a/ydb/core/tx/schemeshard/olap/schema/schema.cpp b/ydb/core/tx/schemeshard/olap/schema/schema.cpp index 3f6749641ec3..dd1889779c1e 100644 --- a/ydb/core/tx/schemeshard/olap/schema/schema.cpp +++ b/ydb/core/tx/schemeshard/olap/schema/schema.cpp @@ -1,6 +1,5 @@ #include "schema.h" #include -#include namespace NKikimr::NSchemeShard { @@ -83,14 +82,6 @@ bool TOlapSchema::ValidateTtlSettings(const NKikimrSchemeOp::TColumnDataLifeCycl errors.AddError("Incorrect ttl column - not found in scheme"); return false; } - if (!Statistics.GetByIdOptional(NOlap::NStatistics::EType::Max, {column->GetId()})) { - TOlapStatisticsModification modification; - NOlap::NStatistics::TConstructorContainer container(std::make_shared(column->GetName())); - modification.AddUpsert("__TTL_PROVIDER::" + TGUID::CreateTimebased().AsUuidString(), container); - if (!Statistics.ApplyUpdate(*this, modification, errors)) { - return false; - } - } return ValidateColumnTableTtl(ttl.GetEnabled(), {}, Columns.GetColumns(), Columns.GetColumnsByName(), errors); } case TTtlProto::kDisabled: @@ -110,10 +101,6 @@ bool TOlapSchema::Update(const TOlapSchemaUpdate& schemaUpdate, IErrorCollector& return false; } - if (!Statistics.ApplyUpdate(*this, schemaUpdate.GetStatistics(), errors)) { - return false; - } - if (!Options.ApplyUpdate(schemaUpdate.GetOptions(), errors)) { return false; } @@ -140,7 +127,6 @@ void TOlapSchema::ParseFromLocalDB(const NKikimrSchemeOp::TColumnTableSchema& ta Columns.Parse(tableSchema); Indexes.Parse(tableSchema); Options.Parse(tableSchema); - Statistics.Parse(tableSchema); } void TOlapSchema::Serialize(NKikimrSchemeOp::TColumnTableSchema& tableSchemaExt) const { @@ -154,7 +140,6 @@ void TOlapSchema::Serialize(NKikimrSchemeOp::TColumnTableSchema& tableSchemaExt) Columns.Serialize(resultLocal); Indexes.Serialize(resultLocal); Options.Serialize(resultLocal); - Statistics.Serialize(resultLocal); std::swap(resultLocal, tableSchemaExt); } @@ -171,10 +156,6 @@ bool TOlapSchema::Validate(const NKikimrSchemeOp::TColumnTableSchema& opSchema, return false; } - if (!Statistics.Validate(opSchema, errors)) { - return false; - } - if (opSchema.GetEngine() != Engine) { errors.AddError("Specified schema engine does not match schema preset"); return false; diff --git a/ydb/core/tx/schemeshard/olap/schema/schema.h b/ydb/core/tx/schemeshard/olap/schema/schema.h index b840f97ca616..f800750341fa 100644 --- a/ydb/core/tx/schemeshard/olap/schema/schema.h +++ b/ydb/core/tx/schemeshard/olap/schema/schema.h @@ -1,11 +1,9 @@ #pragma once #include #include -#include #include #include #include -#include #include "update.h" namespace NKikimr::NSchemeShard { @@ -16,16 +14,11 @@ namespace NKikimr::NSchemeShard { YDB_READONLY_DEF(TOlapColumnsDescription, Columns); YDB_READONLY_DEF(TOlapIndexesDescription, Indexes); YDB_READONLY_DEF(TOlapOptionsDescription, Options); - mutable TOlapStatisticsDescription Statistics; YDB_READONLY(ui32, NextColumnId, 1); YDB_READONLY(ui32, Version, 0); public: - const TOlapStatisticsDescription& GetStatistics() const { - return Statistics; - } - bool Update(const TOlapSchemaUpdate& schemaUpdate, IErrorCollector& errors); void ParseFromLocalDB(const NKikimrSchemeOp::TColumnTableSchema& tableSchema); diff --git a/ydb/core/tx/schemeshard/olap/schema/update.cpp b/ydb/core/tx/schemeshard/olap/schema/update.cpp index b78161394b78..3b0087e3b756 100644 --- a/ydb/core/tx/schemeshard/olap/schema/update.cpp +++ b/ydb/core/tx/schemeshard/olap/schema/update.cpp @@ -23,10 +23,6 @@ namespace NKikimr::NSchemeShard { return false; } - if (!Statistics.Parse(alterRequest, errors)) { - return false; - } - if (!Options.Parse(alterRequest, errors)) { return false; } diff --git a/ydb/core/tx/schemeshard/olap/schema/update.h b/ydb/core/tx/schemeshard/olap/schema/update.h index d61b97749a5d..0cd98c09b3c1 100644 --- a/ydb/core/tx/schemeshard/olap/schema/update.h +++ b/ydb/core/tx/schemeshard/olap/schema/update.h @@ -1,6 +1,5 @@ #pragma once #include -#include #include #include #include @@ -11,7 +10,6 @@ namespace NKikimr::NSchemeShard { YDB_READONLY_DEF(TOlapColumnsUpdate, Columns); YDB_READONLY_DEF(TOlapIndexesUpdate, Indexes); YDB_READONLY_DEF(TOlapOptionsUpdate, Options); - YDB_READONLY_DEF(TOlapStatisticsModification, Statistics); YDB_READONLY_OPT(NKikimrSchemeOp::EColumnTableEngine, Engine); public: bool Parse(const NKikimrSchemeOp::TColumnTableSchema& tableSchema, IErrorCollector& errors, bool allowNullKeys = false); diff --git a/ydb/core/tx/schemeshard/olap/schema/ya.make b/ydb/core/tx/schemeshard/olap/schema/ya.make index 03fae68d790d..76b2d2d1c801 100644 --- a/ydb/core/tx/schemeshard/olap/schema/ya.make +++ b/ydb/core/tx/schemeshard/olap/schema/ya.make @@ -9,8 +9,6 @@ PEERDIR( ydb/core/tx/schemeshard/olap/columns ydb/core/tx/schemeshard/olap/indexes ydb/core/tx/schemeshard/olap/options - ydb/core/tx/schemeshard/olap/statistics - ydb/core/tx/columnshard/engines/scheme/statistics/max ydb/core/tx/schemeshard/common ) diff --git a/ydb/core/tx/schemeshard/olap/statistics/schema.cpp b/ydb/core/tx/schemeshard/olap/statistics/schema.cpp deleted file mode 100644 index af6f9e711d05..000000000000 --- a/ydb/core/tx/schemeshard/olap/statistics/schema.cpp +++ /dev/null @@ -1,92 +0,0 @@ -#include "schema.h" -#include - -namespace NKikimr::NSchemeShard { - -void TOlapStatisticsSchema::SerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const { - Operator.SerializeToProto(proto); -} - -bool TOlapStatisticsSchema::DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) { - AFL_VERIFY(Operator.DeserializeFromProto(proto))("incorrect_proto", proto.DebugString()); - return true; -} - -bool TOlapStatisticsSchema::ApplyUpdate(const TOlapSchema& /*currentSchema*/, const TOlapStatisticsUpsert& upsert, IErrorCollector& errors) { - AFL_VERIFY(upsert.GetName() == Operator.GetName()); - AFL_VERIFY(!!upsert.GetConstructor()); - if (upsert.GetConstructor().GetClassName() != Operator.GetClassName()) { - errors.AddError("different index classes: " + upsert.GetConstructor().GetClassName() + " vs " + Operator.GetClassName()); - return false; - } - errors.AddError("cannot modify statistics calculation for " + Operator.GetName() + ". not implemented currently."); - return false; -} - -bool TOlapStatisticsDescription::ApplyUpdate(const TOlapSchema& currentSchema, const TOlapStatisticsModification& schemaUpdate, IErrorCollector& errors) { - for (auto&& stat : schemaUpdate.GetUpsert()) { - auto* current = MutableByNameOptional(stat.GetName()); - if (current) { - if (!current->ApplyUpdate(currentSchema, stat, errors)) { - return false; - } - } else { - auto meta = stat.GetConstructor()->CreateOperator(stat.GetName(), currentSchema); - if (!meta) { - errors.AddError(meta.GetErrorMessage()); - return false; - } - TOlapStatisticsSchema object(meta.DetachResult()); - Y_ABORT_UNLESS(ObjectsByName.emplace(stat.GetName(), std::move(object)).second); - } - } - - for (const auto& name : schemaUpdate.GetDrop()) { - auto info = GetByNameOptional(name); - if (!info) { - errors.AddError(NKikimrScheme::StatusSchemeError, TStringBuilder() << "Unknown stat for drop: " << name); - return false; - } - AFL_VERIFY(ObjectsByName.erase(name)); - } - - return true; -} - -void TOlapStatisticsDescription::Parse(const NKikimrSchemeOp::TColumnTableSchema& tableSchema) { - for (const auto& proto : tableSchema.GetStatistics()) { - TOlapStatisticsSchema object; - AFL_VERIFY(object.DeserializeFromProto(proto)); - AFL_VERIFY(ObjectsByName.emplace(proto.GetName(), std::move(object)).second); - } -} - -void TOlapStatisticsDescription::Serialize(NKikimrSchemeOp::TColumnTableSchema& tableSchema) const { - for (const auto& object : ObjectsByName) { - object.second.SerializeToProto(*tableSchema.AddStatistics()); - } -} - -bool TOlapStatisticsDescription::Validate(const NKikimrSchemeOp::TColumnTableSchema& opSchema, IErrorCollector& errors) const { - THashSet usedObjects; - for (const auto& proto : opSchema.GetStatistics()) { - if (proto.GetName().empty()) { - errors.AddError("Statistic cannot have an empty name"); - return false; - } - - const TString& name = proto.GetName(); - if (!GetByNameOptional(name)) { - errors.AddError("Stat '" + name + "' does not match schema preset"); - return false; - } - - if (!usedObjects.emplace(proto.GetName()).second) { - errors.AddError("Column '" + name + "' is specified multiple times"); - return false; - } - } - return true; -} - -} diff --git a/ydb/core/tx/schemeshard/olap/statistics/schema.h b/ydb/core/tx/schemeshard/olap/statistics/schema.h deleted file mode 100644 index 37a79fc17fdd..000000000000 --- a/ydb/core/tx/schemeshard/olap/statistics/schema.h +++ /dev/null @@ -1,80 +0,0 @@ -#pragma once -#include "update.h" - -namespace NKikimr::NSchemeShard { - -class TOlapSchema; - -class TOlapStatisticsSchema { -private: - YDB_READONLY_DEF(NOlap::NStatistics::TOperatorContainer, Operator); -public: - TOlapStatisticsSchema() = default; - - TOlapStatisticsSchema(const NOlap::NStatistics::TOperatorContainer& container) - : Operator(container) - { - AFL_VERIFY(container.GetName()); - } - - bool ApplyUpdate(const TOlapSchema& currentSchema, const TOlapStatisticsUpsert& upsert, IErrorCollector& errors); - - void SerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const; - bool DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto); -}; - -class TOlapStatisticsDescription { -public: - using TObjectsByName = THashMap; - -private: - YDB_READONLY_DEF(TObjectsByName, ObjectsByName); -public: - const TOlapStatisticsSchema* GetByIdOptional(const NOlap::NStatistics::EType type, const std::vector& entityIds) const noexcept { - for (auto&& i : ObjectsByName) { - if (!i.second.GetOperator()) { - continue; - } - if (i.second.GetOperator()->GetIdentifier() != NOlap::NStatistics::TIdentifier(type, entityIds)) { - continue; - } - return &i.second; - } - return nullptr; - } - - const TOlapStatisticsSchema* GetByNameOptional(const TString& name) const noexcept { - auto it = ObjectsByName.find(name); - if (it != ObjectsByName.end()) { - return &it->second; - } - return nullptr; - } - - const TOlapStatisticsSchema& GetByNameVerified(const TString& name) const noexcept { - auto object = GetByNameOptional(name); - AFL_VERIFY(object); - return *object; - } - - TOlapStatisticsSchema* MutableByNameOptional(const TString& name) noexcept { - auto it = ObjectsByName.find(name); - if (it != ObjectsByName.end()) { - return &it->second; - } - return nullptr; - } - - TOlapStatisticsSchema& MutableByNameVerified(const TString& name) noexcept { - auto* object = MutableByNameOptional(name); - AFL_VERIFY(object); - return *object; - } - - bool ApplyUpdate(const TOlapSchema& currentSchema, const TOlapStatisticsModification& schemaUpdate, IErrorCollector& errors); - - void Parse(const NKikimrSchemeOp::TColumnTableSchema& tableSchema); - void Serialize(NKikimrSchemeOp::TColumnTableSchema& tableSchema) const; - bool Validate(const NKikimrSchemeOp::TColumnTableSchema& opSchema, IErrorCollector& errors) const; -}; -} diff --git a/ydb/core/tx/schemeshard/olap/statistics/update.cpp b/ydb/core/tx/schemeshard/olap/statistics/update.cpp deleted file mode 100644 index 1c82c07c300c..000000000000 --- a/ydb/core/tx/schemeshard/olap/statistics/update.cpp +++ /dev/null @@ -1,35 +0,0 @@ -#include "update.h" - -namespace NKikimr::NSchemeShard { - -void TOlapStatisticsUpsert::SerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& requestedProto) const { - requestedProto.SetName(Name); - Constructor.SerializeToProto(requestedProto); -} - -bool TOlapStatisticsUpsert::DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) { - Name = proto.GetName(); - AFL_VERIFY(Constructor.DeserializeFromProto(proto))("incorrect_proto", proto.DebugString()); - return true; -} - -bool TOlapStatisticsModification::Parse(const NKikimrSchemeOp::TAlterColumnTableSchema& alterRequest, IErrorCollector& errors) { - for (const auto& name : alterRequest.GetDropStatistics()) { - if (!Drop.emplace(name).second) { - errors.AddError(NKikimrScheme::StatusInvalidParameter, "Duplicated statistics for drop"); - return false; - } - } - TSet upsertNames; - for (auto& schema : alterRequest.GetUpsertStatistics()) { - TOlapStatisticsUpsert stat; - AFL_VERIFY(stat.DeserializeFromProto(schema)); - if (!upsertNames.emplace(stat.GetName()).second) { - errors.AddError(NKikimrScheme::StatusAlreadyExists, TStringBuilder() << "stat '" << stat.GetName() << "' duplication for add"); - return false; - } - Upsert.emplace_back(std::move(stat)); - } - return true; -} -} diff --git a/ydb/core/tx/schemeshard/olap/statistics/update.h b/ydb/core/tx/schemeshard/olap/statistics/update.h deleted file mode 100644 index 96558928acf3..000000000000 --- a/ydb/core/tx/schemeshard/olap/statistics/update.h +++ /dev/null @@ -1,43 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include - -namespace NKikimr::NSchemeShard { - - class TOlapStatisticsUpsert { - private: - YDB_READONLY_DEF(TString, Name); - protected: - NOlap::NStatistics::TConstructorContainer Constructor; - public: - TOlapStatisticsUpsert() = default; - TOlapStatisticsUpsert(const TString& name, const NOlap::NStatistics::TConstructorContainer& constructor) - : Name(name) - , Constructor(constructor) - { - - } - - const NOlap::NStatistics::TConstructorContainer& GetConstructor() const { - return Constructor; - } - - bool DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& requestedProto); - void SerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& requestedProto) const; - }; - - class TOlapStatisticsModification { - private: - YDB_READONLY_DEF(TVector, Upsert); - YDB_READONLY_DEF(TSet, Drop); - public: - void AddUpsert(const TString& name, const NOlap::NStatistics::TConstructorContainer container) { - Upsert.emplace_back(TOlapStatisticsUpsert(name, container)); - } - - bool Parse(const NKikimrSchemeOp::TAlterColumnTableSchema& alterRequest, IErrorCollector& errors); - }; -} diff --git a/ydb/core/tx/schemeshard/olap/statistics/ya.make b/ydb/core/tx/schemeshard/olap/statistics/ya.make deleted file mode 100644 index 3f4902454ef1..000000000000 --- a/ydb/core/tx/schemeshard/olap/statistics/ya.make +++ /dev/null @@ -1,15 +0,0 @@ -LIBRARY() - -SRCS( - schema.cpp - update.cpp -) - -PEERDIR( - ydb/services/bg_tasks/abstract - ydb/core/tx/schemeshard/olap/common - ydb/core/tx/columnshard/engines/scheme/statistics/abstract - ydb/core/protos -) - -END() diff --git a/ydb/core/tx/schemeshard/olap/table/table.h b/ydb/core/tx/schemeshard/olap/table/table.h index e37ede6c3eab..a092e175e25d 100644 --- a/ydb/core/tx/schemeshard/olap/table/table.h +++ b/ydb/core/tx/schemeshard/olap/table/table.h @@ -99,8 +99,9 @@ struct TColumnTableInfo { Stats.UpdateShardStats(shardIdx, newStats); } - void UpdateTableStats(const TPathId& pathId, const TPartitionStats& newStats) { - Stats.UpdateTableStats(pathId, newStats); + void UpdateTableStats(const TShardIdx shardIdx, const TPathId& pathId, const TPartitionStats& newStats) { + Stats.TableStats[pathId].Aggregated.PartCount = GetColumnShards().size(); + Stats.UpdateTableStats(shardIdx, pathId, newStats); } TConclusion> BuildEntity(const TPathId& pathId, const NOlap::NAlter::TEntityInitializationContext& iContext) const; diff --git a/ydb/core/tx/schemeshard/olap/ttl/ya.make b/ydb/core/tx/schemeshard/olap/ttl/ya.make index 0eb0e83c9a22..8aea246ebddf 100644 --- a/ydb/core/tx/schemeshard/olap/ttl/ya.make +++ b/ydb/core/tx/schemeshard/olap/ttl/ya.make @@ -6,6 +6,7 @@ SRCS( ) PEERDIR( + ydb/core/base ydb/core/protos ) diff --git a/ydb/core/tx/schemeshard/olap/ya.make b/ydb/core/tx/schemeshard/olap/ya.make index d41824702a58..4fde54f9fbd0 100644 --- a/ydb/core/tx/schemeshard/olap/ya.make +++ b/ydb/core/tx/schemeshard/olap/ya.make @@ -7,7 +7,6 @@ PEERDIR( ydb/core/tx/schemeshard/olap/schema ydb/core/tx/schemeshard/olap/common ydb/core/tx/schemeshard/olap/operations - ydb/core/tx/schemeshard/olap/statistics ydb/core/tx/schemeshard/olap/options ydb/core/tx/schemeshard/olap/layout ydb/core/tx/schemeshard/olap/manager diff --git a/ydb/core/tx/schemeshard/schemeshard__init.cpp b/ydb/core/tx/schemeshard/schemeshard__init.cpp index 0b77b71a9656..2226cc2576c3 100644 --- a/ydb/core/tx/schemeshard/schemeshard__init.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__init.cpp @@ -1242,6 +1242,7 @@ struct TSchemeShard::TTxInit : public TTransactionBase { .MaxPathElementLength = rowSet.template GetValueOrDefault(defaults.MaxPathElementLength), .ExtraPathSymbolsAllowed = rowSet.template GetValueOrDefault(defaults.ExtraPathSymbolsAllowed), .MaxTableColumns = rowSet.template GetValueOrDefault(defaults.MaxTableColumns), + .MaxColumnTableColumns = rowSet.template GetValueOrDefault(defaults.MaxColumnTableColumns), .MaxTableColumnNameLength = rowSet.template GetValueOrDefault(defaults.MaxTableColumnNameLength), .MaxTableKeyColumns = rowSet.template GetValueOrDefault(defaults.MaxTableKeyColumns), .MaxTableIndices = rowSet.template GetValueOrDefault(defaults.MaxTableIndices), @@ -1825,7 +1826,7 @@ struct TSchemeShard::TTxInit : public TTransactionBase { Y_ABORT_UNLESS(parseOk); if (tableInfo->IsAsyncReplica()) { - Self->PathsById.at(pathId)->SetAsyncReplica(); + Self->PathsById.at(pathId)->SetAsyncReplica(true); } } @@ -1905,6 +1906,9 @@ struct TSchemeShard::TTxInit : public TTransactionBase { auto& view = Self->Views[pathId] = new TViewInfo(); view->AlterVersion = rowset.GetValue(); view->QueryText = rowset.GetValue(); + Y_PROTOBUF_SUPPRESS_NODISCARD view->CapturedContext.ParseFromString( + rowset.GetValue() + ); Self->IncrementPathDbRefCount(pathId); if (!rowset.Next()) { @@ -2475,8 +2479,12 @@ struct TSchemeShard::TTxInit : public TTransactionBase { Y_ABORT_UNLESS(it != Self->Topics.end()); Y_ABORT_UNLESS(it->second); TTopicInfo::TPtr pqGroup = it->second; - if (pqInfo->AlterVersion <= pqGroup->AlterVersion) + if (pqInfo->AlterVersion <= pqGroup->AlterVersion) { ++pqGroup->TotalPartitionCount; + if (pqInfo->Status == NKikimrPQ::ETopicPartitionStatus::Active) { + ++pqGroup->ActivePartitionCount; + } + } if (pqInfo->PqId >= pqGroup->NextPartitionId) { pqGroup->NextPartitionId = pqInfo->PqId + 1; pqGroup->TotalGroupCount = pqInfo->PqId + 1; @@ -4058,7 +4066,9 @@ struct TSchemeShard::TTxInit : public TTransactionBase { if (path->IsPQGroup()) { auto pqGroup = Self->Topics.at(path->PathId); - auto delta = pqGroup->AlterData ? pqGroup->AlterData->TotalPartitionCount : pqGroup->TotalPartitionCount; + auto partitionDelta = pqGroup->AlterData ? pqGroup->AlterData->TotalPartitionCount : pqGroup->TotalPartitionCount; + auto activePartitionDelta = pqGroup->AlterData ? pqGroup->AlterData->ActivePartitionCount : pqGroup->ActivePartitionCount; + auto tabletConfig = pqGroup->AlterData ? (pqGroup->AlterData->TabletConfig.empty() ? pqGroup->TabletConfig : pqGroup->AlterData->TabletConfig) : pqGroup->TabletConfig; NKikimrPQ::TPQTabletConfig config; @@ -4066,12 +4076,12 @@ struct TSchemeShard::TTxInit : public TTransactionBase { bool parseOk = ParseFromStringNoSizeLimit(config, tabletConfig); Y_ABORT_UNLESS(parseOk); - const PQGroupReserve reserve(config, delta); + const PQGroupReserve reserve(config, activePartitionDelta); - inclusiveDomainInfo->IncPQPartitionsInside(delta); + inclusiveDomainInfo->IncPQPartitionsInside(partitionDelta); inclusiveDomainInfo->IncPQReservedStorage(reserve.Storage); - Self->TabletCounters->Simple()[COUNTER_STREAM_SHARDS_COUNT].Add(delta); + Self->TabletCounters->Simple()[COUNTER_STREAM_SHARDS_COUNT].Add(partitionDelta); Self->TabletCounters->Simple()[COUNTER_STREAM_RESERVED_THROUGHPUT].Add(reserve.Throughput); Self->TabletCounters->Simple()[COUNTER_STREAM_RESERVED_STORAGE].Add(reserve.Storage); } @@ -4193,8 +4203,9 @@ struct TSchemeShard::TTxInit : public TTransactionBase { TString settings = rowset.GetValue(); auto domainPathId = TPathId(rowset.GetValueOrDefault(selfId), rowset.GetValue()); + TString peerName = rowset.GetValueOrDefault(); - TExportInfo::TPtr exportInfo = new TExportInfo(id, uid, kind, settings, domainPathId); + TExportInfo::TPtr exportInfo = new TExportInfo(id, uid, kind, settings, domainPathId, peerName); if (rowset.HaveValue()) { exportInfo->UserSID = rowset.GetValue(); @@ -4291,11 +4302,12 @@ struct TSchemeShard::TTxInit : public TTransactionBase { TImportInfo::EKind kind = static_cast(rowset.GetValue()); auto domainPathId = TPathId(rowset.GetValue(), rowset.GetValue()); + TString peerName = rowset.GetValueOrDefault(); Ydb::Import::ImportFromS3Settings settings; Y_ABORT_UNLESS(ParseFromStringNoSizeLimit(settings, rowset.GetValue())); - TImportInfo::TPtr importInfo = new TImportInfo(id, uid, kind, settings, domainPathId); + TImportInfo::TPtr importInfo = new TImportInfo(id, uid, kind, settings, domainPathId, peerName); if (rowset.HaveValue()) { importInfo->UserSID = rowset.GetValue(); diff --git a/ydb/core/tx/schemeshard/schemeshard__login.cpp b/ydb/core/tx/schemeshard/schemeshard__login.cpp index 85d2f54a4892..0fa53652e545 100644 --- a/ydb/core/tx/schemeshard/schemeshard__login.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__login.cpp @@ -1,5 +1,8 @@ -#include "schemeshard_impl.h" #include +#include + +#include "schemeshard_audit_log.h" +#include "schemeshard_impl.h" namespace NKikimr { namespace NSchemeShard { @@ -19,10 +22,16 @@ struct TSchemeShard::TTxLogin : TSchemeShard::TRwTxBase { TTxType GetTxType() const override { return TXTYPE_LOGIN; } NLogin::TLoginProvider::TLoginUserRequest GetLoginRequest() const { + const auto& record(Request->Get()->Record); return { - .User = Request->Get()->Record.GetUser(), - .Password = Request->Get()->Record.GetPassword(), - .ExternalAuth = Request->Get()->Record.GetExternalAuth() + .User = record.GetUser(), + .Password = record.GetPassword(), + .Options = { + .ExpiresAfter = record.HasExpiresAfterMs() + ? std::chrono::milliseconds(record.GetExpiresAfterMs()) + : std::chrono::system_clock::duration::zero() + }, + .ExternalAuth = record.GetExternalAuth(), }; } @@ -66,15 +75,22 @@ struct TSchemeShard::TTxLogin : TSchemeShard::TRwTxBase { Self->PublishToSchemeBoard(TTxId(), {SubDomainPathId}, ctx); } - NLogin::TLoginProvider::TLoginUserResponse LoginResponse = Self->LoginProvider.LoginUser(GetLoginRequest()); THolder result = MakeHolder(); - if (LoginResponse.Error) { - result->Record.SetError(LoginResponse.Error); - } - if (LoginResponse.Token) { - result->Record.SetToken(LoginResponse.Token); + const auto& loginRequest = GetLoginRequest(); + if (loginRequest.ExternalAuth || AppData(ctx)->AuthConfig.GetEnableLoginAuthentication()) { + NLogin::TLoginProvider::TLoginUserResponse LoginResponse = Self->LoginProvider.LoginUser(loginRequest); + if (LoginResponse.Error) { + result->Record.SetError(LoginResponse.Error); + } + if (LoginResponse.Token) { + result->Record.SetToken(LoginResponse.Token); + } + } else { + result->Record.SetError("Login authentication is disabled"); } + AuditLogLogin(Request->Get()->Record, result->Record, Self); + LOG_DEBUG_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, "TTxLogin DoComplete" << ", result: " << result->Record.ShortDebugString() diff --git a/ydb/core/tx/schemeshard/schemeshard__operation.cpp b/ydb/core/tx/schemeshard/schemeshard__operation.cpp index ee7de50fa0a1..0888afa2706d 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation.cpp @@ -104,14 +104,12 @@ THolder TSchemeShard::IgniteOperation(TProposeRequest& request } TOperation::TPtr operation = new TOperation(txId); - Operations[txId] = operation; //record is erased at ApplyOnExecute if all parts are done at propose for (const auto& transaction : record.GetTransaction()) { auto quotaResult = operation->ConsumeQuota(transaction, context); if (quotaResult.Status != NKikimrScheme::StatusSuccess) { response.Reset(new TProposeResponse(quotaResult.Status, ui64(txId), ui64(selfId))); response->SetError(quotaResult.Status, quotaResult.Reason); - Operations.erase(txId); return std::move(response); } } @@ -131,7 +129,6 @@ THolder TSchemeShard::IgniteOperation(TProposeRequest& request if (splitResult.Status != NKikimrScheme::StatusSuccess) { response.Reset(new TProposeResponse(splitResult.Status, ui64(txId), ui64(selfId))); response->SetError(splitResult.Status, splitResult.Reason); - Operations.erase(txId); return std::move(response); } @@ -140,11 +137,15 @@ THolder TSchemeShard::IgniteOperation(TProposeRequest& request const TString owner = record.HasOwner() ? record.GetOwner() : BUILTIN_ACL_ROOT; + bool prevProposeUndoSafe = true; + + Operations[txId] = operation; //record is erased at ApplyOnExecute if all parts are done at propose + for (const auto& transaction : transactions) { auto parts = operation->ConstructParts(transaction, context); if (parts.size() > 1) { - // les't allow altering impl index tables as part of consistent operation + // allow altering impl index tables as part of consistent operation context.IsAllowedPrivateTables = true; } @@ -198,18 +199,21 @@ THolder TSchemeShard::IgniteOperation(TProposeRequest& request << ", with reason: " << response->Record.GetReason() << ", tx message: " << SecureDebugString(record)); - context.OnComplete = {}; // recreate - context.DbChanges = {}; + AbortOperationPropose(txId, context); - for (auto& toAbort : operation->Parts) { - toAbort->AbortPropose(context); - } + return std::move(response); + } - context.MemChanges.UnDo(context.SS); - context.OnComplete.ApplyOnExecute(context.SS, context.GetTxc(), context.Ctx); - Operations.erase(txId); + // Check suboperations for undo safety. Log first unsafe suboperation in the schema transaction. + if (prevProposeUndoSafe && !context.IsUndoChangesSafe()) { + prevProposeUndoSafe = false; - return std::move(response); + LOG_WARN_S(context.Ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, + "Operation part proposed ok, but propose itself is undo unsafe" + << ", suboperation type: " << NKikimrSchemeOp::EOperationType_Name(part->GetTransaction().GetOperationType()) + << ", opId: " << part->GetOperationId() + << ", at schemeshard: " << selfId + ); } } } @@ -217,6 +221,55 @@ THolder TSchemeShard::IgniteOperation(TProposeRequest& request return std::move(response); } +void TSchemeShard::AbortOperationPropose(const TTxId txId, TOperationContext& context) { + Y_ABORT_UNLESS(Operations.contains(txId)); + TOperation::TPtr operation = Operations.at(txId); + + // Drop operation side effects, undo memory changes + // (Local db changes were already applied) + context.OnComplete = {}; + context.DbChanges = {}; + + for (auto& i : operation->Parts) { + i->AbortPropose(context); + } + + context.MemChanges.UnDo(context.SS); + + // And remove aborted operation from existence + Operations.erase(txId); +} + +void AbortOperation(TOperationContext& context, const TTxId txId, const TString& reason) { + LOG_ERROR_S(context.Ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, "TTxOperationPropose Execute" + << ", txId: " << txId + << ", operation is rejected and all changes reverted" + << ", " << reason + << ", at schemeshard: " << context.SS->SelfTabletId() + ); + + context.GetTxc().DB.RollbackChanges(); + context.SS->AbortOperationPropose(txId, context); +} + +bool IsCommitRedoSizeOverLimit(TString* reason, TOperationContext& context) { + // MaxCommitRedoMB is the ICB control shared with NTabletFlatExecutor::TExecutor. + // We subtract from MaxCommitRedoMB additional 1MB for anything extra + // that executor/tablet may (or may not) add under the hood + const ui64 limitBytes = (context.SS->MaxCommitRedoMB - 1) << 20; // MB to bytes + const ui64 commitRedoBytes = context.GetTxc().DB.GetCommitRedoBytes(); + if (commitRedoBytes >= limitBytes) { + *reason = TStringBuilder() + << "local tx commit redo size generated by IgniteOperation() is more than allowed limit: " + << "commit redo size " << commitRedoBytes + << ", limit " << limitBytes + << ", excess " << (commitRedoBytes - limitBytes) + ; + return true; + } + return false; +} + struct TSchemeShard::TTxOperationPropose: public NTabletFlatExecutor::TTransactionBase { using TBase = NTabletFlatExecutor::TTransactionBase; @@ -236,6 +289,7 @@ struct TSchemeShard::TTxOperationPropose: public NTabletFlatExecutor::TTransacti bool Execute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& ctx) override { TTabletId selfId = Self->SelfTabletId(); + auto txId = TTxId(Request->Get()->Record.GetTxId()); LOG_DEBUG_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, "TTxOperationPropose Execute" @@ -246,7 +300,6 @@ struct TSchemeShard::TTxOperationPropose: public NTabletFlatExecutor::TTransacti auto [userToken, tokenParseError] = ParseUserToken(Request->Get()->Record.GetUserToken()); if (tokenParseError) { - auto txId = Request->Get()->Record.GetTxId(); Response = MakeHolder(NKikimrScheme::StatusInvalidParameter, ui64(txId), ui64(selfId), "Failed to parse user token"); return true; } @@ -258,10 +311,52 @@ struct TSchemeShard::TTxOperationPropose: public NTabletFlatExecutor::TTransacti TStorageChanges dbChanges; TOperationContext context{Self, txc, ctx, OnComplete, memChanges, dbChanges, std::move(userToken)}; + //NOTE: Successful IgniteOperation will leave created operation in Self->Operations and accumulated changes in the context. + // Unsuccessful IgniteOperation will leave no operation and context will also be clean. Response = Self->IgniteOperation(*Request->Get(), context); - OnComplete.ApplyOnExecute(Self, txc, ctx); + //NOTE: Successfully created operation also must be checked for the size of this local tx. + // + // Limitation on a commit redo size of local transactions is imposed at the tablet executor level + // (See ydb/core/tablet_flat/flat_executor.cpp, NTabletFlatExecutor::TExecutor::ExecuteTransaction()). + // And a tablet violating that limit is considered broken and will be stopped unconditionally and immediately. + // + // So even if operation was ignited successfully, it's local tx size still must be checked + // as a precaution measure to avoid infinite loop of schemeshard restarting, attempting to propose + // persisted operation again, hitting commit redo size limit and restarting again. + // + // On unsuccessful check, local tx should be rolled back, operation should be rejected and + // all accumulated changes dropped or reverted. + // + + // Actually build commit redo (dbChanges could be empty) dbChanges.Apply(Self, txc, ctx); + + if (Self->Operations.contains(txId)) { + Y_ABORT_UNLESS(Response->IsDone() || Response->IsAccepted() || Response->IsConditionalAccepted()); + + // Check local tx commit redo size + TString reason; + if (IsCommitRedoSizeOverLimit(&reason, context)) { + Response = MakeHolder(NKikimrScheme::StatusSchemeError, ui64(txId), ui64(selfId), reason); + + AbortOperation(context, txId, reason); + + if (!context.IsUndoChangesSafe()) { + LOG_ERROR_S(context.Ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, "TTxOperationPropose Execute" + << ", opId: " << txId + << ", operation should be rejected and all changes be reverted" + << ", but context.IsUndoChangesSafe is false, which means some direct writes have been done" + << ", message: " << SecureDebugString(Request->Get()->Record) + << ", at schemeshard: " << context.SS->SelfTabletId() + ); + } + } + } + + // Apply accumulated changes (changes could be empty) + OnComplete.ApplyOnExecute(Self, txc, ctx); + return true; } @@ -1168,7 +1263,7 @@ TVector TOperation::ConstructParts(const TTxTransaction& tx case NKikimrSchemeOp::EOperationType::ESchemeOpCreateSubDomain: return {CreateSubDomain(NextPartId(), tx)}; case NKikimrSchemeOp::EOperationType::ESchemeOpAlterSubDomain: - return {CreateCompatibleSubdomainAlter(context.SS, NextPartId(), tx)}; + return CreateCompatibleSubdomainAlter(NextPartId(), tx, context); case NKikimrSchemeOp::EOperationType::ESchemeOpDropSubDomain: return {CreateDropSubdomain(NextPartId(), tx)}; case NKikimrSchemeOp::EOperationType::ESchemeOpForceDropSubDomain: diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_cdc_stream.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_cdc_stream.cpp index 4efa03b17955..9113d5d8edc9 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_cdc_stream.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_cdc_stream.cpp @@ -61,7 +61,7 @@ class TPropose: public TSubOperationState { NIceDb::TNiceDb db(context.GetDB()); context.SS->PersistCdcStream(db, pathId); - context.SS->CdcStreams[pathId] = stream->AlterData; + context.SS->CdcStreams[pathId]->FinishAlter(); context.SS->ClearDescribePathCaches(path); context.OnComplete.PublishToSchemeBoard(OperationId, pathId); @@ -143,9 +143,12 @@ class TAlterCdcStream: public TSubOperation { .NotDeleted() .IsTable() .NotAsyncReplicaTable() - .IsCommonSensePath() .NotUnderOperation(); + if (checks && !tablePath.IsInsideTableIndexPath()) { + checks.IsCommonSensePath(); + } + if (!checks) { result->SetError(checks.GetStatus(), checks.GetError()); return result; @@ -370,10 +373,13 @@ class TAlterCdcStreamAtTable: public TSubOperation { .NotDeleted() .IsTable() .NotAsyncReplicaTable() - .IsCommonSensePath() .NotUnderDeleting() .NotUnderOperation(); + if (checks && !tablePath.IsInsideTableIndexPath()) { + checks.IsCommonSensePath(); + } + if (!checks) { result->SetError(checks.GetStatus(), checks.GetError()); return result; @@ -476,10 +482,10 @@ class TAlterCdcStreamAtTable: public TSubOperation { } // anonymous std::variant DoAlterStreamPathChecks( - const TOperationId& opId, - const TPath& workingDirPath, - const TString& tableName, - const TString& streamName) + const TOperationId& opId, + const TPath& workingDirPath, + const TString& tableName, + const TString& streamName) { const auto tablePath = workingDirPath.Child(tableName); { @@ -492,9 +498,12 @@ std::variant DoAlterStreamPathChecks( .NotDeleted() .IsTable() .NotAsyncReplicaTable() - .IsCommonSensePath() .NotUnderOperation(); + if (checks && !tablePath.IsInsideTableIndexPath()) { + checks.IsCommonSensePath(); + } + if (!checks) { return CreateReject(opId, checks.GetStatus(), checks.GetError()); } @@ -521,27 +530,24 @@ std::variant DoAlterStreamPathChecks( } void DoAlterStream( - const NKikimrSchemeOp::TAlterCdcStream& op, - const TOperationId& opId, - const TPath& workingDirPath, - const TPath& tablePath, - TVector& result) + TVector& result, + const NKikimrSchemeOp::TAlterCdcStream& op, + const TOperationId& opId, + const TPath& workingDirPath, + const TPath& tablePath) { { auto outTx = TransactionTemplate(tablePath.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpAlterCdcStreamImpl); outTx.MutableAlterCdcStream()->CopyFrom(op); - if (op.HasGetReady()) { outTx.MutableLockGuard()->SetOwnerTxId(op.GetGetReady().GetLockTxId()); } result.push_back(CreateAlterCdcStreamImpl(NextPartId(opId, result), outTx)); } - { auto outTx = TransactionTemplate(workingDirPath.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpAlterCdcStreamAtTable); outTx.MutableAlterCdcStream()->CopyFrom(op); - if (op.HasGetReady()) { outTx.MutableLockGuard()->SetOwnerTxId(op.GetGetReady().GetLockTxId()); } @@ -601,7 +607,7 @@ TVector CreateAlterCdcStream(TOperationId opId, const TTxTr TVector result; - DoAlterStream(op, opId, workingDirPath, tablePath, result); + DoAlterStream(result, op, opId, workingDirPath, tablePath); if (op.HasGetReady()) { auto outTx = TransactionTemplate(workingDirPath.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropLock); @@ -613,6 +619,14 @@ TVector CreateAlterCdcStream(TOperationId opId, const TTxTr result.push_back(DropLock(NextPartId(opId, result), outTx)); } + if (workingDirPath.IsTableIndex()) { + auto outTx = TransactionTemplate(workingDirPath.Parent().PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpAlterTableIndex); + outTx.MutableAlterTableIndex()->SetName(workingDirPath.LeafName()); + outTx.MutableAlterTableIndex()->SetState(NKikimrSchemeOp::EIndexState::EIndexStateReady); + + result.push_back(CreateAlterTableIndex(NextPartId(opId, result), outTx)); + } + return result; } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_cdc_stream.h b/ydb/core/tx/schemeshard/schemeshard__operation_alter_cdc_stream.h index 198d5ae35cc9..6154ee05ed17 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_cdc_stream.h +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_cdc_stream.h @@ -1,7 +1,7 @@ #pragma once -#include "schemeshard__operation_create_cdc_stream.h" // for TStreamPaths #include "schemeshard__operation_common.h" +#include "schemeshard__operation_create_cdc_stream.h" // for TStreamPaths #include "schemeshard__operation_part.h" #include "schemeshard_impl.h" @@ -17,10 +17,10 @@ std::variant DoAlterStreamPathChecks( const TString& streamName); void DoAlterStream( + TVector& result, const NKikimrSchemeOp::TAlterCdcStream& op, const TOperationId& opId, const TPath& workingDirPath, - const TPath& tablePath, - TVector& result); + const TPath& tablePath); } // namespace NKikimr::NSchemesShard::NCdc diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_continuous_backup.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_continuous_backup.cpp index 24f9c7c0d0f8..69883a91a28a 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_continuous_backup.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_continuous_backup.cpp @@ -1,9 +1,8 @@ -#include "schemeshard__operation_part.h" +#include "schemeshard__operation_alter_cdc_stream.h" #include "schemeshard__operation_common.h" +#include "schemeshard__operation_part.h" #include "schemeshard_impl.h" -#include "schemeshard__operation_alter_cdc_stream.h" - #include #include @@ -83,7 +82,7 @@ TVector CreateAlterContinuousBackup(TOperationId opId, cons const NScheme::TTypeRegistry* typeRegistry = AppData(context.Ctx)->TypeRegistry; NKikimrSchemeOp::TTableDescription schema; - context.SS->DescribeTable(table, typeRegistry, true, false, &schema); + context.SS->DescribeTable(table, typeRegistry, true, &schema); schema.MutablePartitionConfig()->CopyFrom(table->TableDescription.GetPartitionConfig()); TString errStr; @@ -111,7 +110,7 @@ TVector CreateAlterContinuousBackup(TOperationId opId, cons TVector result; - NCdc::DoAlterStream(alterCdcStreamOp, opId, workingDirPath, tablePath, result); + NCdc::DoAlterStream(result, alterCdcStreamOp, opId, workingDirPath, tablePath); if (cbOp.GetActionCase() == NKikimrSchemeOp::TAlterContinuousBackup::kTakeIncrementalBackup) { DoCreateIncBackupTable(opId, backupTablePath, schema, result); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_external_data_source.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_external_data_source.cpp index c67f26b3cea2..1009c5dff470 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_external_data_source.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_external_data_source.cpp @@ -213,6 +213,13 @@ class TAlterExternalDataSource : public TSubOperation { static_cast(OperationId.GetTxId()), static_cast(ssId)); + if (context.SS->IsServerlessDomain(TPath::Init(context.SS->RootPathId(), context.SS))) { + if (!context.SS->EnableExternalDataSourcesOnServerless) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, "External data sources are disabled for serverless domains. Please contact your system administrator to enable it"); + return result; + } + } + const TPath parentPath = TPath::Resolve(parentPathStr, context.SS); RETURN_RESULT_UNLESS(NExternalDataSource::IsParentPathValid( result, parentPath, Transaction, /* isCreate */ false)); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_external_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_external_table.cpp index ad563b01c900..54f63ca0db57 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_external_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_external_table.cpp @@ -308,6 +308,13 @@ class TAlterExternalTable: public TSubOperation { static_cast(OperationId.GetTxId()), static_cast(ssId)); + if (context.SS->IsServerlessDomain(TPath::Init(context.SS->RootPathId(), context.SS))) { + if (!context.SS->EnableExternalDataSourcesOnServerless) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, "External data sources are disabled for serverless domains. Please contact your system administrator to enable it"); + return result; + } + } + const auto parentPath = TPath::Resolve(parentPathStr, context.SS); RETURN_RESULT_UNLESS(NExternalTable::IsParentPathValid(result, parentPath)); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_extsubdomain.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_extsubdomain.cpp index 5b67ca14030c..6b330b329df0 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_extsubdomain.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_extsubdomain.cpp @@ -814,7 +814,7 @@ class TSyncHive: public TSubOperationState { } bool HandleReply(TEvHive::TEvUpdateDomainReply::TPtr& ev, TOperationContext& context) override { - const TTabletId hive = TTabletId(ev->Get()->Record.GetOrigin()); + const TTabletId hive = TTabletId(ev->Get()->Record.GetOrigin()); LOG_I(DebugHint() << "HandleReply TEvUpdateDomainReply" << ", from hive: " << hive); @@ -936,21 +936,23 @@ class TAlterExtSubDomain: public TSubOperation { // Create or derive alter. // (We could have always created new alter from a current subdomainInfo but // we need to take into account possible version increase from CreateHive suboperation.) - auto createAlterFrom = [&inputSettings, &delta](auto prototype) { + auto createAlterFrom = [&inputSettings](auto prototype, const TStoragePools& additionalPools) { return MakeIntrusive( *prototype, inputSettings.GetPlanResolution(), inputSettings.GetTimeCastBucketsPerMediator(), - delta.StoragePoolsAdded + additionalPools ); }; TSubDomainInfo::TPtr alter = [&delta, &subdomainInfo, &createAlterFrom, &context]() { if (delta.AddExternalHive && context.SS->EnableAlterDatabaseCreateHiveFirst) { Y_ABORT_UNLESS(subdomainInfo->GetAlter()); - return createAlterFrom(subdomainInfo->GetAlter()); + //NOTE: existing alter already has all storage pools that combined operation wanted to add, + // should not add them second time when deriving alter from alter + return createAlterFrom(subdomainInfo->GetAlter(), {}); } else { Y_ABORT_UNLESS(!subdomainInfo->GetAlter()); - return createAlterFrom(subdomainInfo); + return createAlterFrom(subdomainInfo, delta.StoragePoolsAdded); } }(); @@ -1084,7 +1086,13 @@ ISubOperation::TPtr CreateAlterExtSubDomain(TOperationId id, TTxState::ETxState } TVector CreateCompatibleAlterExtSubDomain(TOperationId id, const TTxTransaction& tx, TOperationContext& context) { - Y_ABORT_UNLESS(tx.GetOperationType() == NKikimrSchemeOp::ESchemeOpAlterExtSubDomain); + //NOTE: Accepting ESchemeOpAlterSubDomain operation for an ExtSubDomain is a special compatibility case + // for those old subdomains that at the time went through migration to a separate tenants. + // Console tablet holds records about types of the subdomains but they hadn't been updated + // at the migration time. So Console still thinks that old subdomains are plain subdomains + // whereas they had been migrated to the extsubdomains. + // This compatibility case should be upholded until Console records would be updated. + Y_ABORT_UNLESS(tx.GetOperationType() == NKikimrSchemeOp::ESchemeOpAlterExtSubDomain || tx.GetOperationType() == NKikimrSchemeOp::ESchemeOpAlterSubDomain); LOG_I("CreateCompatibleAlterExtSubDomain, opId " << id << ", feature flag EnableAlterDatabaseCreateHiveFirst " << context.SS->EnableAlterDatabaseCreateHiveFirst diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_index.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_index.cpp index 9e4bf359c59c..0415c18c18b0 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_index.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_index.cpp @@ -140,8 +140,11 @@ class TAlterTableIndex: public TSubOperation { .NotDeleted() .NotUnderDeleting() .IsCommonSensePath() - .IsTable() - .NotAsyncReplicaTable(); + .IsTable(); + + if (!Transaction.GetInternal()) { + checks.NotAsyncReplicaTable(); + } if (!checks) { result->SetError(checks.GetStatus(), checks.GetError()); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_login.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_login.cpp index a4a5fd6d2533..48f9f400bdb2 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_login.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_login.cpp @@ -1,6 +1,7 @@ #include "schemeshard__operation_part.h" #include "schemeshard__operation_common.h" #include "schemeshard_impl.h" +#include namespace { @@ -15,7 +16,9 @@ class TAlterLogin: public TSubOperationBase { NIceDb::TNiceDb db(context.GetTxc().DB); // do not track is there are direct writes happen TTabletId ssId = context.SS->SelfTabletId(); auto result = MakeHolder(OperationId.GetTxId(), ssId); - if (Transaction.GetWorkingDir() != context.SS->LoginProvider.Audience) { + if (!AppData()->AuthConfig.GetEnableLoginAuthentication()) { + result->SetStatus(NKikimrScheme::StatusPreconditionFailed, "Login authentication is disabled"); + } else if (Transaction.GetWorkingDir() != context.SS->LoginProvider.Audience) { result->SetStatus(NKikimrScheme::StatusPreconditionFailed, "Wrong working dir"); } else { const NKikimrConfig::TDomainsConfig::TSecurityConfig& securityConfig = context.SS->GetDomainsConfig().GetSecurityConfig(); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_pq.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_pq.cpp index dac4b58742ce..7e07965f5def 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_pq.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_pq.cpp @@ -136,6 +136,10 @@ class TAlterPQ: public TSubOperation { return nullptr; } + if (!alterConfig.HasPartitionStrategy() && tabletConfig->HasPartitionStrategy()) { + alterConfig.MutablePartitionStrategy()->CopyFrom(tabletConfig->GetPartitionStrategy()); + } + if (alterConfig.GetPartitionConfig().HasLifetimeSeconds()) { const auto lifetimeSeconds = alterConfig.GetPartitionConfig().GetLifetimeSeconds(); if (lifetimeSeconds <= 0 || (ui32)lifetimeSeconds > TSchemeShard::MaxPQLifetimeSeconds) { @@ -177,12 +181,8 @@ class TAlterPQ: public TSubOperation { if (alterConfig.HasPartitionStrategy() && !NPQ::SplitMergeEnabled(alterConfig) && tabletConfig->HasPartitionStrategy() && NPQ::SplitMergeEnabled(*tabletConfig)) { - if (!alterConfig.GetPartitionStrategy().HasMaxPartitionCount() || 0 != alterConfig.GetPartitionStrategy().GetMaxPartitionCount()) { - errStr = TStringBuilder() << "Can`t disable auto partitioning. Disabling auto partitioning is a destructive operation, " - << "after which all partitions will become active and the message order guarantee will be violated. " - << "If you are sure of this, then set max_active_partitions to 0."; - return nullptr; - } + errStr = TStringBuilder() << "Can`t disable auto partitioning."; + return nullptr; } if (!alterConfig.HasPartitionStrategy() && tabletConfig->HasPartitionStrategy()) { @@ -577,10 +577,13 @@ class TAlterPQ: public TSubOperation { return result; } + alterData->ActivePartitionCount = topic->ActivePartitionCount; + bool splitMergeEnabled = AppData()->FeatureFlags.GetEnableTopicSplitMerge() && NKikimr::NPQ::SplitMergeEnabled(tabletConfig) && NKikimr::NPQ::SplitMergeEnabled(newTabletConfig); + THashSet involvedPartitions; if (splitMergeEnabled) { auto Hex = [](const auto& value) { return HexText(TBasicStringBuf(value)); @@ -589,10 +592,9 @@ class TAlterPQ: public TSubOperation { ui32 nextId = topic->NextPartitionId; ui32 nextGroupId = topic->TotalGroupCount; - THashSet involvedPartitions; - for (const auto& split : alter.GetSplit()) { alterData->TotalGroupCount += 2; + ++alterData->ActivePartitionCount; const auto splittedPartitionId = split.GetPartition(); if (!topic->Partitions.contains(splittedPartitionId)) { @@ -654,6 +656,7 @@ class TAlterPQ: public TSubOperation { } for (const auto& merge : alter.GetMerge()) { alterData->TotalGroupCount += 1; + --alterData->ActivePartitionCount; const auto partitionId = merge.GetPartition(); if (!topic->Partitions.contains(partitionId)) { @@ -737,6 +740,10 @@ class TAlterPQ: public TSubOperation { } alterData->TotalPartitionCount = topic->TotalPartitionCount + alterData->PartitionsToAdd.size(); + if (!splitMergeEnabled) { + alterData->ActivePartitionCount = alterData->TotalPartitionCount; + } + alterData->NextPartitionId = topic->NextPartitionId; for (const auto& p : alterData->PartitionsToAdd) { if (p.GroupId == 0 || p.GroupId > alterData->TotalGroupCount) { @@ -780,10 +787,11 @@ class TAlterPQ: public TSubOperation { return result; } - const PQGroupReserve reserve(newTabletConfig, alterData->TotalPartitionCount); - const PQGroupReserve oldReserve(tabletConfig, topic->TotalPartitionCount); + const PQGroupReserve reserve(newTabletConfig, alterData->ActivePartitionCount); + const PQGroupReserve reserveForCheckLimit(newTabletConfig, alterData->ActivePartitionCount + involvedPartitions.size()); + const PQGroupReserve oldReserve(tabletConfig, topic->ActivePartitionCount); - const ui64 storageToReserve = reserve.Storage > oldReserve.Storage ? reserve.Storage - oldReserve.Storage : 0; + const ui64 storageToReserve = reserveForCheckLimit.Storage > oldReserve.Storage ? reserveForCheckLimit.Storage - oldReserve.Storage : 0; { TPath::TChecker checks = path.Check(); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_resource_pool.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_resource_pool.cpp index 863533fe7aa2..011291768d6d 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_resource_pool.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_resource_pool.cpp @@ -129,6 +129,13 @@ class TAlterResourcePool : public TSubOperation { static_cast(OperationId.GetTxId()), static_cast(context.SS->SelfTabletId())); + if (context.SS->IsServerlessDomain(TPath::Init(context.SS->RootPathId(), context.SS))) { + if (!context.SS->EnableResourcePoolsOnServerless) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, "Resource pools are disabled for serverless domains. Please contact your system administrator to enable it"); + return result; + } + } + const TPath& parentPath = TPath::Resolve(parentPathStr, context.SS); RETURN_RESULT_UNLESS(NResourcePool::IsParentPathValid(result, parentPath)); @@ -142,6 +149,7 @@ class TAlterResourcePool : public TSubOperation { Y_ABORT_UNLESS(oldResourcePoolInfo); const TResourcePoolInfo::TPtr resourcePoolInfo = NResourcePool::ModifyResourcePool(resourcePoolDescription, oldResourcePoolInfo); Y_ABORT_UNLESS(resourcePoolInfo); + RETURN_RESULT_UNLESS(NResourcePool::IsResourcePoolInfoValid(result, resourcePoolInfo)); result->SetPathId(dstPath.Base()->PathId.LocalPathId); const TPathElement::TPtr resourcePool = ReplaceResourcePoolPathElement(dstPath); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_table.cpp index f512165507a5..3ce17bb6b85d 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_table.cpp @@ -365,6 +365,10 @@ class TPropose: public TSubOperationState { TTableInfo::TPtr table = context.SS->Tables.at(pathId); table->FinishAlter(); + if (!table->IsAsyncReplica()) { + path->SetAsyncReplica(false); + } + auto ttlIt = context.SS->TTLEnabledTables.find(pathId); if (table->IsTTLEnabled() && ttlIt == context.SS->TTLEnabledTables.end()) { context.SS->TTLEnabledTables[pathId] = table; @@ -519,8 +523,10 @@ class TAlterTable: public TSubOperation { .IsTable() .NotUnderOperation(); - if (!Transaction.GetInternal()) { - checks.NotAsyncReplicaTable(); + if (checks && !Transaction.GetInternal()) { + checks + .NotAsyncReplicaTable() + .NotBackupTable(); } if (!context.IsAllowedPrivateTables) { @@ -722,6 +728,10 @@ TVector CreateConsistentAlterTable(TOperationId id, const T return {CreateAlterTable(id, tx)}; } + if (path.IsBackupTable()) { + return {CreateAlterTable(id, tx)}; + } + TPath parent = path.Parent(); if (!parent.IsTableIndex()) { @@ -731,7 +741,7 @@ TVector CreateConsistentAlterTable(TOperationId id, const T // Admins can alter indexImplTable unconditionally. // Regular users can only alter allowed fields. if (!IsSuperUser(context.UserToken.Get()) - && (!CheckAllowedFields(alter, {"Name", "PartitionConfig"}) + && (!CheckAllowedFields(alter, {"Name", "PathId", "PartitionConfig", "ReplicationConfig"}) || (alter.HasPartitionConfig() && !CheckAllowedFields(alter.GetPartitionConfig(), {"PartitioningPolicy"}) ) @@ -744,6 +754,7 @@ TVector CreateConsistentAlterTable(TOperationId id, const T { auto tableIndexAltering = TransactionTemplate(parent.Parent().PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpAlterTableIndex); + tableIndexAltering.SetInternal(tx.GetInternal()); auto alterIndex = tableIndexAltering.MutableAlterTableIndex(); alterIndex->SetName(parent.LeafName()); alterIndex->SetState(NKikimrSchemeOp::EIndexState::EIndexStateReady); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_common.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_common.cpp index 371e5acaf297..bdfd80bc99a7 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_common.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_common.cpp @@ -695,7 +695,7 @@ THolder TConfigureParts::MakeEvProposeTrans const TTopicTabletInfo& pqShard, const TString& topicName, const TString& topicPath, - const std::optional& bootstrapConfig, + const std::optional& bootstrapConfig, const TString& cloudId, const TString& folderId, const TString& databaseId, @@ -703,7 +703,7 @@ THolder TConfigureParts::MakeEvProposeTrans TTxState::ETxType txType, const TOperationContext& context) { - auto event = MakeHolder(); + auto event = MakeHolder(); event->Record.SetTxId(ui64(txId)); ActorIdToProto(context.SS->SelfId(), event->Record.MutableSourceActor()); @@ -719,7 +719,7 @@ THolder TConfigureParts::MakeEvProposeTrans databasePath); if (bootstrapConfig) { Y_ABORT_UNLESS(txType == TTxState::TxCreatePQGroup); - event->Record.MutableConfig()->MutableBootstrapConfig()->CopyFrom(*bootstrapConfig); + event->PreSerializedData += bootstrapConfig->GetPreSerializedProposeTransaction(); } LOG_DEBUG_S(context.Ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, @@ -734,7 +734,7 @@ THolder TConfigureParts::MakeEvUpdateConfig(TTxId const TTopicTabletInfo& pqShard, const TString& topicName, const TString& topicPath, - const std::optional& bootstrapConfig, + const std::optional& bootstrapConfig, const TString& cloudId, const TString& folderId, const TString& databaseId, @@ -742,7 +742,7 @@ THolder TConfigureParts::MakeEvUpdateConfig(TTxId TTxState::ETxType txType, const TOperationContext& context) { - auto event = MakeHolder(); + auto event = MakeHolder(); event->Record.SetTxId(ui64(txId)); MakePQTabletConfig(context, @@ -757,7 +757,7 @@ THolder TConfigureParts::MakeEvUpdateConfig(TTxId databasePath); if (bootstrapConfig) { Y_ABORT_UNLESS(txType == TTxState::TxCreatePQGroup); - event->Record.MutableBootstrapConfig()->CopyFrom(*bootstrapConfig); + event->PreSerializedData += bootstrapConfig->GetPreSerializedUpdateConfig(); } LOG_DEBUG_S(context.Ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_common.h b/ydb/core/tx/schemeshard/schemeshard__operation_common.h index 14ec42f43168..33813a3f9b10 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_common.h +++ b/ydb/core/tx/schemeshard/schemeshard__operation_common.h @@ -552,6 +552,54 @@ class TDone: public TSubOperationState { namespace NPQState { +class TBootstrapConfigWrapper: public NKikimrPQ::TBootstrapConfig { + struct TSerializedProposeTransaction { + TString Value; + + static TSerializedProposeTransaction Serialize(const NKikimrPQ::TBootstrapConfig& value) { + NKikimrPQ::TEvProposeTransaction record; + record.MutableConfig()->MutableBootstrapConfig()->CopyFrom(value); + return {record.SerializeAsString()}; + } + }; + + struct TSerializedUpdateConfig { + TString Value; + + static TSerializedUpdateConfig Serialize(const NKikimrPQ::TBootstrapConfig& value) { + NKikimrPQ::TUpdateConfig record; + record.MutableBootstrapConfig()->CopyFrom(value); + return {record.SerializeAsString()}; + } + }; + + mutable std::optional> PreSerialized; + + template + const TString& Get() const { + if (!PreSerialized) { + PreSerialized.emplace(T::Serialize(*this)); + } + + const auto* value = std::get_if(&PreSerialized.value()); + Y_ABORT_UNLESS(value); + + return value->Value; + } + +public: + const TString& GetPreSerializedProposeTransaction() const { + return Get(); + } + + const TString& GetPreSerializedUpdateConfig() const { + return Get(); + } +}; + class TConfigureParts: public TSubOperationState { private: TOperationId OperationId; @@ -627,7 +675,6 @@ class TConfigureParts: public TSubOperationState { return false; } - bool ProgressState(TOperationContext& context) override { TTabletId ssId = context.SS->SelfTabletId(); @@ -669,7 +716,7 @@ class TConfigureParts: public TSubOperationState { TString databasePath = TPath::Init(context.SS->RootPathId(), context.SS).PathString(); auto topicPath = TPath::Init(txState->TargetPathId, context.SS); - std::optional bootstrapConfig; + std::optional bootstrapConfig; if (txState->TxType == TTxState::TxCreatePQGroup && topicPath.Parent().IsCdcStream()) { bootstrapConfig.emplace(); @@ -881,20 +928,37 @@ class TConfigureParts: public TSubOperationState { config.SetVersion(pqGroup.AlterData->AlterVersion); } + THashSet linkedPartitions; + for(const auto& pq : pqShard.Partitions) { config.AddPartitionIds(pq->PqId); auto& partition = *config.AddPartitions(); FillPartition(partition, pq.Get(), 0); + + linkedPartitions.insert(pq->PqId); + linkedPartitions.insert(pq->ParentPartitionIds.begin(), pq->ParentPartitionIds.end()); + linkedPartitions.insert(pq->ChildPartitionIds.begin(), pq->ChildPartitionIds.end()); + for (auto c : pq->ChildPartitionIds) { + auto it = pqGroup.Partitions.find(c); + if (it == pqGroup.Partitions.end()) { + continue; + } + linkedPartitions.insert(it->second->ParentPartitionIds.begin(), it->second->ParentPartitionIds.end()); + } } - for(const auto& p : pqGroup.Shards) { - const auto& pqShard = p.second; - const auto& tabletId = context.SS->ShardInfos[p.first].TabletID; - for (const auto& pq : pqShard->Partitions) { - auto& partition = *config.AddAllPartitions(); - FillPartition(partition, pq.Get(), ui64(tabletId)); + for(auto lp : linkedPartitions) { + auto it = pqGroup.Partitions.find(lp); + if (it == pqGroup.Partitions.end()) { + continue; } + + auto* partitionInfo = it->second; + const auto& tabletId = context.SS->ShardInfos[partitionInfo->ShardIdx].TabletID; + + auto& partition = *config.AddAllPartitions(); + FillPartition(partition, partitionInfo, ui64(tabletId)); } } @@ -918,7 +982,7 @@ class TConfigureParts: public TSubOperationState { const TTopicTabletInfo& pqShard, const TString& topicName, const TString& topicPath, - const std::optional& bootstrapConfig, + const std::optional& bootstrapConfig, const TString& cloudId, const TString& folderId, const TString& databaseId, @@ -931,7 +995,7 @@ class TConfigureParts: public TSubOperationState { const TTopicTabletInfo& pqShard, const TString& topicName, const TString& topicPath, - const std::optional& bootstrapConfig, + const std::optional& bootstrapConfig, const TString& cloudId, const TString& folderId, const TString& databaseId, diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_common_external_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_common_external_table.cpp index 8bed0aa2df56..86dcd1493373 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_common_external_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_common_external_table.cpp @@ -113,6 +113,14 @@ std::pair> CreateExternalTable( } auto typeName = NMiniKQL::AdaptLegacyYqlType(col.GetType()); + if (typeName == "Decimal(22,9)"sv) { + // + // typename is reformatted as above + // should discard (SCALE,PRECISION) + // they are validated to be (22,9) + // + typeName = "Decimal"sv; + } const NScheme::IType* type = typeRegistry->GetType(typeName); NScheme::TTypeInfo typeInfo; diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_common_resource_pool.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_common_resource_pool.cpp index a7d86f9a9035..5179d835d472 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_common_resource_pool.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_common_resource_pool.cpp @@ -1,6 +1,8 @@ #include "schemeshard__operation_common_resource_pool.h" #include "schemeshard_impl.h" +#include + namespace NKikimr::NSchemeShard::NResourcePool { @@ -34,7 +36,7 @@ TPath::TChecker IsParentPathValid(const TPath& parentPath) { } bool IsParentPathValid(const THolder& result, const TPath& parentPath) { - const TString& resourcePoolsDir = JoinPath({parentPath.GetDomainPathString(), ".resource_pools"}); + const TString& resourcePoolsDir = JoinPath({parentPath.GetDomainPathString(), ".metadata/workload_manager/pools"}); if (parentPath.PathString() != resourcePoolsDir) { result->SetError(NKikimrScheme::EStatus::StatusSchemeError, TStringBuilder() << "Resource pools shoud be placed in " << resourcePoolsDir); return false; @@ -90,6 +92,17 @@ bool IsDescriptionValid(const THolder& result, const NKikimrSc return true; } +bool IsResourcePoolInfoValid(const THolder& result, const TResourcePoolInfo::TPtr& info) { + try { + NKikimr::NResourcePool::TPoolSettings settings(info->Properties.GetProperties()); + settings.Validate(); + } catch (...) { + result->SetError(NKikimrScheme::StatusSchemeError, CurrentExceptionMessage()); + return false; + } + return true; +} + TTxState& CreateTransaction(const TOperationId& operationId, const TOperationContext& context, const TPathId& resourcePoolPathId, TTxState::ETxType txType) { Y_ABORT_UNLESS(!context.SS->FindTx(operationId)); TTxState& txState = context.SS->CreateTx(operationId, txType, resourcePoolPathId); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_common_resource_pool.h b/ydb/core/tx/schemeshard/schemeshard__operation_common_resource_pool.h index 94909926290f..c784de0c7c63 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_common_resource_pool.h +++ b/ydb/core/tx/schemeshard/schemeshard__operation_common_resource_pool.h @@ -24,6 +24,8 @@ bool IsApplyIfChecksPassed(const TTxTransaction& transaction, const THolder& result, const NKikimrSchemeOp::TResourcePoolDescription& description); +bool IsResourcePoolInfoValid(const THolder& result, const TResourcePoolInfo::TPtr& info); + TTxState& CreateTransaction(const TOperationId& operationId, const TOperationContext& context, const TPathId& resourcePoolPathId, TTxState::ETxType txType); void RegisterParentPathDependencies(const TOperationId& operationId, const TOperationContext& context, const TPath& parentPath); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp index 417e52cc18da..2bf68f59488c 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp @@ -14,7 +14,7 @@ void PrepareScheme(NKikimrSchemeOp::TTableDescription* schema, const TString& na const NScheme::TTypeRegistry* typeRegistry = AppData(context.Ctx)->TypeRegistry; NKikimrSchemeOp::TTableDescription completedSchema; - context.SS->DescribeTable(srcTableInfo, typeRegistry, true, false, &completedSchema); + context.SS->DescribeTable(srcTableInfo, typeRegistry, true, &completedSchema); completedSchema.SetName(name); //inherit all from Src except PartitionConfig, PartitionConfig could be altered diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_cdc_stream.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_cdc_stream.cpp index a615d6b68a4a..55994586f433 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_cdc_stream.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_cdc_stream.cpp @@ -105,38 +105,22 @@ class TNewCdcStream: public TSubOperation { } } - TString BuildWorkingDir() const { - if (Transaction.GetCreateCdcStream().HasIndexName()) { - return Transaction.GetWorkingDir() + "/" - + Transaction.GetCreateCdcStream().GetIndexName() + "/indexImplTable"; - } else { - return Transaction.GetWorkingDir(); - } - } - public: using TSubOperation::TSubOperation; THolder Propose(const TString& owner, TOperationContext& context) override { + const auto& workingDir = Transaction.GetWorkingDir(); const auto& op = Transaction.GetCreateCdcStream(); const auto& streamDesc = op.GetStreamDescription(); const auto& streamName = streamDesc.GetName(); const auto acceptExisted = !Transaction.GetFailOnExist(); - auto result = MakeHolder(NKikimrScheme::StatusAccepted, ui64(OperationId.GetTxId()), context.SS->TabletID()); - - if (op.HasAllIndexes()) { - result->SetError(NKikimrScheme::StatusInvalidParameter, - "Illigal part operation with all indexes flag"); - return result; - } - - const auto& workingDir = BuildWorkingDir(); - LOG_N("TNewCdcStream Propose" << ": opId# " << OperationId << ", stream# " << workingDir << "/" << streamName); + auto result = MakeHolder(NKikimrScheme::StatusAccepted, ui64(OperationId.GetTxId()), context.SS->TabletID()); + const auto tablePath = TPath::Resolve(workingDir, context.SS); { const auto checks = tablePath.Check(); @@ -146,15 +130,13 @@ class TNewCdcStream: public TSubOperation { .IsAtLocalSchemeShard() .IsResolved() .NotDeleted() + .IsTable() + .NotBackupTable() .NotAsyncReplicaTable() .NotUnderDeleting(); - if (op.HasIndexName() && op.GetIndexName()) { - checks.IsInsideTableIndexPath(); - } else { - checks - .IsTable() - .IsCommonSensePath(); + if (checks && !tablePath.IsInsideTableIndexPath()) { + checks.IsCommonSensePath(); } if (!checks) { @@ -529,35 +511,17 @@ class TNewCdcStreamAtTable: public TSubOperation { } THolder Propose(const TString&, TOperationContext& context) override { - auto workingDir = Transaction.GetWorkingDir(); + const auto& workingDir = Transaction.GetWorkingDir(); const auto& op = Transaction.GetCreateCdcStream(); - auto tableName = op.GetTableName(); + const auto& tableName = op.GetTableName(); const auto& streamName = op.GetStreamDescription().GetName(); - auto result = MakeHolder(NKikimrScheme::StatusAccepted, ui64(OperationId.GetTxId()), context.SS->TabletID()); - bool isIndexTable = false; - - if (op.HasAllIndexes()) { - result->SetError(NKikimrScheme::StatusInvalidParameter, - "Illigal part operation with all indexes flag"); - return result; - } - - if (op.HasIndexName()) { - if (!op.GetIndexName()) { - result->SetError(NKikimrScheme::StatusInvalidParameter, - "Unexpected empty index name"); - return result; - } - isIndexTable = true; - workingDir += ("/" + tableName + "/" + op.GetIndexName()); - tableName = "indexImplTable"; - } - LOG_N("TNewCdcStreamAtTable Propose" << ": opId# " << OperationId << ", stream# " << workingDir << "/" << tableName << "/" << streamName); + auto result = MakeHolder(NKikimrScheme::StatusAccepted, ui64(OperationId.GetTxId()), context.SS->TabletID()); + const auto workingDirPath = TPath::Resolve(workingDir, context.SS); { const auto checks = workingDirPath.Check(); @@ -569,9 +533,7 @@ class TNewCdcStreamAtTable: public TSubOperation { .IsLikeDirectory() .NotUnderDeleting(); - if (isIndexTable) { - checks.IsInsideTableIndexPath(); - } else { + if (checks && !workingDirPath.IsTableIndex()) { checks.IsCommonSensePath(); } @@ -595,7 +557,7 @@ class TNewCdcStreamAtTable: public TSubOperation { .NotUnderDeleting(); if (checks) { - if (!isIndexTable) { + if (!tablePath.IsInsideTableIndexPath()) { checks.IsCommonSensePath(); } if (InitialScan) { @@ -679,27 +641,34 @@ class TNewCdcStreamAtTable: public TSubOperation { private: const bool InitialScan; + }; // TNewCdcStreamAtTable -void DoCreateLock(const TOperationId opId, const TPath& workingDirPath, const TPath& tablePath, bool allowIndexImplLock, - TVector& result) +void DoCreateLock( + TVector& result, + const TOperationId opId, + const TPath& workingDirPath, + const TPath& tablePath) { - auto outTx = TransactionTemplate(workingDirPath.PathString(), - NKikimrSchemeOp::EOperationType::ESchemeOpCreateLock); + auto outTx = TransactionTemplate(workingDirPath.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpCreateLock); outTx.SetFailOnExist(false); outTx.SetInternal(true); - auto cfg = outTx.MutableLockConfig(); - cfg->SetName(tablePath.LeafName()); - cfg->SetAllowIndexImplLock(allowIndexImplLock); + outTx.MutableLockConfig()->SetName(tablePath.LeafName()); result.push_back(CreateLock(NextPartId(opId, result), outTx)); } } // anonymous -void DoCreatePqPart(const TOperationId& opId, const TPath& streamPath, const TString& streamName, - const TIntrusivePtr table, const NKikimrSchemeOp::TCreateCdcStream& op, - const TVector& boundaries, const bool acceptExisted, TVector& result) +void DoCreatePqPart( + TVector& result, + const NKikimrSchemeOp::TCreateCdcStream& op, + const TOperationId& opId, + const TPath& streamPath, + const TString& streamName, + TTableInfo::TCPtr table, + const TVector& boundaries, + const bool acceptExisted) { auto outTx = TransactionTemplate(streamPath.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpCreatePersQueueGroup); outTx.SetFailOnExist(!acceptExisted); @@ -752,34 +721,37 @@ void DoCreatePqPart(const TOperationId& opId, const TPath& streamPath, const TSt result.push_back(CreateNewPQ(NextPartId(opId, result), outTx)); } -void FillModifySchemaForCdc(NKikimrSchemeOp::TModifyScheme& outTx, const NKikimrSchemeOp::TCreateCdcStream& op, - const TOperationId& opId, const TString& indexName, bool acceptExisted, bool initialScan) +static void FillModifySchemaForCdc( + NKikimrSchemeOp::TModifyScheme& outTx, + const NKikimrSchemeOp::TCreateCdcStream& op, + const TOperationId& opId, + bool acceptExisted, + bool initialScan) { outTx.SetFailOnExist(!acceptExisted); outTx.MutableCreateCdcStream()->CopyFrom(op); - if (indexName) { - outTx.MutableCreateCdcStream()->SetIndexName(indexName); - } else { - outTx.MutableCreateCdcStream()->ClearIndexMode(); - } - if (initialScan) { outTx.MutableLockGuard()->SetOwnerTxId(ui64(opId.GetTxId())); } } -void DoCreateStream(const NKikimrSchemeOp::TCreateCdcStream& op, const TOperationId& opId, const TPath& workingDirPath, const TPath& tablePath, - const bool acceptExisted, const bool initialScan, const TString& indexName, TVector& result) +void DoCreateStream( + TVector& result, + const NKikimrSchemeOp::TCreateCdcStream& op, + const TOperationId& opId, + const TPath& workingDirPath, + const TPath& tablePath, + const bool acceptExisted, + const bool initialScan) { { auto outTx = TransactionTemplate(tablePath.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpCreateCdcStreamImpl); - FillModifySchemaForCdc(outTx, op, opId, indexName, acceptExisted, initialScan); + FillModifySchemaForCdc(outTx, op, opId, acceptExisted, initialScan); result.push_back(CreateNewCdcStreamImpl(NextPartId(opId, result), outTx)); } - { auto outTx = TransactionTemplate(workingDirPath.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpCreateCdcStreamAtTable); - FillModifySchemaForCdc(outTx, op, opId, indexName, acceptExisted, initialScan); + FillModifySchemaForCdc(outTx, op, opId, acceptExisted, initialScan); result.push_back(CreateNewCdcStreamAtTable(NextPartId(opId, result), outTx, initialScan)); } } @@ -826,10 +798,24 @@ ISubOperation::TPtr RejectOnTablePathChecks(const TOperationId& opId, const TPat .NotDeleted() .IsTable() .NotAsyncReplicaTable() - .IsCommonSensePath() .NotUnderDeleting() .NotUnderOperation(); + if (checks) { + if (!tablePath.IsInsideTableIndexPath()) { + checks.IsCommonSensePath(); + } else { + if (!tablePath.Parent().IsTableIndex(NKikimrSchemeOp::EIndexTypeGlobal)) { + return CreateReject(opId, NKikimrScheme::StatusPreconditionFailed, + "Cannot add changefeed to index table"); + } + if (!AppData()->FeatureFlags.GetEnableChangefeedsOnIndexTables()) { + return CreateReject(opId, NKikimrScheme::StatusPreconditionFailed, + "Changefeed on index table is not supported yet"); + } + } + } + if (!checks) { return CreateReject(opId, checks.GetStatus(), checks.GetError()); } @@ -837,19 +823,7 @@ ISubOperation::TPtr RejectOnTablePathChecks(const TOperationId& opId, const TPat return nullptr; } -void CalcBoundaries(const TTableInfo& table, TVector& boundaries) { - const auto& partitions = table.GetPartitions(); - boundaries.reserve(partitions.size() - 1); - - for (ui32 i = 0; i < partitions.size(); ++i) { - const auto& partition = partitions.at(i); - if (i != partitions.size() - 1) { - boundaries.push_back(partition.EndOfRange); - } - } -} - -bool FillBoundaries(const TTableInfo& table, const ::NKikimrSchemeOp::TCreateCdcStream& op, TVector& boundaries, TString& errStr) { +bool FillBoundaries(const TTableInfo& table, const NKikimrSchemeOp::TCreateCdcStream& op, TVector& boundaries, TString& errStr) { if (op.HasTopicPartitions()) { const auto& keyColumns = table.KeyColumnIds; const auto& columns = table.Columns; @@ -862,8 +836,17 @@ bool FillBoundaries(const TTableInfo& table, const ::NKikimrSchemeOp::TCreateCdc return false; } } else { - CalcBoundaries(table, boundaries); + const auto& partitions = table.GetPartitions(); + boundaries.reserve(partitions.size() - 1); + + for (ui32 i = 0; i < partitions.size(); ++i) { + const auto& partition = partitions.at(i); + if (i != partitions.size() - 1) { + boundaries.push_back(partition.EndOfRange); + } + } } + return true; } @@ -921,7 +904,6 @@ TVector CreateNewCdcStream(TOperationId opId, const TTxTran const auto& tableName = op.GetTableName(); const auto& streamDesc = op.GetStreamDescription(); const auto& streamName = streamDesc.GetName(); - const auto workingDirPath = TPath::Resolve(tx.GetWorkingDir(), context.SS); const auto checksResult = DoNewStreamPathChecks(opId, workingDirPath, tableName, streamName, acceptExisted); @@ -971,76 +953,35 @@ TVector CreateNewCdcStream(TOperationId opId, const TTxTran << "Initial scan is not supported yet")}; } - if (op.HasTopicPartitions()) { - if (op.GetTopicPartitions() <= 0) { - return {CreateReject(opId, NKikimrScheme::StatusInvalidParameter, "Topic partitions count must be greater than 0")}; - } - } - - std::vector candidates; - - if (op.GetIndexModeCase() == NKikimrSchemeOp::TCreateCdcStream::kAllIndexes) { - candidates.reserve(tablePath->GetChildren().size()); - for (const auto& child : tablePath->GetChildren()) { - candidates.emplace_back(child.first); - } - } else if (op.GetIndexModeCase() == NKikimrSchemeOp::TCreateCdcStream::kIndexName) { - auto it = tablePath->GetChildren().find(op.GetIndexName()); - if (it == tablePath->GetChildren().end()) { - return {CreateReject(opId, NKikimrScheme::StatusSchemeError, - "requested particular path hasn't been found")}; - } - candidates.emplace_back(it->first); + if (op.HasTopicPartitions() && op.GetTopicPartitions() <= 0) { + return {CreateReject(opId, NKikimrScheme::StatusInvalidParameter, "Topic partitions count must be greater than 0")}; } TVector result; - for (const auto& name : candidates) { - const TPath indexPath = tablePath.Child(name); - if (!indexPath.IsTableIndex() || indexPath.IsDeleted()) { - continue; - } - - const TPath indexImplPath = indexPath.Child("indexImplTable"); - if (!indexImplPath) { - return {CreateReject(opId, NKikimrScheme::StatusSchemeError, - "indexImplTable hasn't been found")}; - } - - Y_ABORT_UNLESS(context.SS->Tables.contains(tablePath.Base()->PathId)); - auto indexImplTable = context.SS->Tables.at(indexImplPath.Base()->PathId); - - const TPath indexStreamPath = indexImplPath.Child(streamName); - if (auto reject = RejectOnCdcChecks(opId, indexStreamPath, acceptExisted)) { - return {reject}; - } - - if (initialScan) { - DoCreateLock(opId, indexPath, indexImplPath, true, result); - } - - TVector boundaries; - if (!FillBoundaries(*indexImplTable, op, boundaries, errStr)) { - return {CreateReject(opId, NKikimrScheme::StatusInvalidParameter, errStr)}; - } - - DoCreateStream(op, opId, workingDirPath, tablePath, acceptExisted, initialScan, name, result); - DoCreatePqPart(opId, indexStreamPath, streamName, indexImplTable, op, boundaries, acceptExisted, result); + if (initialScan) { + DoCreateLock(result, opId, workingDirPath, tablePath); } - if (initialScan) { - DoCreateLock(opId, workingDirPath, tablePath, false, result); + if (workingDirPath.IsTableIndex()) { + auto outTx = TransactionTemplate(workingDirPath.Parent().PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpAlterTableIndex); + outTx.MutableAlterTableIndex()->SetName(workingDirPath.LeafName()); + outTx.MutableAlterTableIndex()->SetState(NKikimrSchemeOp::EIndexState::EIndexStateReady); + + result.push_back(CreateAlterTableIndex(NextPartId(opId, result), outTx)); } Y_ABORT_UNLESS(context.SS->Tables.contains(tablePath.Base()->PathId)); auto table = context.SS->Tables.at(tablePath.Base()->PathId); + TVector boundaries; if (!FillBoundaries(*table, op, boundaries, errStr)) { return {CreateReject(opId, NKikimrScheme::StatusInvalidParameter, errStr)}; } - DoCreateStream(op, opId, workingDirPath, tablePath, acceptExisted, initialScan, {}, result); - DoCreatePqPart(opId, streamPath, streamName, table, op, boundaries, acceptExisted, result); + DoCreateStream(result, op, opId, workingDirPath, tablePath, acceptExisted, initialScan); + DoCreatePqPart(result, op, opId, streamPath, streamName, table, boundaries, acceptExisted); + return result; } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_cdc_stream.h b/ydb/core/tx/schemeshard/schemeshard__operation_create_cdc_stream.h index 11a921d84168..635e57a28b63 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_cdc_stream.h +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_cdc_stream.h @@ -1,7 +1,7 @@ #pragma once -#include "schemeshard__operation_part.h" #include "schemeshard__operation_common.h" +#include "schemeshard__operation_part.h" #include "schemeshard_impl.h" #include @@ -22,23 +22,22 @@ std::variant DoNewStreamPathChecks( bool acceptExisted); void DoCreateStream( + TVector& result, const NKikimrSchemeOp::TCreateCdcStream& op, const TOperationId& opId, const TPath& workingDirPath, const TPath& tablePath, const bool acceptExisted, - const bool initialScan, - const TString& indexName, - TVector& result); + const bool initialScan); void DoCreatePqPart( + TVector& result, + const NKikimrSchemeOp::TCreateCdcStream& op, const TOperationId& opId, const TPath& streamPath, const TString& streamName, - const TIntrusivePtr table, - const NKikimrSchemeOp::TCreateCdcStream& op, + TTableInfo::TCPtr table, const TVector& boundaries, - const bool acceptExisted, - TVector& result); + const bool acceptExisted); } // namespace NKikimr::NSchemesShard::NCdc diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_continuous_backup.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_continuous_backup.cpp index 6bb280316138..ab2c187d24bf 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_continuous_backup.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_continuous_backup.cpp @@ -68,8 +68,8 @@ TVector CreateNewContinuousBackup(TOperationId opId, const TVector result; - NCdc::DoCreateStream(createCdcStreamOp, opId, workingDirPath, tablePath, acceptExisted, false, {}, result); - NCdc::DoCreatePqPart(opId, streamPath, NBackup::CB_CDC_STREAM_NAME, table, createCdcStreamOp, boundaries, acceptExisted, result); + NCdc::DoCreateStream(result, createCdcStreamOp, opId, workingDirPath, tablePath, acceptExisted, false); + NCdc::DoCreatePqPart(result, createCdcStreamOp, opId, streamPath, NBackup::CB_CDC_STREAM_NAME, table, boundaries, acceptExisted); return result; } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_external_data_source.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_external_data_source.cpp index 6ecbfd3c4b85..2c52d5d486d7 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_external_data_source.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_external_data_source.cpp @@ -239,6 +239,13 @@ class TCreateExternalDataSource : public TSubOperation { static_cast(OperationId.GetTxId()), static_cast(ssId)); + if (context.SS->IsServerlessDomain(TPath::Init(context.SS->RootPathId(), context.SS))) { + if (!context.SS->EnableExternalDataSourcesOnServerless) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, "External data sources are disabled for serverless domains. Please contact your system administrator to enable it"); + return result; + } + } + const TPath parentPath = TPath::Resolve(parentPathStr, context.SS); RETURN_RESULT_UNLESS(NExternalDataSource::IsParentPathValid( result, parentPath, Transaction, /* isCreate */ true)); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_external_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_external_table.cpp index 1268f15956d9..c4e0a5e11a4b 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_external_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_external_table.cpp @@ -308,6 +308,13 @@ class TCreateExternalTable: public TSubOperation { static_cast(OperationId.GetTxId()), static_cast(ssId)); + if (context.SS->IsServerlessDomain(TPath::Init(context.SS->RootPathId(), context.SS))) { + if (!context.SS->EnableExternalDataSourcesOnServerless) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, "External data sources are disabled for serverless domains. Please contact your system administrator to enable it"); + return result; + } + } + const auto parentPath = TPath::Resolve(parentPathStr, context.SS); RETURN_RESULT_UNLESS(NExternalTable::IsParentPathValid(result, parentPath)); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_index.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_index.cpp index f5c2d464031b..3e37d408681e 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_index.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_index.cpp @@ -139,7 +139,8 @@ class TCreateTableIndex: public TSubOperation { .NotDeleted() .NotUnderDeleting() .IsCommonSensePath() - .IsTable(); + .IsTable() + .NotBackupTable(); if (!internal) { checks.NotAsyncReplicaTable(); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_lock.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_lock.cpp index bd7ad540099d..c119f253f47f 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_lock.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_lock.cpp @@ -127,9 +127,7 @@ class TCreateLock: public TSubOperation { .IsLikeDirectory() .FailOnRestrictedCreateInTempZone(); - if (op.GetAllowIndexImplLock()) { - checks.IsInsideTableIndexPath(); - } else { + if (checks && !parentPath.IsTableIndex()) { checks.IsCommonSensePath(); } @@ -151,7 +149,7 @@ class TCreateLock: public TSubOperation { .IsTable() .NotAsyncReplicaTable(); - if (!op.GetAllowIndexImplLock()) { + if (checks && !parentPath.IsTableIndex()) { checks.IsCommonSensePath(); } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_pq.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_pq.cpp index a83c459588bd..4cd1303a1fa3 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_pq.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_pq.cpp @@ -155,6 +155,7 @@ TTopicInfo::TPtr CreatePersQueueGroup(TOperationContext& context, pqGroupInfo->TotalGroupCount = partitionCount; pqGroupInfo->TotalPartitionCount = partitionCount; + pqGroupInfo->ActivePartitionCount = partitionCount; ui32 tabletCount = pqGroupInfo->ExpectedShardCount(); if (tabletCount > TSchemeShard::MaxPQGroupTabletsCount) { diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_resource_pool.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_resource_pool.cpp index 29c5aa399e72..fba996674b60 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_resource_pool.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_resource_pool.cpp @@ -155,6 +155,13 @@ class TCreateResourcePool : public TSubOperation { static_cast(OperationId.GetTxId()), static_cast(context.SS->SelfTabletId())); + if (context.SS->IsServerlessDomain(TPath::Init(context.SS->RootPathId(), context.SS))) { + if (!context.SS->EnableResourcePoolsOnServerless) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, "Resource pools are disabled for serverless domains. Please contact your system administrator to enable it"); + return result; + } + } + const TPath& parentPath = TPath::Resolve(parentPathStr, context.SS); RETURN_RESULT_UNLESS(NResourcePool::IsParentPathValid(result, parentPath)); @@ -166,6 +173,7 @@ class TCreateResourcePool : public TSubOperation { const TResourcePoolInfo::TPtr resourcePoolInfo = NResourcePool::CreateResourcePool(resourcePoolDescription, 1); Y_ABORT_UNLESS(resourcePoolInfo); + RETURN_RESULT_UNLESS(NResourcePool::IsResourcePoolInfoValid(result, resourcePoolInfo)); AddPathInSchemeShard(result, dstPath, owner); const TPathElement::TPtr resourcePool = CreateResourcePoolPathElement(dstPath); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_sequence.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_sequence.cpp index 499e54b4fe15..3e42ac8cf6ff 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_sequence.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_sequence.cpp @@ -376,6 +376,7 @@ class TCreateSequence : public TSubOperation { if (checks) { if (parentPath->IsTable()) { + checks.NotBackupTable(); // allow immediately inside a normal table if (parentPath.IsUnderOperation()) { checks.IsUnderTheSameOperation(OperationId.GetTxId()); // allowed only as part of consistent operations diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_table.cpp index 029e72fe4c3f..545ef3642c6e 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_table.cpp @@ -641,7 +641,7 @@ class TCreateTable: public TSubOperation { Y_ABORT_UNLESS(tableInfo->GetPartitions().back().EndOfRange.empty(), "End of last range must be +INF"); if (tableInfo->IsAsyncReplica()) { - newTable->SetAsyncReplica(); + newTable->SetAsyncReplica(true); } context.SS->Tables[newTable->PathId] = tableInfo; diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_view.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_view.cpp index e4f6e69922ba..0e172572f778 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_view.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_view.cpp @@ -48,7 +48,7 @@ class TPropose: public TSubOperationState { Y_ABORT_UNLESS(txState->TxType == TTxState::TxCreateView); context.SS->TabletCounters->Simple()[COUNTER_VIEW_COUNT].Add(1); - + const auto pathId = txState->TargetPathId; auto path = TPath::Init(pathId, context.SS); @@ -68,6 +68,7 @@ TViewInfo::TPtr CreateView(const NKikimrSchemeOp::TViewDescription& desc) { TViewInfo::TPtr viewInfo = new TViewInfo; viewInfo->AlterVersion = 1; viewInfo->QueryText = desc.GetQueryText(); + viewInfo->CapturedContext = desc.GetCapturedContext(); return viewInfo; } @@ -109,7 +110,7 @@ class TCreateView: public TSubOperation { const auto& viewDescription = Transaction.GetCreateView(); const TString& name = viewDescription.GetName(); - + LOG_N("TCreateView Propose" << ", path: " << parentPathStr << "/" << name << ", opId: " << OperationId diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_drop_cdc_stream.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_drop_cdc_stream.cpp index 07d9bd17e0f7..1654ca58bdba 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_drop_cdc_stream.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_drop_cdc_stream.cpp @@ -150,10 +150,13 @@ class TDropCdcStream: public TSubOperation { .NotDeleted() .IsTable() .NotAsyncReplicaTable() - .IsCommonSensePath() .IsUnderOperation() .IsUnderTheSameOperation(OperationId.GetTxId()); + if (checks && !tablePath.IsInsideTableIndexPath()) { + checks.IsCommonSensePath(); + } + if (!checks) { result->SetError(checks.GetStatus(), checks.GetError()); return result; @@ -332,10 +335,13 @@ class TDropCdcStreamAtTable: public TSubOperation { .NotDeleted() .IsTable() .NotAsyncReplicaTable() - .IsCommonSensePath() .NotUnderDeleting() .NotUnderOperation(); + if (checks && !tablePath.IsInsideTableIndexPath()) { + checks.IsCommonSensePath(); + } + if (!checks) { result->SetError(checks.GetStatus(), checks.GetError()); return result; @@ -438,10 +444,10 @@ class TDropCdcStreamAtTable: public TSubOperation { } // anonymous std::variant DoDropStreamPathChecks( - const TOperationId& opId, - const TPath& workingDirPath, - const TString& tableName, - const TString& streamName) + const TOperationId& opId, + const TPath& workingDirPath, + const TString& tableName, + const TString& streamName) { const auto tablePath = workingDirPath.Child(tableName); { @@ -454,10 +460,13 @@ std::variant DoDropStreamPathChecks( .NotDeleted() .IsTable() .NotAsyncReplicaTable() - .IsCommonSensePath() .NotUnderDeleting() .NotUnderOperation(); + if (checks && !tablePath.IsInsideTableIndexPath()) { + checks.IsCommonSensePath(); + } + if (!checks) { return CreateReject(opId, checks.GetStatus(), checks.GetError()); } @@ -485,10 +494,11 @@ std::variant DoDropStreamPathChecks( } ISubOperation::TPtr DoDropStreamChecks( - const TOperationId& opId, - const TPath& tablePath, - const TTxId lockTxId, - TOperationContext& context) { + const TOperationId& opId, + const TPath& tablePath, + const TTxId lockTxId, + TOperationContext& context) +{ TString errStr; if (!context.SS->CheckLocks(tablePath.Base()->PathId, lockTxId, errStr)) { @@ -499,14 +509,14 @@ ISubOperation::TPtr DoDropStreamChecks( } void DoDropStream( - const NKikimrSchemeOp::TDropCdcStream& op, - const TOperationId& opId, - const TPath& workingDirPath, - const TPath& tablePath, - const TPath& streamPath, - const TTxId lockTxId, - TOperationContext& context, - TVector& result) + TVector& result, + const NKikimrSchemeOp::TDropCdcStream& op, + const TOperationId& opId, + const TPath& workingDirPath, + const TPath& tablePath, + const TPath& streamPath, + const TTxId lockTxId, + TOperationContext& context) { { auto outTx = TransactionTemplate(workingDirPath.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropCdcStreamAtTable); @@ -529,6 +539,14 @@ void DoDropStream( result.push_back(DropLock(NextPartId(opId, result), outTx)); } + if (workingDirPath.IsTableIndex()) { + auto outTx = TransactionTemplate(workingDirPath.Parent().PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpAlterTableIndex); + outTx.MutableAlterTableIndex()->SetName(workingDirPath.LeafName()); + outTx.MutableAlterTableIndex()->SetState(NKikimrSchemeOp::EIndexState::EIndexStateReady); + + result.push_back(CreateAlterTableIndex(NextPartId(opId, result), outTx)); + } + { auto outTx = TransactionTemplate(tablePath.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropCdcStreamImpl); outTx.MutableDrop()->SetName(streamPath.Base()->Name); @@ -615,7 +633,7 @@ TVector CreateDropCdcStream(TOperationId opId, const TTxTra TVector result; - DoDropStream(op, opId, workingDirPath, tablePath, streamPath, lockTxId, context, result); + DoDropStream(result, op, opId, workingDirPath, tablePath, streamPath, lockTxId, context); return result; } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_drop_cdc_stream.h b/ydb/core/tx/schemeshard/schemeshard__operation_drop_cdc_stream.h index ec4720da71c0..12be7102684c 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_drop_cdc_stream.h +++ b/ydb/core/tx/schemeshard/schemeshard__operation_drop_cdc_stream.h @@ -1,8 +1,8 @@ #pragma once +#include "schemeshard__operation_common.h" #include "schemeshard__operation_create_cdc_stream.h" // for TStreamPaths #include "schemeshard__operation_part.h" -#include "schemeshard__operation_common.h" #include "schemeshard_impl.h" #include @@ -23,13 +23,13 @@ ISubOperation::TPtr DoDropStreamChecks( TOperationContext& context); void DoDropStream( + TVector& result, const NKikimrSchemeOp::TDropCdcStream& op, const TOperationId& opId, const TPath& workingDirPath, const TPath& tablePath, const TPath& streamPath, const TTxId lockTxId, - TOperationContext& context, - TVector& result); + TOperationContext& context); } // namespace NKikimr::NSchemesShard::NCdc diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_drop_continuous_backup.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_drop_continuous_backup.cpp index 9e7ec8ac7e43..e0e882b84d67 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_drop_continuous_backup.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_drop_continuous_backup.cpp @@ -1,8 +1,7 @@ -#include "schemeshard__operation_part.h" #include "schemeshard__operation_common.h" -#include "schemeshard_impl.h" - #include "schemeshard__operation_drop_cdc_stream.h" +#include "schemeshard__operation_part.h" +#include "schemeshard_impl.h" #include @@ -40,7 +39,7 @@ TVector CreateDropContinuousBackup(TOperationId opId, const TVector result; - NCdc::DoDropStream(dropCdcStreamOp, opId, workingDirPath, tablePath, streamPath, InvalidTxId, context, result); + NCdc::DoDropStream(result, dropCdcStreamOp, opId, workingDirPath, tablePath, streamPath, InvalidTxId, context); return result; } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_drop_index.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_drop_index.cpp index fb268a8b0619..a0d6ff35513f 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_drop_index.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_drop_index.cpp @@ -473,21 +473,22 @@ TVector CreateDropIndex(TOperationId nextId, const TTxTrans result.push_back(CreateDropTableIndex(NextPartId(nextId, result), indexDropping)); } - for (const auto& items: indexPath.Base()->GetChildren()) { - Y_ABORT_UNLESS(context.SS->PathsById.contains(items.second)); - auto implPath = context.SS->PathsById.at(items.second); - if (implPath->Dropped()) { + for (const auto& [childName, childPathId] : indexPath.Base()->GetChildren()) { + TPath child = indexPath.Child(childName); + if (child.IsDeleted()) { continue; } - auto implTable = context.SS->PathsById.at(items.second); - Y_ABORT_UNLESS(implTable->IsTable()); + Y_ABORT_UNLESS(child.Base()->IsTable()); auto implTableDropping = TransactionTemplate(indexPath.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropTable); auto operation = implTableDropping.MutableDrop(); - operation->SetName(items.first); + operation->SetName(child.LeafName()); result.push_back(CreateDropTable(NextPartId(nextId, result), implTableDropping)); + if (auto reject = CascadeDropTableChildren(result, nextId, child)) { + return {reject}; + } } return result; diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_drop_indexed_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_drop_indexed_table.cpp index 5f32acec8c03..c0ef94bbda58 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_drop_indexed_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_drop_indexed_table.cpp @@ -443,102 +443,8 @@ TVector CreateDropIndexedTable(TOperationId nextId, const T TVector result; result.push_back(CreateDropTable(NextPartId(nextId, result), tx)); - - for (const auto& [childName, childPathId] : table.Base()->GetChildren()) { - TPath child = table.Child(childName); - { - TPath::TChecker checks = child.Check(); - checks - .NotEmpty() - .IsResolved(); - - if (checks) { - if (child.IsDeleted()) { - continue; - } - } - - if (child.IsTableIndex()) { - checks.IsTableIndex(); - } else if (child.IsCdcStream()) { - checks.IsCdcStream(); - } else if (child.IsSequence()) { - checks.IsSequence(); - } - - checks.NotDeleted() - .NotUnderDeleting() - .NotUnderOperation(); - - if (!checks) { - return {CreateReject(nextId, checks.GetStatus(), checks.GetError())}; - } - } - Y_ABORT_UNLESS(child.Base()->PathId == childPathId); - - if (child.IsSequence()) { - auto dropSequence = TransactionTemplate(table.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropSequence); - dropSequence.MutableDrop()->SetName(ToString(child->Name)); - - result.push_back(CreateDropSequence(NextPartId(nextId, result), dropSequence)); - continue; - } else if (child.IsTableIndex()) { - auto dropIndex = TransactionTemplate(table.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropTableIndex); - dropIndex.MutableDrop()->SetName(ToString(child.Base()->Name)); - - result.push_back(CreateDropTableIndex(NextPartId(nextId, result), dropIndex)); - } else if (child.IsCdcStream()) { - auto dropStream = TransactionTemplate(table.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropCdcStreamImpl); - dropStream.MutableDrop()->SetName(ToString(child.Base()->Name)); - - result.push_back(CreateDropCdcStreamImpl(NextPartId(nextId, result), dropStream)); - } - - Y_ABORT_UNLESS(child.Base()->GetChildren().size() == 1); - for (auto& [implName, implPathId] : child.Base()->GetChildren()) { - Y_ABORT_UNLESS(implName == "indexImplTable" || implName == "streamImpl", - "unexpected name %s", implName.c_str()); - - TPath implPath = child.Child(implName); - { - TPath::TChecker checks = implPath.Check(); - checks - .NotEmpty() - .IsResolved() - .NotDeleted() - .NotUnderDeleting() - .NotUnderOperation(); - - if (checks) { - if (implPath.Base()->IsTable()) { - checks - .IsTable() - .IsInsideTableIndexPath(); - } else if (implPath.Base()->IsPQGroup()) { - checks - .IsPQGroup() - .IsInsideCdcStreamPath(); - } - } - - if (!checks) { - return {CreateReject(nextId, checks.GetStatus(), checks.GetError())}; - } - } - Y_ABORT_UNLESS(implPath.Base()->PathId == implPathId); - - if (implPath.Base()->IsTable()) { - auto dropIndexTable = TransactionTemplate(child.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropTable); - dropIndexTable.MutableDrop()->SetName(ToString(implPath.Base()->Name)); - - result.push_back(CreateDropTable(NextPartId(nextId, result), dropIndexTable)); - } else if (implPath.Base()->IsPQGroup()) { - auto dropPQGroup = TransactionTemplate(child.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropPersQueueGroup); - dropPQGroup.MutableDrop()->SetName(ToString(implPath.Base()->Name)); - - result.push_back(CreateDropPQ(NextPartId(nextId, result), dropPQGroup)); - } - } + if (auto reject = CascadeDropTableChildren(result, nextId, table)) { + return {reject}; } return result; diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_drop_lock.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_drop_lock.cpp index c5026592a0ec..8dc9cf01b553 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_drop_lock.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_drop_lock.cpp @@ -119,9 +119,12 @@ class TDropLock: public TSubOperation { .IsResolved() .NotDeleted() .NotUnderDeleting() - .IsCommonSensePath() .IsLikeDirectory(); + if (checks && !parentPath.IsTableIndex()) { + checks.IsCommonSensePath(); + } + if (!checks) { result->SetError(checks.GetStatus(), checks.GetError()); return result; @@ -134,10 +137,12 @@ class TDropLock: public TSubOperation { checks .IsAtLocalSchemeShard() .IsResolved() - .NotUnderDeleting() - .IsCommonSensePath(); + .NotUnderDeleting(); if (checks) { + if (!parentPath.IsTableIndex()) { + checks.IsCommonSensePath(); + } if (dstPath.IsUnderOperation()) { // may be part of a consistent operation checks.IsUnderTheSameOperation(OperationId.GetTxId()); } else { diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_drop_pq.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_drop_pq.cpp index 70d663c713eb..5e4f9ca67177 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_drop_pq.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_drop_pq.cpp @@ -183,7 +183,7 @@ class TPropose: public TSubOperationState { bool parseOk = ParseFromStringNoSizeLimit(config, tabletConfig); Y_ABORT_UNLESS(parseOk); - const PQGroupReserve reserve(config, pqGroup->TotalPartitionCount); + const PQGroupReserve reserve(config, pqGroup->ActivePartitionCount); auto domainInfo = context.SS->ResolveDomainInfo(pathId); domainInfo->DecPathsInside(); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_initiate_build_index.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_initiate_build_index.cpp index 7fb9a651574f..a347d5841b90 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_initiate_build_index.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_initiate_build_index.cpp @@ -88,7 +88,9 @@ class TConfigureParts: public TSubOperationState { found = true; Y_ABORT_UNLESS(index->AlterData); - context.SS->DescribeTableIndex(childPathId, childName, index->AlterData, *initiate->MutableIndexDescription()); + context.SS->DescribeTableIndex(childPathId, childName, index->AlterData, false, false, + *initiate->MutableIndexDescription() + ); } txState->ClearShardsInProgress(); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_move_tables.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_move_tables.cpp index 32dadccbd563..9a7da1e828fb 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_move_tables.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_move_tables.cpp @@ -65,6 +65,10 @@ TVector CreateConsistentMoveTable(TOperationId nextId, cons return {CreateReject(nextId, NKikimrScheme::StatusPreconditionFailed, "Cannot move table with cdc streams")}; } + if (srcChildPath.IsSequence()) { + return {CreateReject(nextId, NKikimrScheme::StatusPreconditionFailed, "Cannot move table with sequences")}; + } + TPath dstIndexPath = dstPath.Child(name); Y_ABORT_UNLESS(srcChildPath.Base()->PathId == child.second); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_part.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_part.cpp index e06c7c1cca8a..a854295c8da1 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_part.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_part.cpp @@ -1,5 +1,6 @@ #include "schemeshard__operation_part.h" #include "schemeshard_impl.h" +#include "schemeshard_path.h" namespace NKikimr::NSchemeShard { @@ -99,4 +100,108 @@ void TSubOperationState::IgnoreMessages(TString debugHint, TSet mgsIds) { MsgToIgnore.swap(mgsIds); } +ISubOperation::TPtr CascadeDropTableChildren(TVector& result, const TOperationId& id, const TPath& table) { + for (const auto& [childName, childPathId] : table.Base()->GetChildren()) { + TPath child = table.Child(childName); + { + TPath::TChecker checks = child.Check(); + checks + .NotEmpty() + .IsResolved(); + + if (checks) { + if (child.IsDeleted()) { + continue; + } + } + + if (child.IsTableIndex()) { + checks.IsTableIndex(); + } else if (child.IsCdcStream()) { + checks.IsCdcStream(); + } else if (child.IsSequence()) { + checks.IsSequence(); + } + + checks.NotDeleted() + .NotUnderDeleting() + .NotUnderOperation(); + + if (!checks) { + return CreateReject(id, checks.GetStatus(), checks.GetError()); + } + } + Y_ABORT_UNLESS(child.Base()->PathId == childPathId); + + if (child.IsSequence()) { + auto dropSequence = TransactionTemplate(table.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropSequence); + dropSequence.MutableDrop()->SetName(ToString(child->Name)); + + result.push_back(CreateDropSequence(NextPartId(id, result), dropSequence)); + continue; + } else if (child.IsTableIndex()) { + auto dropIndex = TransactionTemplate(table.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropTableIndex); + dropIndex.MutableDrop()->SetName(ToString(child.Base()->Name)); + + result.push_back(CreateDropTableIndex(NextPartId(id, result), dropIndex)); + } else if (child.IsCdcStream()) { + auto dropStream = TransactionTemplate(table.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropCdcStreamImpl); + dropStream.MutableDrop()->SetName(ToString(child.Base()->Name)); + + result.push_back(CreateDropCdcStreamImpl(NextPartId(id, result), dropStream)); + } + + Y_ABORT_UNLESS(child.Base()->GetChildren().size() == 1); + for (auto& [implName, implPathId] : child.Base()->GetChildren()) { + Y_ABORT_UNLESS(implName == "indexImplTable" || implName == "streamImpl", + "unexpected name %s", implName.c_str()); + + TPath implPath = child.Child(implName); + { + TPath::TChecker checks = implPath.Check(); + checks + .NotEmpty() + .IsResolved() + .NotDeleted() + .NotUnderDeleting() + .NotUnderOperation(); + + if (checks) { + if (implPath.Base()->IsTable()) { + checks + .IsTable() + .IsInsideTableIndexPath(); + } else if (implPath.Base()->IsPQGroup()) { + checks + .IsPQGroup() + .IsInsideCdcStreamPath(); + } + } + + if (!checks) { + return CreateReject(id, checks.GetStatus(), checks.GetError()); + } + } + Y_ABORT_UNLESS(implPath.Base()->PathId == implPathId); + + if (implPath.Base()->IsTable()) { + auto dropIndexTable = TransactionTemplate(child.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropTable); + dropIndexTable.MutableDrop()->SetName(ToString(implPath.Base()->Name)); + + result.push_back(CreateDropTable(NextPartId(id, result), dropIndexTable)); + if (auto reject = CascadeDropTableChildren(result, id, implPath)) { + return reject; + } + } else if (implPath.Base()->IsPQGroup()) { + auto dropPQGroup = TransactionTemplate(child.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropPersQueueGroup); + dropPQGroup.MutableDrop()->SetName(ToString(implPath.Base()->Name)); + + result.push_back(CreateDropPQ(NextPartId(id, result), dropPQGroup)); + } + } + } + + return nullptr; +} + } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_part.h b/ydb/core/tx/schemeshard/schemeshard__operation_part.h index 386dcb20768a..b0e9a3589503 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_part.h +++ b/ydb/core/tx/schemeshard/schemeshard__operation_part.h @@ -84,6 +84,7 @@ namespace NKikimr { namespace NSchemeShard { class TSchemeShard; +class TPath; struct TOperationContext { public: @@ -492,7 +493,7 @@ ISubOperation::TPtr CreateAlterSubDomain(TOperationId id, const TTxTransaction& ISubOperation::TPtr CreateAlterSubDomain(TOperationId id, TTxState::ETxState state); ISubOperation::TPtr CreateCompatibleSubdomainDrop(TSchemeShard* ss, TOperationId id, const TTxTransaction& tx); -ISubOperation::TPtr CreateCompatibleSubdomainAlter(TSchemeShard* ss, TOperationId id, const TTxTransaction& tx); +TVector CreateCompatibleSubdomainAlter(TOperationId id, const TTxTransaction& tx, TOperationContext& context); ISubOperation::TPtr CreateUpgradeSubDomain(TOperationId id, const TTxTransaction& tx); ISubOperation::TPtr CreateUpgradeSubDomain(TOperationId id, TTxState::ETxState state); @@ -513,10 +514,10 @@ ISubOperation::TPtr CreateExtSubDomain(TOperationId id, TTxState::ETxState state // Alter TVector CreateCompatibleAlterExtSubDomain(TOperationId nextId, const TTxTransaction& tx, TOperationContext& context); -ISubOperation::TPtr CreateAlterExtSubDomain(TOperationId id, const TTxTransaction& tx); ISubOperation::TPtr CreateAlterExtSubDomain(TOperationId id, TTxState::ETxState state); -ISubOperation::TPtr CreateAlterExtSubDomainCreateHive(TOperationId id, const TTxTransaction& tx); ISubOperation::TPtr CreateAlterExtSubDomainCreateHive(TOperationId id, TTxState::ETxState state); +//NOTE: no variants to construct individual suboperations directly from TTxTransaction -- +// -- it should be possible only through CreateCompatibleAlterExtSubDomain // Drop ISubOperation::TPtr CreateForceDropExtSubDomain(TOperationId id, const TTxTransaction& tx); @@ -620,5 +621,8 @@ ISubOperation::TPtr CreateAlterResourcePool(TOperationId id, TTxState::ETxState ISubOperation::TPtr CreateDropResourcePool(TOperationId id, const TTxTransaction& tx); ISubOperation::TPtr CreateDropResourcePool(TOperationId id, TTxState::ETxState state); +// returns Reject in case of error, nullptr otherwise +ISubOperation::TPtr CascadeDropTableChildren(TVector& result, const TOperationId& id, const TPath& table); + } } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_upgrade_subdomain.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_upgrade_subdomain.cpp index 6f6714d4ba1d..fcf9bbb4ef40 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_upgrade_subdomain.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_upgrade_subdomain.cpp @@ -1530,13 +1530,13 @@ ISubOperation::TPtr CreateCompatibleSubdomainDrop(TSchemeShard* ss, TOperationId return CreateForceDropSubDomain(id, tx); } -ISubOperation::TPtr CreateCompatibleSubdomainAlter(TSchemeShard* ss, TOperationId id, const TTxTransaction& tx) { +TVector CreateCompatibleSubdomainAlter(TOperationId id, const TTxTransaction& tx, TOperationContext& context) { const auto& info = tx.GetSubDomain(); const TString& parentPathStr = tx.GetWorkingDir(); const TString& name = info.GetName(); - TPath path = TPath::Resolve(parentPathStr, ss).Dive(name); + TPath path = TPath::Resolve(parentPathStr, context.SS).Dive(name); { TPath::TChecker checks = path.Check(); @@ -1546,15 +1546,16 @@ ISubOperation::TPtr CreateCompatibleSubdomainAlter(TSchemeShard* ss, TOperationI .NotDeleted(); if (!checks) { - return CreateAlterSubDomain(id, tx); + return {CreateAlterSubDomain(id, tx)}; } } if (path.Base()->IsExternalSubDomainRoot()) { - return CreateAlterExtSubDomain(id, tx); + // plain subdomains don't have subdomain/tenant hives so only single operation should be returned here + return CreateCompatibleAlterExtSubDomain(id, tx, context); } - return CreateAlterSubDomain(id, tx); + return {CreateAlterSubDomain(id, tx)}; } } diff --git a/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp b/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp index 72e7b4a80379..c3a76f9035f4 100644 --- a/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp @@ -259,18 +259,19 @@ bool TTxStoreTableStats::PersistSingleStats(const TPathId& pathId, subDomainInfo->EffectiveStoragePools(), shardInfo->BindedChannels); + const auto pathElement = Self->PathsById[pathId]; LOG_DEBUG_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, "TTxStoreTableStats.PersistSingleStats: main stats from" << " datashardId(TabletID)=" << datashardId << " maps to shardIdx: " << shardIdx - << ", pathId: " << pathId << ", pathId map=" << Self->PathsById[pathId]->Name + << ", pathId: " << pathId << ", pathId map=" << pathElement->Name << ", is column=" << isColumnTable << ", is olap=" << isOlapStore); const TPartitionStats newStats = PrepareStats(ctx, rec, channelsMapping); LOG_INFO_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, - "Add stats from shard with datashardId(TabletID)=" << datashardId + "Add stats from shard with datashardId(TabletID)=" << datashardId << ", pathId " << pathId.LocalPathId - << ": RowCount " << newStats.RowCount + << ": RowCount " << newStats.RowCount << ", DataSize " << newStats.DataSize << (newStats.HasBorrowedData ? ", with borrowed parts" : "")); @@ -321,11 +322,10 @@ bool TTxStoreTableStats::PersistSingleStats(const TPathId& pathId, const TPathId tablePathId = TPathId(TOwnerId(pathId.OwnerId), TLocalPathId(table.GetTableLocalId())); if (Self->ColumnTables.contains(tablePathId)) { - LOG_DEBUG_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, + LOG_TRACE_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, "add stats for exists table with pathId=" << tablePathId); - auto columnTable = Self->ColumnTables.TakeVerified(tablePathId); - columnTable->UpdateTableStats(tablePathId, newTableStats); + Self->ColumnTables.GetVerifiedPtr(tablePathId)->UpdateTableStats(shardIdx, tablePathId, newTableStats); } else { LOG_WARN_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, "failed add stats for table with pathId=" << tablePathId); @@ -336,7 +336,7 @@ bool TTxStoreTableStats::PersistSingleStats(const TPathId& pathId, LOG_INFO_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, "PersistSingleStats: ColumnTable rec.GetColumnTables() size=" << rec.GetTables().size()); - auto columnTable = Self->ColumnTables.TakeVerified(pathId); + auto columnTable = Self->ColumnTables.GetVerifiedPtr(pathId); oldAggrStats = columnTable->GetStats().Aggregated; columnTable->UpdateShardStats(shardIdx, newStats); newAggrStats = columnTable->GetStats().Aggregated; @@ -400,11 +400,14 @@ bool TTxStoreTableStats::PersistSingleStats(const TPathId& pathId, Self->TabletCounters->Percentile()[COUNTER_NUM_SHARDS_BY_TTL_LAG].IncrementFor(lag->Seconds()); } + const TTableIndexInfo* index = Self->Indexes.Value(pathElement->ParentPathId, nullptr).Get(); const TTableInfo* mainTableForIndex = Self->GetMainTableForIndex(pathId); const auto forceShardSplitSettings = Self->SplitSettings.GetForceShardSplitSettings(); TVector shardsToMerge; - if (table->CheckCanMergePartitions(Self->SplitSettings, forceShardSplitSettings, shardIdx, shardsToMerge, mainTableForIndex)) { + if ((!index || index->State == NKikimrSchemeOp::EIndexStateReady) + && table->CheckCanMergePartitions(Self->SplitSettings, forceShardSplitSettings, shardIdx, shardsToMerge, mainTableForIndex) + ) { TTxId txId = Self->GetCachedTxId(ctx); if (!txId) { diff --git a/ydb/core/tx/schemeshard/schemeshard__table_stats_histogram.cpp b/ydb/core/tx/schemeshard/schemeshard__table_stats_histogram.cpp index 091e60fdc26f..edd640d02ccd 100644 --- a/ydb/core/tx/schemeshard/schemeshard__table_stats_histogram.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__table_stats_histogram.cpp @@ -34,11 +34,41 @@ static bool IsIntegerType(NScheme::TTypeInfo type) { } } -TSerializedCellVec ChooseSplitKeyByHistogram(const NKikimrTableStats::THistogram& histogram, const TConstArrayRef &keyColumnTypes) { - ui64 bucketsCount = histogram.BucketsSize(); - ui64 idxLo = bucketsCount * 0.33; - ui64 idxMed = bucketsCount * 0.5; - ui64 idxHi = bucketsCount * 0.66; +TSerializedCellVec ChooseSplitKeyByHistogram(const NKikimrTableStats::THistogram& histogram, ui64 total, const TConstArrayRef &keyColumnTypes) { + if (histogram.GetBuckets().empty()) { + return {}; + } + + ui64 idxLo = Max(), idxMed = Max(), idxHi = Max(); + { // search for median and acceptable bounds range so that after the split smallest size is >= 25% + ui64 idxMedDiff = Max(), idx = 0; + for (const auto& point : histogram.GetBuckets()) { + ui64 leftSize = Min(point.GetValue(), total); + ui64 rightSize = total - leftSize; + + // search for a median point at which abs(leftSize - rightSize) is minimum + ui64 sizesDiff = Max(leftSize, rightSize) - Min(leftSize, rightSize); + if (idxMedDiff > sizesDiff) { + idxMed = idx; + idxMedDiff = sizesDiff; + } + + if (leftSize * 4 >= total && idxLo == Max()) { + idxLo = idx; // first point at which leftSize >= 25% + } + if (rightSize * 4 >= total) { + idxHi = idx; // last point at which rightSize >= 25% + } + + idx++; + } + + bool canSplit = idxLo != Max() && idxLo <= idxMed && idxMed <= idxHi && idxHi != Max(); + + if (!canSplit) { + return {}; + } + } TSerializedCellVec keyLo(histogram.GetBuckets(idxLo).GetKey()); TSerializedCellVec keyMed(histogram.GetBuckets(idxMed).GetKey()); @@ -302,7 +332,8 @@ bool TTxPartitionHistogram::Execute(TTransactionContext& txc, const TActorContex << " for pathId " << tableId << " state '" << DatashardStateName(rec.GetShardState()).data() << "'" << " dataSize " << dataSize - << " rowCount " << rowCount); + << " rowCount " << rowCount + << " dataSizeHistogram buckets " << rec.GetTableStats().GetDataSizeHistogram().BucketsSize()); if (!Self->Tables.contains(tableId)) return true; @@ -353,12 +384,15 @@ bool TTxPartitionHistogram::Execute(TTransactionContext& txc, const TActorContex } else { // Choose number of parts and split boundaries const auto& histogram = rec.GetTableStats().GetDataSizeHistogram(); - if (histogram.BucketsSize() < 2) { + + splitKey = ChooseSplitKeyByHistogram(histogram, dataSize, keyColumnTypes); + if (splitKey.GetBuffer().empty()) { + LOG_WARN(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, + "Failed to find proper split key (initially) for '%s' of datashard %" PRIu64, + ToString(splitReason), datashardId); return true; } - splitKey = ChooseSplitKeyByHistogram(histogram, keyColumnTypes); - // Split key must not be less than the first key TSerializedCellVec lowestKey(histogram.GetBuckets(0).GetKey()); if (0 < CompareTypedCellVectors(lowestKey.GetCells().data(), splitKey.GetCells().data(), @@ -366,7 +400,7 @@ bool TTxPartitionHistogram::Execute(TTransactionContext& txc, const TActorContex lowestKey.GetCells().size(), splitKey.GetCells().size())) { LOG_WARN(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, - "Failed to find proper split key for '%s' of datashard %" PRIu64, + "Failed to find proper split key (less than first) for '%s' of datashard %" PRIu64, ToString(splitReason), datashardId); return true; } diff --git a/ydb/core/tx/schemeshard/schemeshard_audit_log.cpp b/ydb/core/tx/schemeshard/schemeshard_audit_log.cpp index 8bfe30debba0..7e0cd3f54d87 100644 --- a/ydb/core/tx/schemeshard/schemeshard_audit_log.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_audit_log.cpp @@ -1,14 +1,30 @@ -#include "schemeshard_audit_log.h" -#include "schemeshard_path.h" -#include "schemeshard_audit_log_fragment.h" +#include + +#include +#include -#include #include +#include +#include + #include -#include +#include + +#include "schemeshard_path.h" +#include "schemeshard_impl.h" +#include "schemeshard_xxport__helpers.h" +#include "schemeshard_audit_log_fragment.h" +#include "schemeshard_audit_log.h" namespace NKikimr::NSchemeShard { +namespace { + +const TString SchemeshardComponentName = "schemeshard"; + +//NOTE: EmptyValue couldn't be an empty string as AUDIT_PART() skips parts with an empty values +const TString EmptyValue = "{none}"; + TString GeneralStatus(NKikimrScheme::EStatus actualStatus) { switch(actualStatus) { case NKikimrScheme::EStatus::StatusAlreadyExists: @@ -61,12 +77,9 @@ TPath DatabasePathFromWorkingDir(TSchemeShard* SS, const TString &opWorkingDir) return databasePath; } -void AuditLogModifySchemeTransaction(const NKikimrScheme::TEvModifySchemeTransaction& request, const NKikimrScheme::TEvModifySchemeTransactionResult& response, TSchemeShard* SS, const TString& userSID) { - static const TString SchemeshardComponentName = "schemeshard"; - - //NOTE: EmptyValue couldn't be an empty string as AUDIT_PART() skips parts with an empty values - static const TString EmptyValue = "{none}"; +} // anonymous namespace +void AuditLogModifySchemeTransaction(const NKikimrScheme::TEvModifySchemeTransaction& request, const NKikimrScheme::TEvModifySchemeTransactionResult& response, TSchemeShard* SS, const TString& userSID) { // Each TEvModifySchemeTransaction.Transaction is a self sufficient operation and should be logged independently // (even if it was packed into a single TxProxy transaction with some other operations). for (const auto& operation : request.GetTransaction()) { @@ -79,7 +92,7 @@ void AuditLogModifySchemeTransaction(const NKikimrScheme::TEvModifySchemeTransac AUDIT_LOG( AUDIT_PART("component", SchemeshardComponentName) AUDIT_PART("tx_id", std::to_string(request.GetTxId())) - AUDIT_PART("remote_address", (!peerName.empty() ? peerName : EmptyValue) ) + AUDIT_PART("remote_address", (!peerName.empty() ? peerName : EmptyValue)) AUDIT_PART("subject", (!userSID.empty() ? userSID : EmptyValue)) AUDIT_PART("database", (!databasePath.IsEmpty() ? databasePath.GetDomainPathString() : EmptyValue)) AUDIT_PART("operation", logEntry.Operation) @@ -165,4 +178,223 @@ void AuditLogModifySchemeTransactionDeprecated(const NKikimrScheme::TEvModifySch } } +namespace { + +struct TXxportRecord { + TString OperationName; + ui64 Id; + TString Uid; + TString RemoteAddress; + TString UserSID; + TString DatabasePath; + TString Status; + Ydb::StatusIds::StatusCode DetailedStatus; + TString Reason; + TVector> AdditionalParts; + TString StartTime; + TString EndTime; + TString CloudId; + TString FolderId; + TString ResourceId; +}; + +void AuditLogXxport(TXxportRecord&& record) { + AUDIT_LOG( + AUDIT_PART("component", SchemeshardComponentName) + + AUDIT_PART("id", std::to_string(record.Id)) + AUDIT_PART("uid", record.Uid); + AUDIT_PART("remote_address", (!record.RemoteAddress.empty() ? record.RemoteAddress : EmptyValue)) + AUDIT_PART("subject", (!record.UserSID.empty() ? record.UserSID : EmptyValue)) + AUDIT_PART("database", (!record.DatabasePath.empty() ? record.DatabasePath : EmptyValue)) + AUDIT_PART("operation", record.OperationName) + AUDIT_PART("status", record.Status) + AUDIT_PART("detailed_status", Ydb::StatusIds::StatusCode_Name(record.DetailedStatus)) + AUDIT_PART("reason", record.Reason) + + // all parts are considered required, so all empty values are replaced with a special stub + for (const auto& [name, value] : record.AdditionalParts) { + AUDIT_PART(name, (!value.empty() ? value : EmptyValue)) + } + + AUDIT_PART("start_time", record.StartTime) + AUDIT_PART("end_time", record.EndTime) + + AUDIT_PART("cloud_id", record.CloudId); + AUDIT_PART("folder_id", record.FolderId); + AUDIT_PART("resource_id", record.ResourceId); + ); +} + +using TParts = decltype(TXxportRecord::AdditionalParts); + +template +TParts ExportKindSpecificParts(const Proto& proto) { + //NOTE: intentional switch -- that will help to detect (by breaking the compilation) + // the moment when and if oneof Settings will be extended + switch (proto.GetSettingsCase()) { + case Proto::kExportToYtSettings: + return ExportKindSpecificParts(proto.GetExportToYtSettings()); + case Proto::kExportToS3Settings: + return ExportKindSpecificParts(proto.GetExportToS3Settings()); + case Proto::SETTINGS_NOT_SET: + return {}; + } +} +template <> TParts ExportKindSpecificParts(const Ydb::Export::ExportToYtSettings& proto) { + return { + {"export_type", "yt"}, + {"export_item_count", ToString(proto.items().size())}, + {"export_yt_prefix", ((proto.items().size() > 0) ? proto.items(0).destination_path() : "")}, + }; +} +template <> TParts ExportKindSpecificParts(const Ydb::Export::ExportToS3Settings& proto) { + return { + {"export_type", "s3"}, + {"export_item_count", ToString(proto.items().size())}, + {"export_s3_bucket", proto.bucket()}, + //NOTE: take first item's destination_prefix as a "good enough approximation" + // (each item has its own destination_prefix, but in practice they are all the same) + {"export_s3_prefix", ((proto.items().size() > 0) ? proto.items(0).destination_prefix() : "")}, + }; +} + +template +TParts ImportKindSpecificParts(const Proto& proto) { + //NOTE: intentional switch -- that will help to detect (by breaking the compilation) + // the moment when and if oneof Settings will be extended + switch (proto.GetSettingsCase()) { + case Proto::kImportFromS3Settings: + return ImportKindSpecificParts(proto.GetImportFromS3Settings()); + case Proto::SETTINGS_NOT_SET: + return {}; + } +} +template <> TParts ImportKindSpecificParts(const Ydb::Import::ImportFromS3Settings& proto) { + return { + {"import_type", "s3"}, + {"export_item_count", ToString(proto.items().size())}, + {"import_s3_bucket", proto.bucket()}, + //NOTE: take first item's source_prefix as a "good enough approximation" + // (each item has its own source_prefix, but in practice they are all the same) + {"import_s3_prefix", ((proto.items().size() > 0) ? proto.items(0).source_prefix() : "")}, + }; +} + +} // anonymous namespace + +template +void _AuditLogXxportStart(const Request& request, const Response& response, const TString& operationName, TParts&& additionalParts, TSchemeShard* SS) { + TPath databasePath = DatabasePathFromWorkingDir(SS, request.GetDatabaseName()); + auto [cloud_id, folder_id, database_id] = GetDatabaseCloudIds(databasePath); + auto peerName = NKikimr::NAddressClassifier::ExtractAddress(request.GetPeerName()); + const auto& entry = response.GetResponse().GetEntry(); + + AuditLogXxport({ + .OperationName = operationName, + //NOTE: original request's tx-id is used as an operation id + .Id = request.GetTxId(), + .Uid = GetUid(request.GetRequest().GetOperationParams()), + .RemoteAddress = peerName, + .UserSID = request.GetUserSID(), + .DatabasePath = databasePath.PathString(), + .Status = (entry.GetStatus() == Ydb::StatusIds::SUCCESS ? "SUCCESS" : "ERROR"), + .DetailedStatus = entry.GetStatus(), + //NOTE: use main issue (on {ex,im}port itself), ignore issues on individual items + .Reason = ((entry.IssuesSize() > 0) ? entry.GetIssues(0).message() : ""), + + .AdditionalParts = std::move(additionalParts), + + // no start or end times + + .CloudId = cloud_id, + .FolderId = folder_id, + .ResourceId = database_id, + }); +} + +void AuditLogExportStart(const NKikimrExport::TEvCreateExportRequest& request, const NKikimrExport::TEvCreateExportResponse& response, TSchemeShard* SS) { + _AuditLogXxportStart(request, response, "EXPORT START", ExportKindSpecificParts(request.GetRequest()), SS); +} + +void AuditLogImportStart(const NKikimrImport::TEvCreateImportRequest& request, const NKikimrImport::TEvCreateImportResponse& response, TSchemeShard* SS) { + _AuditLogXxportStart(request, response, "IMPORT START", ImportKindSpecificParts(request.GetRequest()), SS); +} + +template +void _AuditLogXxportEnd(const Info& info, const TString& operationName, TParts&& additionalParts, TSchemeShard* SS) { + const TPath databasePath = TPath::Init(info.DomainPathId, SS); + auto [cloud_id, folder_id, database_id] = GetDatabaseCloudIds(databasePath); + auto peerName = NKikimr::NAddressClassifier::ExtractAddress(info.PeerName); + TString userSID = *info.UserSID.OrElse(EmptyValue); + TString startTime = (info.StartTime != TInstant::Zero() ? info.StartTime.ToString() : TString()); + TString endTime = (info.EndTime != TInstant::Zero() ? info.EndTime.ToString() : TString()); + + // Info.State can't be anything but Done or Cancelled here + Y_ABORT_UNLESS(info.State == Info::EState::Done || info.State == Info::EState::Cancelled); + TString status = TString(info.State == Info::EState::Done ? "SUCCESS" : "ERROR"); + Ydb::StatusIds::StatusCode detailedStatus = (info.State == Info::EState::Done ? Ydb::StatusIds::SUCCESS : Ydb::StatusIds::CANCELLED); + + AuditLogXxport({ + .OperationName = operationName, + .Id = info.Id, + .Uid = info.Uid, + .RemoteAddress = peerName, + .UserSID = userSID, + .DatabasePath = databasePath.PathString(), + .Status = status, + .DetailedStatus = detailedStatus, + .Reason = info.Issue, + + .AdditionalParts = std::move(additionalParts), + + .StartTime = startTime, + .EndTime = endTime, + + .CloudId = cloud_id, + .FolderId = folder_id, + .ResourceId = database_id, + }); +} + +void AuditLogExportEnd(const TExportInfo& info, TSchemeShard* SS) { + NKikimrExport::TCreateExportRequest proto; + // TSchemeShard::FromXxportInfo() can not be used here + switch (info.Kind) { + case TExportInfo::EKind::YT: + Y_ABORT_UNLESS(proto.MutableExportToYtSettings()->ParseFromString(info.Settings)); + proto.MutableExportToYtSettings()->clear_token(); + break; + case TExportInfo::EKind::S3: + Y_ABORT_UNLESS(proto.MutableExportToS3Settings()->ParseFromString(info.Settings)); + proto.MutableExportToS3Settings()->clear_access_key(); + proto.MutableExportToS3Settings()->clear_secret_key(); + break; + } + _AuditLogXxportEnd(info, "EXPORT END", ExportKindSpecificParts(proto), SS); +} +void AuditLogImportEnd(const TImportInfo& info, TSchemeShard* SS) { + _AuditLogXxportEnd(info, "IMPORT END", ImportKindSpecificParts(info.Settings), SS); +} + +void AuditLogLogin(const NKikimrScheme::TEvLogin& request, const NKikimrScheme::TEvLoginResult& response, TSchemeShard* SS) { + static const TString LoginOperationName = "LOGIN"; + + TPath databasePath = TPath::Root(SS); + auto peerName = NKikimr::NAddressClassifier::ExtractAddress(request.GetPeerName()); + + AUDIT_LOG( + AUDIT_PART("component", SchemeshardComponentName) + AUDIT_PART("remote_address", (!peerName.empty() ? peerName : EmptyValue)) + AUDIT_PART("database", (!databasePath.PathString().empty() ? databasePath.PathString() : EmptyValue)) + AUDIT_PART("operation", LoginOperationName) + AUDIT_PART("status", TString(response.GetError().empty() ? "SUCCESS" : "ERROR")) + AUDIT_PART("reason", response.GetError(), response.HasError()) + + // Login + AUDIT_PART("login_user", (request.HasUser() ? request.GetUser() : EmptyValue)) + AUDIT_PART("login_auth_domain", (!request.GetExternalAuth().empty() ? request.GetExternalAuth() : EmptyValue)) + ); +} + } diff --git a/ydb/core/tx/schemeshard/schemeshard_audit_log.h b/ydb/core/tx/schemeshard/schemeshard_audit_log.h index 559099131b86..1053823b4d00 100644 --- a/ydb/core/tx/schemeshard/schemeshard_audit_log.h +++ b/ydb/core/tx/schemeshard/schemeshard_audit_log.h @@ -5,13 +5,35 @@ namespace NKikimrScheme { class TEvModifySchemeTransaction; class TEvModifySchemeTransactionResult; + +class TEvLogin; +class TEvLoginResult; +} + +namespace NKikimrExport { +class TEvCreateExportRequest; +class TEvCreateExportResponse; +} + +namespace NKikimrImport { +class TEvCreateImportRequest; +class TEvCreateImportResponse; } namespace NKikimr::NSchemeShard { class TSchemeShard; +struct TExportInfo; +struct TImportInfo; void AuditLogModifySchemeTransaction(const NKikimrScheme::TEvModifySchemeTransaction& request, const NKikimrScheme::TEvModifySchemeTransactionResult& response, TSchemeShard* SS, const TString& userSID); void AuditLogModifySchemeTransactionDeprecated(const NKikimrScheme::TEvModifySchemeTransaction& request, const NKikimrScheme::TEvModifySchemeTransactionResult& response, TSchemeShard* SS, const TString& userSID); +void AuditLogExportStart(const NKikimrExport::TEvCreateExportRequest& request, const NKikimrExport::TEvCreateExportResponse& response, TSchemeShard* SS); +void AuditLogExportEnd(const TExportInfo& exportInfo, TSchemeShard* SS); + +void AuditLogImportStart(const NKikimrImport::TEvCreateImportRequest& request, const NKikimrImport::TEvCreateImportResponse& response, TSchemeShard* SS); +void AuditLogImportEnd(const TImportInfo& importInfo, TSchemeShard* SS); + +void AuditLogLogin(const NKikimrScheme::TEvLogin& request, const NKikimrScheme::TEvLoginResult& response, TSchemeShard* SS); } diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index.cpp index 08f72ca6c04f..32697ebaeee1 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index.cpp @@ -34,7 +34,8 @@ void TSchemeShard::Handle(TEvPrivate::TEvIndexBuildingMakeABill::TPtr& ev, const void TSchemeShard::PersistCreateBuildIndex(NIceDb::TNiceDb& db, const TIndexBuildInfo::TPtr info) { Y_ABORT_UNLESS(info->BuildKind != TIndexBuildInfo::EBuildKind::BuildKindUnspecified); - db.Table().Key(info->Id).Update( + auto persistedBuildIndex = db.Table().Key(info->Id); + persistedBuildIndex.Update( NIceDb::TUpdate(info->Uid), NIceDb::TUpdate(info->DomainPathId.OwnerId), NIceDb::TUpdate(info->DomainPathId.LocalPathId), @@ -48,6 +49,17 @@ void TSchemeShard::PersistCreateBuildIndex(NIceDb::TNiceDb& db, const TIndexBuil NIceDb::TUpdate(info->Limits.MaxRetries), NIceDb::TUpdate(ui32(info->BuildKind)) ); + // Persist details of the index build operation: ImplTableDescription. + // We have chosen TIndexCreationConfig's string representation as the serialization format. + { + NKikimrSchemeOp::TIndexCreationConfig serializableRepresentation; + + *serializableRepresentation.MutableIndexImplTableDescription() = info->ImplTableDescription; + + persistedBuildIndex.Update( + NIceDb::TUpdate(serializableRepresentation.SerializeAsString()) + ); + } ui32 columnNo = 0; for (ui32 i = 0; i < info->IndexColumns.size(); ++i, ++columnNo) { diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp index 6021cbe67056..450db67f8e46 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp @@ -1,4 +1,5 @@ #include "schemeshard_build_index.h" +#include "schemeshard_xxport__helpers.h" #include "schemeshard_build_index_helpers.h" #include "schemeshard_build_index_tx_base.h" #include "schemeshard_impl.h" @@ -29,7 +30,7 @@ class TSchemeShard::TIndexBuilder::TTxCreate: public TSchemeShard::TIndexBuilder << "Index build with id '" << id << "' already exists"); } - const TString& uid = GetUid(request.GetOperationParams().labels()); + const TString& uid = GetUid(request.GetOperationParams()); if (uid && Self->IndexBuildsByUid.contains(uid)) { return Reply(Ydb::StatusIds::ALREADY_EXISTS, TStringBuilder() << "Index build with uid '" << uid << "' already exists"); @@ -212,7 +213,7 @@ class TSchemeShard::TIndexBuilder::TTxCreate: public TSchemeShard::TIndexBuilder if (settings.has_index() && settings.has_column_build_operation()) { explain = "unable to build index and column in the single operation"; - return false; + return false; } if (settings.has_index()) { @@ -240,19 +241,10 @@ class TSchemeShard::TIndexBuilder::TTxCreate: public TSchemeShard::TIndexBuilder Ydb::StatusIds::StatusCode status; if (!FillIndexTablePartitioning(buildInfo->ImplTableDescription, index, status, explain)) { return false; - } + } } return true; } - - static TString GetUid(const google::protobuf::Map& labels) { - auto it = labels.find("uid"); - if (it == labels.end()) { - return TString(); - } - - return it->second; - } }; ITransaction* TSchemeShard::CreateTxCreate(TEvIndexBuilder::TEvCreateRequest::TPtr& ev) { diff --git a/ydb/core/tx/schemeshard/schemeshard_export.cpp b/ydb/core/tx/schemeshard/schemeshard_export.cpp index 1a201de7edf4..a8ae8e67d9a6 100644 --- a/ydb/core/tx/schemeshard/schemeshard_export.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_export.cpp @@ -83,7 +83,7 @@ namespace { void TSchemeShard::FromXxportInfo(NKikimrExport::TExport& exprt, const TExportInfo::TPtr exportInfo) { exprt.SetId(exportInfo->Id); exprt.SetStatus(Ydb::StatusIds::SUCCESS); - + if (exportInfo->StartTime != TInstant::Zero()) { *exprt.MutableStartTime() = SecondsToProtoTimeStamp(exportInfo->StartTime.Seconds()); } @@ -91,6 +91,10 @@ void TSchemeShard::FromXxportInfo(NKikimrExport::TExport& exprt, const TExportIn *exprt.MutableEndTime() = SecondsToProtoTimeStamp(exportInfo->EndTime.Seconds()); } + if (exportInfo->UserSID) { + exprt.SetUserSID(*exportInfo->UserSID); + } + switch (exportInfo->State) { case TExportInfo::EState::CreateExportDir: case TExportInfo::EState::CopyTables: diff --git a/ydb/core/tx/schemeshard/schemeshard_export__cancel.cpp b/ydb/core/tx/schemeshard/schemeshard_export__cancel.cpp index 531168da9275..ccc6146e269d 100644 --- a/ydb/core/tx/schemeshard/schemeshard_export__cancel.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_export__cancel.cpp @@ -1,6 +1,7 @@ #include "schemeshard_xxport__tx_base.h" #include "schemeshard_export_flow_proposals.h" #include "schemeshard_export.h" +#include "schemeshard_audit_log.h" #include "schemeshard_impl.h" #include @@ -88,6 +89,11 @@ struct TSchemeShard::TExport::TTxCancel: public TSchemeShard::TXxport::TTxBase { Send(Request->Sender, std::move(response), 0, Request->Cookie); SendNotificationsIfFinished(exportInfo); + + if (exportInfo->IsFinished()) { + AuditLogExportEnd(*exportInfo.Get(), Self); + } + return true; } @@ -167,6 +173,11 @@ struct TSchemeShard::TExport::TTxCancelAck: public TSchemeShard::TXxport::TTxBas } SendNotificationsIfFinished(exportInfo); + + if (exportInfo->IsFinished()) { + AuditLogExportEnd(*exportInfo.Get(), Self); + } + return true; } diff --git a/ydb/core/tx/schemeshard/schemeshard_export__create.cpp b/ydb/core/tx/schemeshard/schemeshard_export__create.cpp index dc822ca86d5b..f65eabb43f89 100644 --- a/ydb/core/tx/schemeshard/schemeshard_export__create.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_export__create.cpp @@ -1,7 +1,9 @@ #include "schemeshard_xxport__tx_base.h" +#include "schemeshard_xxport__helpers.h" #include "schemeshard_export_flow_proposals.h" #include "schemeshard_export_helpers.h" #include "schemeshard_export.h" +#include "schemeshard_audit_log.h" #include "schemeshard_impl.h" #include @@ -50,7 +52,7 @@ struct TSchemeShard::TExport::TTxCreate: public TSchemeShard::TXxport::TTxBase { ); } - const TString& uid = GetUid(request.GetRequest().GetOperationParams().labels()); + const TString& uid = GetUid(request.GetRequest().GetOperationParams()); if (uid) { if (auto it = Self->ExportsByUid.find(uid); it != Self->ExportsByUid.end()) { if (IsSameDomain(it->second, request.GetDatabaseName())) { @@ -95,7 +97,7 @@ struct TSchemeShard::TExport::TTxCreate: public TSchemeShard::TXxport::TTxBase { case NKikimrExport::TCreateExportRequest::kExportToYtSettings: { const auto& settings = request.GetRequest().GetExportToYtSettings(); - exportInfo = new TExportInfo(id, uid, TExportInfo::EKind::YT, settings, domainPath.Base()->PathId); + exportInfo = new TExportInfo(id, uid, TExportInfo::EKind::YT, settings, domainPath.Base()->PathId, request.GetPeerName()); TString explain; if (!FillItems(exportInfo, settings, explain)) { @@ -115,7 +117,7 @@ struct TSchemeShard::TExport::TTxCreate: public TSchemeShard::TXxport::TTxBase { settings.set_scheme(Ydb::Export::ExportToS3Settings::HTTPS); } - exportInfo = new TExportInfo(id, uid, TExportInfo::EKind::S3, settings, domainPath.Base()->PathId); + exportInfo = new TExportInfo(id, uid, TExportInfo::EKind::S3, settings, domainPath.Base()->PathId, request.GetPeerName()); TString explain; if (!FillItems(exportInfo, settings, explain)) { @@ -166,15 +168,6 @@ struct TSchemeShard::TExport::TTxCreate: public TSchemeShard::TXxport::TTxBase { } private: - static TString GetUid(const google::protobuf::Map& labels) { - auto it = labels.find("uid"); - if (it == labels.end()) { - return TString(); - } - - return it->second; - } - bool Reply( THolder response, const Ydb::StatusIds::StatusCode status = Ydb::StatusIds::SUCCESS, @@ -191,6 +184,8 @@ struct TSchemeShard::TExport::TTxCreate: public TSchemeShard::TXxport::TTxBase { AddIssue(exprt, errorMessage); } + AuditLogExportStart(Request->Get()->Record, response->Record, Self); + Send(Request->Sender, std::move(response), 0, Request->Cookie); return true; @@ -895,7 +890,7 @@ struct TSchemeShard::TExport::TTxProgress: public TSchemeShard::TXxport::TTxBase OnNotifyResult(txId, id, itemIdx, txc); Self->TxIdToExport.erase(txId); } - + if (Self->TxIdToDependentExport.contains(txId)) { for (const auto id : Self->TxIdToDependentExport.at(txId)) { OnNotifyResult(txId, id, Max(), txc); @@ -997,6 +992,10 @@ struct TSchemeShard::TExport::TTxProgress: public TSchemeShard::TXxport::TTxBase Self->PersistExportState(db, exportInfo); SendNotificationsIfFinished(exportInfo); + + if (exportInfo->IsFinished()) { + AuditLogExportEnd(*exportInfo.Get(), Self); + } } }; // TTxProgress diff --git a/ydb/core/tx/schemeshard/schemeshard_export_flow_proposals.cpp b/ydb/core/tx/schemeshard/schemeshard_export_flow_proposals.cpp index 4af5a86d53ea..d6fca52aecaf 100644 --- a/ydb/core/tx/schemeshard/schemeshard_export_flow_proposals.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_export_flow_proposals.cpp @@ -76,6 +76,7 @@ static NKikimrSchemeOp::TPathDescription GetTableDescription(TSchemeShard* ss, c opts.SetReturnPartitioningInfo(false); opts.SetReturnPartitionConfig(true); opts.SetReturnBoundaries(true); + opts.SetReturnIndexTableBoundaries(true); auto desc = DescribePath(ss, TlsActivationContext->AsActorContext(), pathId, opts); auto record = desc->GetRecord(); diff --git a/ydb/core/tx/schemeshard/schemeshard_impl.cpp b/ydb/core/tx/schemeshard/schemeshard_impl.cpp index 05098b99d693..b3159f81b4dd 100644 --- a/ydb/core/tx/schemeshard/schemeshard_impl.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_impl.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include @@ -598,8 +598,9 @@ void TSchemeShard::ClearDescribePathCaches(const TPathElement::TPtr node, bool f } else if (node->PathType == NKikimrSchemeOp::EPathType::EPathTypeTable) { Y_ABORT_UNLESS(Tables.contains(node->PathId)); TTableInfo::TPtr tabletInfo = Tables.at(node->PathId); - tabletInfo->PreSerializedPathDescription.clear(); - tabletInfo->PreSerializedPathDescriptionWithoutRangeKey.clear(); + tabletInfo->PreserializedTablePartitions.clear(); + tabletInfo->PreserializedTablePartitionsNoKeys.clear(); + tabletInfo->PreserializedTableSplitBoundaries.clear(); } } @@ -2954,7 +2955,9 @@ void TSchemeShard::PersistView(NIceDb::TNiceDb &db, TPathId pathId) { db.Table().Key(pathId.LocalPathId).Update( NIceDb::TUpdate{viewInfo->AlterVersion}, - NIceDb::TUpdate{viewInfo->QueryText}); + NIceDb::TUpdate{viewInfo->QueryText}, + NIceDb::TUpdate{viewInfo->CapturedContext.SerializeAsString()} + ); } void TSchemeShard::PersistRemoveView(NIceDb::TNiceDb& db, TPathId pathId) { @@ -4266,6 +4269,10 @@ ui64 TSchemeShard::GetAliveChildren(TPathElement::TPtr pathEl, const std::option Y_ABORT_UNLESS(PathsById.contains(pathId)); auto childPath = PathsById.at(pathId); + if (childPath->Dropped()) { + continue; + } + count += ui64(childPath->PathType == *type); } @@ -4462,6 +4469,8 @@ void TSchemeShard::OnActivateExecutor(const TActorContext &ctx) { appData->Icb->RegisterSharedControl(DisablePublicationsOfDropping, "SchemeShard_DisablePublicationsOfDropping"); appData->Icb->RegisterSharedControl(FillAllocatePQ, "SchemeShard_FillAllocatePQ"); + appData->Icb->RegisterSharedControl(MaxCommitRedoMB, "TabletControls.MaxCommitRedoMB"); + AllowDataColumnForIndexTable = appData->FeatureFlags.GetEnableDataColumnForIndexTable(); appData->Icb->RegisterSharedControl(AllowDataColumnForIndexTable, "SchemeShard_AllowDataColumnForIndexTable"); @@ -6538,6 +6547,12 @@ TString TSchemeShard::FillAlterTableTxBody(TPathId pathId, TShardIdx shardIdx, T *patch); } + if (alterData->TableDescriptionFull.Defined() && alterData->TableDescriptionFull->HasReplicationConfig()) { + proto->MutableReplicationConfig()->CopyFrom(alterData->TableDescriptionFull->GetReplicationConfig()); + } else if (tableInfo->HasReplicationConfig()) { + proto->MutableReplicationConfig()->CopyFrom(tableInfo->ReplicationConfig()); + } + TString txBody; Y_PROTOBUF_SUPPRESS_NODISCARD tx.SerializeToString(&txBody); return txBody; @@ -6675,7 +6690,9 @@ void TSchemeShard::FillTableDescriptionForShardIdx( case NKikimrSchemeOp::EPathTypeTableIndex: { Y_ABORT_UNLESS(Indexes.contains(childPathId)); auto info = Indexes.at(childPathId); - DescribeTableIndex(childPathId, childName, newTable ? info->AlterData : info, *tableDescr->MutableTableIndexes()->Add()); + DescribeTableIndex(childPathId, childName, newTable ? info->AlterData : info, false, false, + *tableDescr->MutableTableIndexes()->Add() + ); break; } @@ -6966,7 +6983,10 @@ void TSchemeShard::ApplyConsoleConfigs(const NKikimrConfig::TAppConfig& appConfi ExternalSourceFactory = NExternalSource::CreateExternalSourceFactory( std::vector(hostnamePatterns.begin(), hostnamePatterns.end()), nullptr, - appConfig.GetQueryServiceConfig().GetS3().GetGeneratorPathsLimit() + appConfig.GetQueryServiceConfig().GetS3().GetGeneratorPathsLimit(), + nullptr, + appConfig.GetFeatureFlags().GetEnableExternalSourceSchemaInference(), + appConfig.GetQueryServiceConfig().GetS3().GetAllowLocalFiles() ); } @@ -7001,6 +7021,8 @@ void TSchemeShard::ApplyConsoleConfigs(const NKikimrConfig::TFeatureFlags& featu EnableTempTables = featureFlags.GetEnableTempTables(); EnableReplaceIfExistsForExternalEntities = featureFlags.GetEnableReplaceIfExistsForExternalEntities(); EnableTableDatetime64 = featureFlags.GetEnableTableDatetime64(); + EnableResourcePoolsOnServerless = featureFlags.GetEnableResourcePoolsOnServerless(); + EnableExternalDataSourcesOnServerless = featureFlags.GetEnableExternalDataSourcesOnServerless(); } void TSchemeShard::ConfigureStatsBatching(const NKikimrConfig::TSchemeShardConfig& config, const TActorContext& ctx) { @@ -7269,6 +7291,10 @@ void TSchemeShard::Handle(TEvSchemeShard::TEvLogin::TPtr &ev, const TActorContex } void TSchemeShard::Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext&) { + LOG_DEBUG_S(TlsActivationContext->AsActorContext(), NKikimrServices::STATISTICS, + "Handle TEvTxProxySchemeCache::TEvNavigateKeySetResult" + << ", at schemeshard: " << TabletID()); + using TNavigate = NSchemeCache::TSchemeCacheNavigate; std::unique_ptr request(ev->Get()->Request.Release()); if (request->ResultSet.size() != 1) { @@ -7281,15 +7307,19 @@ void TSchemeShard::Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& if (entry.DomainInfo->Params.HasStatisticsAggregator()) { StatisticsAggregatorId = TTabletId(entry.DomainInfo->Params.GetStatisticsAggregator()); + LOG_DEBUG_S(TlsActivationContext->AsActorContext(), NKikimrServices::STATISTICS, + "Handle TEvTxProxySchemeCache::TEvNavigateKeySetResult, StatisticsAggregatorId=" << StatisticsAggregatorId + << ", at schemeshard: " << TabletID()); ConnectToSA(); } } void TSchemeShard::Handle(TEvPrivate::TEvSendBaseStatsToSA::TPtr&, const TActorContext& ctx) { - SendBaseStatsToSA(); - auto seconds = SendStatsIntervalMaxSeconds - SendStatsIntervalMinSeconds; - ctx.Schedule(TDuration::Seconds(SendStatsIntervalMinSeconds + RandomNumber(seconds)), - new TEvPrivate::TEvSendBaseStatsToSA()); + TDuration delta = SendBaseStatsToSA(); + LOG_DEBUG_S(TlsActivationContext->AsActorContext(), NKikimrServices::STATISTICS, + "Schedule next SendBaseStatsToSA in " << delta + << ", at schemeshard: " << TabletID()); + ctx.Schedule(delta, new TEvPrivate::TEvSendBaseStatsToSA()); } void TSchemeShard::InitializeStatistics(const TActorContext& ctx) { @@ -7313,12 +7343,21 @@ void TSchemeShard::ResolveSA() { Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(navigate.release())); } else { StatisticsAggregatorId = subDomainInfo->GetTenantStatisticsAggregatorID(); + LOG_DEBUG_S(TlsActivationContext->AsActorContext(), NKikimrServices::STATISTICS, + "ResolveSA(), StatisticsAggregatorId=" << StatisticsAggregatorId + << ", at schemeshard: " << TabletID()); ConnectToSA(); } } void TSchemeShard::ConnectToSA() { - if (!EnableStatistics || !StatisticsAggregatorId) { + if (!EnableStatistics) + return; + + if (!StatisticsAggregatorId) { + LOG_DEBUG_S(TlsActivationContext->AsActorContext(), NKikimrServices::STATISTICS, + "ConnectToSA(), no StatisticsAggregatorId" + << ", at schemeshard: " << TabletID()); return; } auto policy = NTabletPipe::TClientRetryPolicy::WithRetries(); @@ -7333,18 +7372,28 @@ void TSchemeShard::ConnectToSA() { LOG_DEBUG_S(TlsActivationContext->AsActorContext(), NKikimrServices::STATISTICS, "ConnectToSA()" << ", pipe client id: " << SAPipeClientId - << ", at schemeshard: " << TabletID()); + << ", at schemeshard: " << TabletID() + << ", StatisticsAggregatorId: " << StatisticsAggregatorId + << ", at schemeshard: " << TabletID() + ); } -void TSchemeShard::SendBaseStatsToSA() { +TDuration TSchemeShard::SendBaseStatsToSA() { if (!EnableStatistics) { - return; + return TDuration::Seconds(30); } if (!SAPipeClientId) { ResolveSA(); if (!StatisticsAggregatorId) { - return; + LOG_DEBUG_S(TlsActivationContext->AsActorContext(), NKikimrServices::STATISTICS, + "SendBaseStatsToSA(), no StatisticsAggregatorId" + << ", at schemeshard: " << TabletID()); + return TDuration::Seconds(30); + } else { + LOG_DEBUG_S(TlsActivationContext->AsActorContext(), NKikimrServices::STATISTICS, + "SendBaseStatsToSA(), StatisticsAggregatorId=" << StatisticsAggregatorId + << ", at schemeshard: " << TabletID()); } } @@ -7359,6 +7408,7 @@ void TSchemeShard::SendBaseStatsToSA() { entryPathId->SetLocalId(pathId.LocalPathId); entry->SetRowCount(aggregated.RowCount); entry->SetBytesSize(aggregated.DataSize); + entry->SetIsColumnTable(false); ++count; } auto columnTablesPathIds = ColumnTables.GetAllPathIds(); @@ -7371,9 +7421,17 @@ void TSchemeShard::SendBaseStatsToSA() { entryPathId->SetLocalId(pathId.LocalPathId); entry->SetRowCount(aggregated.RowCount); entry->SetBytesSize(aggregated.DataSize); + entry->SetIsColumnTable(true); ++count; } + if (!count) { + LOG_DEBUG_S(TlsActivationContext->AsActorContext(), NKikimrServices::STATISTICS, + "SendBaseStatsToSA() No tables to send" + << ", at schemeshard: " << TabletID()); + return TDuration::Seconds(30); + } + TString stats; stats.clear(); Y_PROTOBUF_SUPPRESS_NODISCARD record.SerializeToString(&stats); @@ -7388,6 +7446,9 @@ void TSchemeShard::SendBaseStatsToSA() { "SendBaseStatsToSA()" << ", path count: " << count << ", at schemeshard: " << TabletID()); + + return TDuration::Seconds(SendStatsIntervalMinSeconds + + RandomNumber(SendStatsIntervalMaxSeconds - SendStatsIntervalMinSeconds)); } } // namespace NSchemeShard diff --git a/ydb/core/tx/schemeshard/schemeshard_impl.h b/ydb/core/tx/schemeshard/schemeshard_impl.h index da3c182c8a62..3615c8932ddf 100644 --- a/ydb/core/tx/schemeshard/schemeshard_impl.h +++ b/ydb/core/tx/schemeshard/schemeshard_impl.h @@ -187,6 +187,9 @@ class TSchemeShard TControlWrapper DisablePublicationsOfDropping; TControlWrapper FillAllocatePQ; + // Shared with NTabletFlatExecutor::TExecutor + TControlWrapper MaxCommitRedoMB; + TSplitSettings SplitSettings; struct TTenantInitState { @@ -326,6 +329,8 @@ class TSchemeShard bool EnableReplaceIfExistsForExternalEntities = false; bool EnableTempTables = false; bool EnableTableDatetime64 = false; + bool EnableResourcePoolsOnServerless = false; + bool EnableExternalDataSourcesOnServerless = false; TShardDeleter ShardDeleter; @@ -370,6 +375,8 @@ class TSchemeShard NExternalSource::IExternalSourceFactory::TPtr ExternalSourceFactory{NExternalSource::CreateExternalSourceFactory({})}; THolder IgniteOperation(TProposeRequest& request, TOperationContext& context); + void AbortOperationPropose(const TTxId txId, TOperationContext& context); + THolder MakeDataShardProposal(const TPathId& pathId, const TOperationId& opId, const TString& body, const TActorContext& ctx) const; @@ -419,7 +426,7 @@ class TSchemeShard return MakeLocalId(NextLocalPathId); } - TPathId AllocatePathId () { + TPathId AllocatePathId() { TPathId next = PeekNextPathId(); ++NextLocalPathId; return next; @@ -1015,9 +1022,13 @@ class TSchemeShard void FillAsyncIndexInfo(const TPathId& tableId, NKikimrTxDataShard::TFlatSchemeTransaction& tx); void DescribeTable(const TTableInfo::TPtr tableInfo, const NScheme::TTypeRegistry* typeRegistry, - bool fillConfig, bool fillBoundaries, NKikimrSchemeOp::TTableDescription* entry) const; - void DescribeTableIndex(const TPathId& pathId, const TString& name, NKikimrSchemeOp::TIndexDescription& entry); - void DescribeTableIndex(const TPathId& pathId, const TString& name, TTableIndexInfo::TPtr indexInfo, NKikimrSchemeOp::TIndexDescription& entry); + bool fillConfig, NKikimrSchemeOp::TTableDescription* entry) const; + void DescribeTableIndex(const TPathId& pathId, const TString& name, + bool fillConfig, bool fillBoundaries, NKikimrSchemeOp::TIndexDescription& entry + ) const; + void DescribeTableIndex(const TPathId& pathId, const TString& name, TTableIndexInfo::TPtr indexInfo, + bool fillConfig, bool fillBoundaries, NKikimrSchemeOp::TIndexDescription& entry + ) const; void DescribeCdcStream(const TPathId& pathId, const TString& name, NKikimrSchemeOp::TCdcStreamDescription& desc); void DescribeCdcStream(const TPathId& pathId, const TString& name, TCdcStreamInfo::TPtr info, NKikimrSchemeOp::TCdcStreamDescription& desc); void DescribeSequence(const TPathId& pathId, const TString& name, @@ -1027,7 +1038,6 @@ class TSchemeShard void DescribeReplication(const TPathId& pathId, const TString& name, NKikimrSchemeOp::TReplicationDescription& desc); void DescribeReplication(const TPathId& pathId, const TString& name, TReplicationInfo::TPtr info, NKikimrSchemeOp::TReplicationDescription& desc); void DescribeBlobDepot(const TPathId& pathId, const TString& name, NKikimrSchemeOp::TBlobDepotDescription& desc); - static void FillTableBoundaries(const TTableInfo::TPtr tableInfo, google::protobuf::RepeatedPtrField& boundaries); void Handle(NKikimr::NOlap::NBackground::TEvExecuteGeneralLocalTransaction::TPtr& ev, const TActorContext& ctx); void Handle(NKikimr::NOlap::NBackground::TEvRemoveSession::TPtr& ev, const TActorContext& ctx); @@ -1386,7 +1396,7 @@ class TSchemeShard void InitializeStatistics(const TActorContext& ctx); void ResolveSA(); void ConnectToSA(); - void SendBaseStatsToSA(); + TDuration SendBaseStatsToSA(); diff --git a/ydb/core/tx/schemeshard/schemeshard_import.cpp b/ydb/core/tx/schemeshard/schemeshard_import.cpp index c43e8198cddf..222f785228ba 100644 --- a/ydb/core/tx/schemeshard/schemeshard_import.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_import.cpp @@ -55,6 +55,10 @@ void TSchemeShard::FromXxportInfo(NKikimrImport::TImport& import, const TImportI *import.MutableEndTime() = SecondsToProtoTimeStamp(importInfo->EndTime.Seconds()); } + if (importInfo->UserSID) { + import.SetUserSID(*importInfo->UserSID); + } + switch (importInfo->State) { case TImportInfo::EState::Waiting: switch (GetMinState(importInfo)) { diff --git a/ydb/core/tx/schemeshard/schemeshard_import__cancel.cpp b/ydb/core/tx/schemeshard/schemeshard_import__cancel.cpp index 0b7bcf62ffb0..d494c514c3df 100644 --- a/ydb/core/tx/schemeshard/schemeshard_import__cancel.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_import__cancel.cpp @@ -1,6 +1,7 @@ #include "schemeshard_xxport__tx_base.h" #include "schemeshard_import_flow_proposals.h" #include "schemeshard_import.h" +#include "schemeshard_audit_log.h" #include "schemeshard_impl.h" #include @@ -91,6 +92,11 @@ struct TSchemeShard::TImport::TTxCancel: public TSchemeShard::TXxport::TTxBase { Self->PersistImportState(db, importInfo); SendNotificationsIfFinished(importInfo); + + if (importInfo->IsFinished()) { + AuditLogImportEnd(*importInfo.Get(), Self); + } + return respond(Ydb::StatusIds::SUCCESS); default: @@ -191,6 +197,11 @@ struct TSchemeShard::TImport::TTxCancelAck: public TSchemeShard::TXxport::TTxBas Self->PersistImportState(db, importInfo); SendNotificationsIfFinished(importInfo); + + if (importInfo->IsFinished()) { + AuditLogImportEnd(*importInfo.Get(), Self); + } + return true; } diff --git a/ydb/core/tx/schemeshard/schemeshard_import__create.cpp b/ydb/core/tx/schemeshard/schemeshard_import__create.cpp index 25a66878d895..2ddf4c3051b6 100644 --- a/ydb/core/tx/schemeshard/schemeshard_import__create.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_import__create.cpp @@ -1,8 +1,10 @@ #include "schemeshard_xxport__tx_base.h" +#include "schemeshard_xxport__helpers.h" #include "schemeshard_import_flow_proposals.h" #include "schemeshard_import_scheme_getter.h" #include "schemeshard_import_helpers.h" #include "schemeshard_import.h" +#include "schemeshard_audit_log.h" #include "schemeshard_impl.h" #include @@ -52,7 +54,7 @@ struct TSchemeShard::TImport::TTxCreate: public TSchemeShard::TXxport::TTxBase { ); } - const TString& uid = GetUid(request.GetRequest().GetOperationParams().labels()); + const TString& uid = GetUid(request.GetRequest().GetOperationParams()); if (uid) { if (auto it = Self->ImportsByUid.find(uid); it != Self->ImportsByUid.end()) { if (IsSameDomain(it->second, request.GetDatabaseName())) { @@ -101,7 +103,7 @@ struct TSchemeShard::TImport::TTxCreate: public TSchemeShard::TXxport::TTxBase { settings.set_scheme(Ydb::Import::ImportFromS3Settings::HTTPS); } - importInfo = new TImportInfo(id, uid, TImportInfo::EKind::S3, settings, domainPath.Base()->PathId); + importInfo = new TImportInfo(id, uid, TImportInfo::EKind::S3, settings, domainPath.Base()->PathId, request.GetPeerName()); if (request.HasUserSID()) { importInfo->UserSID = request.GetUserSID(); @@ -148,15 +150,6 @@ struct TSchemeShard::TImport::TTxCreate: public TSchemeShard::TXxport::TTxBase { } private: - static TString GetUid(const google::protobuf::Map& labels) { - auto it = labels.find("uid"); - if (it == labels.end()) { - return TString(); - } - - return it->second; - } - bool Reply( THolder response, const Ydb::StatusIds::StatusCode status = Ydb::StatusIds::SUCCESS, @@ -173,6 +166,8 @@ struct TSchemeShard::TImport::TTxCreate: public TSchemeShard::TXxport::TTxBase { AddIssue(entry, errorMessage); } + AuditLogImportStart(Request->Get()->Record, response->Record, Self); + Send(Request->Sender, std::move(response), 0, Request->Cookie); return true; @@ -1017,6 +1012,10 @@ struct TSchemeShard::TImport::TTxProgress: public TSchemeShard::TXxport::TTxBase Self->PersistImportState(db, importInfo); SendNotificationsIfFinished(importInfo); + + if (importInfo->IsFinished()) { + AuditLogImportEnd(*importInfo.Get(), Self); + } } }; // TTxProgress diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp index 54bc391e5c6a..8c8256d3f218 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp @@ -356,10 +356,10 @@ TTableInfo::TAlterDataPtr TTableInfo::CreateAlterData( const TTableInfo::TColumn& sourceColumn = source->Columns[colId]; if (col.HasDefaultFromSequence()) { - if (sourceColumn.PType.GetTypeId() != NScheme::NTypeIds::Int64 + if (sourceColumn.PType.GetTypeId() != NScheme::NTypeIds::Int64 && NPg::PgTypeIdFromTypeDesc(sourceColumn.PType.GetTypeDesc()) != INT8OID) { - TString sequenceType = sourceColumn.PType.GetTypeId() == NScheme::NTypeIds::Pg - ? NPg::PgTypeNameFromTypeDesc(NPg::TypeDescFromPgTypeId(INT8OID)) + TString sequenceType = sourceColumn.PType.GetTypeId() == NScheme::NTypeIds::Pg + ? NPg::PgTypeNameFromTypeDesc(NPg::TypeDescFromPgTypeId(INT8OID)) : NScheme::TypeName(NScheme::NTypeIds::Int64); errStr = Sprintf( "Sequence value type '%s' must be equal to the column type '%s'", sequenceType.c_str(), @@ -411,7 +411,7 @@ TTableInfo::TAlterDataPtr TTableInfo::CreateAlterData( return nullptr; default: break; - } + } } } else { auto* typeDesc = NPg::TypeDescFromPgTypeName(typeName); @@ -1614,8 +1614,9 @@ void TTableInfo::SetPartitioning(TVector&& newPartitioning) { Stats.PartitionStats.swap(newPartitionStats); Stats.Aggregated = newAggregatedStats; Partitions.swap(newPartitioning); - PreSerializedPathDescription.clear(); - PreSerializedPathDescriptionWithoutRangeKey.clear(); + PreserializedTablePartitions.clear(); + PreserializedTablePartitionsNoKeys.clear(); + PreserializedTableSplitBoundaries.clear(); CondEraseSchedule.clear(); InFlightCondErase.clear(); @@ -1630,7 +1631,7 @@ void TTableInfo::UpdateShardStats(TShardIdx datashardIdx, const TPartitionStats& Stats.UpdateShardStats(datashardIdx, newStats); } -void TAggregatedStats::UpdateShardStats(TShardIdx datashardIdx, const TPartitionStats& newStats) { +void TTableAggregatedStats::UpdateShardStats(TShardIdx datashardIdx, const TPartitionStats& newStats) { // Ignore stats from unknown datashard (it could have been split) if (!PartitionStats.contains(datashardIdx)) return; @@ -1719,33 +1720,10 @@ void TAggregatedStats::UpdateShardStats(TShardIdx datashardIdx, const TPartition } } -void TAggregatedStats::UpdateTableStats(const TPathId& pathId, const TPartitionStats& newStats) { - if (!TableStats.contains(pathId)) { - TableStats[pathId] = newStats; - return; - } - - TPartitionStats& oldStats = TableStats[pathId]; - - if (newStats.SeqNo <= oldStats.SeqNo) { - // Ignore outdated message - return; - } - - if (newStats.SeqNo.Generation > oldStats.SeqNo.Generation) { - // Reset incremental counter baselines if tablet has restarted - oldStats.ImmediateTxCompleted = 0; - oldStats.PlannedTxCompleted = 0; - oldStats.TxRejectedByOverload = 0; - oldStats.TxRejectedBySpace = 0; - oldStats.RowUpdates = 0; - oldStats.RowDeletes = 0; - oldStats.RowReads = 0; - oldStats.RangeReads = 0; - oldStats.RangeReadRows = 0; - } - TableStats[pathId].RowCount += (newStats.RowCount - oldStats.RowCount); - TableStats[pathId].DataSize += (newStats.DataSize - oldStats.DataSize); +void TAggregatedStats::UpdateTableStats(TShardIdx shardIdx, const TPathId& pathId, const TPartitionStats& newStats) { + auto& tableStats = TableStats[pathId]; + tableStats.PartitionStats[shardIdx]; // insert if none + tableStats.UpdateShardStats(shardIdx, newStats); } void TTableInfo::RegisterSplitMergeOp(TOperationId opId, const TTxState& txState) { @@ -2017,6 +1995,7 @@ TString TExportInfo::ToString() const { << " DomainPathId: " << DomainPathId << " ExportPathId: " << ExportPathId << " UserSID: '" << UserSID << "'" + << " PeerName: '" << PeerName << "'" << " State: " << State << " WaitTxId: " << WaitTxId << " Issue: '" << Issue << "'" diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index b5297bf20037..7e2449596802 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -320,14 +321,18 @@ struct TPartitionStats { ui64 CPU = 0; }; -struct TAggregatedStats { +struct TTableAggregatedStats { TPartitionStats Aggregated; THashMap PartitionStats; - THashMap TableStats; size_t PartitionStatsUpdated = 0; void UpdateShardStats(TShardIdx datashardIdx, const TPartitionStats& newStats); - void UpdateTableStats(const TPathId& pathId, const TPartitionStats& newStats); +}; + +struct TAggregatedStats : public TTableAggregatedStats { + THashMap TableStats; + + void UpdateTableStats(TShardIdx datashardIdx, const TPathId& pathId, const TPartitionStats& newStats); }; struct TSubDomainInfo; @@ -436,8 +441,11 @@ struct TTableInfo : public TSimpleRefCount { TMap BackupHistory; TMap RestoreHistory; - TString PreSerializedPathDescription; - TString PreSerializedPathDescriptionWithoutRangeKey; + // Preserialized TDescribeSchemeResult with PathDescription.TablePartitions field filled + TString PreserializedTablePartitions; + TString PreserializedTablePartitionsNoKeys; + // Preserialized TDescribeSchemeResult with PathDescription.Table.SplitBoundary field filled + TString PreserializedTableSplitBoundaries; THashMap PerShardPartitionConfig; @@ -937,6 +945,8 @@ struct TTopicTabletInfo : TSimpleRefCount { THashSet ParentPartitionIds; THashSet ChildPartitionIds; + TShardIdx ShardIdx; + void SetStatus(const TActorContext& ctx, ui32 value) { if (value >= NKikimrPQ::ETopicPartitionStatus::Active && value <= NKikimrPQ::ETopicPartitionStatus::Deleted) { @@ -1122,6 +1132,7 @@ struct TTopicInfo : TSimpleRefCount { TTabletId BalancerTabletID = InvalidTabletId; TShardIdx BalancerShardIdx = InvalidShardIdx; THashMap Partitions; + size_t ActivePartitionCount = 0; TString PreSerializedPathDescription; // Cached path description TString PreSerializedPartitionsDescription; // Cached partition description @@ -1129,6 +1140,8 @@ struct TTopicInfo : TSimpleRefCount { TTopicStats Stats; void AddPartition(TShardIdx shardIdx, TTopicTabletInfo::TTopicPartitionInfo* partition) { + partition->ShardIdx = shardIdx; + TTopicTabletInfo::TPtr& pqShard = Shards[shardIdx]; if (!pqShard) { pqShard.Reset(new TTopicTabletInfo()); @@ -1218,6 +1231,7 @@ struct TTopicInfo : TSimpleRefCount { alterData->AlterVersion = AlterVersion + 1; Y_ABORT_UNLESS(alterData->TotalGroupCount); Y_ABORT_UNLESS(alterData->TotalPartitionCount); + Y_ABORT_UNLESS(0 < alterData->ActivePartitionCount && alterData->ActivePartitionCount <= alterData->TotalPartitionCount); Y_ABORT_UNLESS(alterData->NextPartitionId); Y_ABORT_UNLESS(alterData->MaxPartsPerTablet); alterData->KeySchema = KeySchema; @@ -1231,6 +1245,7 @@ struct TTopicInfo : TSimpleRefCount { TotalGroupCount = AlterData->TotalGroupCount; NextPartitionId = AlterData->NextPartitionId; TotalPartitionCount = AlterData->TotalPartitionCount; + ActivePartitionCount = AlterData->ActivePartitionCount; MaxPartsPerTablet = AlterData->MaxPartsPerTablet; if (!AlterData->TabletConfig.empty()) TabletConfig = std::move(AlterData->TabletConfig); @@ -2431,6 +2446,20 @@ struct TCdcStreamInfo : public TSimpleRefCount { return result; } + void FinishAlter() { + Y_ABORT_UNLESS(AlterData); + + AlterVersion = AlterData->AlterVersion; + Mode = AlterData->Mode; + Format = AlterData->Format; + VirtualTimestamps = AlterData->VirtualTimestamps; + ResolvedTimestamps = AlterData->ResolvedTimestamps; + AwsRegion = AlterData->AwsRegion; + State = AlterData->State; + + AlterData.Reset(); + } + ui64 AlterVersion = 1; EMode Mode; EFormat Format; @@ -2602,12 +2631,13 @@ struct TExportInfo: public TSimpleRefCount { static bool IsDropped(const TItem& item); }; - ui64 Id; + ui64 Id; // TxId from the original TEvCreateExportRequest TString Uid; EKind Kind; TString Settings; TPathId DomainPathId; TMaybe UserSID; + TString PeerName; // required for making audit log records TVector Items; TPathId ExportPathId = InvalidPathId; @@ -2632,12 +2662,14 @@ struct TExportInfo: public TSimpleRefCount { const TString& uid, const EKind kind, const TString& settings, - const TPathId domainPathId) + const TPathId domainPathId, + const TString& peerName) : Id(id) , Uid(uid) , Kind(kind) , Settings(settings) , DomainPathId(domainPathId) + , PeerName(peerName) { } @@ -2647,8 +2679,9 @@ struct TExportInfo: public TSimpleRefCount { const TString& uid, const EKind kind, const TSettingsPB& settingsPb, - const TPathId domainPathId) - : TExportInfo(id, uid, kind, SerializeSettings(settingsPb), domainPathId) + const TPathId domainPathId, + const TString& peerName) + : TExportInfo(id, uid, kind, SerializeSettings(settingsPb), domainPathId, peerName) { } @@ -2759,12 +2792,13 @@ struct TImportInfo: public TSimpleRefCount { static bool IsDone(const TItem& item); }; - ui64 Id; + ui64 Id; // TxId from the original TEvCreateImportRequest TString Uid; EKind Kind; Ydb::Import::ImportFromS3Settings Settings; TPathId DomainPathId; TMaybe UserSID; + TString PeerName; // required for making audit log records EState State = EState::Invalid; TString Issue; @@ -2780,12 +2814,14 @@ struct TImportInfo: public TSimpleRefCount { const TString& uid, const EKind kind, const Ydb::Import::ImportFromS3Settings& settings, - const TPathId domainPathId) + const TPathId domainPathId, + const TString& peerName) : Id(id) , Uid(uid) , Kind(kind) , Settings(settings) , DomainPathId(domainPathId) + , PeerName(peerName) { } @@ -3056,6 +3092,14 @@ struct TIndexBuildInfo: public TSimpleRefCount { indexInfo->IndexName = row.template GetValue(); indexInfo->IndexType = row.template GetValue(); + // Restore the operation details: ImplTableDescription. + if (row.template HaveValue()) { + NKikimrSchemeOp::TIndexCreationConfig creationConfig; + Y_ABORT_UNLESS(creationConfig.ParseFromString(row.template GetValue())); + + indexInfo->ImplTableDescription = std::move(*creationConfig.MutableIndexImplTableDescription()); + } + indexInfo->State = TIndexBuildInfo::EState( row.template GetValue()); indexInfo->Issue = @@ -3245,6 +3289,7 @@ struct TViewInfo : TSimpleRefCount { ui64 AlterVersion = 0; TString QueryText; + NYql::NProto::TTranslationSettings CapturedContext; }; struct TResourcePoolInfo : TSimpleRefCount { diff --git a/ydb/core/tx/schemeshard/schemeshard_path.cpp b/ydb/core/tx/schemeshard/schemeshard_path.cpp index f45e6190eb5f..0704fbcb5cee 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_path.cpp @@ -1596,20 +1596,22 @@ bool TPath::IsInsideCdcStreamPath() const { return false; } - ++item; - for (; item != Elements.rend(); ++item) { - if (!(*item)->IsDirectory() && !(*item)->IsSubDomainRoot()) { - return false; - } - } - return true; } -bool TPath::IsTableIndex() const { +bool TPath::IsTableIndex(const TMaybe& type) const { Y_ABORT_UNLESS(IsResolved()); - return Base()->IsTableIndex(); + if (!Base()->IsTableIndex()) { + return false; + } + + if (!type.Defined()) { + return true; + } + + Y_ABORT_UNLESS(SS->Indexes.contains(Base()->PathId)); + return SS->Indexes.at(Base()->PathId)->Type == *type; } bool TPath::IsBackupTable() const { diff --git a/ydb/core/tx/schemeshard/schemeshard_path.h b/ydb/core/tx/schemeshard/schemeshard_path.h index a1474fef7176..4e6aec639db3 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path.h +++ b/ydb/core/tx/schemeshard/schemeshard_path.h @@ -5,6 +5,8 @@ #include +#include + namespace NKikimr::NSchemeShard { class TSchemeShard; @@ -159,7 +161,7 @@ class TPath { bool AtLocalSchemeShardPath() const; bool IsInsideTableIndexPath() const; bool IsInsideCdcStreamPath() const; - bool IsTableIndex() const; + bool IsTableIndex(const TMaybe& type = {}) const; bool IsBackupTable() const; bool IsAsyncReplicaTable() const; bool IsCdcStream() const; diff --git a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp index bf10858b1ff0..12e6a238570e 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp @@ -8,6 +8,16 @@ #include +namespace { + +void FillPartitionConfig(const NKikimrSchemeOp::TPartitionConfig& in, NKikimrSchemeOp::TPartitionConfig& out) { + out.CopyFrom(in); + NKikimr::NSchemeShard::TPartitionConfigMerger::DeduplicateColumnFamiliesById(out); + out.MutableStorageRooms()->Clear(); +} + +} + namespace NKikimr { namespace NSchemeShard { @@ -62,6 +72,78 @@ static void FillTableStats(NKikimrSchemeOp::TPathDescription& pathDescription, c FillTableMetrics(pathDescription.MutableTabletMetrics(), stats); } +static void FillColumns( + const TTableInfo& tableInfo, + google::protobuf::RepeatedPtrField& out +) { + bool familyNamesBuilt = false; + THashMap familyNames; + + out.Reserve(tableInfo.Columns.size()); + for (const auto& col : tableInfo.Columns) { + const auto& cinfo = col.second; + if (cinfo.IsDropped()) + continue; + + auto* colDescr = out.Add(); + colDescr->SetName(cinfo.Name); + colDescr->SetType(NScheme::TypeName(cinfo.PType, cinfo.PTypeMod)); + auto columnType = NScheme::ProtoColumnTypeFromTypeInfoMod(cinfo.PType, cinfo.PTypeMod); + colDescr->SetTypeId(columnType.TypeId); + if (columnType.TypeInfo) { + *colDescr->MutableTypeInfo() = *columnType.TypeInfo; + } + colDescr->SetId(cinfo.Id); + colDescr->SetNotNull(cinfo.NotNull); + + if (cinfo.Family != 0) { + colDescr->SetFamily(cinfo.Family); + + if (!familyNamesBuilt) { + for (const auto& family : tableInfo.PartitionConfig().GetColumnFamilies()) { + if (family.HasName() && family.HasId()) { + familyNames[family.GetId()] = family.GetName(); + } + } + familyNamesBuilt = true; + } + + auto it = familyNames.find(cinfo.Family); + if (it != familyNames.end() && !it->second.empty()) { + colDescr->SetFamilyName(it->second); + } + } + + colDescr->SetIsBuildInProgress(cinfo.IsBuildInProgress); + + switch (cinfo.DefaultKind) { + case ETableColumnDefaultKind::None: + break; + case ETableColumnDefaultKind::FromSequence: + colDescr->SetDefaultFromSequence(cinfo.DefaultValue); + break; + case ETableColumnDefaultKind::FromLiteral: + Y_ABORT_UNLESS(colDescr->MutableDefaultFromLiteral()->ParseFromString( + cinfo.DefaultValue)); + break; + } + } +} + +static void FillKeyColumns( + const TTableInfo& tableInfo, + google::protobuf::RepeatedPtrField& names, + google::protobuf::RepeatedField& ids +) { + Y_ABORT_UNLESS(!tableInfo.KeyColumnIds.empty()); + names.Reserve(tableInfo.KeyColumnIds.size()); + ids.Reserve(tableInfo.KeyColumnIds.size()); + for (ui32 keyColId : tableInfo.KeyColumnIds) { + *names.Add() = tableInfo.Columns.at(keyColId).Name; + *ids.Add() = keyColId; + } +} + void TPathDescriber::FillPathDescr(NKikimrSchemeOp::TDirEntry* descr, TPathElement::TPtr pathEl, TPathElement::EPathSubType subType) { FillChildDescr(descr, pathEl); @@ -207,6 +289,68 @@ void TPathDescriber::DescribeDir(const TPath& path) { DescribeChildren(path); } +void FillTableBoundaries( + google::protobuf::RepeatedPtrField* result, + const TTableInfo::TPtr tableInfo +) { + TString errStr; + // Number of split boundaries equals to number of partitions - 1 + result->Reserve(tableInfo->GetPartitions().size() - 1); + for (ui32 pi = 0; pi < tableInfo->GetPartitions().size() - 1; ++pi) { + const auto& p = tableInfo->GetPartitions()[pi]; + TSerializedCellVec endKey(p.EndOfRange); + auto boundary = result->Add()->MutableKeyPrefix(); + for (ui32 ki = 0; ki < endKey.GetCells().size(); ++ki){ + const auto& c = endKey.GetCells()[ki]; + auto type = tableInfo->Columns[tableInfo->KeyColumnIds[ki]].PType; + bool ok = NMiniKQL::CellToValue(type, c, *boundary->AddTuple(), errStr); + Y_ABORT_UNLESS(ok, "Failed to build key tuple at position %" PRIu32 " error: %s", ki, errStr.data()); + } + } +} + +void FillTablePartitions( + google::protobuf::RepeatedPtrField* result, + const TTableInfo::TPtr tableInfo, + const THashMap& shardInfos, + bool includeKeys +) { + result->Reserve(tableInfo->GetPartitions().size()); + for (auto& p : tableInfo->GetPartitions()) { + const auto& tabletId = ui64(shardInfos.at(p.ShardIdx).TabletID); + const auto& key = p.EndOfRange; + + auto part = result->Add(); + part->SetDatashardId(tabletId); + if (includeKeys) { + // Currently we only support uniform partitioning where each range is [start, end) + // +inf as the end of the last range is represented by empty TCell vector + part->SetIsPoint(false); + part->SetIsInclusive(false); + part->SetEndOfRangeKeyPrefix(key); + } + } +} + +const TString& GetSerializedTablePartitions( + const TTableInfo::TPtr tableInfo, + const THashMap& shardInfos, + bool returnRangeKey +) { + TString& cache = (returnRangeKey + ? tableInfo->PreserializedTablePartitions + : tableInfo->PreserializedTablePartitionsNoKeys + ); + + if (cache.empty()) { + NKikimrScheme::TEvDescribeSchemeResult result; + FillTablePartitions(result.MutablePathDescription()->MutableTablePartitions(), tableInfo, shardInfos, returnRangeKey); + Y_PROTOBUF_SUPPRESS_NODISCARD result.SerializeToString(&cache); + } + + return cache; +} + void TPathDescriber::DescribeTable(const TActorContext& ctx, TPathId pathId, TPathElement::TPtr pathEl) { const NScheme::TTypeRegistry* typeRegistry = AppData(ctx)->TypeRegistry; const TTableInfo::TPtr tableInfo = *Self->Tables.FindPtr(pathId); @@ -220,6 +364,7 @@ void TPathDescriber::DescribeTable(const TActorContext& ctx, TPathId pathId, TPa bool returnBoundaries = false; bool returnRangeKey = true; bool returnSetVal = Params.GetOptions().GetReturnSetVal(); + bool returnIndexTableBoundaries = Params.GetOptions().GetReturnIndexTableBoundaries(); if (Params.HasOptions()) { returnConfig = Params.GetOptions().GetReturnPartitionConfig(); returnPartitioning = Params.GetOptions().GetReturnPartitioningInfo(); @@ -228,50 +373,30 @@ void TPathDescriber::DescribeTable(const TActorContext& ctx, TPathId pathId, TPa returnRangeKey = Params.GetOptions().GetReturnRangeKey(); } - Self->DescribeTable(tableInfo, typeRegistry, returnConfig, returnBoundaries, entry); + Self->DescribeTable(tableInfo, typeRegistry, returnConfig, entry); entry->SetName(pathEl->Name); - if (returnPartitioning) { - // partitions - if (tableInfo->PreSerializedPathDescription.empty()) { + if (returnBoundaries) { + // split boundaries (split keys without shard's tablet-ids) + if (tableInfo->PreserializedTableSplitBoundaries.empty()) { NKikimrScheme::TEvDescribeSchemeResult preSerializedResult; - NKikimrScheme::TEvDescribeSchemeResult preSerializedResultWithoutRangeKey; - - NKikimrSchemeOp::TPathDescription& pathDescription = *preSerializedResult.MutablePathDescription(); - NKikimrSchemeOp::TPathDescription& pathDescriptionWithoutRangeKey = *preSerializedResultWithoutRangeKey.MutablePathDescription(); - - pathDescription.MutableTablePartitions()->Reserve(tableInfo->GetPartitions().size()); - pathDescriptionWithoutRangeKey.MutableTablePartitions()->Reserve(tableInfo->GetPartitions().size()); - for (auto& p : tableInfo->GetPartitions()) { - auto part = pathDescription.AddTablePartitions(); - auto partWithoutRangeKey = pathDescriptionWithoutRangeKey.AddTablePartitions(); - auto datashardIdx = p.ShardIdx; - auto datashardTabletId = Self->ShardInfos[datashardIdx].TabletID; - // Currently we only support uniform partitioning where each range is [start, end) - // +inf as the end of the last range is represented by empty TCell vector - part->SetDatashardId(ui64(datashardTabletId)); - partWithoutRangeKey->SetDatashardId(ui64(datashardTabletId)); - - part->SetIsPoint(false); - partWithoutRangeKey->SetIsPoint(false); - - part->SetIsInclusive(false); - partWithoutRangeKey->SetIsInclusive(false); - - part->SetEndOfRangeKeyPrefix(p.EndOfRange); - } - Y_PROTOBUF_SUPPRESS_NODISCARD preSerializedResult.SerializeToString(&tableInfo->PreSerializedPathDescription); - Y_PROTOBUF_SUPPRESS_NODISCARD preSerializedResultWithoutRangeKey.SerializeToString(&tableInfo->PreSerializedPathDescriptionWithoutRangeKey); - } - if (returnRangeKey) { - Result->PreSerializedData += tableInfo->PreSerializedPathDescription; - } else { - Result->PreSerializedData += tableInfo->PreSerializedPathDescriptionWithoutRangeKey; - } - if (!pathEl->IsCreateFinished()) { - tableInfo->PreSerializedPathDescription.clear(); // KIKIMR-4337 - tableInfo->PreSerializedPathDescriptionWithoutRangeKey.clear(); + auto& tableDesc = *preSerializedResult.MutablePathDescription()->MutableTable(); + FillTableBoundaries(tableDesc.MutableSplitBoundary(), tableInfo); + Y_PROTOBUF_SUPPRESS_NODISCARD preSerializedResult.SerializeToString(&tableInfo->PreserializedTableSplitBoundaries); } + Result->PreSerializedData += tableInfo->PreserializedTableSplitBoundaries; + } + + if (returnPartitioning) { + // partitions (shard tablet-ids with range keys) + Result->PreSerializedData += GetSerializedTablePartitions(tableInfo, Self->ShardInfos, returnRangeKey); + } + + // KIKIMR-4337: table info is in flux until table is finally created + if (!pathEl->IsCreateFinished()) { + tableInfo->PreserializedTablePartitions.clear(); + tableInfo->PreserializedTablePartitionsNoKeys.clear(); + tableInfo->PreserializedTableSplitBoundaries.clear(); } FillAggregatedStats(*Result->Record.MutablePathDescription(), tableInfo->GetStats()); @@ -364,7 +489,9 @@ void TPathDescriber::DescribeTable(const TActorContext& ctx, TPathId pathId, TPa switch (childPath->PathType) { case NKikimrSchemeOp::EPathTypeTableIndex: - Self->DescribeTableIndex(childPathId, childName, *entry->AddTableIndexes()); + Self->DescribeTableIndex( + childPathId, childName, returnConfig, returnIndexTableBoundaries, *entry->AddTableIndexes() + ); break; case NKikimrSchemeOp::EPathTypeCdcStream: Self->DescribeCdcStream(childPathId, childName, *entry->AddCdcStreams()); @@ -427,7 +554,9 @@ void TPathDescriber::DescribeColumnTable(TPathId pathId, TPathElement::TPtr path description->MutableSchema()->SetVersion(description->GetSchema().GetVersion() + description->GetSchemaPresetVersionAdj()); } if (tableInfo->GetStats().TableStats.contains(pathId)) { - FillTableStats(*pathDescription, tableInfo->GetStats().TableStats.at(pathId)); + FillTableStats(*pathDescription, tableInfo->GetStats().TableStats.at(pathId).Aggregated); + } else { + FillTableStats(*pathDescription, TPartitionStats()); } } } @@ -586,8 +715,12 @@ void TPathDescriber::DescribeRtmrVolume(TPathId pathId, TPathElement::TPtr pathE } void TPathDescriber::DescribeTableIndex(const TPath& path) { - Self->DescribeTableIndex(path.Base()->PathId, path.Base()->Name, - *Result->Record.MutablePathDescription()->MutableTableIndex()); + bool returnConfig = Params.GetReturnPartitionConfig(); + bool returnBoundaries = Params.HasOptions() && Params.GetOptions().GetReturnBoundaries(); + + Self->DescribeTableIndex(path.Base()->PathId, path.Base()->Name, returnConfig, returnBoundaries, + *Result->Record.MutablePathDescription()->MutableTableIndex() + ); DescribeChildren(path); } @@ -903,6 +1036,7 @@ void TPathDescriber::DescribeView(const TActorContext&, TPathId pathId, TPathEle PathIdFromPathId(pathId, entry->MutablePathId()); entry->SetVersion(viewInfo->AlterVersion); entry->SetQueryText(viewInfo->QueryText); + *entry->MutableCapturedContext() = viewInfo->CapturedContext; } void TPathDescriber::DescribeResourcePool(TPathId pathId, TPathElement::TPtr pathEl) { @@ -1108,80 +1242,21 @@ THolder DescribePath( return DescribePath(self, ctx, pathId, options); } -void TSchemeShard::DescribeTable(const TTableInfo::TPtr tableInfo, const NScheme::TTypeRegistry* typeRegistry, - bool fillConfig, bool fillBoundaries, NKikimrSchemeOp::TTableDescription* entry) const +void TSchemeShard::DescribeTable( + const TTableInfo::TPtr tableInfo, + const NScheme::TTypeRegistry* typeRegistry, + bool fillConfig, + NKikimrSchemeOp::TTableDescription* entry + ) const { Y_UNUSED(typeRegistry); - THashMap familyNames; - bool familyNamesBuilt = false; entry->SetTableSchemaVersion(tableInfo->AlterVersion); - entry->MutableColumns()->Reserve(tableInfo->Columns.size()); - for (auto col : tableInfo->Columns) { - const auto& cinfo = col.second; - if (cinfo.IsDropped()) - continue; - - auto colDescr = entry->AddColumns(); - colDescr->SetName(cinfo.Name); - colDescr->SetType(NScheme::TypeName(cinfo.PType, cinfo.PTypeMod)); - auto columnType = NScheme::ProtoColumnTypeFromTypeInfoMod(cinfo.PType, cinfo.PTypeMod); - colDescr->SetTypeId(columnType.TypeId); - if (columnType.TypeInfo) { - *colDescr->MutableTypeInfo() = *columnType.TypeInfo; - } - colDescr->SetId(cinfo.Id); - colDescr->SetNotNull(cinfo.NotNull); - - if (cinfo.Family != 0) { - colDescr->SetFamily(cinfo.Family); - - if (!familyNamesBuilt) { - for (const auto& family : tableInfo->PartitionConfig().GetColumnFamilies()) { - if (family.HasName() && family.HasId()) { - familyNames[family.GetId()] = family.GetName(); - } - } - familyNamesBuilt = true; - } - - auto it = familyNames.find(cinfo.Family); - if (it != familyNames.end() && !it->second.empty()) { - colDescr->SetFamilyName(it->second); - } - } - - colDescr->SetIsBuildInProgress(cinfo.IsBuildInProgress); - - switch (cinfo.DefaultKind) { - case ETableColumnDefaultKind::None: - break; - case ETableColumnDefaultKind::FromSequence: - colDescr->SetDefaultFromSequence(cinfo.DefaultValue); - break; - case ETableColumnDefaultKind::FromLiteral: - Y_ABORT_UNLESS(colDescr->MutableDefaultFromLiteral()->ParseFromString( - cinfo.DefaultValue)); - break; - } - } - Y_ABORT_UNLESS(!tableInfo->KeyColumnIds.empty()); - - entry->MutableKeyColumnNames()->Reserve(tableInfo->KeyColumnIds.size()); - entry->MutableKeyColumnIds()->Reserve(tableInfo->KeyColumnIds.size()); - for (ui32 keyColId : tableInfo->KeyColumnIds) { - entry->AddKeyColumnNames(tableInfo->Columns[keyColId].Name); - entry->AddKeyColumnIds(keyColId); - } + FillColumns(*tableInfo, *entry->MutableColumns()); + FillKeyColumns(*tableInfo, *entry->MutableKeyColumnNames(), *entry->MutableKeyColumnIds()); if (fillConfig) { - entry->MutablePartitionConfig()->CopyFrom(tableInfo->PartitionConfig()); - TPartitionConfigMerger::DeduplicateColumnFamiliesById(*entry->MutablePartitionConfig()); - entry->MutablePartitionConfig()->MutableStorageRooms()->Clear(); - } - - if (fillBoundaries) { - FillTableBoundaries(tableInfo, *entry->MutableSplitBoundary()); + FillPartitionConfig(tableInfo->PartitionConfig(), *entry->MutablePartitionConfig()); } if (tableInfo->HasTTLSettings()) { @@ -1196,17 +1271,17 @@ void TSchemeShard::DescribeTable(const TTableInfo::TPtr tableInfo, const NScheme } void TSchemeShard::DescribeTableIndex(const TPathId& pathId, const TString& name, - NKikimrSchemeOp::TIndexDescription& entry) + bool fillConfig, bool fillBoundaries, NKikimrSchemeOp::TIndexDescription& entry) const { auto it = Indexes.FindPtr(pathId); Y_ABORT_UNLESS(it, "TableIndex is not found"); TTableIndexInfo::TPtr indexInfo = *it; - DescribeTableIndex(pathId, name, indexInfo, entry); + DescribeTableIndex(pathId, name, indexInfo, fillConfig, fillBoundaries, entry); } void TSchemeShard::DescribeTableIndex(const TPathId& pathId, const TString& name, TTableIndexInfo::TPtr indexInfo, - NKikimrSchemeOp::TIndexDescription& entry) + bool fillConfig, bool fillBoundaries, NKikimrSchemeOp::TIndexDescription& entry) const { Y_ABORT_UNLESS(indexInfo, "Empty index info"); @@ -1226,23 +1301,26 @@ void TSchemeShard::DescribeTableIndex(const TPathId& pathId, const TString& name *entry.MutableDataColumnNames()->Add() = dataColumns; } - Y_ABORT_UNLESS(PathsById.contains(pathId)); - auto indexPath = PathsById.at(pathId); - + auto indexPath = *PathsById.FindPtr(pathId); + Y_ABORT_UNLESS(indexPath); Y_ABORT_UNLESS(indexPath->GetChildren().size() == 1); - const auto& indexImplPathId = indexPath->GetChildren().begin()->second; + const auto& indexImplTablePathId = indexPath->GetChildren().begin()->second; - Y_ABORT_UNLESS(Tables.contains(indexImplPathId)); - auto indexImplTable = Tables.at(indexImplPathId); + auto tableInfo = *Tables.FindPtr(indexImplTablePathId); + Y_ABORT_UNLESS(tableInfo); - const auto& tableStats = indexImplTable->GetStats().Aggregated; + const auto& tableStats = tableInfo->GetStats().Aggregated; entry.SetDataSize(tableStats.DataSize + tableStats.IndexSize); - *entry.MutablePartitioningPolicy() = indexImplTable->PartitionConfig().GetPartitioningPolicy(); - if (const auto& explicitPartitions = indexImplTable->TableDescription.GetSplitBoundary(); - !explicitPartitions.empty() - ) { - *entry.MutableExplicitPartitions()->MutableSplitBoundary() = explicitPartitions; + auto* tableDescription = entry.AddIndexImplTableDescriptions(); + if (fillConfig) { + FillPartitionConfig(tableInfo->PartitionConfig(), *tableDescription->MutablePartitionConfig()); + } + if (fillBoundaries) { + // column info is necessary for split boundary type conversion + FillColumns(*tableInfo, *tableDescription->MutableColumns()); + FillKeyColumns(*tableInfo, *tableDescription->MutableKeyColumnNames(), *tableDescription->MutableKeyColumnIds()); + FillTableBoundaries(tableDescription->MutableSplitBoundary(), tableInfo); } } @@ -1389,22 +1467,5 @@ void TSchemeShard::DescribeBlobDepot(const TPathId& pathId, const TString& name, desc.SetTabletId(static_cast(it->second->BlobDepotTabletId)); } -void TSchemeShard::FillTableBoundaries(const TTableInfo::TPtr tableInfo, google::protobuf::RepeatedPtrField& boundaries) { - TString errStr; - // Number of split boundaries equals to number of partitions - 1 - boundaries.Reserve(tableInfo->GetPartitions().size() - 1); - for (ui32 pi = 0; pi < tableInfo->GetPartitions().size() - 1; ++pi) { - const auto& p = tableInfo->GetPartitions()[pi]; - TSerializedCellVec endKey(p.EndOfRange); - auto boundary = boundaries.Add()->MutableKeyPrefix(); - for (ui32 ki = 0; ki < endKey.GetCells().size(); ++ki){ - const auto& c = endKey.GetCells()[ki]; - auto type = tableInfo->Columns[tableInfo->KeyColumnIds[ki]].PType; - bool ok = NMiniKQL::CellToValue(type, c, *boundary->AddTuple(), errStr); - Y_ABORT_UNLESS(ok, "Failed to build key tuple at position %" PRIu32 " error: %s", ki, errStr.data()); - } - } -} - } // NSchemeShard } // NKikimr diff --git a/ydb/core/tx/schemeshard/schemeshard_path_element.cpp b/ydb/core/tx/schemeshard/schemeshard_path_element.cpp index 05a0a9068ebc..c8901de18568 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path_element.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_path_element.cpp @@ -411,8 +411,8 @@ bool TPathElement::CheckFileStoreSpaceChange(TFileStoreSpace newSpace, TFileStor CheckSpaceChanged(FileStoreSpaceHDD, newSpace.HDD, oldSpace.HDD, errStr, "filestore", " (hdd)")); } -void TPathElement::SetAsyncReplica() { - IsAsyncReplica = true; +void TPathElement::SetAsyncReplica(bool value) { + IsAsyncReplica = value; } bool TPathElement::HasRuntimeAttrs() const { diff --git a/ydb/core/tx/schemeshard/schemeshard_path_element.h b/ydb/core/tx/schemeshard/schemeshard_path_element.h index e9caae4888cd..1cdce90e1a4d 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path_element.h +++ b/ydb/core/tx/schemeshard/schemeshard_path_element.h @@ -160,7 +160,7 @@ struct TPathElement : TSimpleRefCount { void ChangeFileStoreSpaceBegin(TFileStoreSpace newSpace, TFileStoreSpace oldSpace); void ChangeFileStoreSpaceCommit(TFileStoreSpace newSpace, TFileStoreSpace oldSpace); bool CheckFileStoreSpaceChange(TFileStoreSpace newSpace, TFileStoreSpace oldSpace, TString& errStr); - void SetAsyncReplica(); + void SetAsyncReplica(bool value); bool HasRuntimeAttrs() const; void SerializeRuntimeAttrs(google::protobuf::RepeatedPtrField* userAttrs) const; }; diff --git a/ydb/core/tx/schemeshard/schemeshard_schema.h b/ydb/core/tx/schemeshard/schemeshard_schema.h index ac7cce9805c8..2d4511d15a2d 100644 --- a/ydb/core/tx/schemeshard/schemeshard_schema.h +++ b/ydb/core/tx/schemeshard/schemeshard_schema.h @@ -761,6 +761,7 @@ struct Schema : NIceDb::Schema { struct ImportsLimit : Column<29, NScheme::NTypeIds::Uint64> {}; struct AuditSettings : Column<30, NScheme::NTypeIds::String> {}; struct ServerlessComputeResourcesMode : Column<31, NScheme::NTypeIds::Uint32> { using Type = EServerlessComputeResourcesMode; }; + struct ColumnTableColumnsLimit : Column<32, NScheme::NTypeIds::Uint64> {}; using TKey = TableKey; using TColumns = TableColumns< @@ -794,7 +795,8 @@ struct Schema : NIceDb::Schema { ExportsLimit, ImportsLimit, AuditSettings, - ServerlessComputeResourcesMode + ServerlessComputeResourcesMode, + ColumnTableColumnsLimit >; }; @@ -1160,6 +1162,7 @@ struct Schema : NIceDb::Schema { struct StartTime : Column<14, NScheme::NTypeIds::Uint64> {}; struct EndTime : Column<15, NScheme::NTypeIds::Uint64> {}; + struct PeerName : Column<16, NScheme::NTypeIds::Utf8> {}; using TKey = TableKey; using TColumns = TableColumns< @@ -1177,7 +1180,8 @@ struct Schema : NIceDb::Schema { Kind, UserSID, StartTime, - EndTime + EndTime, + PeerName >; }; @@ -1314,6 +1318,9 @@ struct Schema : NIceDb::Schema { struct AlterMainTableTxStatus : Column<32, NScheme::NTypeIds::Uint32> { using Type = NKikimrScheme::EStatus; }; struct AlterMainTableTxDone : Column<33, NScheme::NTypeIds::Bool> {}; + // Serialized as string NKikimrSchemeOp::TIndexCreationConfig protobuf. + struct CreationConfig : Column<34, NScheme::NTypeIds::String> { using Type = TString; }; + using TKey = TableKey; using TColumns = TableColumns< Id, @@ -1348,7 +1355,8 @@ struct Schema : NIceDb::Schema { BuildKind, AlterMainTableTxId, AlterMainTableTxStatus, - AlterMainTableTxDone + AlterMainTableTxDone, + CreationConfig >; }; @@ -1482,6 +1490,7 @@ struct Schema : NIceDb::Schema { struct StartTime : Column<11, NScheme::NTypeIds::Uint64> {}; struct EndTime : Column<12, NScheme::NTypeIds::Uint64> {}; + struct PeerName : Column<13, NScheme::NTypeIds::Utf8> {}; using TKey = TableKey; using TColumns = TableColumns< @@ -1496,7 +1505,8 @@ struct Schema : NIceDb::Schema { Issue, UserSID, StartTime, - EndTime + EndTime, + PeerName >; }; @@ -1779,9 +1789,11 @@ struct Schema : NIceDb::Schema { struct PathId: Column<1, NScheme::NTypeIds::Uint64> { using Type = TLocalPathId; }; struct AlterVersion: Column<2, NScheme::NTypeIds::Uint64> {}; struct QueryText: Column<3, NScheme::NTypeIds::String> {}; + // CapturedContext is a serialized NYql::NProto::TTranslationSettings. + struct CapturedContext: Column<4, NScheme::NTypeIds::String> {}; using TKey = TableKey; - using TColumns = TableColumns; + using TColumns = TableColumns; }; struct BackgroundSessions: Table<109> { diff --git a/ydb/core/tx/schemeshard/schemeshard_types.cpp b/ydb/core/tx/schemeshard/schemeshard_types.cpp index 443cafd3e7ff..eaad291862a8 100644 --- a/ydb/core/tx/schemeshard/schemeshard_types.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_types.cpp @@ -20,6 +20,9 @@ TSchemeLimits TSchemeLimits::FromProto(const NKikimrScheme::TSchemeLimits& proto if (proto.HasMaxTableColumns()) { result.MaxTableColumns = proto.GetMaxTableColumns(); } + if (proto.HasMaxColumnTableColumns()) { + result.MaxColumnTableColumns = proto.GetMaxColumnTableColumns(); + } if (proto.HasMaxTableColumnNameLength()) { result.MaxTableColumnNameLength = proto.GetMaxTableColumnNameLength(); } @@ -69,6 +72,7 @@ NKikimrScheme::TSchemeLimits TSchemeLimits::AsProto() const { result.SetMaxAclBytesSize(MaxAclBytesSize); result.SetMaxTableColumns(MaxTableColumns); + result.SetMaxColumnTableColumns(MaxColumnTableColumns); result.SetMaxTableColumnNameLength(MaxTableColumnNameLength); result.SetMaxTableKeyColumns(MaxTableKeyColumns); result.SetMaxTableIndices(MaxTableIndices); diff --git a/ydb/core/tx/schemeshard/schemeshard_types.h b/ydb/core/tx/schemeshard/schemeshard_types.h index 5e843c58a293..6afcdeedb1e2 100644 --- a/ydb/core/tx/schemeshard/schemeshard_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_types.h @@ -39,6 +39,7 @@ struct TSchemeLimits { // table ui64 MaxTableColumns = 200; + ui64 MaxColumnTableColumns = 10000; ui64 MaxTableColumnNameLength = 255; ui64 MaxTableKeyColumns = 20; ui64 MaxTableIndices = 20; diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.cpp b/ydb/core/tx/schemeshard/schemeshard_utils.cpp index 13642fc1054e..74ce3f4b30bc 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_utils.cpp @@ -317,6 +317,10 @@ NKikimrSchemeOp::TTableDescription CalcImplTableDesc( result.AddKeyColumnNames(keyName); } + if (indexTableDesc.HasReplicationConfig()) { + result.MutableReplicationConfig()->CopyFrom(indexTableDesc.GetReplicationConfig()); + } + return result; } @@ -384,6 +388,10 @@ NKikimrSchemeOp::TTableDescription CalcImplTableDesc( result.AddKeyColumnNames(keyName); } + if (indexTableDesc.HasReplicationConfig()) { + result.MutableReplicationConfig()->CopyFrom(indexTableDesc.GetReplicationConfig()); + } + return result; } diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.h b/ydb/core/tx/schemeshard/schemeshard_utils.h index c4a4d1dcf244..b74161e931ef 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.h +++ b/ydb/core/tx/schemeshard/schemeshard_utils.h @@ -54,7 +54,7 @@ inline NKikimrSchemeOp::TModifyScheme TransactionTemplate(const TString& working return tx; } -TSerializedCellVec ChooseSplitKeyByHistogram(const NKikimrTableStats::THistogram& histogram, +TSerializedCellVec ChooseSplitKeyByHistogram(const NKikimrTableStats::THistogram& histogram, ui64 total, const TConstArrayRef& keyColumnTypes); class TShardDeleter { diff --git a/ydb/core/tx/schemeshard/schemeshard_xxport__helpers.cpp b/ydb/core/tx/schemeshard/schemeshard_xxport__helpers.cpp new file mode 100644 index 000000000000..8990964b78d3 --- /dev/null +++ b/ydb/core/tx/schemeshard/schemeshard_xxport__helpers.cpp @@ -0,0 +1,14 @@ +#include + +namespace NKikimr::NSchemeShard { + +TString GetUid(const Ydb::Operations::OperationParams& operationParams) { + const auto& labels = operationParams.labels(); + auto it = labels.find("uid"); + if (it != labels.end()) { + return it->second; + } + return {}; +} + +} // NKikimr::NSchemeShard diff --git a/ydb/core/tx/schemeshard/schemeshard_xxport__helpers.h b/ydb/core/tx/schemeshard/schemeshard_xxport__helpers.h new file mode 100644 index 000000000000..e6fafda10fb0 --- /dev/null +++ b/ydb/core/tx/schemeshard/schemeshard_xxport__helpers.h @@ -0,0 +1,13 @@ +#pragma once + +#include + +namespace Ydb::Operations { + class OperationParams; +} + +namespace NKikimr::NSchemeShard { + +TString GetUid(const Ydb::Operations::OperationParams& operationParams); + +} // NKikimr::NSchemeShard diff --git a/ydb/core/tx/schemeshard/ut_backup/ut_backup.cpp b/ydb/core/tx/schemeshard/ut_backup/ut_backup.cpp index 1338f2d8f42d..771b929051c0 100644 --- a/ydb/core/tx/schemeshard/ut_backup/ut_backup.cpp +++ b/ydb/core/tx/schemeshard/ut_backup/ut_backup.cpp @@ -7,9 +7,27 @@ #include #include +#include + +#include + using namespace NSchemeShardUT_Private; using namespace NKikimr::NWrappers::NTestHelpers; +namespace { + +Aws::SDKOptions Options; + +Y_TEST_HOOK_BEFORE_RUN(InitAwsAPI) { + Aws::InitAPI(Options); +} + +Y_TEST_HOOK_AFTER_RUN(ShutdownAwsAPI) { + Aws::ShutdownAPI(Options); +} + +} + Y_UNIT_TEST_SUITE(TBackupTests) { using TFillFn = std::function; diff --git a/ydb/core/tx/schemeshard/ut_backup/ya.make b/ydb/core/tx/schemeshard/ut_backup/ya.make index d9ee6dd81405..aac9bc5f9334 100644 --- a/ydb/core/tx/schemeshard/ut_backup/ya.make +++ b/ydb/core/tx/schemeshard/ut_backup/ya.make @@ -20,6 +20,7 @@ IF (NOT OS_WINDOWS) library/cpp/getopt library/cpp/regex/pcre library/cpp/svnversion + contrib/libs/aws-sdk-cpp/aws-cpp-sdk-core ydb/core/testlib/default ydb/core/tx ydb/core/tx/schemeshard/ut_helpers diff --git a/ydb/core/tx/schemeshard/ut_base/ut_base.cpp b/ydb/core/tx/schemeshard/ut_base/ut_base.cpp index 3f1ab7be33fc..c401758fb733 100644 --- a/ydb/core/tx/schemeshard/ut_base/ut_base.cpp +++ b/ydb/core/tx/schemeshard/ut_base/ut_base.cpp @@ -3603,6 +3603,35 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { NLs::IsBackupTable(true), }); + // cannot alter backup table + TestAlterTable(runtime, ++txId, "/MyRoot", R"( + Name: "CopyTable" + DropColumns { Name: "value" } + )", {NKikimrScheme::StatusSchemeError}); + + // cannot add cdc stream to backup table + TestCreateCdcStream(runtime, ++txId, "/MyRoot", R"( + TableName: "CopyTable" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + } + )", {NKikimrScheme::StatusSchemeError}); + + // cannot add sequence to backup table + TestCreateSequence(runtime, ++txId, "/MyRoot/CopyTable", R"( + Name: "Sequence" + )", {NKikimrScheme::StatusSchemeError}); + + // cannot add index to backup table + TestBuildIndex(runtime, ++txId, TTestTxConfig::SchemeShard, "/MyRoot", "/MyRoot/CopyTable", "Index", {"value"}); + env.TestWaitNotification(runtime, txId); + { + auto desc = TestGetBuildIndex(runtime, TTestTxConfig::SchemeShard, "/MyRoot", txId); + UNIT_ASSERT_EQUAL(desc.GetIndexBuild().GetState(), Ydb::Table::IndexBuildState::STATE_REJECTED); + } + // consistent copy table TestConsistentCopyTables(runtime, ++txId, "/", R"( CopyTableDescriptions { @@ -3741,16 +3770,18 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { )", {NKikimrScheme::StatusInvalidParameter}); // cannot remove 'IsBackup' property from existent table - TestAlterTable(runtime, ++txId, "/MyRoot", R"( + AsyncSend(runtime, TTestTxConfig::SchemeShard, InternalTransaction(AlterTableRequest(++txId, "/MyRoot", R"( Name: "CopyTable" IsBackup: false - )", {NKikimrScheme::StatusInvalidParameter}); + )"))); + TestModificationResults(runtime, txId, {NKikimrScheme::StatusInvalidParameter}); - TestAlterTable(runtime, ++txId, "/MyRoot", R"( + AsyncSend(runtime, TTestTxConfig::SchemeShard, InternalTransaction(AlterTableRequest(++txId, "/MyRoot", R"( Name: "CopyTable" IsBackup: false DropColumns { Name: "value" } - )", {NKikimrScheme::StatusInvalidParameter}); + )"))); + TestModificationResults(runtime, txId, {NKikimrScheme::StatusInvalidParameter}); // sanity check @@ -9912,9 +9943,16 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { : KeyColumnTypes(keyColumnTypes.begin(), keyColumnTypes.end()) {} - TString FindSplitKey(const TVector>& histogramKeys) const { - NKikimrTableStats::THistogram histogram = FillHistogram(histogramKeys); - TSerializedCellVec splitKey = ChooseSplitKeyByHistogram(histogram, KeyColumnTypes); + TString FindSplitKey(const TVector>& histogramKeys, TVector histogramValues = {}, ui64 total = 0) const { + if (histogramValues.empty() && !histogramKeys.empty()) { + for (size_t i = 0; i < histogramKeys.size(); i++) { + histogramValues.push_back(i + 1); + } + total = histogramKeys.size() + 1; + } + + NKikimrTableStats::THistogram histogram = FillHistogram(histogramKeys, histogramValues); + TSerializedCellVec splitKey = ChooseSplitKeyByHistogram(histogram, total, KeyColumnTypes); return PrintKey(splitKey); } @@ -9964,11 +10002,13 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { return NKikimr::TSerializedCellVec(cells); } - NKikimrTableStats::THistogram FillHistogram(const TVector>& keys) const { + NKikimrTableStats::THistogram FillHistogram(const TVector>& keys, const TVector& values) const { NKikimrTableStats::THistogram histogram; - for (const auto& k : keys) { - TSerializedCellVec sk(MakeCells(k)); - histogram.AddBuckets()->SetKey(sk.GetBuffer()); + for (auto i : xrange(keys.size())) { + TSerializedCellVec sk(MakeCells(keys[i])); + auto bucket = histogram.AddBuckets(); + bucket->SetKey(sk.GetBuffer()); + bucket->SetValue(values[i]); } return histogram; } @@ -10085,7 +10125,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { { "2", "f", "42" }, { "3", "cccccccccccccccccccccccc", "42" } }); - UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 2, Utf8 : d, Uint32 : NULL)"); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 2, Utf8 : c, Uint32 : NULL)"); } { @@ -10102,6 +10142,140 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { }); UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 2, Utf8 : bbb, Uint32 : NULL)"); } + + { + TString splitKey = + schemaHelper.FindSplitKey({}); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "()"); + } + + { + TString splitKey = + schemaHelper.FindSplitKey({ + { "0", "a", "1" }, + }, { + 53, + }, 100); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 0, Utf8 : a, Uint32 : 1)"); + } + + { + TString splitKey = + schemaHelper.FindSplitKey({ + { "0", "a", "1" }, + }, { + 25, + }, 100); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 0, Utf8 : a, Uint32 : 1)"); + } + + { + TString splitKey = + schemaHelper.FindSplitKey({ + { "0", "a", "1" }, + }, { + 75, + }, 100); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 0, Utf8 : a, Uint32 : 1)"); + } + + { + TString splitKey = + schemaHelper.FindSplitKey({ + { "0", "a", "1" }, + }, { + 24, + }, 100); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "()"); + } + + { + TString splitKey = + schemaHelper.FindSplitKey({ + { "0", "a", "1" }, + }, { + 76, + }, 100); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "()"); + } + + { + TString splitKey = + schemaHelper.FindSplitKey({ + { "0", "a", "1" }, + { "1", "a", "1" }, + { "2", "a", "2" }, + { "3", "a", "3" }, + { "4", "a", "4" }, + { "5", "a", "5" }, + { "6", "a", "1" }, + { "7", "a", "2" }, + { "8", "a", "42" }, + }, { + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + }, 10); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 4, Utf8 : NULL, Uint32 : NULL)"); + } + + { + TString splitKey = + schemaHelper.FindSplitKey({ + { "0", "a", "1" }, + { "1", "a", "1" }, + { "2", "a", "2" }, + { "3", "a", "3" }, + { "4", "a", "4" }, + { "5", "a", "5" }, + { "6", "a", "1" }, + { "7", "a", "2" }, + { "8", "a", "42" }, + }, { + 1, + 2, + 3, + 4, + 5, + 6, + 30, + 40, + 70 + }, 100); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 7, Utf8 : NULL, Uint32 : NULL)"); + } + + { + TString splitKey = + schemaHelper.FindSplitKey({ + { "0", "a", "1" }, + { "1", "a", "1" }, + { "2", "a", "2" }, + { "3", "a", "3" }, + { "4", "a", "4" }, + { "5", "a", "5" }, + { "6", "a", "1" }, + { "7", "a", "2" }, + { "8", "a", "42" }, + }, { + 30, + 40, + 70, + 90, + 91, + 92, + 93, + 94, + 95 + }, 100); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 1, Utf8 : NULL, Uint32 : NULL)"); + } } Y_UNIT_TEST(ListNotCreatedDirCase) { @@ -10941,6 +11115,74 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { AssertReserve("/MyRoot/Topic2", 3 * 17); } + Y_UNIT_TEST(TopicWithAutopartitioningReserveSize) { + TTestEnvOptions opts; + opts.EnableTopicSplitMerge(true); + opts.EnablePQConfigTransactionsAtSchemeShard(true); + + TTestBasicRuntime runtime; + + TTestEnv env(runtime, opts); + ui64 txId = 100; + + const auto AssertReserve = [&] (TString path, ui64 expectedReservedStorage) { + TestDescribeResult(DescribePath(runtime, path), + {NLs::Finished, + NLs::TopicReservedStorage(expectedReservedStorage)}); + }; + + // create with WriteSpeedInBytesPerSecond + TestCreatePQGroup(runtime, ++txId, "/MyRoot", R"( + Name: "Topic1" + TotalGroupCount: 1 + PartitionPerTablet: 1 + PQTabletConfig { + PartitionConfig { + LifetimeSeconds: 13 + WriteSpeedInBytesPerSecond : 19 + } + MeteringMode: METERING_MODE_RESERVED_CAPACITY + PartitionStrategy { + MinPartitionCount: 1 + MaxPartitionCount: 7 + PartitionStrategyType: CAN_SPLIT_AND_MERGE + } + } + )"); + env.TestWaitNotification(runtime, txId); + AssertReserve("/MyRoot/Topic1", 1 * 13 * 19); + + TestAlterPQGroup(runtime, ++txId, "/MyRoot", R"( + Name: "Topic1" + Split { + Partition: 0 + SplitBoundary: 'A' + } + )"); + env.TestWaitNotification(runtime, txId); + AssertReserve("/MyRoot/Topic1", 2 * 13 * 19); // There are only 2 active partitions + + TestAlterPQGroup(runtime, ++txId, "/MyRoot", R"( + Name: "Topic1" + Split { + Partition: 1 + SplitBoundary: '0' + } + )"); + env.TestWaitNotification(runtime, txId); + AssertReserve("/MyRoot/Topic1", 3 * 13 * 19); // There are only 3 active partitions + + TestAlterPQGroup(runtime, ++txId, "/MyRoot", R"( + Name: "Topic1" + Merge { + Partition: 2 + AdjacentPartition: 4 + } + )"); + env.TestWaitNotification(runtime, txId); + AssertReserve("/MyRoot/Topic1", 2 * 13 * 19); // There are only 2 active partitions + } + Y_UNIT_TEST(FindSubDomainPathId) { TTestBasicRuntime runtime; TTestEnv env(runtime); diff --git a/ydb/core/tx/schemeshard/ut_base/ut_commit_redo_limit.cpp b/ydb/core/tx/schemeshard/ut_base/ut_commit_redo_limit.cpp new file mode 100644 index 000000000000..260b4765d771 --- /dev/null +++ b/ydb/core/tx/schemeshard/ut_base/ut_commit_redo_limit.cpp @@ -0,0 +1,113 @@ +#include + +using namespace NKikimr; +using namespace NSchemeShard; +using namespace NSchemeShardUT_Private; + +Y_UNIT_TEST_SUITE(TSchemeShardCheckProposeSize) { + + //TODO: can't check all operations as many of them do not implement + // TSubOperation::AbortPropose() properly and will abort. + + Y_UNIT_TEST(CopyTable) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + + // Take control over MaxCommitRedoMB ICB setting. + // Drop down its min-value limit to be able to set it as low as test needs. + TControlWrapper MaxCommitRedoMB; + { + runtime.GetAppData().Icb->RegisterSharedControl(MaxCommitRedoMB, "TabletControls.MaxCommitRedoMB"); + MaxCommitRedoMB.Reset(200, 1, 4096); + } + + ui64 txId = 100; + + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "table" + Columns { Name: "key" Type: "Uint64"} + Columns { Name: "value" Type: "Utf8"} + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + // 1. Set MaxCommitRedoMB to 1 and try to create table. + // + // (Check at the operation's Propose tests commit redo size against (MaxCommitRedoMB - 1) + // to give 1MB leeway to executer/tablet inner stuff to may be do "something extra". + // So MaxCommitRedoMB = 1 means effective 0 for the size of operation's commit.) + { + MaxCommitRedoMB = 1; + AsyncCopyTable(runtime, ++txId, "/MyRoot", "table-copy", "/MyRoot/table"); + TestModificationResults(runtime, txId, + {{NKikimrScheme::StatusSchemeError, "local tx commit redo size generated by IgniteOperation() is more than allowed limit"}} + ); + env.TestWaitNotification(runtime, txId); + } + + // 2. Set MaxCommitRedoMB back to high value and try again. + { + MaxCommitRedoMB = 200; + AsyncCopyTable(runtime, ++txId, "/MyRoot", "table-copy", "/MyRoot/table"); + env.TestWaitNotification(runtime, txId); + } + } + + Y_UNIT_TEST(CopyTables) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + + // Take control over MaxCommitRedoMB ICB setting. + // Drop down its min-value limit to be able to set it as low as test needs. + TControlWrapper MaxCommitRedoMB; + { + runtime.GetAppData().Icb->RegisterSharedControl(MaxCommitRedoMB, "TabletControls.MaxCommitRedoMB"); + MaxCommitRedoMB.Reset(200, 1, 4096); + } + + const ui64 tables = 100; + const ui64 shardsPerTable = 1; + + ui64 txId = 100; + + for (ui64 i : xrange(tables)) { + TestCreateTable(runtime, ++txId, "/MyRoot", Sprintf( + R"( + Name: "table-%lu" + Columns { Name: "key" Type: "Uint64"} + Columns { Name: "value" Type: "Utf8"} + KeyColumnNames: ["key"] + UniformPartitionsCount: %lu + )", + i, + shardsPerTable + )); + env.TestWaitNotification(runtime, txId); + } + + auto testCopyTables = [](auto& runtime, ui64 txId, ui64 tables) { + TVector schemeTxs; + for (ui64 i : xrange(tables)) { + schemeTxs.push_back(CopyTableRequest(txId, "/MyRoot", Sprintf("table-%lu-copy", i), Sprintf("/MyRoot/table-%lu", i))); + } + AsyncSend(runtime, TTestTxConfig::SchemeShard, CombineSchemeTransactions(schemeTxs)); + }; + + // 1. Set MaxCommitRedoMB to 1 and try to copy tables. + { + MaxCommitRedoMB = 1; + testCopyTables(runtime, ++txId, tables); + TestModificationResults(runtime, txId, + {{NKikimrScheme::StatusSchemeError, "local tx commit redo size generated by IgniteOperation() is more than allowed limit"}} + ); + } + + // 2. Set MaxCommitRedoMB back to high value and try again. + { + MaxCommitRedoMB = 200; + testCopyTables(runtime, ++txId, tables); + TestModificationResults(runtime, txId, {{NKikimrScheme::StatusAccepted}}); + } + } + +} diff --git a/ydb/core/tx/schemeshard/ut_base/ya.make b/ydb/core/tx/schemeshard/ut_base/ya.make index 4699c198de39..dd0047f98d6b 100644 --- a/ydb/core/tx/schemeshard/ut_base/ya.make +++ b/ydb/core/tx/schemeshard/ut_base/ya.make @@ -27,6 +27,7 @@ SRCS( ut_base.cpp ut_info_types.cpp ut_table_pg_types.cpp + ut_commit_redo_limit.cpp ) END() diff --git a/ydb/core/tx/schemeshard/ut_cdc_stream/ut_cdc_stream.cpp b/ydb/core/tx/schemeshard/ut_cdc_stream/ut_cdc_stream.cpp index a4e9f6a1b2c2..f81110939025 100644 --- a/ydb/core/tx/schemeshard/ut_cdc_stream/ut_cdc_stream.cpp +++ b/ydb/core/tx/schemeshard/ut_cdc_stream/ut_cdc_stream.cpp @@ -1,7 +1,9 @@ #include +#include #include #include #include +#include #include #include @@ -591,15 +593,6 @@ Y_UNIT_TEST_SUITE(TCdcStreamTests) { } )", {NKikimrScheme::StatusNameConflict}); - TestCreateCdcStream(runtime, ++txId, "/MyRoot/Table/Index", R"( - TableName: "indexImplTable" - StreamDescription { - Name: "Stream" - Mode: ECdcStreamModeKeysOnly - Format: ECdcStreamFormatProto - } - )", {NKikimrScheme::StatusNameConflict}); - TestCreateCdcStream(runtime, ++txId, "/MyRoot", R"( TableName: "Table" StreamDescription { @@ -617,29 +610,6 @@ Y_UNIT_TEST_SUITE(TCdcStreamTests) { )"); env.TestWaitNotification(runtime, txId); - TestCreateCdcStream(runtime, ++txId, "/MyRoot", R"( - TableName: "Table" - StreamDescription { - Name: "StreamWithIndex" - Mode: ECdcStreamModeKeysOnly - Format: ECdcStreamFormatProto - } - IndexName: "NotExistedIndex" - )", {NKikimrScheme::StatusSchemeError}); - - TestCreateCdcStream(runtime, ++txId, "/MyRoot", R"( - TableName: "Table" - StreamDescription { - Name: "StreamWithIndex" - Mode: ECdcStreamModeKeysOnly - Format: ECdcStreamFormatProto - } - IndexName: "Index" - )"); - env.TestWaitNotification(runtime, txId); - - TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Index/indexImplTable/StreamWithIndex/streamImpl"), {NLs::PathExist}); - TestDropTable(runtime, ++txId, "/MyRoot", "Table"); env.TestWaitNotification(runtime, txId); @@ -1232,6 +1202,266 @@ Y_UNIT_TEST_SUITE(TCdcStreamTests) { } } + Y_UNIT_TEST(StreamOnIndexTableNegative) { + TTestBasicRuntime runtime; + TTestEnv env(runtime, TTestEnvOptions().EnableChangefeedsOnIndexTables(false)); + ui64 txId = 100; + + TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"( + TableDescription { + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "indexed" Type: "Uint64" } + KeyColumnNames: ["key"] + } + IndexDescription { + Name: "Index" + KeyColumnNames: ["indexed"] + } + )"); + env.TestWaitNotification(runtime, txId); + + TestCreateCdcStream(runtime, ++txId, "/MyRoot/Table/Index", R"( + TableName: "indexImplTable" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + } + )", {NKikimrScheme::StatusPreconditionFailed}); + } + + Y_UNIT_TEST(StreamOnIndexTable) { + TTestBasicRuntime runtime; + TTestEnv env(runtime, TTestEnvOptions().EnableChangefeedsOnIndexTables(true)); + ui64 txId = 100; + + TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"( + TableDescription { + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "indexed" Type: "Uint64" } + KeyColumnNames: ["key"] + } + IndexDescription { + Name: "SyncIndex" + KeyColumnNames: ["indexed"] + } + IndexDescription { + Name: "AsyncIndex" + KeyColumnNames: ["indexed"] + Type: EIndexTypeGlobalAsync + } + )"); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex"), {NLs::PathVersionEqual(2)}); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex/indexImplTable"), {NLs::PathVersionEqual(3)}); + + TestCreateCdcStream(runtime, ++txId, "/MyRoot/Table/UnknownIndex", R"( + TableName: "indexImplTable" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + } + )", {NKikimrScheme::StatusPathDoesNotExist}); + + TestCreateCdcStream(runtime, ++txId, "/MyRoot/Table/AsyncIndex", R"( + TableName: "indexImplTable" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + } + )", {NKikimrScheme::StatusPreconditionFailed}); + + TestCreateCdcStream(runtime, ++txId, "/MyRoot/Table/SyncIndex", R"( + TableName: "indexImplTable" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + } + )"); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex"), {NLs::PathVersionEqual(3)}); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex/indexImplTable"), {NLs::PathVersionEqual(4)}); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex/indexImplTable/Stream"), {NLs::PathExist}); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex/indexImplTable/Stream/streamImpl"), {NLs::PathExist}); + + TestAlterCdcStream(runtime, ++txId, "/MyRoot/Table/UnknownIndex", R"( + TableName: "indexImplTable" + StreamName: "Stream" + Disable {} + )", {NKikimrScheme::StatusPathDoesNotExist}); + + TestAlterCdcStream(runtime, ++txId, "/MyRoot/Table/SyncIndex", R"( + TableName: "indexImplTable" + StreamName: "Stream" + Disable {} + )"); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex"), {NLs::PathVersionEqual(4)}); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex/indexImplTable"), {NLs::PathVersionEqual(5)}); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex/indexImplTable/Stream"), { + NLs::StreamState(NKikimrSchemeOp::ECdcStreamStateDisabled), + }); + + TestDropCdcStream(runtime, ++txId, "/MyRoot/Table/UnknownIndex", R"( + TableName: "indexImplTable" + StreamName: "Stream" + )", {NKikimrScheme::StatusPathDoesNotExist}); + + TestDropCdcStream(runtime, ++txId, "/MyRoot/Table/SyncIndex", R"( + TableName: "indexImplTable" + StreamName: "Stream" + )"); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex"), {NLs::PathVersionEqual(5)}); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex/indexImplTable"), {NLs::PathVersionEqual(6)}); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex/indexImplTable/Stream"), {NLs::PathNotExist}); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex/indexImplTable/Stream/streamImpl"), {NLs::PathNotExist}); + } + + Y_UNIT_TEST(StreamOnBuildingIndexTable) { + TTestBasicRuntime runtime; + TTestEnv env(runtime, TTestEnvOptions().EnableChangefeedsOnIndexTables(true)); + ui64 txId = 100; + + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "indexed" Type: "Uint64" } + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + THolder blockedBuildIndexRequest; + auto blockBuildIndexRequest = runtime.AddObserver([&](auto& ev) { + blockedBuildIndexRequest.Reset(ev.Release()); + }); + + AsyncBuildIndex(runtime, ++txId, TTestTxConfig::SchemeShard, "/MyRoot", "/MyRoot/Table", "Index", {"indexed"}); + const auto buildIndexId = txId; + { + TDispatchOptions opts; + opts.FinalEvents.emplace_back([&blockedBuildIndexRequest](IEventHandle&) { + return bool(blockedBuildIndexRequest); + }); + runtime.DispatchEvents(opts); + } + blockBuildIndexRequest.Remove(); + + TestCreateCdcStream(runtime, ++txId, "/MyRoot/Table/Index", R"( + TableName: "indexImplTable" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + } + )", {NKikimrScheme::StatusMultipleModifications}); + + runtime.Send(blockedBuildIndexRequest.Release(), 0, true); + env.TestWaitNotification(runtime, buildIndexId); + + TestCreateCdcStream(runtime, ++txId, "/MyRoot/Table/Index", R"( + TableName: "indexImplTable" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + } + )"); + env.TestWaitNotification(runtime, txId); + } + + Y_UNIT_TEST(DropIndexWithStream) { + TTestBasicRuntime runtime; + TTestEnv env(runtime, TTestEnvOptions().EnableChangefeedsOnIndexTables(true)); + ui64 txId = 100; + + TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"( + TableDescription { + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "indexed" Type: "Uint64" } + KeyColumnNames: ["key"] + } + IndexDescription { + Name: "Index" + KeyColumnNames: ["indexed"] + } + )"); + env.TestWaitNotification(runtime, txId); + + TestCreateCdcStream(runtime, ++txId, "/MyRoot/Table/Index", R"( + TableName: "indexImplTable" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + } + )"); + env.TestWaitNotification(runtime, txId); + + TestDropTableIndex(runtime, ++txId, "/MyRoot", R"( + TableName: "Table" + IndexName: "Index" + )"); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Index/indexImplTable/Stream"), { + NLs::PathNotExist, + }); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Index/indexImplTable/Stream/streamImpl"), { + NLs::PathNotExist, + }); + } + + Y_UNIT_TEST(DropTableWithIndexWithStream) { + TTestBasicRuntime runtime; + TTestEnv env(runtime, TTestEnvOptions().EnableChangefeedsOnIndexTables(true)); + ui64 txId = 100; + + TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"( + TableDescription { + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "indexed" Type: "Uint64" } + KeyColumnNames: ["key"] + } + IndexDescription { + Name: "Index" + KeyColumnNames: ["indexed"] + } + )"); + env.TestWaitNotification(runtime, txId); + + TestCreateCdcStream(runtime, ++txId, "/MyRoot/Table/Index", R"( + TableName: "indexImplTable" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + } + )"); + env.TestWaitNotification(runtime, txId); + + TestDropTable(runtime, ++txId, "/MyRoot", "Table"); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Index/indexImplTable/Stream"), { + NLs::PathNotExist, + }); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Index/indexImplTable/Stream/streamImpl"), { + NLs::PathNotExist, + }); + } + } // TCdcStreamTests Y_UNIT_TEST_SUITE(TCdcStreamWithInitialScanTests) { @@ -1526,6 +1756,80 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithInitialScanTests) { env.TestWaitNotification(runtime, txId); } + Y_UNIT_TEST(RacyAlterStreamAndRestart) { + TTestBasicRuntime runtime; + TTestEnv env(runtime, TTestEnvOptions() + .EnableChangefeedInitialScan(true)); + ui64 txId = 100; + + TActorId schemeShardActorId; + auto findActorId = runtime.AddObserver([&](auto& ev) { + if (!schemeShardActorId) { + schemeShardActorId = ev->Sender; + } + }); + + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "value" Type: "Uint64" } + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + TBlockEvents blockedAlterStream(runtime, [&](auto& ev) { + const auto& record = ev->Get()->Record; + if (record.GetTransaction(0).GetOperationType() == NKikimrSchemeOp::ESchemeOpAlterCdcStream) { + txId = record.GetTxId(); + return true; + } + return false; + }); + + TestCreateCdcStream(runtime, ++txId, "/MyRoot", R"( + TableName: "Table" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + State: ECdcStreamStateScan + } + )"); + env.TestWaitNotification(runtime, txId); + + runtime.WaitFor("AlterCdcStream", [&]{ return blockedAlterStream.size(); }); + blockedAlterStream.Stop(); + + UNIT_ASSERT(schemeShardActorId); + + TBlockEvents blockedProgress(runtime, [&](auto& ev) { + return schemeShardActorId == ev->Sender; + }); + + blockedAlterStream.Unblock(); + runtime.WaitFor("Progress", [&]{ return blockedProgress.size(); }); + blockedProgress.Stop(); + + RebootTablet(runtime, TTestTxConfig::SchemeShard, runtime.AllocateEdgeActor()); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Stream"), { + NLs::PathExist, + NLs::StreamState(NKikimrSchemeOp::ECdcStreamStateReady), + }); + + TestDropCdcStream(runtime, ++txId, "/MyRoot", R"( + TableName: "Table" + StreamName: "Stream" + )"); + env.TestWaitNotification(runtime, txId); + + RebootTablet(runtime, TTestTxConfig::SchemeShard, runtime.AllocateEdgeActor()); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Stream"), { + NLs::PathNotExist, + }); + } + void Metering(bool serverless) { TTestBasicRuntime runtime; TTestEnv env(runtime, TTestEnvOptions() diff --git a/ydb/core/tx/schemeshard/ut_cdc_stream_reboots/ut_cdc_stream_reboots.cpp b/ydb/core/tx/schemeshard/ut_cdc_stream_reboots/ut_cdc_stream_reboots.cpp index 42ecc9f6397d..47209679f222 100644 --- a/ydb/core/tx/schemeshard/ut_cdc_stream_reboots/ut_cdc_stream_reboots.cpp +++ b/ydb/core/tx/schemeshard/ut_cdc_stream_reboots/ut_cdc_stream_reboots.cpp @@ -6,40 +6,38 @@ using namespace NSchemeShardUT_Private; -static const TString createTableProto = R"( - Name: "Table" - Columns { Name: "key" Type: "Uint64" } - Columns { Name: "value" Type: "Uint64" } - KeyColumnNames: ["key"] -)"; - -static const TString createTableWithIndexProto = R"( - TableDescription { - Name: "Table" - Columns { Name: "key" Type: "Uint64" } - Columns { Name: "value" Type: "Uint64" } - KeyColumnNames: ["key"] - } - IndexDescription { - Name: "SyncIndex" - KeyColumnNames: ["value"] - } -)"; - Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { template - void CreateStream(const TMaybe& state = Nothing(), bool vt = false, bool tableWithIndex = false) { + void CreateStream(const TMaybe& state = Nothing(), bool vt = false, bool onIndex = false) { T t; - t.GetTestEnvOptions().EnableChangefeedInitialScan(true); + t.GetTestEnvOptions() + .EnableChangefeedInitialScan(true) + .EnableChangefeedsOnIndexTables(true); t.Run([&](TTestActorRuntime& runtime, bool& activeZone) { { TInactiveZone inactive(activeZone); runtime.GetAppData().DisableCdcAutoSwitchingToReadyStateForTests = true; - if (tableWithIndex) { - TestCreateIndexedTable(runtime, ++t.TxId, "/MyRoot", createTableWithIndexProto); + if (!onIndex) { + TestCreateTable(runtime, ++t.TxId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "value" Type: "Uint64" } + KeyColumnNames: ["key"] + )"); } else { - TestCreateTable(runtime, ++t.TxId, "/MyRoot", createTableProto); + TestCreateIndexedTable(runtime, ++t.TxId, "/MyRoot", R"( + TableDescription { + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "indexed" Type: "Uint64" } + KeyColumnNames: ["key"] + } + IndexDescription { + Name: "Index" + KeyColumnNames: ["indexed"] + } + )"); } t.TestEnv->TestWaitNotification(runtime, t.TxId); } @@ -58,24 +56,19 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { const bool ok = google::protobuf::TextFormat::PrintToString(streamDesc, &strDesc); UNIT_ASSERT_C(ok, "protobuf serialization failed"); - TestCreateCdcStream(runtime, ++t.TxId, "/MyRoot", Sprintf(R"( - TableName: "Table" + const TString path = !onIndex ? "/MyRoot" : "/MyRoot/Table/Index"; + const TString tableName = !onIndex ? "Table": "indexImplTable"; + + TestCreateCdcStream(runtime, ++t.TxId, path, Sprintf(R"( + TableName: "%s" StreamDescription { %s } - AllIndexes {} - )", strDesc.c_str())); + )", tableName.c_str(), strDesc.c_str())); t.TestEnv->TestWaitNotification(runtime, t.TxId); - TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Stream"), { + TestDescribeResult(DescribePrivatePath(runtime, path + "/" + tableName + "/Stream"), { NLs::PathExist, NLs::StreamVirtualTimestamps(vt), }); - - if (tableWithIndex) { - TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex/indexImplTable/Stream"), { - NLs::PathExist, - NLs::StreamVirtualTimestamps(vt), - }); - } }); } @@ -83,15 +76,15 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { CreateStream(); } - Y_UNIT_TEST_WITH_REBOOTS(CreateStreamTableWithIndex) { - CreateStream(Nothing(), false, true); + Y_UNIT_TEST_WITH_REBOOTS(CreateStreamOnIndexTable) { + CreateStream({}, false, true); } Y_UNIT_TEST_WITH_REBOOTS(CreateStreamExplicitReady) { CreateStream(NKikimrSchemeOp::ECdcStreamStateReady); } - Y_UNIT_TEST_WITH_REBOOTS(CreateStreamExplicitReadyTableWithIndex) { + Y_UNIT_TEST_WITH_REBOOTS(CreateStreamOnIndexTableExplicitReady) { CreateStream(NKikimrSchemeOp::ECdcStreamStateReady, false, true); } @@ -99,7 +92,7 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { CreateStream(NKikimrSchemeOp::ECdcStreamStateScan); } - Y_UNIT_TEST_WITH_REBOOTS(CreateStreamWithInitialScanTableWithIndex) { + Y_UNIT_TEST_WITH_REBOOTS(CreateStreamOnIndexTableWithInitialScan) { CreateStream(NKikimrSchemeOp::ECdcStreamStateScan, false, true); } @@ -107,6 +100,10 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { CreateStream({}, true); } + Y_UNIT_TEST_WITH_REBOOTS(CreateStreamOnIndexTableWithVirtualTimestamps) { + CreateStream({}, true, true); + } + Y_UNIT_TEST_WITH_REBOOTS(CreateStreamWithAwsRegion) { T t; t.GetTestEnvOptions().EnableChangefeedDynamoDBStreamsFormat(true); @@ -293,21 +290,41 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { } template - void DropStream(const TMaybe& state = Nothing()) { + void DropStream(const TMaybe& state = Nothing(), bool onIndex = false) { T t; - t.GetTestEnvOptions().EnableChangefeedInitialScan(true); + t.GetTestEnvOptions() + .EnableChangefeedInitialScan(true) + .EnableChangefeedsOnIndexTables(true); t.Run([&](TTestActorRuntime& runtime, bool& activeZone) { + const TString path = !onIndex ? "/MyRoot" : "/MyRoot/Table/Index"; + const TString tableName = !onIndex ? "Table": "indexImplTable"; + { TInactiveZone inactive(activeZone); runtime.GetAppData().DisableCdcAutoSwitchingToReadyStateForTests = true; - TestCreateTable(runtime, ++t.TxId, "/MyRoot", R"( - Name: "Table" - Columns { Name: "key" Type: "Uint64" } - Columns { Name: "value" Type: "Uint64" } - KeyColumnNames: ["key"] - )"); + if (!onIndex) { + TestCreateTable(runtime, ++t.TxId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "value" Type: "Uint64" } + KeyColumnNames: ["key"] + )"); + } else { + TestCreateIndexedTable(runtime, ++t.TxId, "/MyRoot", R"( + TableDescription { + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "indexed" Type: "Uint64" } + KeyColumnNames: ["key"] + } + IndexDescription { + Name: "Index" + KeyColumnNames: ["indexed"] + } + )"); + } t.TestEnv->TestWaitNotification(runtime, t.TxId); NKikimrSchemeOp::TCdcStreamDescription streamDesc; @@ -323,20 +340,20 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { const bool ok = google::protobuf::TextFormat::PrintToString(streamDesc, &strDesc); UNIT_ASSERT_C(ok, "protobuf serialization failed"); - TestCreateCdcStream(runtime, ++t.TxId, "/MyRoot", Sprintf(R"( - TableName: "Table" + TestCreateCdcStream(runtime, ++t.TxId, path, Sprintf(R"( + TableName: "%s" StreamDescription { %s } - )", strDesc.c_str())); + )", tableName.c_str(), strDesc.c_str())); t.TestEnv->TestWaitNotification(runtime, t.TxId); } - TestDropCdcStream(runtime, ++t.TxId, "/MyRoot", R"( - TableName: "Table" + TestDropCdcStream(runtime, ++t.TxId, path, Sprintf(R"( + TableName: "%s" StreamName: "Stream" - )"); + )", tableName.c_str())); t.TestEnv->TestWaitNotification(runtime, t.TxId); - TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Stream"), {NLs::PathNotExist}); + TestDescribeResult(DescribePrivatePath(runtime, path + "/" + tableName + "/Stream"), {NLs::PathNotExist}); }); } @@ -344,14 +361,26 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { DropStream(); } + Y_UNIT_TEST_WITH_REBOOTS(DropStreamOnIndexTable) { + DropStream({}, true); + } + Y_UNIT_TEST_WITH_REBOOTS(DropStreamExplicitReady) { DropStream(NKikimrSchemeOp::ECdcStreamStateReady); } + Y_UNIT_TEST_WITH_REBOOTS(DropStreamOnIndexTableExplicitReady) { + DropStream(NKikimrSchemeOp::ECdcStreamStateReady, true); + } + Y_UNIT_TEST_WITH_REBOOTS(DropStreamCreatedWithInitialScan) { DropStream(NKikimrSchemeOp::ECdcStreamStateScan); } + Y_UNIT_TEST_WITH_REBOOTS(DropStreamOnIndexTableCreatedWithInitialScan) { + DropStream(NKikimrSchemeOp::ECdcStreamStateScan, true); + } + Y_UNIT_TEST_WITH_REBOOTS(CreateDropRecreate) { T t; t.Run([&](TTestActorRuntime& runtime, bool& activeZone) { @@ -556,68 +585,77 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { }); } + bool CheckRegistrations(TTestActorRuntime& runtime, NKikimrPQ::TMessageGroupInfo::EState expectedState, + const google::protobuf::RepeatedPtrField& tablePartitions, + const google::protobuf::RepeatedPtrField& topicPartitions) + { + for (const auto& topicPartition : topicPartitions) { + auto request = MakeHolder(); + { + auto& record = *request->Record.MutablePartitionRequest(); + record.SetPartition(topicPartition.GetPartitionId()); + auto& cmd = *record.MutableCmdGetMaxSeqNo(); + for (const auto& tablePartition : tablePartitions) { + cmd.AddSourceId(NPQ::NSourceIdEncoding::EncodeSimple(ToString(tablePartition.GetDatashardId()))); + } + } + + const auto& sender = runtime.AllocateEdgeActor(); + ForwardToTablet(runtime, topicPartition.GetTabletId(), sender, request.Release()); + + auto response = runtime.GrabEdgeEvent(sender); + { + const auto& record = response->Get()->Record.GetPartitionResponse(); + const auto& result = record.GetCmdGetMaxSeqNoResult().GetSourceIdInfo(); + + UNIT_ASSERT_VALUES_EQUAL(result.size(), tablePartitions.size()); + for (const auto& item: result) { + if (item.GetState() != expectedState) { + return false; + } + } + } + } + + return true; + } + struct TItem { TString Path; - ui32 nPartitions; + ui32 ExpectedPartitionCount; }; - void CheckRegistrations(TTestActorRuntime& runtime, const TItem& table, const TItem& topic) { + void CheckRegistrations(TTestActorRuntime& runtime, const TItem& table, const TItem& topic, + const google::protobuf::RepeatedPtrField* initialTablePartitions = nullptr) + { auto tableDesc = DescribePath(runtime, table.Path, true, true); const auto& tablePartitions = tableDesc.GetPathDescription().GetTablePartitions(); - UNIT_ASSERT_VALUES_EQUAL(tablePartitions.size(), table.nPartitions); + UNIT_ASSERT_VALUES_EQUAL(tablePartitions.size(), table.ExpectedPartitionCount); auto topicDesc = DescribePrivatePath(runtime, topic.Path); const auto& topicPartitions = topicDesc.GetPathDescription().GetPersQueueGroup().GetPartitions(); - UNIT_ASSERT_VALUES_EQUAL(topicPartitions.size(), topic.nPartitions); + UNIT_ASSERT_VALUES_EQUAL(topicPartitions.size(), topic.ExpectedPartitionCount); while (true) { runtime.SimulateSleep(TDuration::Seconds(1)); - bool done = true; - - for (ui32 i = 0; i < topic.nPartitions; ++i) { - auto request = MakeHolder(); - { - auto& record = *request->Record.MutablePartitionRequest(); - record.SetPartition(topicPartitions[i].GetPartitionId()); - auto& cmd = *record.MutableCmdGetMaxSeqNo(); - for (const auto& tablePartition : tablePartitions) { - cmd.AddSourceId(NPQ::NSourceIdEncoding::EncodeSimple(ToString(tablePartition.GetDatashardId()))); - } - } - - const auto& sender = runtime.AllocateEdgeActor(); - ForwardToTablet(runtime, topicPartitions[i].GetTabletId(), sender, request.Release()); - - auto response = runtime.GrabEdgeEvent(sender); - { - const auto& record = response->Get()->Record.GetPartitionResponse(); - const auto& result = record.GetCmdGetMaxSeqNoResult().GetSourceIdInfo(); - - UNIT_ASSERT_VALUES_EQUAL(result.size(), table.nPartitions); - for (const auto& item: result) { - done &= item.GetState() == NKikimrPQ::TMessageGroupInfo::STATE_REGISTERED; - if (!done) { - break; - } - } - } - - if (!done) { - break; - } - } - - if (done) { + if (CheckRegistrations(runtime, NKikimrPQ::TMessageGroupInfo::STATE_REGISTERED, tablePartitions, topicPartitions)) { break; } } + + if (initialTablePartitions) { + UNIT_ASSERT(CheckRegistrations(runtime, NKikimrPQ::TMessageGroupInfo::STATE_UNKNOWN, *initialTablePartitions, topicPartitions)); + } } - Y_UNIT_TEST_WITH_REBOOTS(SplitTable) { + template + void SplitTable(const TString& cdcStreamDesc) { T t; t.Run([&](TTestActorRuntime& runtime, bool& activeZone) { + NKikimrScheme::TEvDescribeSchemeResult initialTableDesc; { TInactiveZone inactive(activeZone); + TestCreateTable(runtime, ++t.TxId, "/MyRoot", R"( Name: "Table" Columns { Name: "key" Type: "Uint32" } @@ -625,15 +663,9 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { KeyColumnNames: ["key"] )"); t.TestEnv->TestWaitNotification(runtime, t.TxId); + initialTableDesc = DescribePath(runtime, "/MyRoot/Table", true, true); - TestCreateCdcStream(runtime, ++t.TxId, "/MyRoot", R"( - TableName: "Table" - StreamDescription { - Name: "Stream" - Mode: ECdcStreamModeKeysOnly - Format: ECdcStreamFormatProto - } - )"); + TestCreateCdcStream(runtime, ++t.TxId, "/MyRoot", cdcStreamDesc); t.TestEnv->TestWaitNotification(runtime, t.TxId); } @@ -651,16 +683,43 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { TInactiveZone inactive(activeZone); UploadRow(runtime, "/MyRoot/Table", 0, {1}, {2}, {TCell::Make(1u)}, {TCell::Make(1u)}); UploadRow(runtime, "/MyRoot/Table", 1, {1}, {2}, {TCell::Make(Max())}, {TCell::Make(Max())}); - CheckRegistrations(runtime, {"/MyRoot/Table", 2}, {"/MyRoot/Table/Stream/streamImpl", 1}); + CheckRegistrations(runtime, {"/MyRoot/Table", 2}, {"/MyRoot/Table/Stream/streamImpl", 1}, + &initialTableDesc.GetPathDescription().GetTablePartitions()); } }); } - Y_UNIT_TEST_WITH_REBOOTS(MergeTable) { + Y_UNIT_TEST_WITH_REBOOTS(SplitTable) { + SplitTable(R"( + TableName: "Table" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + } + )"); + } + + Y_UNIT_TEST_WITH_REBOOTS(SplitTableResolvedTimestamps) { + SplitTable(R"( + TableName: "Table" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + ResolvedTimestampsIntervalMs: 1000 + } + )"); + } + + template + void MergeTable(const TString& cdcStreamDesc) { T t; t.Run([&](TTestActorRuntime& runtime, bool& activeZone) { + NKikimrScheme::TEvDescribeSchemeResult initialTableDesc; { TInactiveZone inactive(activeZone); + TestCreateTable(runtime, ++t.TxId, "/MyRoot", R"( Name: "Table" Columns { Name: "key" Type: "Uint32" } @@ -674,15 +733,9 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { } )"); t.TestEnv->TestWaitNotification(runtime, t.TxId); + initialTableDesc = DescribePath(runtime, "/MyRoot/Table", true, true); - TestCreateCdcStream(runtime, ++t.TxId, "/MyRoot", R"( - TableName: "Table" - StreamDescription { - Name: "Stream" - Mode: ECdcStreamModeKeysOnly - Format: ECdcStreamFormatProto - } - )"); + TestCreateCdcStream(runtime, ++t.TxId, "/MyRoot", cdcStreamDesc); t.TestEnv->TestWaitNotification(runtime, t.TxId); } @@ -696,11 +749,35 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { TInactiveZone inactive(activeZone); UploadRow(runtime, "/MyRoot/Table", 0, {1}, {2}, {TCell::Make(1u)}, {TCell::Make(1u)}); UploadRow(runtime, "/MyRoot/Table", 0, {1}, {2}, {TCell::Make(Max())}, {TCell::Make(Max())}); - CheckRegistrations(runtime, {"/MyRoot/Table", 1}, {"/MyRoot/Table/Stream/streamImpl", 2}); + CheckRegistrations(runtime, {"/MyRoot/Table", 1}, {"/MyRoot/Table/Stream/streamImpl", 2}, + &initialTableDesc.GetPathDescription().GetTablePartitions()); } }); } + Y_UNIT_TEST_WITH_REBOOTS(MergeTable) { + MergeTable(R"( + TableName: "Table" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + } + )"); + } + + Y_UNIT_TEST_WITH_REBOOTS(MergeTableResolvedTimestamps) { + MergeTable(R"( + TableName: "Table" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + ResolvedTimestampsIntervalMs: 1000 + } + )"); + } + Y_UNIT_TEST_WITH_REBOOTS(RacySplitTableAndCreateStream) { T t; t.Run([&](TTestActorRuntime& runtime, bool& activeZone) { diff --git a/ydb/core/tx/schemeshard/ut_compaction/ut_compaction.cpp b/ydb/core/tx/schemeshard/ut_compaction/ut_compaction.cpp index 4671e1d1d39c..f5884ec8433b 100644 --- a/ydb/core/tx/schemeshard/ut_compaction/ut_compaction.cpp +++ b/ydb/core/tx/schemeshard/ut_compaction/ut_compaction.cpp @@ -845,7 +845,7 @@ Y_UNIT_TEST_SUITE(TSchemeshardBorrowedCompactionTest) { } })"); env.TestWaitNotification(runtime, txId); - env.SimulateSleep(runtime, TDuration::Seconds(30)); + env.SimulateSleep(runtime, TDuration::Seconds(60)); simpleInfo = GetPathInfo(runtime, "/MyRoot/Simple"); UNIT_ASSERT_VALUES_EQUAL(simpleInfo.Shards.size(), 5UL); diff --git a/ydb/core/tx/schemeshard/ut_export/ut_export.cpp b/ydb/core/tx/schemeshard/ut_export/ut_export.cpp index aebe7014e51d..8d41d7bab562 100644 --- a/ydb/core/tx/schemeshard/ut_export/ut_export.cpp +++ b/ydb/core/tx/schemeshard/ut_export/ut_export.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -12,14 +13,28 @@ #include #include +#include + +#include + using namespace NSchemeShardUT_Private; using namespace NKikimr::NWrappers::NTestHelpers; namespace { + Aws::SDKOptions Options; + + Y_TEST_HOOK_BEFORE_RUN(InitAwsAPI) { + Aws::InitAPI(Options); + } + + Y_TEST_HOOK_AFTER_RUN(ShutdownAwsAPI) { + Aws::ShutdownAPI(Options); + } + void Run(TTestBasicRuntime& runtime, TTestEnv& env, const TVector& tables, const TString& request, Ydb::StatusIds::StatusCode expectedStatus = Ydb::StatusIds::SUCCESS, - const TString& dbName = "/MyRoot", bool serverless = false, const TString& userSID = "") { + const TString& dbName = "/MyRoot", bool serverless = false, const TString& userSID = "", const TString& peerName = "") { ui64 txId = 100; @@ -104,7 +119,7 @@ namespace { const auto initialStatus = expectedStatus == Ydb::StatusIds::PRECONDITION_FAILED ? expectedStatus : Ydb::StatusIds::SUCCESS; - TestExport(runtime, schemeshardId, ++txId, dbName, request, userSID, initialStatus); + TestExport(runtime, schemeshardId, ++txId, dbName, request, userSID, peerName, initialStatus); env.TestWaitNotification(runtime, txId, schemeshardId); if (initialStatus != Ydb::StatusIds::SUCCESS) { @@ -124,6 +139,9 @@ namespace { void Cancel(const TVector& tables, const TString& request, TDelayFunc delayFunc) { TTestBasicRuntime runtime; + std::vector auditLines; + runtime.AuditLogBackends = std::move(CreateTestAuditLogBackends(auditLines)); + TTestEnv env(runtime); ui64 txId = 100; @@ -147,6 +165,22 @@ namespace { TestExport(runtime, ++txId, "/MyRoot", request); const ui64 exportId = txId; + // Check audit record for export start + { + auto line = FindAuditLine(auditLines, "operation=EXPORT START"); + UNIT_ASSERT_STRING_CONTAINS(line, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(line, "operation=EXPORT START"); + UNIT_ASSERT_STRING_CONTAINS(line, Sprintf("id=%lu", exportId)); + UNIT_ASSERT_STRING_CONTAINS(line, "remote_address="); // can't check the value + UNIT_ASSERT_STRING_CONTAINS(line, "subject={none}"); + UNIT_ASSERT_STRING_CONTAINS(line, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(line, "status=SUCCESS"); + UNIT_ASSERT_STRING_CONTAINS(line, "detailed_status=SUCCESS"); + UNIT_ASSERT(!line.contains("reason")); + UNIT_ASSERT(!line.contains("start_time")); + UNIT_ASSERT(!line.contains("end_time")); + } + if (!delayed) { TDispatchOptions opts; opts.FinalEvents.emplace_back([&delayed](IEventHandle&) -> bool { @@ -161,6 +195,23 @@ namespace { runtime.Send(delayed.Release(), 0, true); env.TestWaitNotification(runtime, exportId); + // Check audit record for export end + // + { + auto line = FindAuditLine(auditLines, "operation=EXPORT END"); + UNIT_ASSERT_STRING_CONTAINS(line, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(line, "operation=EXPORT END"); + UNIT_ASSERT_STRING_CONTAINS(line, Sprintf("id=%lu", exportId)); + UNIT_ASSERT_STRING_CONTAINS(line, "remote_address="); // can't check the value + UNIT_ASSERT_STRING_CONTAINS(line, "subject={none}"); + UNIT_ASSERT_STRING_CONTAINS(line, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(line, "status=ERROR"); + UNIT_ASSERT_STRING_CONTAINS(line, "detailed_status=CANCELLED"); + UNIT_ASSERT_STRING_CONTAINS(line, "reason=Cancelled"); + UNIT_ASSERT_STRING_CONTAINS(line, "start_time="); + UNIT_ASSERT_STRING_CONTAINS(line, "end_time="); + } + TestGetExport(runtime, exportId, "/MyRoot", Ydb::StatusIds::CANCELLED); TestForgetExport(runtime, ++txId, "/MyRoot", exportId); @@ -1650,7 +1701,7 @@ partitioning_settings { return ev->Get()->Record .GetTransaction(0).GetOperationType() == NKikimrSchemeOp::ESchemeOpBackup; }; - + THolder delayed; auto prevObserver = runtime.SetObserverFunc([&](TAutoPtr& ev) { if (delayFunc(ev)) { @@ -1708,6 +1759,235 @@ partitioning_settings { UNIT_ASSERT_LT(entry.GetStartTime().seconds(), entry.GetEndTime().seconds()); } + // Based on CompletedExportEndTime + Y_UNIT_TEST(AuditCompletedExport) { + TTestBasicRuntime runtime; + std::vector auditLines; + runtime.AuditLogBackends = std::move(CreateTestAuditLogBackends(auditLines)); + + TTestEnv env(runtime); + + runtime.UpdateCurrentTime(TInstant::Now()); + ui64 txId = 100; + + // Prepare table to export + // + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Utf8" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + // Start export + // + TPortManager portManager; + const ui16 port = portManager.GetPort(); + + TS3Mock s3Mock({}, TS3Mock::TSettings(port)); + UNIT_ASSERT(s3Mock.Start()); + + const auto request = Sprintf(R"( + OperationParams { + labels { + key: "uid" + value: "foo" + } + } + ExportToS3Settings { + endpoint: "localhost:%d" + scheme: HTTP + items { + source_path: "/MyRoot/Table" + destination_prefix: "" + } + } + )", port); + TestExport(runtime, ++txId, "/MyRoot", request, /*userSID*/ "user@builtin", /*peerName*/ "127.0.0.1:9876"); + + // Check audit record for export start + { + auto line = FindAuditLine(auditLines, "operation=EXPORT START"); + UNIT_ASSERT_STRING_CONTAINS(line, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(line, "operation=EXPORT START"); + UNIT_ASSERT_STRING_CONTAINS(line, Sprintf("id=%lu", txId)); + UNIT_ASSERT_STRING_CONTAINS(line, "uid=foo"); + UNIT_ASSERT_STRING_CONTAINS(line, "remote_address=127.0.0.1"); + UNIT_ASSERT_STRING_CONTAINS(line, "subject=user@builtin"); + UNIT_ASSERT_STRING_CONTAINS(line, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(line, "status=SUCCESS"); + UNIT_ASSERT_STRING_CONTAINS(line, "detailed_status=SUCCESS"); + UNIT_ASSERT(!line.contains("reason")); + UNIT_ASSERT(!line.contains("start_time")); + UNIT_ASSERT(!line.contains("end_time")); + } + + // Do export + // + runtime.AdvanceCurrentTime(TDuration::Seconds(30)); + + env.TestWaitNotification(runtime, txId); + + const auto desc = TestGetExport(runtime, txId, "/MyRoot"); + const auto& entry = desc.GetResponse().GetEntry(); + UNIT_ASSERT_VALUES_EQUAL(entry.GetProgress(), Ydb::Export::ExportProgress::PROGRESS_DONE); + UNIT_ASSERT(entry.HasStartTime()); + UNIT_ASSERT(entry.HasEndTime()); + UNIT_ASSERT_LT(entry.GetStartTime().seconds(), entry.GetEndTime().seconds()); + + // Check audit record for export end + // + { + auto line = FindAuditLine(auditLines, "operation=EXPORT END"); + UNIT_ASSERT_STRING_CONTAINS(line, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(line, "operation=EXPORT END"); + UNIT_ASSERT_STRING_CONTAINS(line, Sprintf("id=%lu", txId)); + UNIT_ASSERT_STRING_CONTAINS(line, "remote_address=127.0.0.1"); + UNIT_ASSERT_STRING_CONTAINS(line, "subject=user@builtin"); + UNIT_ASSERT_STRING_CONTAINS(line, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(line, "status=SUCCESS"); + UNIT_ASSERT_STRING_CONTAINS(line, "detailed_status=SUCCESS"); + UNIT_ASSERT(!line.contains("reason")); + UNIT_ASSERT_STRING_CONTAINS(line, "start_time="); + UNIT_ASSERT_STRING_CONTAINS(line, "end_time="); + } + } + + Y_UNIT_TEST(AuditCancelledExport) { + TTestBasicRuntime runtime; + std::vector auditLines; + runtime.AuditLogBackends = std::move(CreateTestAuditLogBackends(auditLines)); + + TTestEnv env(runtime); + + runtime.UpdateCurrentTime(TInstant::Now()); + ui64 txId = 100; + + // Prepare table to export + // + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Utf8" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + auto delayFunc = [](TAutoPtr& ev) { + if (ev->GetTypeRewrite() != TEvSchemeShard::EvModifySchemeTransaction) { + return false; + } + + return ev->Get()->Record + .GetTransaction(0).GetOperationType() == NKikimrSchemeOp::ESchemeOpBackup; + }; + + THolder delayed; + auto prevObserver = runtime.SetObserverFunc([&](TAutoPtr& ev) { + if (delayFunc(ev)) { + delayed.Reset(ev.Release()); + return TTestActorRuntime::EEventAction::DROP; + } + return TTestActorRuntime::EEventAction::PROCESS; + }); + + // Start export + // + TPortManager portManager; + const ui16 port = portManager.GetPort(); + + TS3Mock s3Mock({}, TS3Mock::TSettings(port)); + UNIT_ASSERT(s3Mock.Start()); + + const auto request = Sprintf(R"( + OperationParams { + labels { + key: "uid" + value: "foo" + } + } + ExportToS3Settings { + endpoint: "localhost:%d" + scheme: HTTP + items { + source_path: "/MyRoot/Table" + destination_prefix: "" + } + } + )", port); + TestExport(runtime, ++txId, "/MyRoot", request, /*userSID*/ "user@builtin", /*peerName*/ "127.0.0.1:9876"); + const ui64 exportId = txId; + + // Check audit record for export start + { + auto line = FindAuditLine(auditLines, "operation=EXPORT START"); + UNIT_ASSERT_STRING_CONTAINS(line, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(line, "operation=EXPORT START"); + UNIT_ASSERT_STRING_CONTAINS(line, Sprintf("id=%lu", exportId)); + UNIT_ASSERT_STRING_CONTAINS(line, "uid=foo"); + UNIT_ASSERT_STRING_CONTAINS(line, "remote_address=127.0.0.1"); + UNIT_ASSERT_STRING_CONTAINS(line, "subject=user@builtin"); + UNIT_ASSERT_STRING_CONTAINS(line, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(line, "status=SUCCESS"); + UNIT_ASSERT_STRING_CONTAINS(line, "detailed_status=SUCCESS"); + UNIT_ASSERT(!line.contains("reason")); + UNIT_ASSERT(!line.contains("start_time")); + UNIT_ASSERT(!line.contains("end_time")); + } + + // Do export (unsuccessfully) + // + runtime.AdvanceCurrentTime(TDuration::Seconds(30)); + + if (!delayed) { + TDispatchOptions opts; + opts.FinalEvents.emplace_back([&delayed](IEventHandle&) -> bool { + return bool(delayed); + }); + runtime.DispatchEvents(opts); + } + runtime.SetObserverFunc(prevObserver); + + // Cancel export mid-air + // + TestCancelExport(runtime, ++txId, "/MyRoot", exportId); + + auto desc = TestGetExport(runtime, exportId, "/MyRoot"); + auto entry = desc.GetResponse().GetEntry(); + UNIT_ASSERT_VALUES_EQUAL(entry.GetProgress(), Ydb::Export::ExportProgress::PROGRESS_CANCELLATION); + UNIT_ASSERT(entry.HasStartTime()); + UNIT_ASSERT(!entry.HasEndTime()); + + runtime.Send(delayed.Release(), 0, true); + env.TestWaitNotification(runtime, exportId); + + desc = TestGetExport(runtime, exportId, "/MyRoot", Ydb::StatusIds::CANCELLED); + entry = desc.GetResponse().GetEntry(); + UNIT_ASSERT_VALUES_EQUAL(entry.GetProgress(), Ydb::Export::ExportProgress::PROGRESS_CANCELLED); + UNIT_ASSERT(entry.HasStartTime()); + UNIT_ASSERT(entry.HasEndTime()); + UNIT_ASSERT_LT(entry.GetStartTime().seconds(), entry.GetEndTime().seconds()); + + // Check audit record for export end + // + { + auto line = FindAuditLine(auditLines, "operation=EXPORT END"); + UNIT_ASSERT_STRING_CONTAINS(line, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(line, "operation=EXPORT END"); + UNIT_ASSERT_STRING_CONTAINS(line, Sprintf("id=%lu", exportId)); + UNIT_ASSERT_STRING_CONTAINS(line, "uid=foo"); + UNIT_ASSERT_STRING_CONTAINS(line, "remote_address=127.0.0.1"); // can't check the value + UNIT_ASSERT_STRING_CONTAINS(line, "subject=user@builtin"); + UNIT_ASSERT_STRING_CONTAINS(line, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(line, "status=ERROR"); + UNIT_ASSERT_STRING_CONTAINS(line, "detailed_status=CANCELLED"); + UNIT_ASSERT_STRING_CONTAINS(line, "reason=Cancelled"); + UNIT_ASSERT_STRING_CONTAINS(line, "start_time="); + UNIT_ASSERT_STRING_CONTAINS(line, "end_time="); + } + } + Y_UNIT_TEST(ExportPartitioningSettings) { TPortManager portManager; const ui16 port = portManager.GetPort(); @@ -1818,4 +2098,41 @@ partitioning_settings { )")); } + Y_UNIT_TEST(UserSID) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Utf8" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + TPortManager portManager; + const ui16 port = portManager.GetPort(); + + TS3Mock s3Mock({}, TS3Mock::TSettings(port)); + UNIT_ASSERT(s3Mock.Start()); + + const TString request = Sprintf(R"( + ExportToS3Settings { + endpoint: "localhost:%d" + scheme: HTTP + items { + source_path: "/MyRoot/Table" + destination_prefix: "" + } + } + )", port); + const TString userSID = "user@builtin"; + TestExport(runtime, ++txId, "/MyRoot", request, userSID); + + const auto desc = TestGetExport(runtime, txId, "/MyRoot"); + const auto& entry = desc.GetResponse().GetEntry(); + UNIT_ASSERT_VALUES_EQUAL(entry.GetProgress(), Ydb::Export::ExportProgress::PROGRESS_PREPARING); + UNIT_ASSERT_VALUES_EQUAL(entry.GetUserSID(), userSID); + } } diff --git a/ydb/core/tx/schemeshard/ut_export/ya.make b/ydb/core/tx/schemeshard/ut_export/ya.make index 4d5bf91e2698..c62dc9ea8ebc 100644 --- a/ydb/core/tx/schemeshard/ut_export/ya.make +++ b/ydb/core/tx/schemeshard/ut_export/ya.make @@ -20,6 +20,7 @@ IF (NOT OS_WINDOWS) library/cpp/getopt library/cpp/regex/pcre library/cpp/svnversion + contrib/libs/aws-sdk-cpp/aws-cpp-sdk-core ydb/core/testlib/default ydb/core/tx ydb/core/tx/schemeshard/ut_helpers diff --git a/ydb/core/tx/schemeshard/ut_export_reboots_s3/ut_export_reboots_s3.cpp b/ydb/core/tx/schemeshard/ut_export_reboots_s3/ut_export_reboots_s3.cpp index 5e1e42d17ce9..97e34a6d3fa4 100644 --- a/ydb/core/tx/schemeshard/ut_export_reboots_s3/ut_export_reboots_s3.cpp +++ b/ydb/core/tx/schemeshard/ut_export_reboots_s3/ut_export_reboots_s3.cpp @@ -4,10 +4,28 @@ #include +#include + +#include + using namespace NSchemeShardUT_Private; using namespace NSchemeShardUT_Private::NExportReboots; using namespace NKikimr::NWrappers::NTestHelpers; +namespace { + +Aws::SDKOptions Options; + +Y_TEST_HOOK_BEFORE_RUN(InitAwsAPI) { + Aws::InitAPI(Options); +} + +Y_TEST_HOOK_AFTER_RUN(ShutdownAwsAPI) { + Aws::ShutdownAPI(Options); +} + +} + Y_UNIT_TEST_SUITE(TExportToS3WithRebootsTests) { using TUnderlying = std::function&, const TString&, TTestWithReboots&)>; diff --git a/ydb/core/tx/schemeshard/ut_export_reboots_s3/ya.make b/ydb/core/tx/schemeshard/ut_export_reboots_s3/ya.make index bc7ca966e0dc..caf4fb7de362 100644 --- a/ydb/core/tx/schemeshard/ut_export_reboots_s3/ya.make +++ b/ydb/core/tx/schemeshard/ut_export_reboots_s3/ya.make @@ -19,6 +19,7 @@ PEERDIR( library/cpp/getopt library/cpp/regex/pcre library/cpp/svnversion + contrib/libs/aws-sdk-cpp/aws-cpp-sdk-core ydb/core/testlib/default ydb/core/tx ydb/core/tx/schemeshard/ut_helpers diff --git a/ydb/core/tx/schemeshard/ut_external_data_source/ut_external_data_source.cpp b/ydb/core/tx/schemeshard/ut_external_data_source/ut_external_data_source.cpp index 9d89bbfdf199..92faf2827c6c 100644 --- a/ydb/core/tx/schemeshard/ut_external_data_source/ut_external_data_source.cpp +++ b/ydb/core/tx/schemeshard/ut_external_data_source/ut_external_data_source.cpp @@ -46,6 +46,10 @@ Y_UNIT_TEST_SUITE(TExternalDataSourceTest) { key: "mdb_cluster_id", value: "id" } + Properties { + key: "database_name", + value: "postgres" + } } )", {NKikimrScheme::StatusAccepted}); diff --git a/ydb/core/tx/schemeshard/ut_extsubdomain/ut_extsubdomain.cpp b/ydb/core/tx/schemeshard/ut_extsubdomain/ut_extsubdomain.cpp index 4737ccdddc6f..8bbd846af415 100644 --- a/ydb/core/tx/schemeshard/ut_extsubdomain/ut_extsubdomain.cpp +++ b/ydb/core/tx/schemeshard/ut_extsubdomain/ut_extsubdomain.cpp @@ -119,7 +119,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { {NLs::InExternalSubdomain}); } - Y_UNIT_TEST_FLAG(CreateAndAlterWithoutEnablingTx, AlterDatabaseCreateHiveFirst) { + Y_UNIT_TEST_FLAGS(CreateAndAlterWithoutEnablingTx, AlterDatabaseCreateHiveFirst, ExternalHive) { TTestBasicRuntime runtime; TTestEnv env(runtime, TTestEnvOptions().EnableAlterDatabaseCreateHiveFirst(AlterDatabaseCreateHiveFirst)); ui64 txId = 100; @@ -137,15 +137,19 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { env.TestWaitNotification(runtime, {txId, txId - 1}); TestAlterExtSubDomain(runtime, ++txId, "/MyRoot/dir", - R"( - Name: "USER_0" - ExternalSchemeShard: true - )", + Sprintf(R"( + Name: "USER_0" + ExternalSchemeShard: true + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ), {{NKikimrScheme::StatusInvalidParameter, "ExtSubDomain without coordinators/mediators"}} ); } - Y_UNIT_TEST_FLAG(CreateAndAlter, AlterDatabaseCreateHiveFirst) { + Y_UNIT_TEST_FLAGS(CreateAndAlter, AlterDatabaseCreateHiveFirst, ExternalHive) { TTestBasicRuntime runtime; TTestEnv env(runtime, TTestEnvOptions().EnableAlterDatabaseCreateHiveFirst(AlterDatabaseCreateHiveFirst)); ui64 txId = 100; @@ -156,41 +160,49 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { ); TestAlterExtSubDomain(runtime, ++txId, "/MyRoot", - R"( - Name: "USER_0" - PlanResolution: 50 - Coordinators: 3 - Mediators: 3 - TimeCastBucketsPerMediator: 2 - )", + Sprintf(R"( + Name: "USER_0" + PlanResolution: 50 + Coordinators: 3 + Mediators: 3 + TimeCastBucketsPerMediator: 2 + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ), {{NKikimrScheme::StatusInvalidParameter, "ExtSubDomain without ExternalSchemeShard"}} ); TestAlterExtSubDomain(runtime, ++txId, "/MyRoot", - R"( - StoragePools { - Name: "pool-1" - Kind: "pool-kind-1" - } - StoragePools { - Name: "pool-2" - Kind: "pool-kind-2" - } - StoragePools { - Name: "/dc-1/users/tenant-1:hdd" - Kind: "hdd" - } - StoragePools { - Name: "/dc-1/users/tenant-1:hdd-1" - Kind: "hdd-1" - } - PlanResolution: 50 - Coordinators: 1 - Mediators: 1 - TimeCastBucketsPerMediator: 2 - ExternalSchemeShard: true - Name: "USER_0" - )" + Sprintf(R"( + StoragePools { + Name: "pool-1" + Kind: "pool-kind-1" + } + StoragePools { + Name: "pool-2" + Kind: "pool-kind-2" + } + StoragePools { + Name: "/dc-1/users/tenant-1:hdd" + Kind: "hdd" + } + StoragePools { + Name: "/dc-1/users/tenant-1:hdd-1" + Kind: "hdd-1" + } + PlanResolution: 50 + Coordinators: 1 + Mediators: 1 + TimeCastBucketsPerMediator: 2 + ExternalSchemeShard: true + Name: "USER_0" + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ) ); env.TestWaitNotification(runtime, {txId, txId - 1, txId - 2}); @@ -247,7 +259,64 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { NLs::Finished}); } - Y_UNIT_TEST_FLAG(CreateAndAlterTwice, AlterDatabaseCreateHiveFirst) { + Y_UNIT_TEST_FLAGS(CreateAndSameAlterTwice, AlterDatabaseCreateHiveFirst, ExternalHive) { + TTestBasicRuntime runtime; + TTestEnv env(runtime, TTestEnvOptions().EnableAlterDatabaseCreateHiveFirst(AlterDatabaseCreateHiveFirst)); + ui64 txId = 100; + + + TestCreateExtSubDomain(runtime, ++txId, "/MyRoot", + R"(Name: "USER_0")" + ); + + const TString alterText = Sprintf(R"( + Name: "USER_0" + ExternalSchemeShard: true + PlanResolution: 50 + Coordinators: 1 + Mediators: 1 + TimeCastBucketsPerMediator: 2 + StoragePools { + Name: "pool-1" + Kind: "hdd" + } + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ); + + TestAlterExtSubDomain(runtime, ++txId, "/MyRoot", alterText); + env.TestWaitNotification(runtime, {txId, txId - 1}); + + TestAlterExtSubDomain(runtime, ++txId, "/MyRoot", alterText); + env.TestWaitNotification(runtime, txId); + + ui64 tenantSchemeShard = 0; + TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), { + NLs::PathExist, + NLs::IsExternalSubDomain("USER_0"), + NLs::ExtractTenantSchemeshard(&tenantSchemeShard), + }); + + UNIT_ASSERT(tenantSchemeShard != 0 + && tenantSchemeShard != (ui64)-1 + && tenantSchemeShard != TTestTxConfig::SchemeShard + ); + + TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), { + NLs::PathExist, + NLs::IsExternalSubDomain("USER_0"), + NLs::StoragePoolsEqual({"pool-1"}), + }); + + TestDescribeResult(DescribePath(runtime, tenantSchemeShard, "/MyRoot/USER_0"), { + NLs::PathExist, + NLs::StoragePoolsEqual({"pool-1"}) + }); + } + + Y_UNIT_TEST_FLAGS(CreateAndAlterAlterAddStoragePool, AlterDatabaseCreateHiveFirst, ExternalHive) { TTestBasicRuntime runtime; TTestEnv env(runtime, TTestEnvOptions().EnableAlterDatabaseCreateHiveFirst(AlterDatabaseCreateHiveFirst)); ui64 txId = 100; @@ -258,34 +327,42 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { ); TestAlterExtSubDomain(runtime, ++txId, "/MyRoot", - R"( - Name: "USER_0" - ExternalSchemeShard: true - PlanResolution: 50 - Coordinators: 1 - Mediators: 1 - TimeCastBucketsPerMediator: 2 - StoragePools { - Name: "pool-1" - Kind: "hdd" - } - )" + Sprintf(R"( + Name: "USER_0" + ExternalSchemeShard: true + PlanResolution: 50 + Coordinators: 1 + Mediators: 1 + TimeCastBucketsPerMediator: 2 + StoragePools { + Name: "pool-1" + Kind: "hdd" + } + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ) ); env.TestWaitNotification(runtime, {txId, txId - 1}); TestAlterExtSubDomain(runtime, ++txId, "/MyRoot", - R"( - Name: "USER_0" - StoragePools { - Name: "pool-1" - Kind: "hdd" - } - StoragePools { - Name: "pool-2" - Kind: "hdd-1" - } - )" + Sprintf(R"( + Name: "USER_0" + StoragePools { + Name: "pool-1" + Kind: "hdd" + } + StoragePools { + Name: "pool-2" + Kind: "hdd-1" + } + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ) ); env.TestWaitNotification(runtime, txId); @@ -322,6 +399,214 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { NLs::UserAttrsEqual({{"user__attr_1", "value"}})}); } + Y_UNIT_TEST_FLAGS(CreateAndAlterAlterSameStoragePools, AlterDatabaseCreateHiveFirst, ExternalHive) { + TTestBasicRuntime runtime; + TTestEnv env(runtime, TTestEnvOptions().EnableAlterDatabaseCreateHiveFirst(AlterDatabaseCreateHiveFirst)); + ui64 txId = 100; + + + TestCreateExtSubDomain(runtime, ++txId, "/MyRoot", + R"(Name: "USER_0")" + ); + + TestAlterExtSubDomain(runtime, ++txId, "/MyRoot", + Sprintf(R"( + Name: "USER_0" + ExternalSchemeShard: true + PlanResolution: 50 + Coordinators: 1 + Mediators: 1 + TimeCastBucketsPerMediator: 2 + StoragePools { + Name: "pool-1" + Kind: "hdd" + } + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ) + ); + + env.TestWaitNotification(runtime, {txId, txId - 1}); + + TestAlterExtSubDomain(runtime, ++txId, "/MyRoot", + Sprintf(R"( + Name: "USER_0" + ExternalSchemeShard: true + PlanResolution: 50 + Coordinators: 1 + Mediators: 1 + TimeCastBucketsPerMediator: 2 + StoragePools { + Name: "pool-1" + Kind: "hdd" + } + DatabaseQuotas { + data_size_hard_quota: 1288490188800 + data_size_soft_quota: 1224065679360 + } + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ) + ); + env.TestWaitNotification(runtime, txId); + + + ui64 tenantSchemeShard = 0; + TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), { + NLs::PathExist, + NLs::IsExternalSubDomain("USER_0"), + NLs::ExtractTenantSchemeshard(&tenantSchemeShard) + }); + + UNIT_ASSERT(tenantSchemeShard != 0 + && tenantSchemeShard != (ui64)-1 + && tenantSchemeShard != TTestTxConfig::SchemeShard + ); + + TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), { + NLs::PathExist, + NLs::IsExternalSubDomain("USER_0"), + NLs::StoragePoolsEqual({"pool-1"}) + }); + + TestDescribeResult(DescribePath(runtime, tenantSchemeShard, "/MyRoot/USER_0"), { + NLs::PathExist, + NLs::StoragePoolsEqual({"pool-1"}) + }); + } + + Y_UNIT_TEST_FLAGS(AlterWithPlainAlterSubdomain, AlterDatabaseCreateHiveFirst, ExternalHive) { + TTestBasicRuntime runtime; + TTestEnv env(runtime, TTestEnvOptions().EnableAlterDatabaseCreateHiveFirst(AlterDatabaseCreateHiveFirst)); + ui64 txId = 100; + + // Create extsubdomain + + TestCreateExtSubDomain(runtime, ++txId, "/MyRoot", + R"(Name: "USER_0")" + ); + TestAlterExtSubDomain(runtime, ++txId, "/MyRoot", + Sprintf(R"( + Name: "USER_0" + ExternalSchemeShard: true + PlanResolution: 50 + Coordinators: 1 + Mediators: 1 + TimeCastBucketsPerMediator: 2 + StoragePools { + Name: "pool-1" + Kind: "hdd" + } + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ) + ); + env.TestWaitNotification(runtime, {txId, txId - 1}); + + // Altering extsubdomain but with plain altersubdomain should succeed + // (post tenant migration compatibility) + + //NOTE: SubDomain and not ExtSubdomain + TestAlterSubDomain(runtime, ++txId, "/MyRoot", + Sprintf(R"( + Name: "USER_0" + ExternalSchemeShard: true + PlanResolution: 50 + Coordinators: 1 + Mediators: 1 + TimeCastBucketsPerMediator: 2 + StoragePools { + Name: "pool-1" + Kind: "hdd" + } + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ) + ); + env.TestWaitNotification(runtime, txId); + } + + Y_UNIT_TEST_FLAGS(AlterTwiceAndWithPlainAlterSubdomain, AlterDatabaseCreateHiveFirst, ExternalHive) { + TTestBasicRuntime runtime; + TTestEnv env(runtime, TTestEnvOptions().EnableAlterDatabaseCreateHiveFirst(AlterDatabaseCreateHiveFirst)); + ui64 txId = 100; + + TestCreateExtSubDomain(runtime, ++txId, "/MyRoot", + R"(Name: "USER_0")" + ); + TestAlterExtSubDomain(runtime, ++txId, "/MyRoot", + Sprintf(R"( + Name: "USER_0" + ExternalSchemeShard: true + PlanResolution: 50 + Coordinators: 1 + Mediators: 1 + TimeCastBucketsPerMediator: 2 + StoragePools { + Name: "pool-1" + Kind: "hdd" + } + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ) + ); + env.TestWaitNotification(runtime, {txId, txId - 1}); + + AsyncAlterExtSubDomain(runtime, ++txId, "/MyRoot", + Sprintf(R"( + Name: "USER_0" + ExternalSchemeShard: true + PlanResolution: 50 + Coordinators: 1 + Mediators: 1 + TimeCastBucketsPerMediator: 2 + StoragePools { + Name: "pool-1" + Kind: "hdd" + } + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ) + ); + // TestModificationResults(runtime, txId, {NKikimrScheme::StatusAccepted}); + const auto firstAlterTxId = txId; + + //NOTE: SubDomain vs ExtSubDomain + TestAlterSubDomain(runtime, ++txId, "/MyRoot", + Sprintf(R"( + Name: "USER_0" + ExternalSchemeShard: true + PlanResolution: 50 + Coordinators: 1 + Mediators: 1 + TimeCastBucketsPerMediator: 2 + StoragePools { + Name: "pool-1" + Kind: "hdd" + } + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ), + {{NKikimrScheme::StatusMultipleModifications}} + ); + + env.TestWaitNotification(runtime, firstAlterTxId); + } + Y_UNIT_TEST(CreateWithOnlyDotsNotAllowed) { TTestBasicRuntime runtime; TTestEnv env(runtime); @@ -1082,7 +1367,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { testCreations(); } - Y_UNIT_TEST_FLAG(Drop, AlterDatabaseCreateHiveFirst) { + Y_UNIT_TEST_FLAGS(Drop, AlterDatabaseCreateHiveFirst, ExternalHive) { TTestBasicRuntime runtime; TTestEnv env(runtime, TTestEnvOptions().EnableAlterDatabaseCreateHiveFirst(AlterDatabaseCreateHiveFirst)); ui64 txId = 100; @@ -1093,18 +1378,22 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { ); TestAlterExtSubDomain(runtime, ++txId, "/MyRoot", - R"( - Name: "USER_0" - ExternalSchemeShard: true - PlanResolution: 50 - Coordinators: 1 - Mediators: 1 - TimeCastBucketsPerMediator: 2 - StoragePools { - Name: "pool-1" - Kind: "hdd" - } - )" + Sprintf(R"( + Name: "USER_0" + ExternalSchemeShard: true + PlanResolution: 50 + Coordinators: 1 + Mediators: 1 + TimeCastBucketsPerMediator: 2 + StoragePools { + Name: "pool-1" + Kind: "hdd" + } + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ) ); env.TestWaitNotification(runtime, {txId, txId - 1}); @@ -1139,16 +1428,18 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { NLs::PathsInsideDomain(1), NLs::ShardsInsideDomain(0)}); + const ui64 AdditionalHiveTablet = (ExternalHive ? 1 : 0); + TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0/dir/table_1"), {NLs::PathRedirected, NLs::PathsInsideDomain(0), - NLs::ShardsInsideDomain(3)}); + NLs::ShardsInsideDomain(3 + AdditionalHiveTablet)}); TestDescribeResult(DescribePath(runtime, tenantSchemeShard, "/MyRoot/USER_0/dir/table_1"), {NLs::PathExist, NLs::Finished, NLs::PathsInsideDomain(2), - NLs::ShardsInsideDomain(5)}); + NLs::ShardsInsideDomain(5 + AdditionalHiveTablet)}); TestForceDropExtSubDomain(runtime, ++txId, "/MyRoot", "USER_0"); env.TestWaitNotification(runtime, txId); @@ -1164,7 +1455,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { NLs::PathsInsideDomain(0), NLs::ShardsInsideDomain(0)}); - env.TestWaitTabletDeletion(runtime, xrange(TTestTxConfig::FakeHiveTablets, TTestTxConfig::FakeHiveTablets + 5)); + // env.TestWaitTabletDeletion(runtime, xrange(TTestTxConfig::FakeHiveTablets, TTestTxConfig::FakeHiveTablets + 5)); UNIT_ASSERT(!CheckLocalRowExists(runtime, TTestTxConfig::SchemeShard, "SubDomains", "PathId", 2)); UNIT_ASSERT(!CheckLocalRowExists(runtime, TTestTxConfig::SchemeShard, "Paths", "Id", 2)); } @@ -1209,7 +1500,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { } } - Y_UNIT_TEST_FLAG(CreateAndAlterThenDropChangesParent, AlterDatabaseCreateHiveFirst) { + Y_UNIT_TEST_FLAGS(CreateAndAlterThenDropChangesParent, AlterDatabaseCreateHiveFirst, ExternalHive) { TTestBasicRuntime runtime; TTestEnv env(runtime, TTestEnvOptions().EnableAlterDatabaseCreateHiveFirst(AlterDatabaseCreateHiveFirst)); ui64 txId = 100; @@ -1218,18 +1509,22 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { R"(Name: "USER_0")" ); TestAlterExtSubDomain(runtime, ++txId, "/MyRoot", - R"( - Name: "USER_0" - ExternalSchemeShard: true - PlanResolution: 50 - Coordinators: 1 - Mediators: 1 - TimeCastBucketsPerMediator: 2 - StoragePools { - Name: "pool-1" - Kind: "hdd" - } - )" + Sprintf(R"( + Name: "USER_0" + ExternalSchemeShard: true + PlanResolution: 50 + Coordinators: 1 + Mediators: 1 + TimeCastBucketsPerMediator: 2 + StoragePools { + Name: "pool-1" + Kind: "hdd" + } + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ) ); env.TestWaitNotification(runtime, {txId, txId - 1}); diff --git a/ydb/core/tx/schemeshard/ut_helpers/auditlog_helpers.cpp b/ydb/core/tx/schemeshard/ut_helpers/auditlog_helpers.cpp new file mode 100644 index 000000000000..9826eafc523a --- /dev/null +++ b/ydb/core/tx/schemeshard/ut_helpers/auditlog_helpers.cpp @@ -0,0 +1,53 @@ +#include +#include +#include + +#include +#include +#include + +#include + +#include "auditlog_helpers.h" + +namespace NSchemeShardUT_Private { + +namespace { + +class TMemoryLogBackend: public TLogBackend { +public: + std::vector& Buffer; + + TMemoryLogBackend(std::vector& buffer) + : Buffer(buffer) + {} + + virtual void WriteData(const TLogRecord& rec) override { + Buffer.emplace_back(rec.Data, rec.Len); + } + + virtual void ReopenLog() override { + } +}; + +} // anonymous namespace + +NAudit::TAuditLogBackends CreateTestAuditLogBackends(std::vector& lineBuffer) { + NAudit::TAuditLogBackends logBackends; + logBackends[NKikimrConfig::TAuditConfig::TXT].emplace_back(new TMemoryLogBackend(lineBuffer)); + return logBackends; +} + +std::string FindAuditLine(const std::vector& auditLines, const std::string& substr) { + Cerr << "AUDIT LOG buffer(" << auditLines.size() << "):" << Endl; + for (auto i : auditLines) { + Cerr << " " << i << Endl; + } + auto found = std::find_if(auditLines.begin(), auditLines.end(), [&](auto i) { return i.contains(substr); }); + UNIT_ASSERT_C(found != auditLines.end(), "No audit record with substring: '" + substr + "'"); + auto line = *found; + Cerr << "AUDIT LOG checked line:" << Endl << " " << line << Endl; + return line; +} + +} // namespace NSchemeShardUT_Private diff --git a/ydb/core/tx/schemeshard/ut_helpers/auditlog_helpers.h b/ydb/core/tx/schemeshard/ut_helpers/auditlog_helpers.h new file mode 100644 index 000000000000..b7d57ba3cb70 --- /dev/null +++ b/ydb/core/tx/schemeshard/ut_helpers/auditlog_helpers.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +#include + +namespace NSchemeShardUT_Private { + +using namespace NKikimr; + +NAudit::TAuditLogBackends CreateTestAuditLogBackends(std::vector& lineBuffer); + +std::string FindAuditLine(const std::vector& auditLines, const std::string& substr); + +} // namespace NSchemeShardUT_Private diff --git a/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp b/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp index 0fe031eb3cf8..dc3cc8833366 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp +++ b/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp @@ -1026,7 +1026,7 @@ namespace NSchemeShardUT_Private { return result; } - void AsyncExport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID) { + void AsyncExport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID, const TString& peerName) { NKikimrExport::TCreateExportRequest request; UNIT_ASSERT(google::protobuf::TextFormat::ParseFromString(requestStr, &request)); @@ -1044,26 +1044,29 @@ namespace NSchemeShardUT_Private { if (userSID) { ev->Record.SetUserSID(userSID); } + if (peerName) { + ev->Record.SetPeerName(peerName); + } AsyncSend(runtime, schemeshardId, ev.Release()); } - void AsyncExport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID) { - AsyncExport(runtime, TTestTxConfig::SchemeShard, id, dbName, requestStr, userSID); + void AsyncExport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID, const TString& peerName) { + AsyncExport(runtime, TTestTxConfig::SchemeShard, id, dbName, requestStr, userSID, peerName); } - void TestExport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID, + void TestExport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID, const TString& peerName, Ydb::StatusIds::StatusCode expectedStatus) { - AsyncExport(runtime, schemeshardId, id, dbName, requestStr, userSID); + AsyncExport(runtime, schemeshardId, id, dbName, requestStr, userSID, peerName); TAutoPtr handle; auto ev = runtime.GrabEdgeEvent(handle); UNIT_ASSERT_EQUAL(ev->Record.GetResponse().GetEntry().GetStatus(), expectedStatus); } - void TestExport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID, + void TestExport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID, const TString& peerName, Ydb::StatusIds::StatusCode expectedStatus) { - TestExport(runtime, TTestTxConfig::SchemeShard, id, dbName, requestStr, userSID, expectedStatus); + TestExport(runtime, TTestTxConfig::SchemeShard, id, dbName, requestStr, userSID, peerName, expectedStatus); } NKikimrExport::TEvGetExportResponse TestGetExport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, @@ -1152,7 +1155,7 @@ namespace NSchemeShardUT_Private { return TestForgetExport(runtime, TTestTxConfig::SchemeShard, txId, dbName, exportId, expectedStatus); } - void AsyncImport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID) { + void AsyncImport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID, const TString& peerName) { NKikimrImport::TCreateImportRequest request; UNIT_ASSERT(google::protobuf::TextFormat::ParseFromString(requestStr, &request)); @@ -1160,26 +1163,29 @@ namespace NSchemeShardUT_Private { if (userSID) { ev->Record.SetUserSID(userSID); } + if (peerName) { + ev->Record.SetPeerName(peerName); + } AsyncSend(runtime, schemeshardId, ev.Release()); } - void AsyncImport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID) { - AsyncImport(runtime, TTestTxConfig::SchemeShard, id, dbName, requestStr, userSID); + void AsyncImport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID, const TString& peerName) { + AsyncImport(runtime, TTestTxConfig::SchemeShard, id, dbName, requestStr, userSID, peerName); } - void TestImport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID, + void TestImport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID, const TString& peerName, Ydb::StatusIds::StatusCode expectedStatus) { - AsyncImport(runtime, schemeshardId, id, dbName, requestStr, userSID); + AsyncImport(runtime, schemeshardId, id, dbName, requestStr, userSID, peerName); TAutoPtr handle; auto ev = runtime.GrabEdgeEvent(handle); UNIT_ASSERT_EQUAL(ev->Record.GetResponse().GetEntry().GetStatus(), expectedStatus); } - void TestImport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID, + void TestImport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID, const TString& peerName, Ydb::StatusIds::StatusCode expectedStatus) { - TestImport(runtime, TTestTxConfig::SchemeShard, id, dbName, requestStr, userSID, expectedStatus); + TestImport(runtime, TTestTxConfig::SchemeShard, id, dbName, requestStr, userSID, peerName, expectedStatus); } NKikimrImport::TEvGetImportResponse TestGetImport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, @@ -1500,6 +1506,7 @@ namespace NSchemeShardUT_Private { (let child '('ChildrenLimit (Uint64 '%lu))) (let acl '('AclByteSizeLimit (Uint64 '%lu))) (let columns '('TableColumnsLimit (Uint64 '%lu))) + (let columnColumns '('ColumnTableColumnsLimit (Uint64 '%lu))) (let colName '('TableColumnNameLengthLimit (Uint64 '%lu))) (let keyCols '('TableKeyColumnsLimit (Uint64 '%lu))) (let indices '('TableIndicesLimit (Uint64 '%lu))) @@ -1512,11 +1519,11 @@ namespace NSchemeShardUT_Private { (let pqPartitions '('PQPartitionsLimit (Uint64 '%lu))) (let exports '('ExportsLimit (Uint64 '%lu))) (let imports '('ImportsLimit (Uint64 '%lu))) - (let ret (AsList (UpdateRow 'SubDomains key '(depth paths child acl columns colName keyCols indices streams shards pathShards consCopy maxPathLength extraSymbols pqPartitions exports imports)))) + (let ret (AsList (UpdateRow 'SubDomains key '(depth paths child acl columns columnColumns colName keyCols indices streams shards pathShards consCopy maxPathLength extraSymbols pqPartitions exports imports)))) (return ret) ) )", domainId, limits.MaxDepth, limits.MaxPaths, limits.MaxChildrenInDir, limits.MaxAclBytesSize, - limits.MaxTableColumns, limits.MaxTableColumnNameLength, limits.MaxTableKeyColumns, + limits.MaxTableColumns, limits.MaxColumnTableColumns, limits.MaxTableColumnNameLength, limits.MaxTableKeyColumns, limits.MaxTableIndices, limits.MaxTableCdcStreams, limits.MaxShards, limits.MaxShardsInPath, limits.MaxConsistentCopyTargets, limits.MaxPathElementLength, escapedStr.c_str(), limits.MaxPQPartitions, @@ -1672,12 +1679,18 @@ namespace NSchemeShardUT_Private { *index.mutable_data_columns() = {cfg.DataColumns.begin(), cfg.DataColumns.end()}; switch (cfg.IndexType) { - case NKikimrSchemeOp::EIndexTypeGlobal: - *index.mutable_global_index() = Ydb::Table::GlobalIndex(); - break; - case NKikimrSchemeOp::EIndexTypeGlobalAsync: - *index.mutable_global_async_index() = Ydb::Table::GlobalAsyncIndex(); - break; + case NKikimrSchemeOp::EIndexTypeGlobal: { + auto& settings = *index.mutable_global_index()->mutable_settings(); + if (cfg.GlobalIndexSettings) { + cfg.GlobalIndexSettings[0].SerializeTo(settings); + } + } break; + case NKikimrSchemeOp::EIndexTypeGlobalAsync: { + auto& settings = *index.mutable_global_async_index()->mutable_settings(); + if (cfg.GlobalIndexSettings) { + cfg.GlobalIndexSettings[0].SerializeTo(settings); + } + } break; default: UNIT_ASSERT_C(false, "Unknown index type: " << static_cast(cfg.IndexType)); } @@ -1987,7 +2000,7 @@ namespace NSchemeShardUT_Private { Runtime.SendToPipe(shardData.ShardId, sender, proposal); TAutoPtr handle; auto event = Runtime.GrabEdgeEventIf(handle, - [=](const TEvDataShard::TEvProposeTransactionResult& event) { + [this, shardData](const TEvDataShard::TEvProposeTransactionResult& event) { return event.GetTxId() == TxId && event.GetOrigin() == shardData.ShardId; }); activeZone = true; diff --git a/ydb/core/tx/schemeshard/ut_helpers/helpers.h b/ydb/core/tx/schemeshard/ut_helpers/helpers.h index e59f1d3fe07d..dc97f8b5cd8b 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/helpers.h +++ b/ydb/core/tx/schemeshard/ut_helpers/helpers.h @@ -20,6 +20,7 @@ #include #include +#include #include @@ -48,6 +49,19 @@ template \ void N(NUnitTest::TTestContext&) +#define Y_UNIT_TEST_FLAGS(N, OPT1, OPT2) \ + template void N(NUnitTest::TTestContext&); \ + struct TTestRegistration##N { \ + TTestRegistration##N() { \ + TCurrentTest::AddTest(#N, static_cast(&N), false); \ + TCurrentTest::AddTest(#N "-" #OPT2, static_cast(&N), false); \ + TCurrentTest::AddTest(#N "-" #OPT1, static_cast(&N), false); \ + TCurrentTest::AddTest(#N "-" #OPT1 "-" #OPT2, static_cast(&N), false); \ + } \ + }; \ + static TTestRegistration##N testRegistration##N; \ + template \ + void N(NUnitTest::TTestContext&) namespace NSchemeShardUT_Private { using namespace NKikimr; @@ -348,6 +362,7 @@ namespace NSchemeShardUT_Private { NKikimrSchemeOp::EIndexType IndexType = NKikimrSchemeOp::EIndexTypeGlobal; TVector IndexColumns; TVector DataColumns; + TVector GlobalIndexSettings = {}; }; std::unique_ptr CreateBuildColumnRequest(ui64 id, const TString& dbName, const TString& src, const TString& columnName, const Ydb::TypedValue& literal); @@ -370,11 +385,11 @@ namespace NSchemeShardUT_Private { ////////// export TVector GetExportTargetPaths(const TString& requestStr); - void AsyncExport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = ""); - void AsyncExport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = ""); - void TestExport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = "", + void AsyncExport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = "", const TString& peerName = ""); + void AsyncExport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = "", const TString& peerName = ""); + void TestExport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = "", const TString& peerName = "", Ydb::StatusIds::StatusCode expectedStatus = Ydb::StatusIds::SUCCESS); - void TestExport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = "", + void TestExport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = "", const TString& peerName = "", Ydb::StatusIds::StatusCode expectedStatus = Ydb::StatusIds::SUCCESS); NKikimrExport::TEvGetExportResponse TestGetExport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TVector& expectedStatuses); @@ -398,11 +413,11 @@ namespace NSchemeShardUT_Private { Ydb::StatusIds::StatusCode expectedStatus = Ydb::StatusIds::SUCCESS); ////////// import - void AsyncImport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = ""); - void AsyncImport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = ""); - void TestImport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = "", + void AsyncImport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = "", const TString& peerName = ""); + void AsyncImport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = "", const TString& peerName = ""); + void TestImport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = "", const TString& peerName = "", Ydb::StatusIds::StatusCode expectedStatus = Ydb::StatusIds::SUCCESS); - void TestImport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = "", + void TestImport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = "", const TString& peerName = "", Ydb::StatusIds::StatusCode expectedStatus = Ydb::StatusIds::SUCCESS); NKikimrImport::TEvGetImportResponse TestGetImport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TVector& expectedStatuses); diff --git a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp index 8a415d0cad6c..fe7c69563602 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp +++ b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp @@ -1252,11 +1252,63 @@ TCheckFunc PartitionKeys(TVector lastShardKeys) { const auto& pathDescr = record.GetPathDescription(); UNIT_ASSERT_VALUES_EQUAL(lastShardKeys.size(), pathDescr.TablePartitionsSize()); for (size_t i = 0; i < lastShardKeys.size(); ++i) { - UNIT_ASSERT_STRING_CONTAINS(pathDescr.GetTablePartitions(i).GetEndOfRangeKeyPrefix(), lastShardKeys[i]); + const auto& partition = pathDescr.GetTablePartitions(i); + UNIT_ASSERT_STRING_CONTAINS_C( + partition.GetEndOfRangeKeyPrefix(), lastShardKeys[i], + "partition index: " << i << '\n' + << "actual key prefix: " << partition.GetEndOfRangeKeyPrefix().Quote() << '\n' + << "expected key prefix: " << lastShardKeys[i].Quote() << '\n' + ); } }; } +namespace { + +// Serializes / deserializes a value of type T to a cell vector string representation. +template +struct TSplitBoundarySerializer { + static TString Serialize(T splitBoundary) { + const auto cell = TCell::Make(splitBoundary); + TSerializedCellVec cellVec(TArrayRef(&cell, 1)); + return cellVec.ReleaseBuffer(); + } + + static TVector Deserialize(const TString& serializedCells) { + TSerializedCellVec cells(serializedCells); + TVector values; + for (const auto& cell : cells.GetCells()) { + if (cell.IsNull()) { + // the last cell + break; + } + values.emplace_back(cell.AsValue()); + } + return values; + } +}; + +} + +template +TCheckFunc SplitBoundaries(TVector&& expectedBoundaries) { + return [expectedBoundaries = std::move(expectedBoundaries)] (const NKikimrScheme::TEvDescribeSchemeResult& record) { + const auto& pathDescr = record.GetPathDescription(); + UNIT_ASSERT_VALUES_EQUAL(pathDescr.TablePartitionsSize(), expectedBoundaries.size() + 1); + for (size_t i = 0; i < expectedBoundaries.size(); ++i) { + const auto& partition = pathDescr.GetTablePartitions(i); + const auto actualBoundary = TSplitBoundarySerializer::Deserialize(partition.GetEndOfRangeKeyPrefix()).at(0); + UNIT_ASSERT_VALUES_EQUAL_C( + actualBoundary, expectedBoundaries[i], + "partition index: " << i << '\n' + << "actual key prefix: " << partition.GetEndOfRangeKeyPrefix().Quote() << '\n' + ); + } + }; +} + +template TCheckFunc SplitBoundaries(TVector&&); + TCheckFunc ServerlessComputeResourcesMode(NKikimrSubDomains::EServerlessComputeResourcesMode serverlessComputeResourcesMode) { return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) { UNIT_ASSERT_C(IsGoodDomainStatus(record.GetStatus()), "Unexpected status: " << record.GetStatus()); diff --git a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h index 3cb8f39c5754..0f39c65f8513 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h +++ b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h @@ -99,6 +99,10 @@ namespace NLs { void CheckBoundaries(const NKikimrScheme::TEvDescribeSchemeResult& record); TCheckFunc PartitionCount(ui32 count); TCheckFunc PartitionKeys(TVector lastShardKeys); + // Checks if the serialized representation of an expected boundary is a prefix of the actual one. + // Similar to PartitionKeys check, but does not require you to pass split boundaries in a serialized form. + template + TCheckFunc SplitBoundaries(TVector&& expectedBoundaries); TCheckFunc FollowerCount(ui32 count); TCheckFunc CrossDataCenterFollowerCount(ui32 count); TCheckFunc AllowFollowerPromotion(bool val); diff --git a/ydb/core/tx/schemeshard/ut_helpers/test_env.cpp b/ydb/core/tx/schemeshard/ut_helpers/test_env.cpp index afef5e0ffab5..9e5b7c8543f6 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/test_env.cpp +++ b/ydb/core/tx/schemeshard/ut_helpers/test_env.cpp @@ -541,6 +541,8 @@ NSchemeShardUT_Private::TTestEnv::TTestEnv(TTestActorRuntime& runtime, const TTe app.SetEnableServerlessExclusiveDynamicNodes(opts.EnableServerlessExclusiveDynamicNodes_); app.SetEnableAddColumsWithDefaults(opts.EnableAddColumsWithDefaults_); app.SetEnableReplaceIfExistsForExternalEntities(opts.EnableReplaceIfExistsForExternalEntities_); + app.SetEnableChangefeedsOnIndexTables(opts.EnableChangefeedsOnIndexTables_); + app.SetEnableTieringInColumnShard(opts.EnableTieringInColumnShard_); app.ColumnShardConfig.SetDisabledOnSchemeShard(false); diff --git a/ydb/core/tx/schemeshard/ut_helpers/test_env.h b/ydb/core/tx/schemeshard/ut_helpers/test_env.h index 36a9d36888cb..c433855c9c76 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/test_env.h +++ b/ydb/core/tx/schemeshard/ut_helpers/test_env.h @@ -65,6 +65,8 @@ namespace NSchemeShardUT_Private { OPTION(std::optional, EnableAddColumsWithDefaults, std::nullopt); OPTION(std::optional, EnableReplaceIfExistsForExternalEntities, std::nullopt); OPTION(std::optional, GraphBackendType, std::nullopt); + OPTION(std::optional, EnableChangefeedsOnIndexTables, std::nullopt); + OPTION(std::optional, EnableTieringInColumnShard, std::nullopt); #undef OPTION }; diff --git a/ydb/core/tx/schemeshard/ut_helpers/ya.make b/ydb/core/tx/schemeshard/ut_helpers/ya.make index 89f867d6c154..5eb4bf14ca88 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/ya.make +++ b/ydb/core/tx/schemeshard/ut_helpers/ya.make @@ -22,9 +22,12 @@ PEERDIR( ydb/public/lib/scheme_types ydb/library/yql/public/issue ydb/public/sdk/cpp/client/ydb_driver + ydb/public/sdk/cpp/client/ydb_table ) SRCS( + auditlog_helpers.cpp + auditlog_helpers.h export_reboots_common.cpp failing_mtpq.cpp helpers.cpp diff --git a/ydb/core/tx/schemeshard/ut_index/ut_async_index.cpp b/ydb/core/tx/schemeshard/ut_index/ut_async_index.cpp index 9103a56a148d..15427cb64dd8 100644 --- a/ydb/core/tx/schemeshard/ut_index/ut_async_index.cpp +++ b/ydb/core/tx/schemeshard/ut_index/ut_async_index.cpp @@ -5,6 +5,7 @@ #include #include #include +#include using namespace NKikimr; using namespace NSchemeShard; diff --git a/ydb/core/tx/schemeshard/ut_index_build/ut_index_build.cpp b/ydb/core/tx/schemeshard/ut_index_build/ut_index_build.cpp index 4ab86bcf9a7c..ef83b89235a3 100644 --- a/ydb/core/tx/schemeshard/ut_index_build/ut_index_build.cpp +++ b/ydb/core/tx/schemeshard/ut_index_build/ut_index_build.cpp @@ -1,10 +1,13 @@ #include #include +#include #include #include #include +#include + using namespace NKikimr; using namespace NSchemeShard; using namespace NSchemeShardUT_Private; @@ -667,6 +670,36 @@ Y_UNIT_TEST_SUITE(IndexBuildTest) { env.TestWaitNotification(runtime, {txId, txId - 1}); } + Y_UNIT_TEST(CheckLimitWithDroppedIndex) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TSchemeLimits lowLimits; + lowLimits.MaxTableIndices = 1; + SetSchemeshardSchemaLimits(runtime, lowLimits); + + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + TestBuildIndex(runtime, ++txId, TTestTxConfig::SchemeShard, "/MyRoot", "/MyRoot/Table", "Index1", {"value"}, Ydb::StatusIds::SUCCESS); + env.TestWaitNotification(runtime, txId); + + TestDropTableIndex(runtime, ++txId, "/MyRoot", R"( + TableName: "Table" + IndexName: "Index1" + )"); + env.TestWaitNotification(runtime, txId); + + TestBuildIndex(runtime, ++txId, TTestTxConfig::SchemeShard, "/MyRoot", "/MyRoot/Table", "Index2", {"value"}, Ydb::StatusIds::SUCCESS); + env.TestWaitNotification(runtime, txId); + } + Y_UNIT_TEST(Lock) { TTestBasicRuntime runtime; TTestEnv env(runtime); @@ -729,6 +762,226 @@ Y_UNIT_TEST_SUITE(IndexBuildTest) { } + Y_UNIT_TEST(MergeIndexTableShardsOnlyWhenReady) { + TTestBasicRuntime runtime; + + TTestEnvOptions opts; + opts.EnableBackgroundCompaction(false); + opts.DisableStatsBatching(true); + TTestEnv env(runtime, opts); + + NDataShard::gDbStatsReportInterval = TDuration::Seconds(0); + + ui64 txId = 100; + + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "value" Type: "Uint64" } + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + Ydb::Table::GlobalIndexSettings settings; + UNIT_ASSERT(google::protobuf::TextFormat::ParseFromString(R"( + partition_at_keys { + split_points { + type { tuple_type { elements { optional_type { item { type_id: UINT64 } } } } } + value { items { uint64_value: 10 } } + } + split_points { + type { tuple_type { elements { optional_type { item { type_id: UINT64 } } } } } + value { items { uint64_value: 20 } } + } + split_points { + type { tuple_type { elements { optional_type { item { type_id: UINT64 } } } } } + value { items { uint64_value: 30 } } + } + } + )", &settings)); + + TBlockEvents indexApplicationBlocker(runtime, [](const auto& ev) { + const auto& modifyScheme = ev->Get()->Record.GetTransaction(0); + return modifyScheme.GetOperationType() == NKikimrSchemeOp::ESchemeOpApplyIndexBuild; + }); + + ui64 indexInitializationTx = 0; + using TEvent = TEvSchemeShard::TEvModifySchemeTransaction; + auto indexInitializationObserver = runtime.AddObserver([&indexInitializationTx](const TEvent::TPtr& ev) { + const auto& record = ev->Get()->Record; + if (record.GetTransaction(0).GetOperationType() == NKikimrSchemeOp::ESchemeOpCreateIndexBuild) { + indexInitializationTx = record.GetTxId(); + } + } + ); + + const ui64 buildIndexTx = ++txId; + TestBuildIndex(runtime, buildIndexTx, TTestTxConfig::SchemeShard, "/MyRoot", "/MyRoot/Table", TBuildIndexConfig{ + "ByValue", NKikimrSchemeOp::EIndexTypeGlobal, { "value" }, {}, + { NYdb::NTable::TGlobalIndexSettings::FromProto(settings) } + }); + + runtime.WaitFor("index initialization", [&indexInitializationTx]{ + return indexInitializationTx != 0; + }); + env.TestWaitNotification(runtime, indexInitializationTx); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/ByValue"), { + NLs::PathExist, + NLs::IndexState(NKikimrSchemeOp::EIndexStateWriteOnly) + }); + + TVector indexShards; + auto shardCollector = [&indexShards](const NKikimrScheme::TEvDescribeSchemeResult& record) { + UNIT_ASSERT_VALUES_EQUAL(record.GetStatus(), NKikimrScheme::StatusSuccess); + const auto& partitions = record.GetPathDescription().GetTablePartitions(); + indexShards.clear(); + indexShards.reserve(partitions.size()); + for (const auto& partition : partitions) { + indexShards.emplace_back(partition.GetDatashardId()); + } + }; + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/ByValue/indexImplTable", true), { + NLs::PathExist, + NLs::PartitionCount(4), + shardCollector + }); + UNIT_ASSERT_VALUES_EQUAL(indexShards.size(), 4); + + { + // make sure no shards are merged + TBlockEvents mergeBlocker(runtime, [](const auto& ev) { + const auto& modifyScheme = ev->Get()->Record.GetTransaction(0); + return modifyScheme.GetOperationType() == NKikimrSchemeOp::ESchemeOpSplitMergeTablePartitions; + }); + + { + // wait for all index shards to send statistics + THashSet shardsWithStats; + using TEvType = TEvDataShard::TEvPeriodicTableStats; + auto statsObserver = runtime.AddObserver([&shardsWithStats](const TEvType::TPtr& ev) { + shardsWithStats.emplace(ev->Get()->Record.GetDatashardId()); + }); + + runtime.WaitFor("all index shards to send statistics", [&]{ + return AllOf(indexShards, [&shardsWithStats](ui64 indexShard) { + return shardsWithStats.contains(indexShard); + }); + }); + } + + // we expect to not have observed any attempts to merge + UNIT_ASSERT(mergeBlocker.empty()); + + // wait for 1 minute to ensure that no merges have been started by SchemeShard + env.SimulateSleep(runtime, TDuration::Minutes(1)); + UNIT_ASSERT(mergeBlocker.empty()); + } + + // splits are allowed even if the index is not ready + TestSplitTable(runtime, ++txId, "/MyRoot/Table/ByValue/indexImplTable", Sprintf(R"( + SourceTabletId: %lu + SplitBoundary { KeyPrefix { Tuple { Optional { Uint64: 5 } } } } + )", + indexShards.front() + ) + ); + env.TestWaitNotification(runtime, txId); + + indexApplicationBlocker.Stop().Unblock(); + env.TestWaitNotification(runtime, buildIndexTx); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/ByValue"), { + NLs::IndexState(NKikimrSchemeOp::EIndexStateReady) + }); + + // wait until all index impl table shards are merged into one + while (true) { + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/ByValue/indexImplTable", true), { + shardCollector + }); + if (indexShards.size() > 1) { + // If a merge happens, old shards are deleted and replaced with a new one. + // That is why we need to wait for * all * the shards to be deleted. + env.TestWaitTabletDeletion(runtime, indexShards); + } else { + break; + } + } + } + + Y_UNIT_TEST(IndexPartitioningIsPersisted) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: [ "key" ] + )"); + env.TestWaitNotification(runtime, txId); + + Ydb::Table::GlobalIndexSettings settings; + UNIT_ASSERT(google::protobuf::TextFormat::ParseFromString(R"( + partition_at_keys { + split_points { + type { tuple_type { elements { optional_type { item { type_id: UTF8 } } } } } + value { items { text_value: "alice" } } + } + split_points { + type { tuple_type { elements { optional_type { item { type_id: UTF8 } } } } } + value { items { text_value: "bob" } } + } + } + partitioning_settings { + min_partitions_count: 3 + max_partitions_count: 3 + } + )", &settings)); + + TBlockEvents indexCreationBlocker(runtime, [](const auto& ev) { + const auto& modifyScheme = ev->Get()->Record.GetTransaction(0); + return modifyScheme.GetOperationType() == NKikimrSchemeOp::ESchemeOpCreateIndexBuild; + }); + + const ui64 buildIndexTx = ++txId; + TestBuildIndex(runtime, buildIndexTx, TTestTxConfig::SchemeShard, "/MyRoot", "/MyRoot/Table", TBuildIndexConfig{ + "Index", NKikimrSchemeOp::EIndexTypeGlobal, { "value" }, {}, + { NYdb::NTable::TGlobalIndexSettings::FromProto(settings) } + }); + + RebootTablet(runtime, TTestTxConfig::SchemeShard, runtime.AllocateEdgeActor()); + + indexCreationBlocker.Stop().Unblock(); + env.TestWaitNotification(runtime, buildIndexTx); + + auto buildIndexOperation = TestGetBuildIndex(runtime, TTestTxConfig::SchemeShard, "/MyRoot", buildIndexTx); + UNIT_ASSERT_VALUES_EQUAL_C( + (int)buildIndexOperation.GetIndexBuild().GetState(), (int)Ydb::Table::IndexBuildState::STATE_DONE, + buildIndexOperation.DebugString() + ); + + TestDescribeResult(DescribePath(runtime, "/MyRoot/Table"), { + NLs::IsTable, + NLs::IndexesCount(1) + }); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Index"), { + NLs::PathExist, + NLs::IndexState(NKikimrSchemeOp::EIndexState::EIndexStateReady) + }); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Index/indexImplTable", true, true), { + NLs::IsTable, + NLs::PartitionCount(3), + NLs::MinPartitionsCountEqual(3), + NLs::MaxPartitionsCountEqual(3), + NLs::PartitionKeys({"alice", "bob", ""}) + }); + } + Y_UNIT_TEST(DropIndex) { TTestBasicRuntime runtime; TTestEnv env(runtime); diff --git a/ydb/core/tx/schemeshard/ut_index_build/ya.make b/ydb/core/tx/schemeshard/ut_index_build/ya.make index 10643c848a6a..f365a9c79f2c 100644 --- a/ydb/core/tx/schemeshard/ut_index_build/ya.make +++ b/ydb/core/tx/schemeshard/ut_index_build/ya.make @@ -14,6 +14,7 @@ PEERDIR( ydb/core/testlib/default ydb/core/tx ydb/core/tx/schemeshard/ut_helpers + ydb/public/sdk/cpp/client/ydb_table ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/tx/schemeshard/ut_index_build_reboots/ut_index_build_reboots.cpp b/ydb/core/tx/schemeshard/ut_index_build_reboots/ut_index_build_reboots.cpp index 8f6135105977..2f9599cbb644 100644 --- a/ydb/core/tx/schemeshard/ut_index_build_reboots/ut_index_build_reboots.cpp +++ b/ydb/core/tx/schemeshard/ut_index_build_reboots/ut_index_build_reboots.cpp @@ -450,4 +450,75 @@ Y_UNIT_TEST_SUITE(IndexBuildTestReboots) { }); } + + Y_UNIT_TEST(IndexPartitioning) { + TTestWithReboots t(false); + t.Run([&](TTestActorRuntime& runtime, bool& activeZone) { + { + TInactiveZone inactive(activeZone); + + TestCreateTable(runtime, ++t.TxId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Uint32" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: [ "key" ] + )"); + t.TestEnv->TestWaitNotification(runtime, t.TxId); + } + + Ydb::Table::GlobalIndexSettings settings; + UNIT_ASSERT(google::protobuf::TextFormat::ParseFromString(R"( + partition_at_keys { + split_points { + type { tuple_type { elements { optional_type { item { type_id: UTF8 } } } } } + value { items { text_value: "alice" } } + } + split_points { + type { tuple_type { elements { optional_type { item { type_id: UTF8 } } } } } + value { items { text_value: "bob" } } + } + } + partitioning_settings { + min_partitions_count: 3 + max_partitions_count: 3 + } + )", &settings)); + + const ui64 buildIndexId = ++t.TxId; + AsyncBuildIndex(runtime, buildIndexId, TTestTxConfig::SchemeShard, "/MyRoot", "/MyRoot/Table", TBuildIndexConfig{ + "Index", NKikimrSchemeOp::EIndexTypeGlobal, { "value" }, {}, + { NYdb::NTable::TGlobalIndexSettings::FromProto(settings) } + }); + + { + auto descr = TestGetBuildIndex(runtime, TTestTxConfig::SchemeShard, "/MyRoot", buildIndexId); + UNIT_ASSERT_VALUES_EQUAL((int)descr.GetIndexBuild().GetState(), (int)Ydb::Table::IndexBuildState::STATE_PREPARING); + } + + t.TestEnv->TestWaitNotification(runtime, buildIndexId); + + { + auto descr = TestGetBuildIndex(runtime, TTestTxConfig::SchemeShard, "/MyRoot", buildIndexId); + UNIT_ASSERT_VALUES_EQUAL((int)descr.GetIndexBuild().GetState(), (int)Ydb::Table::IndexBuildState::STATE_DONE); + } + + TestDescribeResult(DescribePath(runtime, "/MyRoot/Table"), { + NLs::IsTable, + NLs::IndexesCount(1) + }); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Index"), { + NLs::PathExist, + NLs::IndexState(NKikimrSchemeOp::EIndexState::EIndexStateReady) + }); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Index/indexImplTable", true, true), { + NLs::IsTable, + NLs::PartitionCount(3), + NLs::MinPartitionsCountEqual(3), + NLs::MaxPartitionsCountEqual(3), + NLs::PartitionKeys({"alice", "bob", ""}) + }); + }); + } } diff --git a/ydb/core/tx/schemeshard/ut_index_build_reboots/ya.make b/ydb/core/tx/schemeshard/ut_index_build_reboots/ya.make index 594f2ceeec47..8631141f4e40 100644 --- a/ydb/core/tx/schemeshard/ut_index_build_reboots/ya.make +++ b/ydb/core/tx/schemeshard/ut_index_build_reboots/ya.make @@ -18,6 +18,7 @@ PEERDIR( ydb/core/tx ydb/core/tx/schemeshard/ut_helpers ydb/library/yql/public/udf/service/exception_policy + ydb/public/sdk/cpp/client/ydb_table ) SRCS( diff --git a/ydb/core/tx/schemeshard/ut_login/ut_login.cpp b/ydb/core/tx/schemeshard/ut_login/ut_login.cpp index 1a46fafeb1d8..1401936c99f7 100644 --- a/ydb/core/tx/schemeshard/ut_login/ut_login.cpp +++ b/ydb/core/tx/schemeshard/ut_login/ut_login.cpp @@ -1,19 +1,35 @@ +#include + #include +#include #include +#include using namespace NKikimr; using namespace NSchemeShard; using namespace NSchemeShardUT_Private; +namespace NSchemeShardUT_Private { + +// convert into generic test helper? +void TestCreateAlterLoginCreateUser(TTestActorRuntime& runtime, ui64 txId, const TString& database, const TString& user, const TString& password, const TVector& expectedResults) { + std::unique_ptr modifyTx(CreateAlterLoginCreateUser(txId, user, password)); + //TODO: move setting of TModifyScheme.WorkingDir into CreateAlterLoginCreateUser() + //NOTE: TModifyScheme.Name isn't set, intentionally + modifyTx->Record.MutableTransaction(0)->SetWorkingDir(database); + AsyncSend(runtime, TTestTxConfig::SchemeShard, modifyTx.release()); + // AlterLoginCreateUser is synchronous in nature, result is returned immediately + TestModificationResults(runtime, txId, expectedResults); +} + +} // namespace NSchemeShardUT_Private + Y_UNIT_TEST_SUITE(TSchemeShardLoginTest) { Y_UNIT_TEST(BasicLogin) { TTestBasicRuntime runtime; TTestEnv env(runtime); ui64 txId = 100; - TActorId sender = runtime.AllocateEdgeActor(); - std::unique_ptr transaction(CreateAlterLoginCreateUser(++txId, "user1", "password1")); - transaction->Record.MutableTransaction(0)->SetWorkingDir("/MyRoot"); - ForwardToTablet(runtime, TTestTxConfig::SchemeShard, sender, transaction.release()); + TestCreateAlterLoginCreateUser(runtime, ++txId, "/MyRoot", "user1", "password1", {{NKikimrScheme::StatusSuccess}}); auto resultLogin = Login(runtime, "user1", "password1"); UNIT_ASSERT_VALUES_EQUAL(resultLogin.error(), ""); auto describe = DescribePath(runtime, TTestTxConfig::SchemeShard, "/MyRoot"); @@ -28,4 +44,85 @@ Y_UNIT_TEST_SUITE(TSchemeShardLoginTest) { auto resultValidate = login.ValidateToken({.Token = resultLogin.token()}); UNIT_ASSERT_VALUES_EQUAL(resultValidate.User, "user1"); } + + Y_UNIT_TEST(DisableBuiltinAuthMechanism) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + runtime.GetAppData().AuthConfig.SetEnableLoginAuthentication(false); + ui64 txId = 100; + TActorId sender = runtime.AllocateEdgeActor(); + std::unique_ptr transaction(CreateAlterLoginCreateUser(++txId, "user1", "password1")); + transaction->Record.MutableTransaction(0)->SetWorkingDir("/MyRoot"); + ForwardToTablet(runtime, TTestTxConfig::SchemeShard, sender, transaction.release()); + auto resultLogin = Login(runtime, "user1", "password1"); + UNIT_ASSERT_VALUES_EQUAL(resultLogin.error(), "Login authentication is disabled"); + UNIT_ASSERT_VALUES_EQUAL(resultLogin.token(), ""); + auto describe = DescribePath(runtime, TTestTxConfig::SchemeShard, "/MyRoot"); + UNIT_ASSERT(describe.HasPathDescription()); + UNIT_ASSERT(describe.GetPathDescription().HasDomainDescription()); + UNIT_ASSERT(describe.GetPathDescription().GetDomainDescription().HasSecurityState()); + UNIT_ASSERT(describe.GetPathDescription().GetDomainDescription().GetSecurityState().PublicKeysSize() > 0); + } + + Y_UNIT_TEST(AuditLogLoginSuccess) { + TTestBasicRuntime runtime; + std::vector lines; + runtime.AuditLogBackends = std::move(CreateTestAuditLogBackends(lines)); + TTestEnv env(runtime); + + UNIT_ASSERT_VALUES_EQUAL(lines.size(), 1); // alter root subdomain + + ui64 txId = 100; + + TestCreateAlterLoginCreateUser(runtime, ++txId, "/MyRoot", "user1", "password1", {{NKikimrScheme::StatusSuccess}}); + UNIT_ASSERT_VALUES_EQUAL(lines.size(), 2); // +user creation + + // test body + { + auto resultLogin = Login(runtime, "user1", "password1"); + UNIT_ASSERT_C(resultLogin.error().empty(), resultLogin); + } + UNIT_ASSERT_VALUES_EQUAL(lines.size(), 3); // +user login + + auto last = FindAuditLine(lines, "operation=LOGIN"); + UNIT_ASSERT_STRING_CONTAINS(last, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(last, "remote_address="); // can't check the value + UNIT_ASSERT_STRING_CONTAINS(last, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(last, "operation=LOGIN"); + UNIT_ASSERT_STRING_CONTAINS(last, "status=SUCCESS"); + UNIT_ASSERT(!last.contains("reason")); + UNIT_ASSERT_STRING_CONTAINS(last, "login_user=user1"); + UNIT_ASSERT_STRING_CONTAINS(last, "login_auth_domain={none}"); + } + + Y_UNIT_TEST(AuditLogLoginFailure) { + TTestBasicRuntime runtime; + std::vector lines; + runtime.AuditLogBackends = std::move(CreateTestAuditLogBackends(lines)); + TTestEnv env(runtime); + + UNIT_ASSERT_VALUES_EQUAL(lines.size(), 1); // alter root subdomain + + ui64 txId = 100; + + TestCreateAlterLoginCreateUser(runtime, ++txId, "/MyRoot", "user1", "password1", {{NKikimrScheme::StatusSuccess}}); + UNIT_ASSERT_VALUES_EQUAL(lines.size(), 2); // +user creation + + // test body + { + auto resultLogin = Login(runtime, "user1", "bad_password"); + UNIT_ASSERT_C(!resultLogin.error().empty(), resultLogin); + } + UNIT_ASSERT_VALUES_EQUAL(lines.size(), 3); // +user login + + auto last = FindAuditLine(lines, "operation=LOGIN"); + UNIT_ASSERT_STRING_CONTAINS(last, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(last, "remote_address="); // can't check the value + UNIT_ASSERT_STRING_CONTAINS(last, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(last, "operation=LOGIN"); + UNIT_ASSERT_STRING_CONTAINS(last, "status=ERROR"); + UNIT_ASSERT_STRING_CONTAINS(last, "reason=Invalid password"); + UNIT_ASSERT_STRING_CONTAINS(last, "login_user=user1"); + UNIT_ASSERT_STRING_CONTAINS(last, "login_auth_domain={none}"); + } } diff --git a/ydb/core/tx/schemeshard/ut_move/ut_move.cpp b/ydb/core/tx/schemeshard/ut_move/ut_move.cpp index 7690dd906a7e..1c9e7391798f 100644 --- a/ydb/core/tx/schemeshard/ut_move/ut_move.cpp +++ b/ydb/core/tx/schemeshard/ut_move/ut_move.cpp @@ -1191,4 +1191,25 @@ Y_UNIT_TEST_SUITE(TSchemeShardMoveTest) { TestMoveTable(runtime, ++txId, "/MyRoot/Table", "/MyRoot/TableMove"); env.TestWaitNotification(runtime, txId); } + + Y_UNIT_TEST(MoveTableWithSequence) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"( + TableDescription { + Name: "Table" + Columns { Name: "key" Type: "Uint64" DefaultFromSequence: "myseq" } + Columns { Name: "value" Type: "Uint64" } + KeyColumnNames: ["key"] + } + SequenceDescription { + Name: "myseq" + } + )"); + env.TestWaitNotification(runtime, txId); + + TestMoveTable(runtime, ++txId, "/MyRoot/Table", "/MyRoot/TableMove", {NKikimrScheme::StatusPreconditionFailed}); + } } diff --git a/ydb/core/tx/schemeshard/ut_olap/ut_olap.cpp b/ydb/core/tx/schemeshard/ut_olap/ut_olap.cpp index c53fdeb58425..1a6a85e46e27 100644 --- a/ydb/core/tx/schemeshard/ut_olap/ut_olap.cpp +++ b/ydb/core/tx/schemeshard/ut_olap/ut_olap.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -39,7 +40,7 @@ static const TString defaultTableSchema = R"( )"; static const TVector defaultYdbSchema = { - NArrow::NTest::TTestColumn("timestamp", TTypeInfo(NTypeIds::Timestamp) ), + NArrow::NTest::TTestColumn("timestamp", TTypeInfo(NTypeIds::Timestamp)).SetNullable(false), NArrow::NTest::TTestColumn("data", TTypeInfo(NTypeIds::Utf8) ) }; @@ -556,11 +557,24 @@ Y_UNIT_TEST_SUITE(TOlap) { } } )", {NKikimrScheme::StatusAccepted}); + + env.TestWaitNotification(runtime, txId); + TestAlterOlapStore(runtime, ++txId, "/MyRoot", R"( + Name: "OlapStore" + AlterSchemaPresets { + Name: "default" + AlterSchema { + AlterColumns { Name: "comment" DefaultValue: "10" } + } + } + )", {NKikimrScheme::StatusSchemeError}); } Y_UNIT_TEST(AlterTtl) { TTestBasicRuntime runtime; - TTestEnv env(runtime); + TTestEnvOptions options; + options.EnableTieringInColumnShard(true); + TTestEnv env(runtime, options); ui64 txId = 100; TString olapSchema = R"( @@ -634,17 +648,17 @@ Y_UNIT_TEST_SUITE(TOlap) { env.TestWaitNotification(runtime, txId); } - // TODO: AlterTiers - // negatives for store: disallow alters - // negatives for table: wrong tiers count, wrong tiers, wrong eviction column, wrong eviction values, - // different TTL columns in tiers -#if 0 Y_UNIT_TEST(StoreStats) { TTestBasicRuntime runtime; TTestEnv env(runtime); runtime.SetLogPriority(NKikimrServices::TX_COLUMNSHARD, NActors::NLog::PRI_DEBUG); runtime.UpdateCurrentTime(TInstant::Now() - TDuration::Seconds(600)); + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->SetOverrideLagForCompactionBeforeTierings(TDuration::Seconds(1)); + csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); + // disable stats batching auto& appData = runtime.GetAppData(); appData.SchemeShardConfig.SetStatsBatchTimeoutMs(0); @@ -690,6 +704,16 @@ Y_UNIT_TEST_SUITE(TOlap) { UNIT_ASSERT(shardId); UNIT_ASSERT(pathId); UNIT_ASSERT(planStep); + { + auto description = DescribePrivatePath(runtime, TTestTxConfig::SchemeShard, "/MyRoot/OlapStore/ColumnTable", true, true); + Cerr << description.DebugString() << Endl; + auto& tabletStats = description.GetPathDescription().GetTableStats(); + + UNIT_ASSERT(description.GetPathDescription().HasTableStats()); + UNIT_ASSERT_EQUAL(tabletStats.GetRowCount(), 0); + UNIT_ASSERT_EQUAL(tabletStats.GetDataSize(), 0); + } + ui32 rowsInBatch = 100000; @@ -702,7 +726,7 @@ Y_UNIT_TEST_SUITE(TOlap) { TSet txIds; for (ui32 i = 0; i < 10; ++i) { std::vector writeIds; - NTxUT::WriteData(runtime, sender, shardId, ++writeId, pathId, data, defaultYdbSchema, &writeIds); + NTxUT::WriteData(runtime, sender, shardId, ++writeId, pathId, data, defaultYdbSchema, &writeIds, NEvWrite::EModificationType::Upsert); NTxUT::ProposeCommit(runtime, sender, shardId, ++txId, writeIds); txIds.insert(txId); } @@ -714,16 +738,38 @@ Y_UNIT_TEST_SUITE(TOlap) { // trigger periodic stats at shard (after timeout) std::vector writeIds; - NTxUT::WriteData(runtime, sender, shardId, ++writeId, pathId, data, defaultYdbSchema, &writeIds); + NTxUT::WriteData(runtime, sender, shardId, ++writeId, pathId, data, defaultYdbSchema, &writeIds, NEvWrite::EModificationType::Upsert); NTxUT::ProposeCommit(runtime, sender, shardId, ++txId, writeIds); NTxUT::PlanCommit(runtime, sender, shardId, ++planStep, {txId}); } + csController->WaitIndexation(TDuration::Seconds(5)); + { + auto description = DescribePrivatePath(runtime, TTestTxConfig::SchemeShard, "/MyRoot/OlapStore", true, true); + Cerr << description.DebugString() << Endl; + auto& tabletStats = description.GetPathDescription().GetTableStats(); + + UNIT_ASSERT_GT(tabletStats.GetRowCount(), 0); + UNIT_ASSERT_GT(tabletStats.GetDataSize(), 0); + UNIT_ASSERT_GT(tabletStats.GetPartCount(), 0); + UNIT_ASSERT_GT(tabletStats.GetRowUpdates(), 0); + UNIT_ASSERT_GT(tabletStats.GetImmediateTxCompleted(), 0); + UNIT_ASSERT_GT(tabletStats.GetPlannedTxCompleted(), 0); + UNIT_ASSERT_GT(tabletStats.GetLastAccessTime(), 0); + UNIT_ASSERT_GT(tabletStats.GetLastUpdateTime(), 0); + } + + { + auto description = DescribePrivatePath(runtime, TTestTxConfig::SchemeShard, "/MyRoot/OlapStore/ColumnTable", true, true); + Cerr << description.DebugString() << Endl; + auto& tabletStats = description.GetPathDescription().GetTableStats(); - auto description = DescribePrivatePath(runtime, TTestTxConfig::SchemeShard, "/MyRoot/OlapStore", true, true); - auto& tabletStats = description.GetPathDescription().GetTableStats(); + UNIT_ASSERT_GT(tabletStats.GetRowCount(), 0); + UNIT_ASSERT_GT(tabletStats.GetDataSize(), 0); + UNIT_ASSERT_GT(tabletStats.GetPartCount(), 0); + UNIT_ASSERT_GT(tabletStats.GetLastAccessTime(), 0); + UNIT_ASSERT_GT(tabletStats.GetLastUpdateTime(), 0); + } - UNIT_ASSERT_GT(tabletStats.GetRowCount(), 0); - UNIT_ASSERT_GT(tabletStats.GetDataSize(), 0); #if 0 TestDropColumnTable(runtime, ++txId, "/MyRoot/OlapStore", "ColumnTable"); env.TestWaitNotification(runtime, txId); @@ -738,5 +784,4 @@ Y_UNIT_TEST_SUITE(TOlap) { TestLsPathId(runtime, 2, NLs::PathStringEqual("")); #endif } -#endif } diff --git a/ydb/core/tx/schemeshard/ut_replication/ut_replication.cpp b/ydb/core/tx/schemeshard/ut_replication/ut_replication.cpp index 8817995f8881..4b596e677ca1 100644 --- a/ydb/core/tx/schemeshard/ut_replication/ut_replication.cpp +++ b/ydb/core/tx/schemeshard/ut_replication/ut_replication.cpp @@ -344,6 +344,53 @@ Y_UNIT_TEST_SUITE(TReplicationTests) { TestDescribeResult(DescribePath(runtime, "/MyRoot/Table"), { NLs::ReplicationMode(NKikimrSchemeOp::TTableReplicationConfig::REPLICATION_MODE_NONE), + NLs::UserAttrsEqual({}), + }); + } + + Y_UNIT_TEST(AlterReplicatedIndexTable) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + AsyncSend(runtime, TTestTxConfig::SchemeShard, InternalTransaction(CreateIndexedTableRequest(++txId, "/MyRoot", R"( + TableDescription { + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "indexed" Type: "Uint64" } + KeyColumnNames: ["key"] + ReplicationConfig { + Mode: REPLICATION_MODE_READ_ONLY + } + } + IndexDescription { + Name: "Index" + KeyColumnNames: ["indexed"] + IndexImplTableDescription { + ReplicationConfig { + Mode: REPLICATION_MODE_READ_ONLY + } + } + } + )"))); + TestModificationResults(runtime, txId, {NKikimrScheme::StatusAccepted}); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Index/indexImplTable"), { + NLs::ReplicationMode(NKikimrSchemeOp::TTableReplicationConfig::REPLICATION_MODE_READ_ONLY), + }); + + AsyncSend(runtime, TTestTxConfig::SchemeShard, InternalTransaction(AlterTableRequest(++txId, "/MyRoot/Table/Index", R"( + Name: "indexImplTable" + ReplicationConfig { + Mode: REPLICATION_MODE_NONE + } + )"))); + TestModificationResults(runtime, txId, {NKikimrScheme::StatusAccepted}); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Index/indexImplTable"), { + NLs::ReplicationMode(NKikimrSchemeOp::TTableReplicationConfig::REPLICATION_MODE_NONE), }); } diff --git a/ydb/core/tx/schemeshard/ut_resource_pool/ut_resource_pool.cpp b/ydb/core/tx/schemeshard/ut_resource_pool/ut_resource_pool.cpp index f5a7f8754a5a..3d27ec3cd587 100644 --- a/ydb/core/tx/schemeshard/ut_resource_pool/ut_resource_pool.cpp +++ b/ydb/core/tx/schemeshard/ut_resource_pool/ut_resource_pool.cpp @@ -9,16 +9,16 @@ Y_UNIT_TEST_SUITE(TResourcePoolTest) { TTestEnv env(runtime); ui64 txId = 100; - TestMkDir(runtime, ++txId, "/MyRoot", ".resource_pools"); + TestMkDir(runtime, ++txId, "/MyRoot", ".metadata/workload_manager/pools"); env.TestWaitNotification(runtime, txId); - TestCreateResourcePool(runtime, ++txId, "/MyRoot/.resource_pools", R"( + TestCreateResourcePool(runtime, ++txId, "/MyRoot/.metadata/workload_manager/pools", R"( Name: "MyResourcePool" )", {NKikimrScheme::StatusAccepted}); env.TestWaitNotification(runtime, txId); - TestLs(runtime, "/MyRoot/.resource_pools/MyResourcePool", false, NLs::PathExist); + TestLs(runtime, "/MyRoot/.metadata/workload_manager/pools/MyResourcePool", false, NLs::PathExist); } Y_UNIT_TEST(CreateResourcePoolWithProperties) { @@ -26,10 +26,10 @@ Y_UNIT_TEST_SUITE(TResourcePoolTest) { TTestEnv env(runtime); ui64 txId = 100; - TestMkDir(runtime, ++txId, "/MyRoot", ".resource_pools"); + TestMkDir(runtime, ++txId, "/MyRoot", ".metadata/workload_manager/pools"); env.TestWaitNotification(runtime, txId); - TestCreateResourcePool(runtime, ++txId, "/MyRoot/.resource_pools", R"( + TestCreateResourcePool(runtime, ++txId, "/MyRoot/.metadata/workload_manager/pools", R"( Name: "MyResourcePool" Properties { Properties { @@ -49,7 +49,7 @@ Y_UNIT_TEST_SUITE(TResourcePoolTest) { properties.MutableProperties()->insert({"concurrent_query_limit", "10"}); properties.MutableProperties()->insert({"query_cancel_after_seconds", "60"}); - auto describeResult = DescribePath(runtime, "/MyRoot/.resource_pools/MyResourcePool"); + auto describeResult = DescribePath(runtime, "/MyRoot/.metadata/workload_manager/pools/MyResourcePool"); TestDescribeResult(describeResult, {NLs::PathExist}); UNIT_ASSERT(describeResult.GetPathDescription().HasResourcePoolDescription()); const auto& resourcePoolDescription = describeResult.GetPathDescription().GetResourcePoolDescription(); @@ -63,21 +63,21 @@ Y_UNIT_TEST_SUITE(TResourcePoolTest) { TTestEnv env(runtime); ui64 txId = 100; - TestMkDir(runtime, ++txId, "/MyRoot", ".resource_pools"); + TestMkDir(runtime, ++txId, "/MyRoot", ".metadata/workload_manager/pools"); env.TestWaitNotification(runtime, txId); - TestCreateResourcePool(runtime, ++txId, "/MyRoot/.resource_pools", R"( + TestCreateResourcePool(runtime, ++txId, "/MyRoot/.metadata/workload_manager/pools", R"( Name: "MyResourcePool" )", {NKikimrScheme::StatusAccepted}); env.TestWaitNotification(runtime, txId); - TestLs(runtime, "/MyRoot/.resource_pools/MyResourcePool", false, NLs::PathExist); + TestLs(runtime, "/MyRoot/.metadata/workload_manager/pools/MyResourcePool", false, NLs::PathExist); - TestDropResourcePool(runtime, ++txId, "/MyRoot/.resource_pools", "MyResourcePool"); + TestDropResourcePool(runtime, ++txId, "/MyRoot/.metadata/workload_manager/pools", "MyResourcePool"); env.TestWaitNotification(runtime, txId); - TestLs(runtime, "/MyRoot/.resource_pools/MyResourcePool", false, NLs::PathNotExist); + TestLs(runtime, "/MyRoot/.metadata/workload_manager/pools/MyResourcePool", false, NLs::PathNotExist); } Y_UNIT_TEST(DropResourcePoolTwice) { @@ -85,16 +85,16 @@ Y_UNIT_TEST_SUITE(TResourcePoolTest) { TTestEnv env(runtime); ui64 txId = 100; - TestMkDir(runtime, ++txId, "/MyRoot", ".resource_pools"); + TestMkDir(runtime, ++txId, "/MyRoot", ".metadata/workload_manager/pools"); env.TestWaitNotification(runtime, txId); - TestCreateResourcePool(runtime, ++txId, "/MyRoot/.resource_pools", R"( + TestCreateResourcePool(runtime, ++txId, "/MyRoot/.metadata/workload_manager/pools", R"( Name: "MyResourcePool" )"); env.TestWaitNotification(runtime, txId); - AsyncDropResourcePool(runtime, ++txId, "/MyRoot/.resource_pools", "MyResourcePool"); - AsyncDropResourcePool(runtime, ++txId, "/MyRoot/.resource_pools", "MyResourcePool"); + AsyncDropResourcePool(runtime, ++txId, "/MyRoot/.metadata/workload_manager/pools", "MyResourcePool"); + AsyncDropResourcePool(runtime, ++txId, "/MyRoot/.metadata/workload_manager/pools", "MyResourcePool"); TestModificationResult(runtime, txId - 1); auto ev = runtime.GrabEdgeEvent(); @@ -106,7 +106,7 @@ Y_UNIT_TEST_SUITE(TResourcePoolTest) { UNIT_ASSERT_VALUES_EQUAL(record.GetPathDropTxId(), txId - 1); env.TestWaitNotification(runtime, txId - 1); - TestDescribeResult(DescribePath(runtime, "/MyRoot/.resource_pools/MyResourcePool"), { + TestDescribeResult(DescribePath(runtime, "/MyRoot/.metadata/workload_manager/pools/MyResourcePool"), { NLs::PathNotExist }); } @@ -116,11 +116,11 @@ Y_UNIT_TEST_SUITE(TResourcePoolTest) { TTestEnv env(runtime); ui64 txId = 123; - AsyncMkDir(runtime, ++txId, "/MyRoot", ".resource_pools"); - AsyncCreateResourcePool(runtime, ++txId, "/MyRoot/.resource_pools", R"( + AsyncMkDir(runtime, ++txId, "/MyRoot", ".metadata/workload_manager/pools"); + AsyncCreateResourcePool(runtime, ++txId, "/MyRoot/.metadata/workload_manager/pools", R"( Name: "MyResourcePool1" )"); - AsyncCreateResourcePool(runtime, ++txId, "/MyRoot/.resource_pools", R"( + AsyncCreateResourcePool(runtime, ++txId, "/MyRoot/.metadata/workload_manager/pools", R"( Name: "MyResourcePool2" )"); TestModificationResult(runtime, txId-2, NKikimrScheme::StatusAccepted); @@ -129,14 +129,14 @@ Y_UNIT_TEST_SUITE(TResourcePoolTest) { env.TestWaitNotification(runtime, {txId, txId-1, txId-2}); - TestDescribe(runtime, "/MyRoot/.resource_pools/MyResourcePool1"); - TestDescribe(runtime, "/MyRoot/.resource_pools/MyResourcePool2"); + TestDescribe(runtime, "/MyRoot/.metadata/workload_manager/pools/MyResourcePool1"); + TestDescribe(runtime, "/MyRoot/.metadata/workload_manager/pools/MyResourcePool2"); - TestDescribeResult(DescribePath(runtime, "/MyRoot/.resource_pools"), + TestDescribeResult(DescribePath(runtime, "/MyRoot/.metadata/workload_manager/pools"), {NLs::PathVersionEqual(7)}); - TestDescribeResult(DescribePath(runtime, "/MyRoot/.resource_pools/MyResourcePool1"), + TestDescribeResult(DescribePath(runtime, "/MyRoot/.metadata/workload_manager/pools/MyResourcePool1"), {NLs::PathVersionEqual(2)}); - TestDescribeResult(DescribePath(runtime, "/MyRoot/.resource_pools/MyResourcePool2"), + TestDescribeResult(DescribePath(runtime, "/MyRoot/.metadata/workload_manager/pools/MyResourcePool2"), {NLs::PathVersionEqual(2)}); } @@ -151,12 +151,12 @@ Y_UNIT_TEST_SUITE(TResourcePoolTest) { Name: "NilNoviSubLuna" )"; - TestMkDir(runtime, ++txId, "/MyRoot", ".resource_pools"); + TestMkDir(runtime, ++txId, "/MyRoot", ".metadata/workload_manager/pools"); env.TestWaitNotification(runtime, txId); - AsyncCreateResourcePool(runtime, ++txId, "/MyRoot/.resource_pools", resourcePoolConfig); - AsyncCreateResourcePool(runtime, ++txId, "/MyRoot/.resource_pools", resourcePoolConfig); - AsyncCreateResourcePool(runtime, ++txId, "/MyRoot/.resource_pools", resourcePoolConfig); + AsyncCreateResourcePool(runtime, ++txId, "/MyRoot/.metadata/workload_manager/pools", resourcePoolConfig); + AsyncCreateResourcePool(runtime, ++txId, "/MyRoot/.metadata/workload_manager/pools", resourcePoolConfig); + AsyncCreateResourcePool(runtime, ++txId, "/MyRoot/.metadata/workload_manager/pools", resourcePoolConfig); ui64 sts[3]; sts[0] = TestModificationResults(runtime, txId-2, {ESts::StatusAccepted, ESts::StatusMultipleModifications, ESts::StatusAlreadyExists}); @@ -165,13 +165,13 @@ Y_UNIT_TEST_SUITE(TResourcePoolTest) { for (ui32 i=0; i<3; ++i) { if (sts[i] == ESts::StatusAlreadyExists) { - TestDescribeResult(DescribePath(runtime, "/MyRoot/.resource_pools/NilNoviSubLuna"), + TestDescribeResult(DescribePath(runtime, "/MyRoot/.metadata/workload_manager/pools/NilNoviSubLuna"), {NLs::Finished, NLs::IsResourcePool}); } if (sts[i] == ESts::StatusMultipleModifications) { - TestDescribeResult(DescribePath(runtime, "/MyRoot/.resource_pools/NilNoviSubLuna"), + TestDescribeResult(DescribePath(runtime, "/MyRoot/.metadata/workload_manager/pools/NilNoviSubLuna"), {NLs::Finished, NLs::IsResourcePool}); } @@ -179,12 +179,12 @@ Y_UNIT_TEST_SUITE(TResourcePoolTest) { env.TestWaitNotification(runtime, {txId-2, txId-1, txId}); - TestDescribeResult(DescribePath(runtime, "/MyRoot/.resource_pools/NilNoviSubLuna"), + TestDescribeResult(DescribePath(runtime, "/MyRoot/.metadata/workload_manager/pools/NilNoviSubLuna"), {NLs::Finished, NLs::IsResourcePool, NLs::PathVersionEqual(2)}); - TestCreateResourcePool(runtime, ++txId, "/MyRoot/.resource_pools", resourcePoolConfig, {ESts::StatusAlreadyExists}); + TestCreateResourcePool(runtime, ++txId, "/MyRoot/.metadata/workload_manager/pools", resourcePoolConfig, {ESts::StatusAlreadyExists}); } Y_UNIT_TEST(ReadOnlyMode) { @@ -192,11 +192,11 @@ Y_UNIT_TEST_SUITE(TResourcePoolTest) { TTestEnv env(runtime); ui64 txId = 123; - TestMkDir(runtime, ++txId, "/MyRoot", ".resource_pools"); + TestMkDir(runtime, ++txId, "/MyRoot", ".metadata/workload_manager/pools"); env.TestWaitNotification(runtime, txId); AsyncMkDir(runtime, ++txId, "/MyRoot", "SubDirA"); - AsyncCreateResourcePool(runtime, ++txId, "/MyRoot/.resource_pools", R"( + AsyncCreateResourcePool(runtime, ++txId, "/MyRoot/.metadata/workload_manager/pools", R"( Name: "MyResourcePool" )"); @@ -211,13 +211,13 @@ Y_UNIT_TEST_SUITE(TResourcePoolTest) { // Check that describe works TestDescribeResult(DescribePath(runtime, "/MyRoot/SubDirA"), {NLs::Finished}); - TestDescribeResult(DescribePath(runtime, "/MyRoot/.resource_pools/MyResourcePool"), + TestDescribeResult(DescribePath(runtime, "/MyRoot/.metadata/workload_manager/pools/MyResourcePool"), {NLs::Finished, NLs::IsResourcePool}); // Check that new modifications fail TestMkDir(runtime, ++txId, "/MyRoot", "SubDirBBBB", {NKikimrScheme::StatusReadOnly}); - TestCreateResourcePool(runtime, ++txId, "/MyRoot/.resource_pools", R"( + TestCreateResourcePool(runtime, ++txId, "/MyRoot/.metadata/workload_manager/pools", R"( Name: "MyResourcePool2" )", {NKikimrScheme::StatusReadOnly}); @@ -235,18 +235,18 @@ Y_UNIT_TEST_SUITE(TResourcePoolTest) { TTestEnv env(runtime); ui64 txId = 123; - TestMkDir(runtime, ++txId, "/MyRoot", ".resource_pools"); + TestMkDir(runtime, ++txId, "/MyRoot", ".metadata/workload_manager/pools"); env.TestWaitNotification(runtime, txId); TestCreateResourcePool(runtime, ++txId, "/MyRoot", R"( Name: "AnotherDir/MyResourcePool" - )", {{NKikimrScheme::StatusSchemeError, "Resource pools shoud be placed in /MyRoot/.resource_pools"}}); + )", {{NKikimrScheme::StatusSchemeError, "Resource pools shoud be placed in /MyRoot/.metadata/workload_manager/pools"}}); - TestCreateResourcePool(runtime, ++txId, "/MyRoot/.resource_pools", R"( + TestCreateResourcePool(runtime, ++txId, "/MyRoot/.metadata/workload_manager/pools", R"( Name: "AnotherDir/MyResourcePool" - )", {{NKikimrScheme::StatusSchemeError, "Resource pools shoud be placed in /MyRoot/.resource_pools"}}); + )", {{NKikimrScheme::StatusSchemeError, "Resource pools shoud be placed in /MyRoot/.metadata/workload_manager/pools"}}); - TestCreateResourcePool(runtime, ++txId, "/MyRoot/.resource_pools", R"( + TestCreateResourcePool(runtime, ++txId, "/MyRoot/.metadata/workload_manager/pools", R"( Name: "" )", {{NKikimrScheme::StatusSchemeError, "error: path part shouldn't be empty"}}); } @@ -256,10 +256,10 @@ Y_UNIT_TEST_SUITE(TResourcePoolTest) { TTestEnv env(runtime); ui64 txId = 100; - TestMkDir(runtime, ++txId, "/MyRoot", ".resource_pools"); + TestMkDir(runtime, ++txId, "/MyRoot", ".metadata/workload_manager/pools"); env.TestWaitNotification(runtime, txId); - TestCreateResourcePool(runtime, ++txId, "/MyRoot/.resource_pools", R"( + TestCreateResourcePool(runtime, ++txId, "/MyRoot/.metadata/workload_manager/pools", R"( Name: "MyResourcePool" Properties { Properties { @@ -280,7 +280,7 @@ Y_UNIT_TEST_SUITE(TResourcePoolTest) { properties.MutableProperties()->insert({"query_count_limit", "50"}); { - auto describeResult = DescribePath(runtime, "/MyRoot/.resource_pools/MyResourcePool"); + auto describeResult = DescribePath(runtime, "/MyRoot/.metadata/workload_manager/pools/MyResourcePool"); TestDescribeResult(describeResult, {NLs::PathExist}); UNIT_ASSERT(describeResult.GetPathDescription().HasResourcePoolDescription()); const auto& resourcePoolDescription = describeResult.GetPathDescription().GetResourcePoolDescription(); @@ -289,7 +289,7 @@ Y_UNIT_TEST_SUITE(TResourcePoolTest) { UNIT_ASSERT_VALUES_EQUAL(resourcePoolDescription.GetProperties().DebugString(), properties.DebugString()); } - TestAlterResourcePool(runtime, ++txId, "/MyRoot/.resource_pools", R"( + TestAlterResourcePool(runtime, ++txId, "/MyRoot/.metadata/workload_manager/pools", R"( Name: "MyResourcePool" Properties { Properties { @@ -309,7 +309,7 @@ Y_UNIT_TEST_SUITE(TResourcePoolTest) { properties.MutableProperties()->insert({"query_cancel_after_seconds", "60"}); { - auto describeResult = DescribePath(runtime, "/MyRoot/.resource_pools/MyResourcePool"); + auto describeResult = DescribePath(runtime, "/MyRoot/.metadata/workload_manager/pools/MyResourcePool"); TestDescribeResult(describeResult, {NLs::PathExist}); UNIT_ASSERT(describeResult.GetPathDescription().HasResourcePoolDescription()); const auto& resourcePoolDescription = describeResult.GetPathDescription().GetResourcePoolDescription(); @@ -324,10 +324,10 @@ Y_UNIT_TEST_SUITE(TResourcePoolTest) { TTestEnv env(runtime); ui64 txId = 100; - TestMkDir(runtime, ++txId, "/MyRoot", ".resource_pools"); + TestMkDir(runtime, ++txId, "/MyRoot", ".metadata/workload_manager/pools"); env.TestWaitNotification(runtime, txId); - TestAlterResourcePool(runtime, ++txId, "/MyRoot/.resource_pools", R"( + TestAlterResourcePool(runtime, ++txId, "/MyRoot/.metadata/workload_manager/pools", R"( Name: "MyResourcePool" Properties { Properties { diff --git a/ydb/core/tx/schemeshard/ut_resource_pool_reboots/ut_resource_pool_reboots.cpp b/ydb/core/tx/schemeshard/ut_resource_pool_reboots/ut_resource_pool_reboots.cpp index d8b772e24bdc..3bc4ae28cda5 100644 --- a/ydb/core/tx/schemeshard/ut_resource_pool_reboots/ut_resource_pool_reboots.cpp +++ b/ydb/core/tx/schemeshard/ut_resource_pool_reboots/ut_resource_pool_reboots.cpp @@ -7,9 +7,9 @@ Y_UNIT_TEST_SUITE(TResourcePoolTestReboots) { Y_UNIT_TEST(CreateResourcePoolWithReboots) { TTestWithReboots t; t.Run([&](TTestActorRuntime& runtime, bool& activeZone) { - AsyncMkDir(runtime, ++t.TxId, "/MyRoot", ".resource_pools"); + AsyncMkDir(runtime, ++t.TxId, "/MyRoot", ".metadata/workload_manager/pools"); - AsyncCreateResourcePool(runtime, ++t.TxId, "/MyRoot/.resource_pools", R"( + AsyncCreateResourcePool(runtime, ++t.TxId, "/MyRoot/.metadata/workload_manager/pools", R"( Name: "MyResourcePool" Properties { Properties { @@ -31,7 +31,7 @@ Y_UNIT_TEST_SUITE(TResourcePoolTestReboots) { { TInactiveZone inactive(activeZone); - auto describeResult = DescribePath(runtime, "/MyRoot/.resource_pools/MyResourcePool"); + auto describeResult = DescribePath(runtime, "/MyRoot/.metadata/workload_manager/pools/MyResourcePool"); TestDescribeResult(describeResult, {NLs::Finished}); UNIT_ASSERT(describeResult.GetPathDescription().HasResourcePoolDescription()); @@ -46,22 +46,22 @@ Y_UNIT_TEST_SUITE(TResourcePoolTestReboots) { Y_UNIT_TEST(ParallelCreateDrop) { TTestWithReboots t; t.Run([&](TTestActorRuntime& runtime, bool& activeZone) { - TestMkDir(runtime, ++t.TxId, "/MyRoot", ".resource_pools"); + TestMkDir(runtime, ++t.TxId, "/MyRoot", ".metadata/workload_manager/pools"); t.TestEnv->TestWaitNotification(runtime, t.TxId); - AsyncCreateResourcePool(runtime, ++t.TxId, "/MyRoot/.resource_pools", R"( + AsyncCreateResourcePool(runtime, ++t.TxId, "/MyRoot/.metadata/workload_manager/pools", R"( Name: "DropMe" )"); - AsyncDropResourcePool(runtime, ++t.TxId, "/MyRoot/.resource_pools", "DropMe"); + AsyncDropResourcePool(runtime, ++t.TxId, "/MyRoot/.metadata/workload_manager/pools", "DropMe"); t.TestEnv->TestWaitNotification(runtime, t.TxId-1); - TestDropResourcePool(runtime, ++t.TxId, "/MyRoot/.resource_pools", "DropMe"); + TestDropResourcePool(runtime, ++t.TxId, "/MyRoot/.metadata/workload_manager/pools", "DropMe"); t.TestEnv->TestWaitNotification(runtime, t.TxId); { TInactiveZone inactive(activeZone); - TestDescribeResult(DescribePath(runtime, "/MyRoot/.resource_pools/DropMe"), + TestDescribeResult(DescribePath(runtime, "/MyRoot/.metadata/workload_manager/pools/DropMe"), {NLs::PathNotExist}); } }); @@ -71,22 +71,22 @@ Y_UNIT_TEST_SUITE(TResourcePoolTestReboots) { TTestWithReboots t; t.Run([&](TTestActorRuntime& runtime, bool& activeZone) { { - TestMkDir(runtime, ++t.TxId, "/MyRoot", ".resource_pools"); + TestMkDir(runtime, ++t.TxId, "/MyRoot", ".metadata/workload_manager/pools"); t.TestEnv->TestWaitNotification(runtime, t.TxId); TInactiveZone inactive(activeZone); - TestCreateResourcePool(runtime, ++t.TxId, "/MyRoot/.resource_pools", R"( + TestCreateResourcePool(runtime, ++t.TxId, "/MyRoot/.metadata/workload_manager/pools", R"( Name: "ResourcePool" )"); t.TestEnv->TestWaitNotification(runtime, t.TxId); } - TestDropResourcePool(runtime, ++t.TxId, "/MyRoot/.resource_pools", "ResourcePool"); + TestDropResourcePool(runtime, ++t.TxId, "/MyRoot/.metadata/workload_manager/pools", "ResourcePool"); t.TestEnv->TestWaitNotification(runtime, t.TxId); { TInactiveZone inactive(activeZone); - TestDescribeResult(DescribePath(runtime, "/MyRoot/.resource_pools/ResourcePool"), + TestDescribeResult(DescribePath(runtime, "/MyRoot/.metadata/workload_manager/pools/ResourcePool"), {NLs::PathNotExist}); } }); @@ -98,21 +98,21 @@ Y_UNIT_TEST_SUITE(TResourcePoolTestReboots) { { TInactiveZone inactive(activeZone); - TestMkDir(runtime, ++t.TxId, "/MyRoot", ".resource_pools"); + TestMkDir(runtime, ++t.TxId, "/MyRoot", ".metadata/workload_manager/pools"); t.TestEnv->TestWaitNotification(runtime, t.TxId); - TestCreateResourcePool(runtime, ++t.TxId, "/MyRoot/.resource_pools", R"( + TestCreateResourcePool(runtime, ++t.TxId, "/MyRoot/.metadata/workload_manager/pools", R"( Name: "ResourcePool" )"); t.TestEnv->TestWaitNotification(runtime, t.TxId); } - TestDropResourcePool(runtime, ++t.TxId, "/MyRoot/.resource_pools", "ResourcePool"); + TestDropResourcePool(runtime, ++t.TxId, "/MyRoot/.metadata/workload_manager/pools", "ResourcePool"); t.TestEnv->TestWaitNotification(runtime, t.TxId); { TInactiveZone inactive(activeZone); - TestDescribeResult(DescribePath(runtime, "/MyRoot/.resource_pools/ResourcePool"), + TestDescribeResult(DescribePath(runtime, "/MyRoot/.metadata/workload_manager/pools/ResourcePool"), {NLs::PathNotExist}); } }); @@ -124,32 +124,32 @@ Y_UNIT_TEST_SUITE(TResourcePoolTestReboots) { { TInactiveZone inactive(activeZone); - TestMkDir(runtime, ++t.TxId, "/MyRoot", ".resource_pools"); + TestMkDir(runtime, ++t.TxId, "/MyRoot", ".metadata/workload_manager/pools"); t.TestEnv->TestWaitNotification(runtime, t.TxId); - TestCreateResourcePool(runtime, t.TxId, "/MyRoot/.resource_pools", R"( + TestCreateResourcePool(runtime, t.TxId, "/MyRoot/.metadata/workload_manager/pools", R"( Name: "ResourcePool" )"); t.TestEnv->TestWaitNotification(runtime, t.TxId); } - TestDropResourcePool(runtime, ++t.TxId, "/MyRoot/.resource_pools", "ResourcePool"); + TestDropResourcePool(runtime, ++t.TxId, "/MyRoot/.metadata/workload_manager/pools", "ResourcePool"); t.TestEnv->TestWaitNotification(runtime, t.TxId); { TInactiveZone inactive(activeZone); - TestDescribeResult(DescribePath(runtime, "/MyRoot/.resource_pools/ResourcePool"), + TestDescribeResult(DescribePath(runtime, "/MyRoot/.metadata/workload_manager/pools/ResourcePool"), {NLs::PathNotExist}); - TestCreateResourcePool(runtime, ++t.TxId, "/MyRoot/.resource_pools", R"( + TestCreateResourcePool(runtime, ++t.TxId, "/MyRoot/.metadata/workload_manager/pools", R"( Name: "ResourcePool" )"); t.TestEnv->TestWaitNotification(runtime, t.TxId); - TestDropResourcePool(runtime, ++t.TxId, "/MyRoot/.resource_pools", "ResourcePool"); + TestDropResourcePool(runtime, ++t.TxId, "/MyRoot/.metadata/workload_manager/pools", "ResourcePool"); t.TestEnv->TestWaitNotification(runtime, t.TxId); - TestDescribeResult(DescribePath(runtime, "/MyRoot/.resource_pools/ResourcePool"), + TestDescribeResult(DescribePath(runtime, "/MyRoot/.metadata/workload_manager/pools/ResourcePool"), {NLs::PathNotExist}); } }); @@ -161,19 +161,19 @@ Y_UNIT_TEST_SUITE(TResourcePoolTestReboots) { { TInactiveZone inactive(activeZone); - TestMkDir(runtime, ++t.TxId, "/MyRoot", ".resource_pools"); + TestMkDir(runtime, ++t.TxId, "/MyRoot", ".metadata/workload_manager/pools"); t.TestEnv->TestWaitNotification(runtime, t.TxId); - TestCreateResourcePool(runtime, ++t.TxId, "/MyRoot/.resource_pools", R"( + TestCreateResourcePool(runtime, ++t.TxId, "/MyRoot/.metadata/workload_manager/pools", R"( Name: "ResourcePool" )"); t.TestEnv->TestWaitNotification(runtime, t.TxId); - TestDropResourcePool(runtime, ++t.TxId, "/MyRoot/.resource_pools", "ResourcePool"); + TestDropResourcePool(runtime, ++t.TxId, "/MyRoot/.metadata/workload_manager/pools", "ResourcePool"); t.TestEnv->TestWaitNotification(runtime, t.TxId); } - TestCreateResourcePool(runtime, ++t.TxId, "/MyRoot/.resource_pools", R"( + TestCreateResourcePool(runtime, ++t.TxId, "/MyRoot/.metadata/workload_manager/pools", R"( Name: "ResourcePool" )"); t.TestEnv->TestWaitNotification(runtime, t.TxId); @@ -181,7 +181,7 @@ Y_UNIT_TEST_SUITE(TResourcePoolTestReboots) { { TInactiveZone inactive(activeZone); - TestDropResourcePool(runtime, ++t.TxId, "/MyRoot/.resource_pools", "ResourcePool"); + TestDropResourcePool(runtime, ++t.TxId, "/MyRoot/.metadata/workload_manager/pools", "ResourcePool"); t.TestEnv->TestWaitNotification(runtime, t.TxId); } }); @@ -193,29 +193,29 @@ Y_UNIT_TEST_SUITE(TResourcePoolTestReboots) { { TInactiveZone inactive(activeZone); - TestMkDir(runtime, ++t.TxId, "/MyRoot", ".resource_pools"); + TestMkDir(runtime, ++t.TxId, "/MyRoot", ".metadata/workload_manager/pools"); t.TestEnv->TestWaitNotification(runtime, t.TxId); - TestCreateResourcePool(runtime, ++t.TxId, "/MyRoot/.resource_pools", R"( + TestCreateResourcePool(runtime, ++t.TxId, "/MyRoot/.metadata/workload_manager/pools", R"( Name: "ResourcePool" )"); t.TestEnv->TestWaitNotification(runtime, t.TxId); - TestDropResourcePool(runtime, ++t.TxId, "/MyRoot/.resource_pools", "ResourcePool"); + TestDropResourcePool(runtime, ++t.TxId, "/MyRoot/.metadata/workload_manager/pools", "ResourcePool"); t.TestEnv->TestWaitNotification(runtime, t.TxId); - TestCreateResourcePool(runtime, ++t.TxId, "/MyRoot/.resource_pools", R"( + TestCreateResourcePool(runtime, ++t.TxId, "/MyRoot/.metadata/workload_manager/pools", R"( Name: "ResourcePool" )"); t.TestEnv->TestWaitNotification(runtime, t.TxId); } - TestDropResourcePool(runtime, ++t.TxId, "/MyRoot/.resource_pools", "ResourcePool"); + TestDropResourcePool(runtime, ++t.TxId, "/MyRoot/.metadata/workload_manager/pools", "ResourcePool"); t.TestEnv->TestWaitNotification(runtime, t.TxId); { TInactiveZone inactive(activeZone); - TestDescribeResult(DescribePath(runtime, "/MyRoot/.resource_pools/ResourcePool"), + TestDescribeResult(DescribePath(runtime, "/MyRoot/.metadata/workload_manager/pools/ResourcePool"), {NLs::PathNotExist}); } }); diff --git a/ydb/core/tx/schemeshard/ut_restore/ut_restore.cpp b/ydb/core/tx/schemeshard/ut_restore/ut_restore.cpp index 13b365dc102e..6007cf9619d5 100644 --- a/ydb/core/tx/schemeshard/ut_restore/ut_restore.cpp +++ b/ydb/core/tx/schemeshard/ut_restore/ut_restore.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -20,8 +21,10 @@ #include +#include #include #include +#include #include #include @@ -37,6 +40,16 @@ using namespace NKikimr::NWrappers::NTestHelpers; namespace { + Aws::SDKOptions Options; + + Y_TEST_HOOK_BEFORE_RUN(InitAwsAPI) { + Aws::InitAPI(Options); + } + + Y_TEST_HOOK_AFTER_RUN(ShutdownAwsAPI) { + Aws::ShutdownAPI(Options); + } + const TString EmptyYsonStr = R"([[[[];%false]]])"; TString GenerateScheme(const NKikimrSchemeOp::TPathDescription& pathDesc) { @@ -48,7 +61,7 @@ namespace { scheme.mutable_primary_key()->CopyFrom(tableDesc.GetKeyColumnNames()); FillColumnDescription(scheme, mkqlKeyType, tableDesc); - FillIndexDescription(scheme, tableDesc, mkqlKeyType); + FillIndexDescription(scheme, tableDesc); FillStorageSettings(scheme, tableDesc); FillColumnFamilies(scheme, tableDesc); FillAttributes(scheme, pathDesc); @@ -316,7 +329,6 @@ namespace { runtime.SetObserverFunc(prevObserver); } - } // anonymous Y_UNIT_TEST_SUITE(TRestoreTests) { @@ -1206,7 +1218,7 @@ value { const TVector keyTags = {1}; TVector valueTags(values.size()); - std::iota(valueTags.begin(), valueTags.end(), 2); + std::iota(valueTags.begin(), valueTags.end(), 2); UploadRow(runtime, "/MyRoot/Table", partitionIdx, keyTags, valueTags, keys, values); @@ -1298,7 +1310,7 @@ value { "jsondoc_value", "uuid_value", }; - + auto contentOriginalTable = ReadTable(runtime, TTestTxConfig::FakeHiveTablets, "Table", readKeyDesc, readColumns); NKqp::CompareYson(expectedJson, contentOriginalTable); @@ -2166,7 +2178,7 @@ Y_UNIT_TEST_SUITE(TImportTests) { void Run(TTestBasicRuntime& runtime, TTestEnv& env, THashMap&& data, const TString& request, Ydb::StatusIds::StatusCode expectedStatus = Ydb::StatusIds::SUCCESS, - const TString& dbName = "/MyRoot", bool serverless = false, const TString& userSID = "") + const TString& dbName = "/MyRoot", bool serverless = false, const TString& userSID = "", const TString& peerName = "") { ui64 id = 100; @@ -2256,7 +2268,7 @@ Y_UNIT_TEST_SUITE(TImportTests) { break; } - TestImport(runtime, schemeshardId, ++id, dbName, Sprintf(request.data(), port), userSID, initialStatus); + TestImport(runtime, schemeshardId, ++id, dbName, Sprintf(request.data(), port), userSID, peerName, initialStatus); env.TestWaitNotification(runtime, id, schemeshardId); if (initialStatus != Ydb::StatusIds::SUCCESS) { @@ -3090,6 +3102,9 @@ Y_UNIT_TEST_SUITE(TImportTests) { void CancelShouldSucceed(TDelayFunc delayFunc) { TTestBasicRuntime runtime; + std::vector auditLines; + runtime.AuditLogBackends = std::move(CreateTestAuditLogBackends(auditLines)); + TTestEnv env(runtime, TTestEnvOptions()); ui64 txId = 100; @@ -3134,12 +3149,45 @@ Y_UNIT_TEST_SUITE(TImportTests) { )", port)); const ui64 importId = txId; + // Check audit record for import start + { + auto line = FindAuditLine(auditLines, "operation=IMPORT START"); + UNIT_ASSERT_STRING_CONTAINS(line, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(line, "operation=IMPORT START"); + UNIT_ASSERT_STRING_CONTAINS(line, Sprintf("id=%lu", importId)); + UNIT_ASSERT_STRING_CONTAINS(line, "remote_address={none}"); + UNIT_ASSERT_STRING_CONTAINS(line, "subject={none}"); + UNIT_ASSERT_STRING_CONTAINS(line, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(line, "status=SUCCESS"); + UNIT_ASSERT_STRING_CONTAINS(line, "detailed_status=SUCCESS"); + UNIT_ASSERT(!line.contains("reason")); + UNIT_ASSERT(!line.contains("start_time")); + UNIT_ASSERT(!line.contains("end_time")); + } + WaitForDelayed(runtime, delayed, prevObserver); TestCancelImport(runtime, ++txId, "/MyRoot", importId); runtime.Send(delayed.Release(), 0, true); env.TestWaitNotification(runtime, importId); + // Check audit record for import end + // + { + auto line = FindAuditLine(auditLines, "operation=IMPORT END"); + UNIT_ASSERT_STRING_CONTAINS(line, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(line, "operation=IMPORT END"); + UNIT_ASSERT_STRING_CONTAINS(line, Sprintf("id=%lu", importId)); + UNIT_ASSERT_STRING_CONTAINS(line, "remote_address={none}"); + UNIT_ASSERT_STRING_CONTAINS(line, "subject={none}"); + UNIT_ASSERT_STRING_CONTAINS(line, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(line, "status=ERROR"); + UNIT_ASSERT_STRING_CONTAINS(line, "detailed_status=CANCELLED"); + UNIT_ASSERT_STRING_CONTAINS(line, "reason=Cancelled"); + UNIT_ASSERT_STRING_CONTAINS(line, "start_time="); + UNIT_ASSERT_STRING_CONTAINS(line, "end_time="); + } + TestGetImport(runtime, importId, "/MyRoot", Ydb::StatusIds::CANCELLED); } @@ -3429,6 +3477,260 @@ Y_UNIT_TEST_SUITE(TImportTests) { UNIT_ASSERT(entry.HasEndTime()); UNIT_ASSERT_LT(entry.GetStartTime().seconds(), entry.GetEndTime().seconds()); } + + // Based on CompletedImportEndTime + Y_UNIT_TEST(AuditCompletedImport) { + TTestBasicRuntime runtime; + std::vector auditLines; + runtime.AuditLogBackends = std::move(CreateTestAuditLogBackends(auditLines)); + + TTestEnv env(runtime); + + runtime.UpdateCurrentTime(TInstant::Now()); + ui64 txId = 100; + + const auto data = GenerateTestData(R"( + columns { + name: "key" + type { optional_type { item { type_id: UTF8 } } } + } + columns { + name: "value" + type { optional_type { item { type_id: UTF8 } } } + } + primary_key: "key" + )", {{"a", 1}}); + + TPortManager portManager; + const ui16 port = portManager.GetPort(); + + TS3Mock s3Mock(ConvertTestData(data), TS3Mock::TSettings(port)); + UNIT_ASSERT(s3Mock.Start()); + + const auto request = Sprintf(R"( + OperationParams { + labels { + key: "uid" + value: "foo" + } + } + ImportFromS3Settings { + endpoint: "localhost:%d" + scheme: HTTP + items { + source_prefix: "" + destination_path: "/MyRoot/Table" + } + } + )", port); + TestImport(runtime, ++txId, "/MyRoot", request, /*userSID*/ "user@builtin", /*peerName*/ "127.0.0.1:9876"); + + // Check audit record for import start + { + auto line = FindAuditLine(auditLines, "operation=IMPORT START"); + UNIT_ASSERT_STRING_CONTAINS(line, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(line, "operation=IMPORT START"); + UNIT_ASSERT_STRING_CONTAINS(line, Sprintf("id=%lu", txId)); + UNIT_ASSERT_STRING_CONTAINS(line, "uid=foo"); + UNIT_ASSERT_STRING_CONTAINS(line, "remote_address=127.0.0.1"); + UNIT_ASSERT_STRING_CONTAINS(line, "subject=user@builtin"); + UNIT_ASSERT_STRING_CONTAINS(line, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(line, "status=SUCCESS"); + UNIT_ASSERT_STRING_CONTAINS(line, "detailed_status=SUCCESS"); + UNIT_ASSERT(!line.contains("reason")); + UNIT_ASSERT(!line.contains("start_time")); + UNIT_ASSERT(!line.contains("end_time")); + } + + runtime.AdvanceCurrentTime(TDuration::Seconds(30)); // doing import + + env.TestWaitNotification(runtime, txId); + + // Check audit record for import end + // + { + auto line = FindAuditLine(auditLines, "operation=IMPORT END"); + UNIT_ASSERT_STRING_CONTAINS(line, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(line, "operation=IMPORT END"); + UNIT_ASSERT_STRING_CONTAINS(line, Sprintf("id=%lu", txId)); + UNIT_ASSERT_STRING_CONTAINS(line, "uid=foo"); + UNIT_ASSERT_STRING_CONTAINS(line, "remote_address=127.0.0.1"); + UNIT_ASSERT_STRING_CONTAINS(line, "subject=user@builtin"); + UNIT_ASSERT_STRING_CONTAINS(line, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(line, "status=SUCCESS"); + UNIT_ASSERT_STRING_CONTAINS(line, "detailed_status=SUCCESS"); + UNIT_ASSERT(!line.contains("reason")); + UNIT_ASSERT_STRING_CONTAINS(line, "start_time="); + UNIT_ASSERT_STRING_CONTAINS(line, "end_time="); + } + + const auto desc = TestGetImport(runtime, txId, "/MyRoot"); + const auto& entry = desc.GetResponse().GetEntry(); + UNIT_ASSERT_VALUES_EQUAL(entry.GetProgress(), Ydb::Import::ImportProgress::PROGRESS_DONE); + UNIT_ASSERT(entry.HasStartTime()); + UNIT_ASSERT(entry.HasEndTime()); + UNIT_ASSERT_LT(entry.GetStartTime().seconds(), entry.GetEndTime().seconds()); + } + + // Based on CancelledImportEndTime + Y_UNIT_TEST(AuditCancelledImport) { + TTestBasicRuntime runtime; + std::vector auditLines; + runtime.AuditLogBackends = std::move(CreateTestAuditLogBackends(auditLines)); + + TTestEnv env(runtime); + + runtime.UpdateCurrentTime(TInstant::Now()); + ui64 txId = 100; + + const auto data = GenerateTestData(R"( + columns { + name: "key" + type { optional_type { item { type_id: UTF8 } } } + } + columns { + name: "value" + type { optional_type { item { type_id: UTF8 } } } + } + primary_key: "key" + )", {{"a", 1}}); + + TPortManager portManager; + const ui16 port = portManager.GetPort(); + + TS3Mock s3Mock(ConvertTestData(data), TS3Mock::TSettings(port)); + UNIT_ASSERT(s3Mock.Start()); + + auto delayFunc = [](TAutoPtr& ev) { + if (ev->GetTypeRewrite() != TEvSchemeShard::EvModifySchemeTransaction) { + return false; + } + + return ev->Get()->Record + .GetTransaction(0).GetOperationType() == NKikimrSchemeOp::ESchemeOpRestore; + }; + + THolder delayed; + auto prevObserver = SetDelayObserver(runtime, delayed, delayFunc); + + const auto request = Sprintf(R"( + OperationParams { + labels { + key: "uid" + value: "foo" + } + } + ImportFromS3Settings { + endpoint: "localhost:%d" + scheme: HTTP + items { + source_prefix: "" + destination_path: "/MyRoot/Table" + } + } + )", port); + TestImport(runtime, ++txId, "/MyRoot", request, /*userSID*/ "user@builtin", /*peerName*/ "127.0.0.1:9876"); + const ui64 importId = txId; + + // Check audit record for import start + { + auto line = FindAuditLine(auditLines, "operation=IMPORT START"); + UNIT_ASSERT_STRING_CONTAINS(line, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(line, "operation=IMPORT START"); + UNIT_ASSERT_STRING_CONTAINS(line, Sprintf("id=%lu", importId)); + UNIT_ASSERT_STRING_CONTAINS(line, "uid=foo"); + UNIT_ASSERT_STRING_CONTAINS(line, "remote_address=127.0.0.1"); + UNIT_ASSERT_STRING_CONTAINS(line, "subject=user@builtin"); + UNIT_ASSERT_STRING_CONTAINS(line, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(line, "status=SUCCESS"); + UNIT_ASSERT_STRING_CONTAINS(line, "detailed_status=SUCCESS"); + UNIT_ASSERT(!line.contains("reason")); + UNIT_ASSERT(!line.contains("start_time")); + UNIT_ASSERT(!line.contains("end_time")); + } + + runtime.AdvanceCurrentTime(TDuration::Seconds(30)); // doing import + + WaitForDelayed(runtime, delayed, prevObserver); + + TestCancelImport(runtime, ++txId, "/MyRoot", importId); + + auto desc = TestGetImport(runtime, importId, "/MyRoot"); + auto entry = desc.GetResponse().GetEntry(); + UNIT_ASSERT_VALUES_EQUAL(entry.GetProgress(), Ydb::Import::ImportProgress::PROGRESS_CANCELLATION); + UNIT_ASSERT(entry.HasStartTime()); + UNIT_ASSERT(!entry.HasEndTime()); + + runtime.Send(delayed.Release(), 0, true); + env.TestWaitNotification(runtime, importId); + + desc = TestGetImport(runtime, importId, "/MyRoot", Ydb::StatusIds::CANCELLED); + entry = desc.GetResponse().GetEntry(); + UNIT_ASSERT_VALUES_EQUAL(entry.GetProgress(), Ydb::Import::ImportProgress::PROGRESS_CANCELLED); + UNIT_ASSERT(entry.HasStartTime()); + UNIT_ASSERT(entry.HasEndTime()); + UNIT_ASSERT_LT(entry.GetStartTime().seconds(), entry.GetEndTime().seconds()); + + // Check audit record for import end + // + { + auto line = FindAuditLine(auditLines, "operation=IMPORT END"); + UNIT_ASSERT_STRING_CONTAINS(line, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(line, "operation=IMPORT END"); + UNIT_ASSERT_STRING_CONTAINS(line, Sprintf("id=%lu", importId)); + UNIT_ASSERT_STRING_CONTAINS(line, "uid=foo"); + UNIT_ASSERT_STRING_CONTAINS(line, "remote_address=127.0.0.1"); + UNIT_ASSERT_STRING_CONTAINS(line, "subject=user@builtin"); + UNIT_ASSERT_STRING_CONTAINS(line, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(line, "status=ERROR"); + UNIT_ASSERT_STRING_CONTAINS(line, "detailed_status=CANCELLED"); + UNIT_ASSERT_STRING_CONTAINS(line, "reason=Cancelled"); + UNIT_ASSERT_STRING_CONTAINS(line, "start_time="); + UNIT_ASSERT_STRING_CONTAINS(line, "end_time="); + } + } + + Y_UNIT_TEST(UserSID) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + const auto data = GenerateTestData(R"( + columns { + name: "key" + type { optional_type { item { type_id: UTF8 } } } + } + columns { + name: "value" + type { optional_type { item { type_id: UTF8 } } } + } + primary_key: "key" + )", {{"a", 1}}); + + TPortManager portManager; + const ui16 port = portManager.GetPort(); + + TS3Mock s3Mock(ConvertTestData(data), TS3Mock::TSettings(port)); + UNIT_ASSERT(s3Mock.Start()); + + const TString request = Sprintf(R"( + ImportFromS3Settings { + endpoint: "localhost:%d" + scheme: HTTP + items { + source_prefix: "" + destination_path: "/MyRoot/Table" + } + } + )", port); + const TString userSID = "user@builtin"; + TestImport(runtime, ++txId, "/MyRoot", request, userSID); + + const auto desc = TestGetImport(runtime, txId, "/MyRoot"); + const auto& entry = desc.GetResponse().GetEntry(); + UNIT_ASSERT_VALUES_EQUAL(entry.GetProgress(), Ydb::Import::ImportProgress::PROGRESS_PREPARING); + UNIT_ASSERT_VALUES_EQUAL(entry.GetUserSID(), userSID); + } } Y_UNIT_TEST_SUITE(TImportWithRebootsTests) { diff --git a/ydb/core/tx/schemeshard/ut_restore/ya.make b/ydb/core/tx/schemeshard/ut_restore/ya.make index 7044d4283b5e..d514b36b49ee 100644 --- a/ydb/core/tx/schemeshard/ut_restore/ya.make +++ b/ydb/core/tx/schemeshard/ut_restore/ya.make @@ -14,6 +14,7 @@ ELSE() ENDIF() PEERDIR( + contrib/libs/aws-sdk-cpp/aws-cpp-sdk-core contrib/libs/double-conversion library/cpp/string_utils/quote ydb/core/kqp/ut/common diff --git a/ydb/core/tx/schemeshard/ut_split_merge/ut_split_merge.cpp b/ydb/core/tx/schemeshard/ut_split_merge/ut_split_merge.cpp index 2ea65bb9caea..6c78df21a2ce 100644 --- a/ydb/core/tx/schemeshard/ut_split_merge/ut_split_merge.cpp +++ b/ydb/core/tx/schemeshard/ut_split_merge/ut_split_merge.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -277,6 +278,69 @@ Y_UNIT_TEST_SUITE(TSchemeShardSplitBySizeTest) { // test requires more txids than cached at start } + Y_UNIT_TEST(MergeIndexTableShards) { + TTestBasicRuntime runtime; + + TTestEnvOptions opts; + opts.EnableBackgroundCompaction(false); + TTestEnv env(runtime, opts); + + ui64 txId = 100; + + TBlockEvents statsBlocker(runtime); + + TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"( + TableDescription { + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: ["key"] + } + IndexDescription { + Name: "ByValue" + KeyColumnNames: ["value"] + IndexImplTableDescription { + SplitBoundary { KeyPrefix { Tuple { Optional { Text: "A" } } } } + SplitBoundary { KeyPrefix { Tuple { Optional { Text: "B" } } } } + SplitBoundary { KeyPrefix { Tuple { Optional { Text: "C" } } } } + } + } + )" + ); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/ByValue/indexImplTable", true), + { NLs::PartitionCount(4) } + ); + + statsBlocker.Stop().Unblock(); + + TVector indexShards; + auto shardCollector = [&indexShards](const NKikimrScheme::TEvDescribeSchemeResult& record) { + UNIT_ASSERT_VALUES_EQUAL(record.GetStatus(), NKikimrScheme::StatusSuccess); + const auto& partitions = record.GetPathDescription().GetTablePartitions(); + indexShards.clear(); + indexShards.reserve(partitions.size()); + for (const auto& partition : partitions) { + indexShards.emplace_back(partition.GetDatashardId()); + } + }; + + // wait until all index impl table shards are merged into one + while (true) { + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/ByValue/indexImplTable", true), { + shardCollector + }); + if (indexShards.size() > 1) { + // If a merge happens, old shards are deleted and replaced with a new one. + // That is why we need to wait for * all * the shards to be deleted. + env.TestWaitTabletDeletion(runtime, indexShards); + } else { + break; + } + } + } + Y_UNIT_TEST(AutoMergeInOne) { TTestWithReboots t; t.Run([&](TTestActorRuntime& runtime, bool& activeZone) { diff --git a/ydb/core/tx/schemeshard/ut_subdomain/ut_subdomain.cpp b/ydb/core/tx/schemeshard/ut_subdomain/ut_subdomain.cpp index 78db80f9f32c..8fc34d9edbe0 100644 --- a/ydb/core/tx/schemeshard/ut_subdomain/ut_subdomain.cpp +++ b/ydb/core/tx/schemeshard/ut_subdomain/ut_subdomain.cpp @@ -2568,6 +2568,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardSubDomainTest) { } + //clear subdomain { TestDescribeResult(DescribePath(runtime, "/MyRoot"), @@ -2585,6 +2586,155 @@ Y_UNIT_TEST_SUITE(TSchemeShardSubDomainTest) { } } + Y_UNIT_TEST(ColumnSchemeLimitsRejects) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TSchemeLimits lowLimits; + lowLimits.MaxDepth = 4; + lowLimits.MaxPaths = 3; + lowLimits.MaxChildrenInDir = 3; + lowLimits.MaxAclBytesSize = 25; + lowLimits.MaxTableColumns = 3; + lowLimits.MaxColumnTableColumns = 3; + lowLimits.MaxTableColumnNameLength = 10; + lowLimits.MaxTableKeyColumns = 1; + lowLimits.MaxShards = 6; + lowLimits.MaxShardsInPath = 4; + lowLimits.MaxPQPartitions = 20; + + + //lowLimits.ExtraPathSymbolsAllowed = "!\"#$%&'()*+,-.:;<=>?@[\\]^_`{|}~"; + SetSchemeshardSchemaLimits(runtime, lowLimits); + TestDescribeResult(DescribePath(runtime, "/MyRoot"), + {NLs::PathExist, + NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards, lowLimits.MaxPQPartitions)}); + + { + TestCreateSubDomain(runtime, txId++, "/MyRoot", + "PlanResolution: 50 " + "Coordinators: 1 " + "Mediators: 1 " + "TimeCastBucketsPerMediator: 2 " + "Name: \"USER_0\"" + " DatabaseQuotas {" + " data_stream_shards_quota: 2" + " data_stream_reserved_storage_quota: 200000" + "}"); + } + + //create column tables, column limits + { + TestMkDir(runtime, txId++, "/MyRoot/USER_0", "C"); + env.TestWaitNotification(runtime, txId - 1); + + // MaxColumnTableColumns + TestCreateColumnTable(runtime, txId++, "/MyRoot/USER_0/C", R"( + Name: "C2" + ColumnShardCount: 1 + Schema { + Columns { Name: "RowId" Type: "Uint64", NotNull: true } + Columns { Name: "Value0" Type: "Utf8" } + Columns { Name: "Value1" Type: "Utf8" } + KeyColumnNames: "RowId" + Engine: COLUMN_ENGINE_REPLACING_TIMESERIES + } + )", {NKikimrScheme::StatusAccepted}); + env.TestWaitNotification(runtime, txId - 1); + + TestAlterColumnTable(runtime, txId++, "/MyRoot/USER_0/C", R"( + Name: "C2" + AlterSchema { + DropColumns {Name: "Value0"} + } + )", {NKikimrScheme::StatusAccepted}); + env.TestWaitNotification(runtime, txId - 1); + + TestAlterColumnTable(runtime, txId++, "/MyRoot/USER_0/C", R"( + Name: "C2" + AlterSchema { + DropColumns {Name: "Value1"} + AddColumns { Name: "Value2" Type: "Utf8" } + AddColumns { Name: "Value3" Type: "Utf8" } + AddColumns { Name: "Value4" Type: "Utf8" } + } + )", {NKikimrScheme::StatusSchemeError}); + env.TestWaitNotification(runtime, txId - 1); + + TestCreateColumnTable(runtime, txId++, "/MyRoot/USER_0/C", R"( + Name: "C1" + ColumnShardCount: 1 + Schema { + Columns { Name: "RowId" Type: "Uint64", NotNull: true } + Columns { Name: "Value0" Type: "Utf8" } + Columns { Name: "Value1" Type: "Utf8" } + Columns { Name: "Value2" Type: "Utf8" } + KeyColumnNames: "RowId" + Engine: COLUMN_ENGINE_REPLACING_TIMESERIES + } + )", {NKikimrScheme::StatusSchemeError}); + + TString olapSchema = R"( + Name: "OlapStore1" + ColumnShardCount: 1 + SchemaPresets { + Name: "default" + Schema { + Columns { Name: "timestamp" Type: "Timestamp" NotNull: true } + Columns { Name: "data" Type: "Utf8" } + KeyColumnNames: "timestamp" + Engine: COLUMN_ENGINE_REPLACING_TIMESERIES + } + } + )"; + + TestCreateOlapStore(runtime, txId++, "/MyRoot", olapSchema, {NKikimrScheme::StatusAccepted}); + env.TestWaitNotification(runtime, txId - 1); + + TString olapSchemaBig = R"( + Name: "OlapStoreBig" + ColumnShardCount: 1 + SchemaPresets { + Name: "default" + Schema { + Columns { Name: "timestamp" Type: "Timestamp" NotNull: true } + Columns { Name: "data" Type: "Utf8" } + Columns { Name: "data2" Type: "Utf8" } + Columns { Name: "data3" Type: "Utf8" } + KeyColumnNames: "timestamp" + Engine: COLUMN_ENGINE_REPLACING_TIMESERIES + } + } + )"; + + TestCreateOlapStore(runtime, txId++, "/MyRoot", olapSchemaBig, {NKikimrScheme::StatusSchemeError}); + env.TestWaitNotification(runtime, txId - 1); + + TestAlterOlapStore(runtime, txId++, "/MyRoot", R"( + Name: "OlapStore1" + AlterSchemaPresets { + Name: "default" + AlterSchema { + AddColumns { Name: "comment" Type: "Utf8" } + } + } + )", {NKikimrScheme::StatusAccepted}); + env.TestWaitNotification(runtime, txId - 1); + + TestAlterOlapStore(runtime, txId++, "/MyRoot", R"( + Name: "OlapStore1" + AlterSchemaPresets { + Name: "default" + AlterSchema { + AddColumns { Name: "comment2" Type: "Utf8" } + } + } + )", {NKikimrScheme::StatusSchemeError}); + env.TestWaitNotification(runtime, txId - 1); + } + } + Y_UNIT_TEST(SchemeLimitsRejectsWithIndexedTables) { TTestBasicRuntime runtime; TTestEnv env(runtime); diff --git a/ydb/core/tx/schemeshard/ut_topic_splitmerge/ut_topic_splitmerge.cpp b/ydb/core/tx/schemeshard/ut_topic_splitmerge/ut_topic_splitmerge.cpp index 7b09ebefe501..f93498ee65b2 100644 --- a/ydb/core/tx/schemeshard/ut_topic_splitmerge/ut_topic_splitmerge.cpp +++ b/ydb/core/tx/schemeshard/ut_topic_splitmerge/ut_topic_splitmerge.cpp @@ -668,29 +668,6 @@ Y_UNIT_TEST_SUITE(TSchemeShardTopicSplitMergeTest) { partitionStrategy->SetPartitionStrategyType(::NKikimrPQ::TPQTabletConfig_TPartitionStrategyType::TPQTabletConfig_TPartitionStrategyType_DISABLED); } }, {{TEvSchemeShard::EStatus::StatusInvalidParameter}}); - - ModifyTopic(runtime, env, txId, [&](auto& scheme) { - { - auto* partitionStrategy = scheme.MutablePQTabletConfig()->MutablePartitionStrategy(); - partitionStrategy->SetPartitionStrategyType(::NKikimrPQ::TPQTabletConfig_TPartitionStrategyType::TPQTabletConfig_TPartitionStrategyType_DISABLED); - partitionStrategy->SetMaxPartitionCount(0); - } - }); - - topic = DescribeTopic(runtime); - - UNIT_ASSERT_VALUES_EQUAL(static_cast(::NKikimrPQ::TPQTabletConfig_TPartitionStrategyType::TPQTabletConfig_TPartitionStrategyType_DISABLED), - static_cast(topic.GetPQTabletConfig().GetPartitionStrategy().GetPartitionStrategyType())); - - UNIT_ASSERT_VALUES_EQUAL(3, topic.GetPartitions().size()); - for (const auto& p : topic.GetPartitions()) { - Cerr << ">>>>> Verify partition " << p.GetPartitionId() << Endl << Flush; - UNIT_ASSERT_VALUES_EQUAL(static_cast(::NKikimrPQ::ETopicPartitionStatus::Active), static_cast(p.GetStatus())); - UNIT_ASSERT(p.GetChildPartitionIds().empty()); - UNIT_ASSERT(p.GetParentPartitionIds().empty()); - UNIT_ASSERT(!p.HasKeyRange()); - } - } // Y_UNIT_TEST(DisableSplitMerge) Y_UNIT_TEST(EnableSplitMerge) { diff --git a/ydb/core/tx/schemeshard/ut_ttl/ut_ttl.cpp b/ydb/core/tx/schemeshard/ut_ttl/ut_ttl.cpp index ce57f14992b3..1accb55c269b 100644 --- a/ydb/core/tx/schemeshard/ut_ttl/ut_ttl.cpp +++ b/ydb/core/tx/schemeshard/ut_ttl/ut_ttl.cpp @@ -1150,6 +1150,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardColumnTableTTL) { Columns { Name: "key" Type: "Uint64" NotNull: true } Columns { Name: "modified_at" Type: "Timestamp" } Columns { Name: "saved_at" Type: "Datetime" } + Columns { Name: "data" Type: "Utf8" } KeyColumnNames: ["key"] } )"); @@ -1206,6 +1207,13 @@ Y_UNIT_TEST_SUITE(TSchemeShardColumnTableTTL) { } } ); + TestAlterColumnTable(runtime, ++txId, "/MyRoot", R"( + Name: "TTLEnabledTable" + AlterSchema { + AlterColumns {Name: "data" DefaultValue: "10"} + } + )", {NKikimrScheme::StatusSchemeError}); + env.TestWaitNotification(runtime, txId); } Y_UNIT_TEST(AlterColumnTable_Negative) { diff --git a/ydb/core/tx/schemeshard/ya.make b/ydb/core/tx/schemeshard/ya.make index 975514b27a12..e17c0a11b792 100644 --- a/ydb/core/tx/schemeshard/ya.make +++ b/ydb/core/tx/schemeshard/ya.make @@ -207,6 +207,7 @@ SRCS( schemeshard_utils.cpp schemeshard_utils.h schemeshard_bg_tasks__list.cpp + schemeshard_xxport__helpers.cpp schemeshard_export__cancel.cpp schemeshard_export__create.cpp schemeshard_export__forget.cpp @@ -261,6 +262,7 @@ PEERDIR( ydb/core/persqueue/events ydb/core/persqueue/writer ydb/core/protos + ydb/core/resource_pools ydb/core/scheme ydb/core/statistics ydb/core/sys_view/partition_stats diff --git a/ydb/core/tx/sharding/unboxed_reader.h b/ydb/core/tx/sharding/unboxed_reader.h index fed3e972e2b9..11a31d7e3597 100644 --- a/ydb/core/tx/sharding/unboxed_reader.h +++ b/ydb/core/tx/sharding/unboxed_reader.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include namespace NKikimr::NMiniKQL { diff --git a/ydb/core/tx/sharding/ut/ut_sharding.cpp b/ydb/core/tx/sharding/ut/ut_sharding.cpp index 14dd61a48560..dce5bc6e7500 100644 --- a/ydb/core/tx/sharding/ut/ut_sharding.cpp +++ b/ydb/core/tx/sharding/ut/ut_sharding.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/ydb/core/tx/tiering/manager.cpp b/ydb/core/tx/tiering/manager.cpp index 85dd6d60c10b..57462d745d3a 100644 --- a/ydb/core/tx/tiering/manager.cpp +++ b/ydb/core/tx/tiering/manager.cpp @@ -199,19 +199,18 @@ THashMap TTiersManager::GetTiering() const { Y_ABORT_UNLESS(snapshotPtr); auto& tierConfigs = snapshotPtr->GetTierConfigs(); for (auto&& i : PathIdTiering) { - auto* tiering = snapshotPtr->GetTieringById(i.second); - if (tiering) { + auto* tieringRule = snapshotPtr->GetTieringById(i.second); + if (tieringRule) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("path_id", i.first)("tiering_name", i.second)("event", "activation"); - result.emplace(i.first, tiering->BuildOlapTiers()); - for (auto& [pathId, pathTiering] : result) { - for (auto& [name, tier] : pathTiering.GetTierByName()) { - AFL_VERIFY(name != NOlap::NTiering::NCommon::DeleteTierName); - auto it = tierConfigs.find(name); - if (it != tierConfigs.end()) { - tier->SetSerializer(NTiers::ConvertCompression(it->second.GetCompression())); - } + NOlap::TTiering tiering = tieringRule->BuildOlapTiers(); + for (auto& [name, tier] : tiering.GetTierByName()) { + AFL_VERIFY(name != NOlap::NTiering::NCommon::DeleteTierName); + auto it = tierConfigs.find(name); + if (it != tierConfigs.end()) { + tier->SetSerializer(NTiers::ConvertCompression(it->second.GetCompression())); } } + result.emplace(i.first, std::move(tiering)); } else { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("path_id", i.first)("tiering_name", i.second)("event", "not_found"); } diff --git a/ydb/core/tx/tiering/rule/manager.cpp b/ydb/core/tx/tiering/rule/manager.cpp index c6ea9e9f6130..a97ba742467a 100644 --- a/ydb/core/tx/tiering/rule/manager.cpp +++ b/ydb/core/tx/tiering/rule/manager.cpp @@ -6,13 +6,17 @@ namespace NKikimr::NColumnShard::NTiers { void TTieringRulesManager::DoPrepareObjectsBeforeModification(std::vector&& objects, NMetadata::NModifications::IAlterPreparationController::TPtr controller, - const TInternalModificationContext& context) const { + const TInternalModificationContext& context, const NMetadata::NModifications::TAlterOperationContext& /*alterContext*/) const { TActivationContext::Register(new TRulePreparationActor(std::move(objects), controller, context)); } NMetadata::NModifications::TOperationParsingResult TTieringRulesManager::DoBuildPatchFromSettings( const NYql::TObjectSettingsImpl& settings, TInternalModificationContext& /*context*/) const { + if (HasAppData() && !AppDataVerified().FeatureFlags.GetEnableTieringInColumnShard()) { + return TConclusionStatus::Fail("Tiering functionality is disabled for OLAP tables."); + } + NMetadata::NInternal::TTableRecord result; result.SetColumn(TTieringRule::TDecoder::TieringRuleId, NMetadata::NInternal::TYDBValue::Utf8(settings.GetObjectId())); if (settings.GetObjectId().StartsWith("$") || settings.GetObjectId().StartsWith("_")) { @@ -21,6 +25,9 @@ NMetadata::NModifications::TOperationParsingResult TTieringRulesManager::DoBuild { auto fValue = settings.GetFeaturesExtractor().Extract(TTieringRule::TDecoder::DefaultColumn); if (fValue) { + if (fValue->Empty()) { + return TConclusionStatus::Fail("defaultColumn cannot be empty"); + } result.SetColumn(TTieringRule::TDecoder::DefaultColumn, NMetadata::NInternal::TYDBValue::Utf8(*fValue)); } } diff --git a/ydb/core/tx/tiering/rule/manager.h b/ydb/core/tx/tiering/rule/manager.h index 3268c90021c4..d5646dbf3002 100644 --- a/ydb/core/tx/tiering/rule/manager.h +++ b/ydb/core/tx/tiering/rule/manager.h @@ -9,7 +9,7 @@ class TTieringRulesManager: public NMetadata::NModifications::TGenericOperations protected: virtual void DoPrepareObjectsBeforeModification(std::vector&& objects, NMetadata::NModifications::IAlterPreparationController::TPtr controller, - const TInternalModificationContext& context) const override; + const TInternalModificationContext& context, const NMetadata::NModifications::TAlterOperationContext& alterContext) const override; virtual NMetadata::NModifications::TOperationParsingResult DoBuildPatchFromSettings(const NYql::TObjectSettingsImpl& settings, TInternalModificationContext& context) const override; diff --git a/ydb/core/tx/tiering/rule/object.cpp b/ydb/core/tx/tiering/rule/object.cpp index 59d42bdb4c8e..a596b56890ca 100644 --- a/ydb/core/tx/tiering/rule/object.cpp +++ b/ydb/core/tx/tiering/rule/object.cpp @@ -30,6 +30,10 @@ bool TTieringRule::DeserializeDescriptionFromJson(const NJson::TJsonValue& jsonI if (!jsonInfo["rules"].GetArrayPointer(&rules)) { return false; } + if (rules->empty()) { + AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "tiering_rule_deserialization_failed")("reason", "empty_rules"); + return false; + } for (auto&& i : *rules) { TTieringInterval interval; if (!interval.DeserializeFromJson(i)) { @@ -61,6 +65,9 @@ bool TTieringRule::DeserializeFromRecord(const TDecoder& decoder, const Ydb::Val if (!decoder.Read(decoder.GetDefaultColumnIdx(), DefaultColumn, r)) { return false; } + if (DefaultColumn.Empty()) { + return false; + } NJson::TJsonValue jsonDescription; if (!decoder.ReadJson(decoder.GetDescriptionIdx(), jsonDescription, r)) { return false; @@ -72,6 +79,7 @@ bool TTieringRule::DeserializeFromRecord(const TDecoder& decoder, const Ydb::Val } NKikimr::NOlap::TTiering TTieringRule::BuildOlapTiers() const { + AFL_VERIFY(!Intervals.empty()); NOlap::TTiering result; for (auto&& r : Intervals) { AFL_VERIFY(result.Add(std::make_shared(r.GetTierName(), r.GetDurationForEvict(), GetDefaultColumn()))); diff --git a/ydb/core/tx/tiering/rule/ss_fetcher.cpp b/ydb/core/tx/tiering/rule/ss_fetcher.cpp index 681e96780eb4..e822ace4c5a8 100644 --- a/ydb/core/tx/tiering/rule/ss_fetcher.cpp +++ b/ydb/core/tx/tiering/rule/ss_fetcher.cpp @@ -17,7 +17,7 @@ void TFetcherCheckUserTieringPermissions::DoProcess(NSchemeShard::TSchemeShard& } else { bool denied = false; for (auto&& i : TieringRuleIds) { - const std::set& pathIds = schemeShard.ColumnTables.GetTablesWithTiering(i); + const auto& pathIds = schemeShard.ColumnTables.GetTablesWithTiering(i); for (auto&& pathId : pathIds) { auto path = NSchemeShard::TPath::Init(pathId, &schemeShard); if (!path.IsResolved() || path.IsUnderDeleting() || path.IsDeleted()) { diff --git a/ydb/core/tx/tiering/tier/manager.cpp b/ydb/core/tx/tiering/tier/manager.cpp index a64d2a7603ab..8d60219624b4 100644 --- a/ydb/core/tx/tiering/tier/manager.cpp +++ b/ydb/core/tx/tiering/tier/manager.cpp @@ -8,6 +8,10 @@ NMetadata::NModifications::TOperationParsingResult TTiersManager::DoBuildPatchFr const NYql::TObjectSettingsImpl& settings, TInternalModificationContext& context) const { + if (HasAppData() && !AppDataVerified().FeatureFlags.GetEnableTieringInColumnShard()) { + return TConclusionStatus::Fail("Tiering functionality is disabled for OLAP tables."); + } + NMetadata::NInternal::TTableRecord result; result.SetColumn(TTierConfig::TDecoder::TierName, NMetadata::NInternal::TYDBValue::Utf8(settings.GetObjectId())); if (settings.GetObjectId().StartsWith("$") || settings.GetObjectId().StartsWith("_")) { @@ -65,7 +69,7 @@ NMetadata::NModifications::TOperationParsingResult TTiersManager::DoBuildPatchFr void TTiersManager::DoPrepareObjectsBeforeModification(std::vector&& patchedObjects, NMetadata::NModifications::IAlterPreparationController::TPtr controller, - const TInternalModificationContext& context) const + const TInternalModificationContext& context, const NMetadata::NModifications::TAlterOperationContext& /*alterContext*/) const { TActivationContext::Register(new TTierPreparationActor(std::move(patchedObjects), controller, context)); } diff --git a/ydb/core/tx/tiering/tier/manager.h b/ydb/core/tx/tiering/tier/manager.h index ba777648139c..7d8626c8c36c 100644 --- a/ydb/core/tx/tiering/tier/manager.h +++ b/ydb/core/tx/tiering/tier/manager.h @@ -9,7 +9,7 @@ class TTiersManager: public NMetadata::NModifications::TGenericOperationsManager protected: virtual void DoPrepareObjectsBeforeModification(std::vector&& patchedObjects, NMetadata::NModifications::IAlterPreparationController::TPtr controller, - const TInternalModificationContext& context) const override; + const TInternalModificationContext& context, const NMetadata::NModifications::TAlterOperationContext& alterContext) const override; virtual NMetadata::NModifications::TOperationParsingResult DoBuildPatchFromSettings(const NYql::TObjectSettingsImpl& settings, TInternalModificationContext& context) const override; diff --git a/ydb/core/tx/tiering/ut/ut_tiers.cpp b/ydb/core/tx/tiering/ut/ut_tiers.cpp index 9eeb50b23257..21fa01b29a6e 100644 --- a/ydb/core/tx/tiering/ut/ut_tiers.cpp +++ b/ydb/core/tx/tiering/ut/ut_tiers.cpp @@ -32,21 +32,15 @@ class TFastTTLCompactionController: public NKikimr::NYDBTest::ICSController { virtual bool NeedForceCompactionBacketsConstruction() const override { return true; } - virtual TDuration GetRemovedPortionLivetime(const TDuration /*def*/) const override { - return TDuration::Zero(); - } - virtual ui64 GetSmallPortionSizeDetector(const ui64 /*def*/) const override { + virtual ui64 DoGetSmallPortionSizeDetector(const ui64 /*def*/) const override { return 0; } - virtual TDuration GetOptimizerFreshnessCheckDuration(const TDuration /*defaultValue*/) const override { + virtual TDuration DoGetOptimizerFreshnessCheckDuration(const TDuration /*defaultValue*/) const override { return TDuration::Zero(); } - virtual TDuration GetLagForCompactionBeforeTierings(const TDuration /*def*/) const override { + virtual TDuration DoGetLagForCompactionBeforeTierings(const TDuration /*def*/) const override { return TDuration::Zero(); } - virtual TDuration GetTTLDefaultWaitingDuration(const TDuration /*defaultValue*/) const override { - return TDuration::Seconds(1); - } }; @@ -336,7 +330,8 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { serverSettings.GrpcPort = grpcPort; serverSettings.SetDomainName("Root") .SetUseRealThreads(false) - .SetEnableMetadataProvider(true); + .SetEnableMetadataProvider(true) + .SetEnableTieringInColumnShard(true) ; Tests::TServer::TPtr server = new Tests::TServer(serverSettings); @@ -418,6 +413,7 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { NKikimrConfig::TAppConfig appConfig; appConfig.MutableTableServiceConfig()->SetEnablePreparedDdl(true); + appConfig.MutableColumnShardConfig()->SetDisabledOnSchemeShard(false); Tests::TServerSettings serverSettings(msgbPort); serverSettings.Port = msgbPort; @@ -425,6 +421,7 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { serverSettings.SetDomainName("Root") .SetUseRealThreads(false) .SetEnableMetadataProvider(true) + .SetEnableTieringInColumnShard(true) .SetAppConfig(appConfig); Tests::TServer::TPtr server = new Tests::TServer(serverSettings); @@ -555,6 +552,7 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { serverSettings.SetDomainName("Root") .SetUseRealThreads(false) .SetEnableMetadataProvider(true) + .SetEnableTieringInColumnShard(true) ; Tests::TServer::TPtr server = new Tests::TServer(serverSettings); diff --git a/ydb/core/tx/tx_processing.h b/ydb/core/tx/tx_processing.h index d992dd71474d..1f9b86527225 100644 --- a/ydb/core/tx/tx_processing.h +++ b/ydb/core/tx/tx_processing.h @@ -105,7 +105,7 @@ struct TEvTxProcessing { } }; - struct TEvReadSet : public TEventPB { + struct TEvReadSet: public TEventPB { TEvReadSet() {} diff --git a/ydb/core/tx/tx_proxy/global.cpp b/ydb/core/tx/tx_proxy/global.cpp new file mode 100644 index 000000000000..66d88d8d824e --- /dev/null +++ b/ydb/core/tx/tx_proxy/global.cpp @@ -0,0 +1,5 @@ +#include "global.h" + +namespace NKikimr::NTxProxy { + +} diff --git a/ydb/core/tx/tx_proxy/global.h b/ydb/core/tx/tx_proxy/global.h new file mode 100644 index 000000000000..00002b17e0ac --- /dev/null +++ b/ydb/core/tx/tx_proxy/global.h @@ -0,0 +1,9 @@ +#pragma once +#include + +namespace NKikimr::NTxProxy { +class TLimits { +public: + static constexpr ui64 MemoryInFlightWriting = (ui64)1 << 30; +}; +} \ No newline at end of file diff --git a/ydb/core/tx/tx_proxy/rpc_long_tx.cpp b/ydb/core/tx/tx_proxy/rpc_long_tx.cpp index f5b7c6b07cd5..557cf13c14cb 100644 --- a/ydb/core/tx/tx_proxy/rpc_long_tx.cpp +++ b/ydb/core/tx/tx_proxy/rpc_long_tx.cpp @@ -1,10 +1,15 @@ -#include -#include +#include "global.h" + +#include #include +#include #include -#include +#include +#include #include +#include + #include namespace NKikimr { @@ -16,30 +21,29 @@ using namespace NLongTxService; // Common logic of LongTx Write that takes care of splitting the data according to the sharding scheme, // sending it to shards and collecting their responses template -class TLongTxWriteBase : public TActorBootstrapped { +class TLongTxWriteBase: public TActorBootstrapped { using TBase = TActorBootstrapped; + static inline TAtomicCounter MemoryInFlight = 0; + protected: using TThis = typename TBase::TThis; public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::GRPC_REQ; - } - - TLongTxWriteBase(const TString& databaseName, const TString& path, const TString& token, - const TLongTxId& longTxId, const TString& dedupId) - : TBase() - , DatabaseName(databaseName) + TLongTxWriteBase(const TString& databaseName, const TString& path, const TString& token, const TLongTxId& longTxId, const TString& dedupId) + : DatabaseName(databaseName) , Path(path) , DedupId(dedupId) , LongTxId(longTxId) - , ActorSpan(0, NWilson::TTraceId::NewTraceId(0, Max()), "TLongTxWriteBase") - { + , ActorSpan(0, NWilson::TTraceId::NewTraceId(0, Max()), "TLongTxWriteBase") { if (token) { UserToken.emplace(token); } } + virtual ~TLongTxWriteBase() { + AFL_VERIFY(MemoryInFlight.Sub(InFlightSize) >= 0); + } + protected: void ProceedWithSchema(const NSchemeCache::TSchemeCacheNavigate& resp) { NWilson::TProfileSpan pSpan = ActorSpan.BuildChildrenSpan("ProceedWithSchema"); @@ -53,17 +57,24 @@ class TLongTxWriteBase : public TActorBootstrapped { if (UserToken && entry.SecurityObject) { const ui32 access = NACLib::UpdateRow; if (!entry.SecurityObject->CheckAccess(access, *UserToken)) { - RaiseIssue(MakeIssue(NKikimrIssues::TIssuesIds::ACCESS_DENIED, TStringBuilder() - << "User has no permission to perform writes to this table" - << " user: " << UserToken->GetUserSID() - << " path: " << Path)); + RaiseIssue(MakeIssue( + NKikimrIssues::TIssuesIds::ACCESS_DENIED, TStringBuilder() << "User has no permission to perform writes to this table" + << " user: " << UserToken->GetUserSID() << " path: " << Path)); return ReplyError(Ydb::StatusIds::UNAUTHORIZED); } } + auto accessor = ExtractDataAccessor(); + AFL_VERIFY(!InFlightSize); + InFlightSize = accessor->GetSize(); + const i64 sizeInFlight = MemoryInFlight.Add(InFlightSize); + if (TLimits::MemoryInFlightWriting < (ui64)sizeInFlight && sizeInFlight != InFlightSize) { + return ReplyError(Ydb::StatusIds::OVERLOADED, "a lot of memory in flight"); + } if (NCSIndex::TServiceOperator::IsEnabled()) { - TBase::Send(NCSIndex::MakeServiceId(TBase::SelfId().NodeId()), - new NCSIndex::TEvAddData(GetDataAccessor().GetDeserializedBatch(), Path, std::make_shared(TBase::SelfId()))); + TBase::Send( + NCSIndex::MakeServiceId(TBase::SelfId().NodeId()), new NCSIndex::TEvAddData(accessor->GetDeserializedBatch(), Path, + std::make_shared(TBase::SelfId()))); } else { IndexReady = true; } @@ -73,10 +84,11 @@ class TLongTxWriteBase : public TActorBootstrapped { return ReplyError(Ydb::StatusIds::BAD_REQUEST, "Shard splitter not implemented for table kind"); } - auto initStatus = shardsSplitter->SplitData(entry, GetDataAccessor()); + auto initStatus = shardsSplitter->SplitData(entry, *accessor); if (!initStatus.Ok()) { return ReplyError(initStatus.GetStatus(), initStatus.GetErrorMessage()); } + accessor.reset(); const auto& splittedData = shardsSplitter->GetSplitData(); InternalController = std::make_shared(splittedData.GetShardRequestsCount(), this->SelfId(), LongTxId); @@ -85,24 +97,26 @@ class TLongTxWriteBase : public TActorBootstrapped { ui32 writeIdx = 0; for (auto& [shard, infos] : splittedData.GetShardsInfo()) { for (auto&& shardInfo : infos) { + InternalController->GetCounters()->OnRequest(shardInfo->GetRowsCount(), shardInfo->GetBytes()); sumBytes += shardInfo->GetBytes(); rowsCount += shardInfo->GetRowsCount(); - this->Register(new NEvWrite::TShardWriter(shard, shardsSplitter->GetTableId(), DedupId, shardInfo, ActorSpan, InternalController, ++writeIdx, NEvWrite::EModificationType::Replace)); + this->Register(new NEvWrite::TShardWriter(shard, shardsSplitter->GetTableId(), DedupId, shardInfo, ActorSpan, InternalController, + ++writeIdx, NEvWrite::EModificationType::Replace)); } } pSpan.Attribute("affected_shards_count", (long)splittedData.GetShardsInfo().size()); pSpan.Attribute("bytes", (long)sumBytes); pSpan.Attribute("rows", (long)rowsCount); pSpan.Attribute("shards_count", (long)splittedData.GetShardsCount()); - AFL_DEBUG(NKikimrServices::LONG_TX_SERVICE)("affected_shards_count", splittedData.GetShardsInfo().size())("shards_count", splittedData.GetShardsCount()) - ("path", Path)("shards_info", splittedData.ShortLogString(32)); + AFL_DEBUG(NKikimrServices::LONG_TX_SERVICE)("affected_shards_count", splittedData.GetShardsInfo().size())( + "shards_count", splittedData.GetShardsCount())("path", Path)("shards_info", splittedData.ShortLogString(32)); this->Become(&TThis::StateMain); } private: STFUNC(StateMain) { switch (ev->GetTypeRewrite()) { - hFunc(NEvWrite::TWritersController::TEvPrivate::TEvShardsWriteResult, Handle) + hFunc(NEvWrite::TWritersController::TEvPrivate::TEvShardsWriteResult, Handle); hFunc(TEvLongTxService::TEvAttachColumnShardWritesResult, Handle); hFunc(NCSIndex::TEvAddDataResult, Handle); } @@ -150,11 +164,10 @@ class TLongTxWriteBase : public TActorBootstrapped { IndexReady = true; } } - } protected: - virtual NEvWrite::IShardsSplitter::IEvWriteDataAccessor& GetDataAccessor() const = 0; + virtual std::unique_ptr ExtractDataAccessor() = 0; virtual void RaiseIssue(const NYql::TIssue& issue) = 0; virtual void ReplyError(Ydb::StatusIds::StatusCode status, const TString& message = TString()) = 0; virtual void ReplySuccess() = 0; @@ -164,7 +177,9 @@ class TLongTxWriteBase : public TActorBootstrapped { const TString Path; const TString DedupId; TLongTxId LongTxId; + private: + i64 InFlightSize = 0; std::optional UserToken; NWilson::TProfileSpan ActorSpan; NEvWrite::TWritersController::TPtr InternalController; @@ -174,15 +189,19 @@ class TLongTxWriteBase : public TActorBootstrapped { // LongTx Write implementation called from the inside of YDB (e.g. as a part of BulkUpsert call) // NOTE: permission checks must have been done by the caller -class TLongTxWriteInternal : public TLongTxWriteBase { +class TLongTxWriteInternal: public TLongTxWriteBase { using TBase = TLongTxWriteBase; - class TParsedBatchData : public NEvWrite::IShardsSplitter::IEvWriteDataAccessor { + class TParsedBatchData: public NEvWrite::IShardsSplitter::IEvWriteDataAccessor { + private: + using TBase = NEvWrite::IShardsSplitter::IEvWriteDataAccessor; std::shared_ptr Batch; + public: TParsedBatchData(std::shared_ptr batch) - : Batch(batch) - {} + : TBase(NArrow::GetBatchMemorySize(batch)) + , Batch(batch) { + } std::shared_ptr GetDeserializedBatch() const override { return Batch; @@ -193,25 +212,19 @@ class TLongTxWriteInternal : public TLongTxWriteBase { } }; - NEvWrite::IShardsSplitter::IEvWriteDataAccessor::TPtr DataAccessor; -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::GRPC_REQ; - } + std::unique_ptr DataAccessor; - explicit TLongTxWriteInternal(const TActorId& replyTo, const TLongTxId& longTxId, const TString& dedupId, - const TString& databaseName, const TString& path, - std::shared_ptr navigateResult, - std::shared_ptr batch, - std::shared_ptr issues) +public: + explicit TLongTxWriteInternal(const TActorId& replyTo, const TLongTxId& longTxId, const TString& dedupId, const TString& databaseName, + const TString& path, std::shared_ptr navigateResult, std::shared_ptr batch, + std::shared_ptr issues) : TBase(databaseName, path, TString(), longTxId, dedupId) , ReplyTo(replyTo) , NavigateResult(navigateResult) , Batch(batch) - , Issues(issues) - { + , Issues(issues) { Y_ABORT_UNLESS(Issues); - DataAccessor = std::make_shared(Batch); + DataAccessor = std::make_unique(Batch); } void Bootstrap() { @@ -220,8 +233,9 @@ class TLongTxWriteInternal : public TLongTxWriteBase { } protected: - NEvWrite::IShardsSplitter::IEvWriteDataAccessor& GetDataAccessor() const override { - return *DataAccessor; + std::unique_ptr ExtractDataAccessor() override { + AFL_VERIFY(DataAccessor); + return std::move(DataAccessor); } void RaiseIssue(const NYql::TIssue& issue) override { @@ -248,19 +262,14 @@ class TLongTxWriteInternal : public TLongTxWriteBase { std::shared_ptr Issues; }; - -TActorId DoLongTxWriteSameMailbox(const TActorContext& ctx, const TActorId& replyTo, - const NLongTxService::TLongTxId& longTxId, const TString& dedupId, - const TString& databaseName, const TString& path, - std::shared_ptr navigateResult, - std::shared_ptr batch, std::shared_ptr issues) -{ - return ctx.RegisterWithSameMailbox( - new TLongTxWriteInternal(replyTo, longTxId, dedupId, databaseName, path, navigateResult, batch, issues)); +TActorId DoLongTxWriteSameMailbox(const TActorContext& ctx, const TActorId& replyTo, const NLongTxService::TLongTxId& longTxId, + const TString& dedupId, const TString& databaseName, const TString& path, + std::shared_ptr navigateResult, std::shared_ptr batch, + std::shared_ptr issues) { + return ctx.RegisterWithSameMailbox(new TLongTxWriteInternal(replyTo, longTxId, dedupId, databaseName, path, navigateResult, batch, issues)); } // - -} -} +} // namespace NTxProxy +} // namespace NKikimr diff --git a/ydb/core/tx/tx_proxy/upload_rows_common_impl.cpp b/ydb/core/tx/tx_proxy/upload_rows_common_impl.cpp index 6d487a26016b..281d8bf05bc7 100644 --- a/ydb/core/tx/tx_proxy/upload_rows_common_impl.cpp +++ b/ydb/core/tx/tx_proxy/upload_rows_common_impl.cpp @@ -7,10 +7,16 @@ namespace NKikimr { : TBase("BulkUpsert") { RequestsCount = TBase::GetDeriviative("Requests/Count"); - ReplyDuration = TBase::GetHistogram("Replies/Duration", NMonitoring::ExponentialHistogram(15, 2, 1)); + ReplyDuration = TBase::GetHistogram("Replies/Duration", NMonitoring::ExponentialHistogram(15, 2, 10)); RowsCount = TBase::GetDeriviative("Rows/Count"); - PackageSize = TBase::GetHistogram("Rows/PackageSize", NMonitoring::ExponentialHistogram(15, 2, 10)); + PackageSizeRecordsByRecords = TBase::GetHistogram("ByRecords/PackageSize/Records", NMonitoring::ExponentialHistogram(15, 2, 10)); + PackageSizeCountByRecords = TBase::GetHistogram("ByRecords/PackageSize/Count", NMonitoring::ExponentialHistogram(15, 2, 10)); + + PreparingDuration = TBase::GetHistogram("Preparing/DurationMs", NMonitoring::ExponentialHistogram(15, 2, 10)); + WritingDuration = TBase::GetHistogram("Writing/DurationMs", NMonitoring::ExponentialHistogram(15, 2, 10)); + CommitDuration = TBase::GetHistogram("Commit/DurationMs", NMonitoring::ExponentialHistogram(15, 2, 10)); + PrepareReplyDuration = TBase::GetHistogram("ToReply/DurationMs", NMonitoring::ExponentialHistogram(15, 2, 10)); const google::protobuf::EnumDescriptor* descriptor = ::Ydb::StatusIds::StatusCode_descriptor(); for (ui32 i = 0; i < (ui32)descriptor->value_count(); ++i) { @@ -19,12 +25,4 @@ namespace NKikimr { } } - void TUploadCounters::OnReply(const TDuration d, const ::Ydb::StatusIds::StatusCode code) const { - const TString name = ::Ydb::StatusIds::StatusCode_Name(code); - auto it = CodesCount.find(name); - Y_ABORT_UNLESS(it != CodesCount.end()); - it->second->Add(1); - ReplyDuration->Collect(d.MilliSeconds()); - } - } diff --git a/ydb/core/tx/tx_proxy/upload_rows_common_impl.h b/ydb/core/tx/tx_proxy/upload_rows_common_impl.h index 599f9984b8ab..cbfaffaf23f4 100644 --- a/ydb/core/tx/tx_proxy/upload_rows_common_impl.h +++ b/ydb/core/tx/tx_proxy/upload_rows_common_impl.h @@ -43,19 +43,77 @@ class TUploadCounters: public NColumnShard::TCommonCountersOwner { NMonitoring::THistogramPtr ReplyDuration; NMonitoring::TDynamicCounters::TCounterPtr RowsCount; - NMonitoring::THistogramPtr PackageSize; + NMonitoring::THistogramPtr PackageSizeRecordsByRecords; + NMonitoring::THistogramPtr PackageSizeCountByRecords; + + NMonitoring::THistogramPtr PreparingDuration; + NMonitoring::THistogramPtr WritingDuration; + NMonitoring::THistogramPtr CommitDuration; + NMonitoring::THistogramPtr PrepareReplyDuration; THashMap CodesCount; public: TUploadCounters(); + class TGuard: TMoveOnly { + private: + TMonotonic Start = TMonotonic::Now(); + std::optional WritingStarted; + std::optional CommitStarted; + std::optional CommitFinished; + std::optional ReplyFinished; + TUploadCounters& Owner; + public: + TGuard(const TMonotonic start, TUploadCounters& owner) + : Start(start) + , Owner(owner) + { + + } + + void OnWritingStarted() { + WritingStarted = TMonotonic::Now(); + Owner.PreparingDuration->Collect((*WritingStarted - Start).MilliSeconds()); + } + + void OnCommitStarted() { + CommitStarted = TMonotonic::Now(); + AFL_VERIFY(WritingStarted); + Owner.WritingDuration->Collect((*CommitStarted - *WritingStarted).MilliSeconds()); + } + + void OnCommitFinished() { + CommitFinished = TMonotonic::Now(); + AFL_VERIFY(CommitStarted); + Owner.CommitDuration->Collect((*CommitFinished - *CommitStarted).MilliSeconds()); + } + + void OnReply(const ::Ydb::StatusIds::StatusCode code) { + ReplyFinished = TMonotonic::Now(); + if (CommitFinished) { + Owner.PrepareReplyDuration->Collect((*ReplyFinished - *CommitFinished).MilliSeconds()); + } + Owner.ReplyDuration->Collect((*ReplyFinished - Start).MilliSeconds()); + + const TString name = ::Ydb::StatusIds::StatusCode_Name(code); + auto it = Owner.CodesCount.find(name); + Y_ABORT_UNLESS(it != Owner.CodesCount.end()); + it->second->Add(1); + } + }; + + TGuard BuildGuard(const TMonotonic start) { + return TGuard(start, *this); + } + void OnRequest(const ui64 rowsCount) const { RequestsCount->Add(1); RowsCount->Add(rowsCount); - PackageSize->Collect(rowsCount); + PackageSizeRecordsByRecords->Collect((i64)rowsCount, rowsCount); + PackageSizeCountByRecords->Collect(rowsCount); } - void OnReply(const TDuration d, const ::Ydb::StatusIds::StatusCode code) const; + void OnReply(const TDuration dFull, const TDuration dDelta, const ::Ydb::StatusIds::StatusCode code) const; }; @@ -148,6 +206,7 @@ class TUploadRowsBase : public TActorBootstrapped StartCommitTime; TActorId TimeoutTimerActorId; TAutoPtr ResolvePartitionsResult; @@ -164,7 +223,7 @@ class TUploadRowsBase : public TActorBootstrapped Issues = std::make_shared(); NLongTxService::TLongTxId LongTxId; TUploadCounters UploadCounters; - + TUploadCounters::TGuard UploadCountersGuard; protected: enum class EUploadSource { ProtoValues = 0, @@ -216,6 +275,7 @@ class TUploadRowsBase : public TActorBootstrappedGet(); if (msg->Record.GetStatus() == Ydb::StatusIds::SUCCESS) { @@ -1263,7 +1326,7 @@ class TUploadRowsBase : public TActorBootstrappedNow() - StartTime, status); + UploadCountersGuard.OnReply(status); SendResult(ctx, status); LOG_DEBUG_S(ctx, NKikimrServices::RPC_REQUEST, LogPrefix() << "completed with status " << status); diff --git a/ydb/core/tx/tx_proxy/ya.make b/ydb/core/tx/tx_proxy/ya.make index fa746bd1e249..d592810a65b2 100644 --- a/ydb/core/tx/tx_proxy/ya.make +++ b/ydb/core/tx/tx_proxy/ya.make @@ -14,6 +14,7 @@ SRCS( commitreq.cpp upload_rows_common_impl.cpp upload_rows.cpp + global.cpp ) GENERATE_ENUM_SERIALIZATION(read_table_impl.h) diff --git a/ydb/core/util/cache_cache.h b/ydb/core/util/cache_cache.h index 7e3137720057..058fbfda6b72 100644 --- a/ydb/core/util/cache_cache.h +++ b/ydb/core/util/cache_cache.h @@ -1,24 +1,24 @@ #pragma once #include "defs.h" +#include #include #include #include #include #include -namespace NKikimr { +namespace NKikimr::NCache { + +enum class ECacheCacheGeneration { + None, + Fresh, + Staging, + Warm, +}; struct TCacheCacheConfig : public TAtomicRefCount { using TCounterPtr = ::NMonitoring::TDynamicCounters::TCounterPtr; - enum ECacheGeneration { - CacheGenNone, - CacheGenEvicted, - CacheGenFresh, - CacheGenStaging, - CacheGenWarm, - }; - ui64 Limit; ui64 FreshLimit; @@ -48,30 +48,10 @@ struct TCacheCacheConfig : public TAtomicRefCount { StagingLimit = FreshLimit; WarmLimit = FreshLimit; } - - template - struct TDefaultWeight { - static ui64 Get(TItem *) { - return 1; - } - }; - - template - struct TDefaultGeneration { - static ECacheGeneration Get(TItem *x) { - return static_cast(x->CacheGeneration); - } - static void Set(TItem *x, ECacheGeneration gen) { - x->CacheGeneration = gen; - } - }; }; -template - , typename TGeneration = TCacheCacheConfig::TDefaultGeneration - > -class TCacheCache { +template +class TCacheCache : public ICacheCache { public: TCacheCache(const TCacheCacheConfig &config) : Config(config) @@ -80,45 +60,44 @@ class TCacheCache { , WarmWeight(0) {} - TItem* EvictNext() { - TItem* ret = nullptr; - + TIntrusiveList EvictNext() override { + TIntrusiveList evictedList; + if (!StagingList.Empty()) { - ret = EvictNext(StagingList, StagingWeight); + evictedList.PushBack(EvictNext(StagingList, StagingWeight)); if (Config.ReportedStaging) *Config.ReportedStaging = StagingWeight; } else if (!FreshList.Empty()) { - ret = EvictNext(FreshList, FreshWeight); + evictedList.PushBack(EvictNext(FreshList, FreshWeight)); if (Config.ReportedFresh) *Config.ReportedFresh = FreshWeight; } else if (!WarmList.Empty()) { - ret = EvictNext(WarmList, WarmWeight); + evictedList.PushBack(EvictNext(WarmList, WarmWeight)); if (Config.ReportedWarm) *Config.ReportedWarm = WarmWeight; } - return ret; + return evictedList; } // returns evicted elements as list - TIntrusiveList Touch(TItem *item) { + TIntrusiveList Touch(TItem *item) override { TIntrusiveList evictedList; TIntrusiveListItem *xitem = item; - const TCacheCacheConfig::ECacheGeneration cacheGen = GenerationOp.Get(item); + const ECacheCacheGeneration cacheGen = TItemTraits::GetGeneration(item); switch (cacheGen) { - case TCacheCacheConfig::CacheGenNone: // place in fresh - case TCacheCacheConfig::CacheGenEvicted: // corner case: was evicted from staging and touched in same update + case ECacheCacheGeneration::None: // place in fresh AddToFresh(item, evictedList); [[fallthrough]]; - case TCacheCacheConfig::CacheGenFresh: // just update inside fresh + case ECacheCacheGeneration::Fresh: // just update inside fresh xitem->Unlink(); FreshList.PushFront(xitem); break; - case TCacheCacheConfig::CacheGenStaging: // move to warm + case ECacheCacheGeneration::Staging: // move to warm MoveToWarm(item, evictedList); break; - case TCacheCacheConfig::CacheGenWarm: // just update inside warm + case ECacheCacheGeneration::Warm: // just update inside warm xitem->Unlink(); WarmList.PushFront(xitem); break; @@ -130,74 +109,45 @@ class TCacheCache { return evictedList; } - // evict and erase differs on Evicted handling - void Evict(TItem *item) { - const TCacheCacheConfig::ECacheGeneration cacheGen = GenerationOp.Get(item); + void Erase(TItem *item) override { + const ECacheCacheGeneration cacheGen = TItemTraits::GetGeneration(item); switch (cacheGen) { - case TCacheCacheConfig::CacheGenNone: - case TCacheCacheConfig::CacheGenEvicted: + case ECacheCacheGeneration::None: break; - case TCacheCacheConfig::CacheGenFresh: + case ECacheCacheGeneration::Fresh: Unlink(item, FreshWeight); if (Config.ReportedFresh) *Config.ReportedFresh = FreshWeight; break; - case TCacheCacheConfig::CacheGenStaging: + case ECacheCacheGeneration::Staging: Unlink(item, StagingWeight); if (Config.ReportedStaging) *Config.ReportedStaging = StagingWeight; break; - case TCacheCacheConfig::CacheGenWarm: + case ECacheCacheGeneration::Warm: Unlink(item, WarmWeight); if (Config.ReportedWarm) *Config.ReportedWarm = WarmWeight; break; default: - Y_DEBUG_ABORT("unknown cache generaton"); + Y_DEBUG_ABORT("unknown cache generation"); } + TItemTraits::SetGeneration(item, ECacheCacheGeneration::None); } - void Erase(TItem *item) { - const TCacheCacheConfig::ECacheGeneration cacheGen = GenerationOp.Get(item); - switch (cacheGen) { - case TCacheCacheConfig::CacheGenNone: - break; - case TCacheCacheConfig::CacheGenEvicted: - item->Unlink(); - GenerationOp.Set(item, TCacheCacheConfig::CacheGenNone); - break; - case TCacheCacheConfig::CacheGenFresh: - Unlink(item, FreshWeight); - if (Config.ReportedFresh) - *Config.ReportedFresh = FreshWeight; - break; - case TCacheCacheConfig::CacheGenStaging: - Unlink(item, StagingWeight); - if (Config.ReportedStaging) - *Config.ReportedStaging = StagingWeight; - break; - case TCacheCacheConfig::CacheGenWarm: - Unlink(item, WarmWeight); - if (Config.ReportedWarm) - *Config.ReportedWarm = WarmWeight; - break; - default: - Y_DEBUG_ABORT("unknown cache generaton"); - } + void UpdateLimit(ui64 limit) override { + Config.SetLimit(limit); } - void UpdateCacheSize(ui64 cacheSize) { - if (cacheSize == 0) - cacheSize = Max(); - - Config.SetLimit(cacheSize); + ui64 GetSize() const override { + return FreshWeight + StagingWeight + WarmWeight; } private: void Unlink(TItem *item, ui64 &weight) { item->Unlink(); - const ui64 elementWeight = WeightOp.Get(item); + const ui64 elementWeight = TItemTraits::GetWeight(item); Y_DEBUG_ABORT_UNLESS(elementWeight <= weight); weight -= elementWeight; } @@ -205,9 +155,9 @@ class TCacheCache { void AddToFresh(TItem *item, TIntrusiveList& evictedList) { LimitFresh(evictedList); item->Unlink(); - FreshWeight += WeightOp.Get(item); + FreshWeight += TItemTraits::GetWeight(item); FreshList.PushFront(item); - GenerationOp.Set(item, TCacheCacheConfig::CacheGenFresh); + TItemTraits::SetGeneration(item, ECacheCacheGeneration::Fresh); if (Config.ReportedStaging) *Config.ReportedStaging = StagingWeight; @@ -219,9 +169,9 @@ class TCacheCache { // Note: unlink first, so item is not evicted by LimitWarm call below Unlink(item, StagingWeight); LimitWarm(evictedList); - WarmWeight += WeightOp.Get(item); + WarmWeight += TItemTraits::GetWeight(item); WarmList.PushFront(item); - GenerationOp.Set(item, TCacheCacheConfig::CacheGenWarm); + TItemTraits::SetGeneration(item, ECacheCacheGeneration::Warm); if (Config.ReportedStaging) *Config.ReportedStaging = StagingWeight; @@ -231,16 +181,16 @@ class TCacheCache { void AddToStaging(TItem *item, TIntrusiveList& evictedList) { LimitStaging(evictedList); - StagingWeight += WeightOp.Get(item); + StagingWeight += TItemTraits::GetWeight(item); StagingList.PushFront(item); - GenerationOp.Set(item, TCacheCacheConfig::CacheGenStaging); + TItemTraits::SetGeneration(item, ECacheCacheGeneration::Staging); } void LimitFresh(TIntrusiveList& evictedList) { while (FreshWeight > Config.FreshLimit) { Y_DEBUG_ABORT_UNLESS(!FreshList.Empty()); TItem *x = FreshList.PopBack(); - Y_ABORT_UNLESS(GenerationOp.Get(x) == TCacheCacheConfig::CacheGenFresh, "malformed entry in fresh cache. %" PRIu32, (ui32)GenerationOp.Get(x)); + Y_ABORT_UNLESS(TItemTraits::GetGeneration(x) == ECacheCacheGeneration::Fresh, "malformed entry in fresh cache. %" PRIu32, (ui32)TItemTraits::GetGeneration(x)); Unlink(x, FreshWeight); AddToStaging(x, evictedList); } @@ -250,7 +200,7 @@ class TCacheCache { while (WarmWeight > Config.WarmLimit) { Y_DEBUG_ABORT_UNLESS(!WarmList.Empty()); TItem *x = WarmList.PopBack(); - Y_ABORT_UNLESS(GenerationOp.Get(x) == TCacheCacheConfig::CacheGenWarm, "malformed entry in warm cache. %" PRIu32, (ui32)GenerationOp.Get(x)); + Y_ABORT_UNLESS(TItemTraits::GetGeneration(x) == ECacheCacheGeneration::Warm, "malformed entry in warm cache. %" PRIu32, (ui32)TItemTraits::GetGeneration(x)); Unlink(x, WarmWeight); AddToStaging(x, evictedList); } @@ -260,9 +210,9 @@ class TCacheCache { while (StagingWeight > Config.StagingLimit) { Y_DEBUG_ABORT_UNLESS(!StagingList.Empty()); TItem *evicted = StagingList.PopBack(); - Y_ABORT_UNLESS(GenerationOp.Get(evicted) == TCacheCacheConfig::CacheGenStaging, "malformed entry in staging cache %" PRIu32, (ui32)GenerationOp.Get(evicted)); + Y_ABORT_UNLESS(TItemTraits::GetGeneration(evicted) == ECacheCacheGeneration::Staging, "malformed entry in staging cache %" PRIu32, (ui32)TItemTraits::GetGeneration(evicted)); Unlink(evicted, StagingWeight); - GenerationOp.Set(evicted, TCacheCacheConfig::CacheGenEvicted); + TItemTraits::SetGeneration(evicted, ECacheCacheGeneration::None); evictedList.PushBack(evicted); } } @@ -272,7 +222,7 @@ class TCacheCache { TItem *evicted = list.PopBack(); Unlink(evicted, weight); - GenerationOp.Set(evicted, TCacheCacheConfig::CacheGenEvicted); + TItemTraits::SetGeneration(evicted, ECacheCacheGeneration::None); return evicted; } @@ -287,9 +237,6 @@ class TCacheCache { ui64 FreshWeight; ui64 StagingWeight; ui64 WarmWeight; - - TWeight WeightOp; - TGeneration GenerationOp; }; } diff --git a/ydb/core/util/cache_cache_iface.h b/ydb/core/util/cache_cache_iface.h new file mode 100644 index 000000000000..b914d4b78162 --- /dev/null +++ b/ydb/core/util/cache_cache_iface.h @@ -0,0 +1,29 @@ +#pragma once +#include "defs.h" + +namespace NKikimr::NCache { + +template +struct ICacheCache { + // returns evicted elements as list + // in most common scenarios it has only one item + virtual TIntrusiveList EvictNext() Y_WARN_UNUSED_RESULT = 0; + + // returns evicted elements as list + virtual TIntrusiveList Touch(TItem *item) Y_WARN_UNUSED_RESULT = 0; + + virtual void Erase(TItem *item) = 0; + + // WARN: do not evict items + virtual void UpdateLimit(ui64 limit) = 0; + + virtual ui64 GetSize() const = 0; + + virtual TString Dump() const { + return {}; + } + + virtual ~ICacheCache() = default; +}; + +} diff --git a/ydb/core/util/cache_cache_ut.cpp b/ydb/core/util/cache_cache_ut.cpp index 1e4f5a84a7f8..bba48cf4fe13 100644 --- a/ydb/core/util/cache_cache_ut.cpp +++ b/ydb/core/util/cache_cache_ut.cpp @@ -1,12 +1,29 @@ #include "cache_cache.h" #include +#include -namespace NKikimr { +namespace NKikimr::NCache { Y_UNIT_TEST_SUITE(TCacheCacheTest) { struct TPage : public TIntrusiveListItem { - TCacheCacheConfig::ECacheGeneration CacheGeneration = TCacheCacheConfig::CacheGenNone; + ECacheCacheGeneration CacheGeneration; + }; + + struct TCacheCachePageTraits { + static ui64 GetWeight(const TPage*) { + return 1; + } + + static ECacheCacheGeneration GetGeneration(const TPage *page) { + return static_cast(page->CacheGeneration); + } + + static void SetGeneration(TPage *page, ECacheCacheGeneration generation) { + ui32 generation_ = static_cast(generation); + Y_ABORT_UNLESS(generation_ < (1 << 4)); + page->CacheGeneration = generation; + } }; Y_UNIT_TEST(MoveToWarm) { @@ -17,14 +34,14 @@ Y_UNIT_TEST_SUITE(TCacheCacheTest) { // this should mean nothing is cacheable, but currently we will // place 1 page on a level until it is inspected again. TCacheCacheConfig config(1, fresh, staging, warm); - TCacheCache cache(config); + TCacheCache cache(config); TVector pages(3); TIntrusiveList evicted; // page 0 added to fresh evicted = cache.Touch(&pages[0]); - UNIT_ASSERT(pages[0].CacheGeneration == TCacheCacheConfig::CacheGenFresh); + UNIT_ASSERT(pages[0].CacheGeneration == ECacheCacheGeneration::Fresh); UNIT_ASSERT_VALUES_EQUAL(fresh->Val(), 1ULL); UNIT_ASSERT_VALUES_EQUAL(staging->Val(), 0ULL); UNIT_ASSERT_VALUES_EQUAL(warm->Val(), 0ULL); @@ -32,8 +49,8 @@ Y_UNIT_TEST_SUITE(TCacheCacheTest) { // page 1 added to fresh first bumps page 0 to staging evicted = cache.Touch(&pages[1]); - UNIT_ASSERT(pages[1].CacheGeneration == TCacheCacheConfig::CacheGenFresh); - UNIT_ASSERT(pages[0].CacheGeneration == TCacheCacheConfig::CacheGenStaging); + UNIT_ASSERT(pages[1].CacheGeneration == ECacheCacheGeneration::Fresh); + UNIT_ASSERT(pages[0].CacheGeneration == ECacheCacheGeneration::Staging); UNIT_ASSERT_VALUES_EQUAL(fresh->Val(), 1ULL); UNIT_ASSERT_VALUES_EQUAL(staging->Val(), 1ULL); UNIT_ASSERT_VALUES_EQUAL(warm->Val(), 0ULL); @@ -41,7 +58,7 @@ Y_UNIT_TEST_SUITE(TCacheCacheTest) { // page 0 is moved to warm from staging evicted = cache.Touch(&pages[0]); - UNIT_ASSERT(pages[0].CacheGeneration == TCacheCacheConfig::CacheGenWarm); + UNIT_ASSERT(pages[0].CacheGeneration == ECacheCacheGeneration::Warm); UNIT_ASSERT_VALUES_EQUAL(fresh->Val(), 1ULL); UNIT_ASSERT_VALUES_EQUAL(staging->Val(), 0ULL); UNIT_ASSERT_VALUES_EQUAL(warm->Val(), 1ULL); @@ -49,8 +66,8 @@ Y_UNIT_TEST_SUITE(TCacheCacheTest) { // page 2 added to fresh first bumps page 1 to staging evicted = cache.Touch(&pages[2]); - UNIT_ASSERT(pages[2].CacheGeneration == TCacheCacheConfig::CacheGenFresh); - UNIT_ASSERT(pages[1].CacheGeneration == TCacheCacheConfig::CacheGenStaging); + UNIT_ASSERT(pages[2].CacheGeneration == ECacheCacheGeneration::Fresh); + UNIT_ASSERT(pages[1].CacheGeneration == ECacheCacheGeneration::Staging); UNIT_ASSERT_VALUES_EQUAL(fresh->Val(), 1ULL); UNIT_ASSERT_VALUES_EQUAL(staging->Val(), 1ULL); UNIT_ASSERT_VALUES_EQUAL(warm->Val(), 1ULL); @@ -58,8 +75,8 @@ Y_UNIT_TEST_SUITE(TCacheCacheTest) { // page 1 moves to warm, but first it bumps page 0 to staging evicted = cache.Touch(&pages[1]); - UNIT_ASSERT(pages[1].CacheGeneration == TCacheCacheConfig::CacheGenWarm); - UNIT_ASSERT(pages[0].CacheGeneration == TCacheCacheConfig::CacheGenStaging); + UNIT_ASSERT(pages[1].CacheGeneration == ECacheCacheGeneration::Warm); + UNIT_ASSERT(pages[0].CacheGeneration == ECacheCacheGeneration::Staging); UNIT_ASSERT_VALUES_EQUAL(fresh->Val(), 1ULL); UNIT_ASSERT_VALUES_EQUAL(staging->Val(), 1ULL); UNIT_ASSERT_VALUES_EQUAL(warm->Val(), 1ULL); @@ -73,7 +90,7 @@ Y_UNIT_TEST_SUITE(TCacheCacheTest) { // 2 pages per layer TCacheCacheConfig config(3, fresh, staging, warm); - TCacheCache, TCacheCacheConfig::TDefaultGeneration> cache(config); + TCacheCache cache(config); TVector pages(6); @@ -85,41 +102,67 @@ Y_UNIT_TEST_SUITE(TCacheCacheTest) { cache.Touch(&pages[1]); cache.Touch(&pages[4]); cache.Touch(&pages[5]); - UNIT_ASSERT(pages[0].CacheGeneration == TCacheCacheConfig::CacheGenWarm); - UNIT_ASSERT(pages[1].CacheGeneration == TCacheCacheConfig::CacheGenWarm); - UNIT_ASSERT(pages[2].CacheGeneration == TCacheCacheConfig::CacheGenStaging); - UNIT_ASSERT(pages[3].CacheGeneration == TCacheCacheConfig::CacheGenStaging); - UNIT_ASSERT(pages[4].CacheGeneration == TCacheCacheConfig::CacheGenFresh); - UNIT_ASSERT(pages[5].CacheGeneration == TCacheCacheConfig::CacheGenFresh); + UNIT_ASSERT(pages[0].CacheGeneration == ECacheCacheGeneration::Warm); + UNIT_ASSERT(pages[1].CacheGeneration == ECacheCacheGeneration::Warm); + UNIT_ASSERT(pages[2].CacheGeneration == ECacheCacheGeneration::Staging); + UNIT_ASSERT(pages[3].CacheGeneration == ECacheCacheGeneration::Staging); + UNIT_ASSERT(pages[4].CacheGeneration == ECacheCacheGeneration::Fresh); + UNIT_ASSERT(pages[5].CacheGeneration == ECacheCacheGeneration::Fresh); UNIT_ASSERT_VALUES_EQUAL(fresh->Val(), 2ULL); UNIT_ASSERT_VALUES_EQUAL(warm->Val(), 2ULL); UNIT_ASSERT_VALUES_EQUAL(staging->Val(), 2ULL); - UNIT_ASSERT_VALUES_EQUAL(cache.EvictNext(), &pages[2]); - UNIT_ASSERT(pages[2].CacheGeneration == TCacheCacheConfig::CacheGenEvicted); + UNIT_ASSERT_VALUES_EQUAL(cache.EvictNext().Front(), &pages[2]); + UNIT_ASSERT(pages[2].CacheGeneration == ECacheCacheGeneration::None); UNIT_ASSERT_VALUES_EQUAL(staging->Val(), 1ULL); - UNIT_ASSERT_VALUES_EQUAL(cache.EvictNext(), &pages[3]); - UNIT_ASSERT(pages[3].CacheGeneration == TCacheCacheConfig::CacheGenEvicted); + UNIT_ASSERT_VALUES_EQUAL(cache.EvictNext().Front(), &pages[3]); + UNIT_ASSERT(pages[3].CacheGeneration == ECacheCacheGeneration::None); UNIT_ASSERT_VALUES_EQUAL(staging->Val(), 0ULL); - UNIT_ASSERT_VALUES_EQUAL(cache.EvictNext(), &pages[4]); - UNIT_ASSERT(pages[4].CacheGeneration == TCacheCacheConfig::CacheGenEvicted); + UNIT_ASSERT_VALUES_EQUAL(cache.EvictNext().Front(), &pages[4]); + UNIT_ASSERT(pages[4].CacheGeneration == ECacheCacheGeneration::None); UNIT_ASSERT_VALUES_EQUAL(fresh->Val(), 1ULL); - UNIT_ASSERT_VALUES_EQUAL(cache.EvictNext(), &pages[5]); - UNIT_ASSERT(pages[5].CacheGeneration == TCacheCacheConfig::CacheGenEvicted); + UNIT_ASSERT_VALUES_EQUAL(cache.EvictNext().Front(), &pages[5]); + UNIT_ASSERT(pages[5].CacheGeneration == ECacheCacheGeneration::None); UNIT_ASSERT_VALUES_EQUAL(fresh->Val(), 0ULL); - UNIT_ASSERT_VALUES_EQUAL(cache.EvictNext(), &pages[0]); - UNIT_ASSERT(pages[0].CacheGeneration == TCacheCacheConfig::CacheGenEvicted); + UNIT_ASSERT_VALUES_EQUAL(cache.EvictNext().Front(), &pages[0]); + UNIT_ASSERT(pages[0].CacheGeneration == ECacheCacheGeneration::None); UNIT_ASSERT_VALUES_EQUAL(warm->Val(), 1ULL); - UNIT_ASSERT_VALUES_EQUAL(cache.EvictNext(), &pages[1]); - UNIT_ASSERT(pages[1].CacheGeneration == TCacheCacheConfig::CacheGenEvicted); + UNIT_ASSERT_VALUES_EQUAL(cache.EvictNext().Front(), &pages[1]); + UNIT_ASSERT(pages[1].CacheGeneration == ECacheCacheGeneration::None); UNIT_ASSERT_VALUES_EQUAL(warm->Val(), 0ULL); - UNIT_ASSERT_VALUES_EQUAL(cache.EvictNext(), nullptr); + UNIT_ASSERT(cache.EvictNext().Empty()); + } + + Y_UNIT_TEST(Random) { + TCacheCacheConfig::TCounterPtr fresh = new NMonitoring::TCounterForPtr; + TCacheCacheConfig::TCounterPtr staging = new NMonitoring::TCounterForPtr; + TCacheCacheConfig::TCounterPtr warm = new NMonitoring::TCounterForPtr; + + TCacheCacheConfig config(100, fresh, staging, warm); + TCacheCache cache(config); + + TVector pages(500); + + ui32 hits = 0, misses = 0; + + for (ui32 i = 0; i < 100000; i++) { + ui32 pageId = std::sqrt(RandomNumber(pages.size() * pages.size())); + TPage* page = &pages[pageId]; + if (page->CacheGeneration != ECacheCacheGeneration::None) { + hits++; + } else { + misses++; + } + cache.Touch(page); + } + + Cerr << 1.0 * hits / (hits + misses) << Endl; } } diff --git a/ydb/core/viewer/browse.h b/ydb/core/viewer/browse.h index aa29d6c0f3a4..69569a35b61c 100644 --- a/ydb/core/viewer/browse.h +++ b/ydb/core/viewer/browse.h @@ -1,21 +1,16 @@ #pragma once -#include -#include -#include +#include "browse_events.h" +#include "viewer.h" +#include "wb_aggregate.h" #include #include #include -#include +#include #include #include -#include -#include -#include "browse_events.h" -#include "viewer.h" -#include "wb_aggregate.h" +#include -namespace NKikimr { -namespace NViewer { +namespace NKikimr::NViewer { using namespace NActors; @@ -646,4 +641,3 @@ class TBrowseTabletsCommon : public TActorBootstrapped { }; } -} diff --git a/ydb/core/viewer/browse_db.h b/ydb/core/viewer/browse_db.h index 86c4aa264127..f794cb3bbab4 100644 --- a/ydb/core/viewer/browse_db.h +++ b/ydb/core/viewer/browse_db.h @@ -1,20 +1,12 @@ #pragma once -#include -#include +#include "browse.h" +#include "viewer.h" +#include "wb_aggregate.h" #include -#include -#include #include #include -#include -#include -#include -#include "viewer.h" -#include "browse.h" -#include "wb_aggregate.h" -namespace NKikimr { -namespace NViewerDB { +namespace NKikimr::NViewerDB { using namespace NViewer; using namespace NActors; @@ -200,4 +192,3 @@ class TBrowseTable : public TBrowseTabletsCommon { }; } -} diff --git a/ydb/core/viewer/browse_events.h b/ydb/core/viewer/browse_events.h index ee0bbbb37572..92cc2b206bec 100644 --- a/ydb/core/viewer/browse_events.h +++ b/ydb/core/viewer/browse_events.h @@ -1,13 +1,12 @@ #pragma once -#include -#include -#include - #include +#include +#include +#include +#include -namespace NKikimr { -namespace NViewer { +namespace NKikimr::NViewer { namespace NViewerEvents { enum EEv { @@ -76,5 +75,4 @@ namespace NViewerEvents { }; } // namespace NViewerEvents -} // namespace NViewer -} // namespace NKikimr +} diff --git a/ydb/core/viewer/browse_pq.h b/ydb/core/viewer/browse_pq.h index 1f2b5d950b16..190b72ad953b 100644 --- a/ydb/core/viewer/browse_pq.h +++ b/ydb/core/viewer/browse_pq.h @@ -1,20 +1,12 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" #include "browse.h" +#include "viewer.h" #include "wb_aggregate.h" +#include +#include +#include -namespace NKikimr { -namespace NViewerPQ { +namespace NKikimr::NViewerPQ { using namespace NViewer; using namespace NActors; @@ -419,4 +411,3 @@ class TBrowseTopic : public TBrowseCommon { }; } -} diff --git a/ydb/core/viewer/check_access.h b/ydb/core/viewer/check_access.h deleted file mode 100644 index 42526b2bffe1..000000000000 --- a/ydb/core/viewer/check_access.h +++ /dev/null @@ -1,229 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "json_pipe_req.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using NSchemeShard::TEvSchemeShard; -using TNavigate = NSchemeCache::TSchemeCacheNavigate; - -class TCheckAccess : public TViewerPipeClient { - using TBase = TViewerPipeClient; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TAutoPtr CacheResult; - TVector Permissions; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TCheckAccess(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - ui32 timeout = FromStringWithDefault(params.Get("timeout"), 10000); - TString database; - if (params.Has("database")) { - database = params.Get("database"); - } else { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "field 'database' is required")); - } - if (database && database != AppData()->TenantName) { - BLOG_TRACE("Requesting StateStorageEndpointsLookup for " << database); - RequestStateStorageEndpointsLookup(database); // to find some dynamic node and redirect query there - } else { - if (params.Has("permissions")) { - Split(params.Get("permissions"), ",", Permissions); - } else { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "field 'permissions' is required")); - } - if (params.Has("path")) { - RequestSchemeCacheNavigate(params.Get("path")); - } else { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "field 'path' is required")); - } - } - Become(&TThis::StateRequestedNavigate, TDuration::MilliSeconds(timeout), new TEvents::TEvWakeup()); - } - - void Handle(TEvStateStorage::TEvBoardInfo::TPtr& ev) { - BLOG_TRACE("Received TEvBoardInfo"); - ReplyAndPassAway(Viewer->MakeForward(Event->Get(), GetNodesFromBoardReply(ev))); - } - - STATEFN(StateRequestedNavigate) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvStateStorage::TEvBoardInfo, Handle); - hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { - CacheResult = ev->Release(); - RequestDone(); - } - - ui32 GetAccessType(const TString& permission) { - TACLAttrs attrs(0); - try { - attrs = ConvertYdbPermissionNameToACLAttrs(permission); - } - catch (const std::exception&) { - } - return attrs.AccessMask; - } - - bool CheckAccessPermission(const NACLib::TSecurityObject* object, const NACLib::TUserToken* token, const TString& permission) { - const auto& kikimrRunConfig = Viewer->GetKikimrRunConfig(); - const auto& securityConfig = kikimrRunConfig.AppConfig.GetDomainsConfig().GetSecurityConfig(); - if (!securityConfig.GetEnforceUserTokenRequirement()) { - if (!securityConfig.GetEnforceUserTokenCheckRequirement() || token == nullptr) { - return true; - } - } - if (token == nullptr) { - return false; - } - if (object == nullptr) { - return false; - } - ui32 access = GetAccessType(permission); - if (access == 0) { - return false; - } - return object->CheckAccess(access, *token); - } - - void ReplyAndPassAway() { - std::unique_ptr token; - if (Event->Get()->UserToken) { - token = std::make_unique(Event->Get()->UserToken); - } - if (CacheResult == nullptr) { - return ReplyAndPassAway(Viewer->GetHTTPINTERNALERROR(Event->Get(), "text/plain", "no SchemeCache response")); - } - if (CacheResult->Request == nullptr) { - return ReplyAndPassAway(Viewer->GetHTTPINTERNALERROR(Event->Get(), "text/plain", "wrong SchemeCache response")); - } - if (CacheResult->Request.Get()->ResultSet.empty()) { - return ReplyAndPassAway(Viewer->GetHTTPINTERNALERROR(Event->Get(), "text/plain", "SchemeCache response is empty")); - } - if (CacheResult->Request.Get()->ErrorCount != 0) { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", TStringBuilder() << "SchemeCache response error " << static_cast(CacheResult->Request.Get()->ResultSet.front().Status))); - } - - - auto object = CacheResult->Request.Get()->ResultSet.front().SecurityObject; - - NJson::TJsonValue json(NJson::JSON_MAP); - - for (const TString& permission : Permissions) { - json[permission] = CheckAccessPermission(object.Get(), token.get(), permission); - } - - ReplyAndPassAway(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json, false))); - } - - void HandleTimeout() { - ReplyAndPassAway(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get(), "text/plain", "Timeout receiving SchemeCache response")); - } - - void ReplyAndPassAway(TString data) { - Send(Event->Sender, new NMon::TEvHttpInfoRes(data, 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -YAML::Node TJsonRequestSwagger::GetSwagger() { - YAML::Node node = YAML::Load(R"___( - get: - tags: - - viewer - summary: Check access - description: Check access to the specified path - parameters: - - name: database - in: query - description: database name - type: string - required: true - - name: path - in: query - description: path to check access - type: string - required: true - - name: permissions - in: query - description: permissions to check - required: true - type: array - items: - type: string - enum: - - ydb.database.connect - - ydb.tables.modify - - ydb.tables.read - - ydb.generic.list - - ydb.generic.read - - ydb.generic.write - - ydb.generic.use_legacy - - ydb.generic.use - - ydb.generic.manage - - ydb.generic.full_legacy - - ydb.generic.full - - ydb.database.create - - ydb.database.drop - - ydb.access.grant - - ydb.granular.select_row - - ydb.granular.update_row - - ydb.granular.erase_row - - ydb.granular.read_attributes - - ydb.granular.write_attributes - - ydb.granular.create_directory - - ydb.granular.create_table - - ydb.granular.create_queue - - ydb.granular.remove_schema - - ydb.granular.describe_schema - - ydb.granular.alter_schema - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - responses: - 200: - description: OK - content: - application/json: - schema: {} - 400: - description: Bad Request - 403: - description: Forbidden - 504: - description: Gateway Timeout - )___"); - - return node; -} - -} -} - diff --git a/ydb/core/viewer/counters_hosts.h b/ydb/core/viewer/counters_hosts.h index 2232f16b00ba..b93a7cb0b485 100644 --- a/ydb/core/viewer/counters_hosts.h +++ b/ydb/core/viewer/counters_hosts.h @@ -1,16 +1,12 @@ #pragma once -#include -#include -#include -#include +#include "viewer.h" #include #include -#include #include -#include "viewer.h" +#include +#include -namespace NKikimr { -namespace NViewer { +namespace NKikimr::NViewer { using namespace NActors; using namespace NNodeWhiteboard; @@ -173,4 +169,3 @@ class TCountersHostsList : public TActorBootstrapped { }; } -} diff --git a/ydb/core/viewer/healthcheck_record.h b/ydb/core/viewer/healthcheck_record.h index 39c94e288773..5b55d2bd1322 100644 --- a/ydb/core/viewer/healthcheck_record.h +++ b/ydb/core/viewer/healthcheck_record.h @@ -1,9 +1,8 @@ #pragma once +#include +#include namespace NKikimr::NViewer { - -using namespace NActors; -using namespace NMonitoring; struct TMetricRecord { TString Database; diff --git a/ydb/core/viewer/json/json.cpp b/ydb/core/viewer/json/json.cpp index af5acc4dcc1e..d3cbd6b0ce59 100644 --- a/ydb/core/viewer/json/json.cpp +++ b/ydb/core/viewer/json/json.cpp @@ -492,9 +492,7 @@ void TProtoToJson::ProtoToJsonSchema(IOutputStream& to, const TJsonSettings& jso to << "{\"type\":\"array\",\"items\":"; } if (fieldDescriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { - if (fieldDescriptor->message_type()->full_name() == google::protobuf::Timestamp::descriptor()->full_name()) { - to << "{\"type\":\"string\",\"format\":\"date-time\"}"; - } else if (fieldDescriptor->message_type()->full_name() == google::protobuf::Duration::descriptor()->full_name()) { + if (fieldDescriptor->message_type()->full_name() == google::protobuf::Duration::descriptor()->full_name()) { to << "{\"type\":\"string\", \"example\":\"3600s\"}"; } else if (fieldDescriptor->message_type()->full_name() == google::protobuf::BoolValue::descriptor()->full_name()) { to << "{\"type\":\"boolean\"}"; diff --git a/ydb/core/viewer/json_acl.h b/ydb/core/viewer/json_acl.h deleted file mode 100644 index d484ac4f2860..000000000000 --- a/ydb/core/viewer/json_acl.h +++ /dev/null @@ -1,303 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "json_pipe_req.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using NSchemeShard::TEvSchemeShard; - -class TJsonACL : public TViewerPipeClient { - using TBase = TViewerPipeClient; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TAutoPtr CacheResult; - TJsonSettings JsonSettings; - bool MergeRules = false; - ui32 Timeout = 0; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonACL(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - TString database; - if (params.Has("database")) { - database = params.Get("database"); - } - if (database && database != AppData()->TenantName) { - BLOG_TRACE("Requesting StateStorageEndpointsLookup for " << database); - RequestStateStorageEndpointsLookup(database); // to find some dynamic node and redirect query there - } else { - if (params.Has("path")) { - RequestSchemeCacheNavigate(params.Get("path")); - } else { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "field 'path' is required")); - } - MergeRules = FromStringWithDefault(params.Get("merge_rules"), MergeRules); - } - - Become(&TThis::StateRequestedDescribe, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void Handle(TEvStateStorage::TEvBoardInfo::TPtr& ev) { - BLOG_TRACE("Received TEvBoardInfo"); - ReplyAndPassAway(Viewer->MakeForward(Event->Get(), GetNodesFromBoardReply(ev))); - } - - STATEFN(StateRequestedDescribe) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvStateStorage::TEvBoardInfo, Handle); - hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { - CacheResult = ev->Release(); - RequestDone(); - } - - static bool Has(ui32 accessRights, ui32 mask) { - return (accessRights & mask) == mask; - } - - void FillACE(const NACLibProto::TACE& ace, NKikimrViewer::TMetaCommonInfo::TACE& pbAce) { - if (static_cast(ace.GetAccessType()) == NACLib::EAccessType::Deny) { - pbAce.SetAccessType("Deny"); - } - if (static_cast(ace.GetAccessType()) == NACLib::EAccessType::Allow) { - pbAce.SetAccessType("Allow"); - } - - auto ar = ace.GetAccessRight(); - - static std::pair accessRules[] = { - {NACLib::EAccessRights::GenericFull, "Full"}, - {NACLib::EAccessRights::GenericFullLegacy, "FullLegacy"}, - {NACLib::EAccessRights::GenericManage, "Manage"}, - {NACLib::EAccessRights::GenericUse, "Use"}, - {NACLib::EAccessRights::GenericUseLegacy, "UseLegacy"}, - {NACLib::EAccessRights::GenericWrite, "Write"}, - {NACLib::EAccessRights::GenericRead, "Read"}, - {NACLib::EAccessRights::GenericList, "List"}, - }; - if (MergeRules) { - for (const auto& [rule, name] : accessRules) { - if (Has(ar, rule)) { - pbAce.AddAccessRules(name); - ar &= ~rule; - } - } - } - - static std::pair accessRights[] = { - {NACLib::EAccessRights::SelectRow, "SelectRow"}, - {NACLib::EAccessRights::UpdateRow, "UpdateRow"}, - {NACLib::EAccessRights::EraseRow, "EraseRow"}, - {NACLib::EAccessRights::ReadAttributes, "ReadAttributes"}, - {NACLib::EAccessRights::WriteAttributes, "WriteAttributes"}, - {NACLib::EAccessRights::CreateDirectory, "CreateDirectory"}, - {NACLib::EAccessRights::CreateTable, "CreateTable"}, - {NACLib::EAccessRights::CreateQueue, "CreateQueue"}, - {NACLib::EAccessRights::RemoveSchema, "RemoveSchema"}, - {NACLib::EAccessRights::DescribeSchema, "DescribeSchema"}, - {NACLib::EAccessRights::AlterSchema, "AlterSchema"}, - {NACLib::EAccessRights::CreateDatabase, "CreateDatabase"}, - {NACLib::EAccessRights::DropDatabase, "DropDatabase"}, - {NACLib::EAccessRights::GrantAccessRights, "GrantAccessRights"}, - {NACLib::EAccessRights::WriteUserAttributes, "WriteUserAttributes"}, - {NACLib::EAccessRights::ConnectDatabase, "ConnectDatabase"}, - {NACLib::EAccessRights::ReadStream, "ReadStream"}, - {NACLib::EAccessRights::WriteStream, "WriteStream"}, - {NACLib::EAccessRights::ReadTopic, "ReadTopic"}, - {NACLib::EAccessRights::WriteTopic, "WriteTopic"} - }; - for (const auto& [right, name] : accessRights) { - if (Has(ar, right)) { - pbAce.AddAccessRights(name); - ar &= ~right; - } - } - - if (ar != 0) { - pbAce.AddAccessRights(NACLib::AccessRightsToString(ar)); - } - - pbAce.SetSubject(ace.GetSID()); - - auto inht = ace.GetInheritanceType(); - if ((inht & NACLib::EInheritanceType::InheritObject) != 0) { - pbAce.AddInheritanceType("Object"); - } - if ((inht & NACLib::EInheritanceType::InheritContainer) != 0) { - pbAce.AddInheritanceType("Container"); - } - if ((inht & NACLib::EInheritanceType::InheritOnly) != 0) { - pbAce.AddInheritanceType("Only"); - } - } - - void ReplyAndPassAway(TString data) { - Send(Event->Sender, new NMon::TEvHttpInfoRes(data, 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void ReplyAndPassAway() { - if (CacheResult == nullptr) { - return ReplyAndPassAway(Viewer->GetHTTPINTERNALERROR(Event->Get(), "text/plain", "no SchemeCache response")); - } - if (CacheResult->Request == nullptr) { - return ReplyAndPassAway(Viewer->GetHTTPINTERNALERROR(Event->Get(), "text/plain", "wrong SchemeCache response")); - } - if (CacheResult->Request.Get()->ResultSet.empty()) { - return ReplyAndPassAway(Viewer->GetHTTPINTERNALERROR(Event->Get(), "text/plain", "SchemeCache response is empty")); - } - if (CacheResult->Request.Get()->ErrorCount != 0) { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", TStringBuilder() << "SchemeCache response error " << static_cast(CacheResult->Request.Get()->ResultSet.front().Status))); - } - const auto& entry = CacheResult->Request.Get()->ResultSet.front(); - NKikimrViewer::TMetaInfo metaInfo; - NKikimrViewer::TMetaCommonInfo& pbCommon = *metaInfo.MutableCommon(); - pbCommon.SetPath(CanonizePath(entry.Path)); - pbCommon.SetOwner(entry.Self->Info.GetOwner()); - if (entry.Self->Info.HasACL()) { - NACLib::TACL acl(entry.Self->Info.GetACL()); - for (const NACLibProto::TACE& ace : acl.GetACE()) { - auto& pbAce = *pbCommon.AddACL(); - FillACE(ace, pbAce); - } - } - if (entry.Self->Info.HasEffectiveACL()) { - NACLib::TACL acl(entry.Self->Info.GetEffectiveACL()); - for (const NACLibProto::TACE& ace : acl.GetACE()) { - auto& pbAce = *pbCommon.AddEffectiveACL(); - FillACE(ace, pbAce); - } - } - - TStringStream json; - TProtoToJson::ProtoToJson(json, metaInfo, JsonSettings); - - ReplyAndPassAway(Viewer->GetHTTPOKJSON(Event->Get(), json.Str())); - } - - void HandleTimeout() { - ReplyAndPassAway(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get())); - } -}; - -template <> -YAML::Node TJsonRequestSwagger::GetSwagger() { - YAML::Node node = YAML::Load(R"___( - get: - tags: - - viewer - summary: ACL information - description: Returns information about ACL of an object - parameters: - - name: database - in: query - description: database name - type: string - required: false - - name: path - in: query - description: schema path - required: true - type: string - - name: merge_rules - in: query - description: merge access rights into access rules - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - responses: - 200: - description: OK - content: - application/json: - schema: - type: object - properties: - Common: - type: object - properties: - Path: - type: string - Owner: - type: string - ACL: - type: array - items: - type: object - properties: - AccessType: - type: string - Subject: - type: string - AccessRules: - type: array - items: - type: string - AccessRights: - type: array - items: - type: string - InheritanceType: - type: array - items: - type: string - EffectiveACL: - type: array - items: - type: object - properties: - AccessType: - type: string - Subject: - type: string - AccessRules: - type: array - items: - type: string - AccessRights: - type: array - items: - type: string - InheritanceType: - type: array - items: - type: string - 400: - description: Bad Request - 403: - description: Forbidden - 504: - description: Gateway Timeout - - )___"); - - return node; -} - -} -} diff --git a/ydb/core/viewer/json_autocomplete.h b/ydb/core/viewer/json_autocomplete.h deleted file mode 100644 index 155a960af2ea..000000000000 --- a/ydb/core/viewer/json_autocomplete.h +++ /dev/null @@ -1,519 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include - -#include "query_autocomplete_helper.h" -#include "viewer_request.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using TNavigate = NSchemeCache::TSchemeCacheNavigate; - -class TJsonAutocomplete : public TViewerPipeClient { - using TBase = TViewerPipeClient; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TEvViewer::TEvViewerRequest::TPtr ViewerRequest; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - - TAutoPtr ProxyResult; - TAutoPtr ConsoleResult; - TAutoPtr CacheResult; - - struct TSchemaWordData { - TString Name; - NKikimrViewer::EAutocompleteType Type; - TString Table; - TSchemaWordData() {} - TSchemaWordData(const TString& name, const NKikimrViewer::EAutocompleteType type, const TString& table = "") - : Name(name) - , Type(type) - , Table(table) - {} - }; - THashMap Dictionary; - TString Database; - TVector Tables; - TVector Paths; - TString Prefix; - TString SearchWord; - ui32 Limit = 10; - NKikimrViewer::TQueryAutocomplete Result; - - std::optional SubscribedNodeId; - std::vector TenantDynamicNodes; - bool Direct = false; -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonAutocomplete(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - { - const auto& params(Event->Get()->Request.GetParams()); - InitConfig(params); - ParseCgiParameters(params); - if (IsPostContent()) { - TStringBuf content = Event->Get()->Request.GetPostContent(); - ParsePostContent(content); - } - PrepareParameters(); - } - - // proxied request - TJsonAutocomplete(TEvViewer::TEvViewerRequest::TPtr& ev) - : ViewerRequest(ev) - { - auto& request = ViewerRequest->Get()->Record.GetAutocompleteRequest(); - - Database = request.GetDatabase(); - for (auto& table: request.GetTables()) { - Tables.emplace_back(table); - } - Prefix = request.GetPrefix(); - Limit = request.GetLimit(); - - Timeout = ViewerRequest->Get()->Record.GetTimeout(); - Direct = true; - PrepareParameters(); - } - - void PrepareParameters() { - if (Database) { - TString prefixUpToLastSlash = ""; - auto splitPos = Prefix.find_last_of('/'); - if (splitPos != std::string::npos) { - prefixUpToLastSlash += Prefix.substr(0, splitPos); - SearchWord = Prefix.substr(splitPos + 1); - } else { - SearchWord = Prefix; - } - - if (Tables.size() == 0) { - Paths.emplace_back(Database); - } else { - for (TString& table: Tables) { - TString path = table; - if (!table.StartsWith(Database)) { - path = Database + "/" + path; - } - path += "/" + prefixUpToLastSlash; - Paths.emplace_back(path); - } - } - } else { - SearchWord = Prefix; - } - if (Limit == 0) { - Limit = std::numeric_limits::max(); - } - } - - void ParseCgiParameters(const TCgiParameters& params) { - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Database = params.Get("database"); - StringSplitter(params.Get("table")).Split(',').SkipEmpty().Collect(&Tables); - Prefix = params.Get("prefix"); - Limit = FromStringWithDefault(params.Get("limit"), Limit); - Direct = FromStringWithDefault(params.Get("direct"), Direct); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - } - - void ParsePostContent(const TStringBuf& content) { - static NJson::TJsonReaderConfig JsonConfig; - NJson::TJsonValue requestData; - bool success = NJson::ReadJsonTree(content, &JsonConfig, &requestData); - if (success) { - Database = Database.empty() ? requestData["database"].GetStringSafe({}) : Database; - if (requestData["table"].IsArray()) { - for (auto& table: requestData["table"].GetArraySafe()) { - Tables.emplace_back(table.GetStringSafe()); - } - } - Prefix = Prefix.empty() ? requestData["prefix"].GetStringSafe({}) : Prefix; - if (requestData["limit"].IsDefined()) { - Limit = requestData["limit"].GetInteger(); - } - } - } - - bool IsPostContent() const { - return NViewer::IsPostContent(Event); - } - - TAutoPtr MakeSchemeCacheRequest() { - TAutoPtr request(new NSchemeCache::TSchemeCacheNavigate()); - - for (TString& path: Paths) { - NSchemeCache::TSchemeCacheNavigate::TEntry entry; - entry.Operation = NSchemeCache::TSchemeCacheNavigate::OpList; - entry.SyncVersion = false; - entry.Path = SplitPath(path); - request->ResultSet.emplace_back(entry); - } - - return request; - } - - void Bootstrap() { - if (ViewerRequest) { - // handle proxied request - SendSchemeCacheRequest(); - } else if (!Database) { - // autocomplete database list via console request - RequestConsoleListTenants(); - } else { - if (!Direct) { - // proxy request to a dynamic node of the specified database - RequestStateStorageEndpointsLookup(Database); - } - if (Requests == 0) { - // perform autocomplete without proxying - SendSchemeCacheRequest(); - } - } - - Become(&TThis::StateRequestedDescribe, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void Connected(TEvInterconnect::TEvNodeConnected::TPtr &) {} - - void Undelivered(TEvents::TEvUndelivered::TPtr &ev) { - if (!Direct && ev->Get()->SourceType == NViewer::TEvViewer::EvViewerRequest) { - Direct = true; - SendSchemeCacheRequest(); // fallback - RequestDone(); - } - } - - void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &) { - if (!Direct) { - Direct = true; - SendSchemeCacheRequest(); // fallback - RequestDone(); - } - } - - void Handle(TEvStateStorage::TEvBoardInfo::TPtr& ev) { - BLOG_TRACE("Received TEvBoardInfo"); - if (ev->Get()->Status == TEvStateStorage::TEvBoardInfo::EStatus::Ok) { - for (const auto& [actorId, infoEntry] : ev->Get()->InfoEntries) { - TenantDynamicNodes.emplace_back(actorId.NodeId()); - } - } - if (TenantDynamicNodes.empty()) { - SendSchemeCacheRequest(); - } else { - SendDynamicNodeAutocompleteRequest(); - } - RequestDone(); - } - - void SendSchemeCacheRequest() { - SendRequest(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(MakeSchemeCacheRequest())); - } - - void SendDynamicNodeAutocompleteRequest() { - ui64 hash = std::hash()(Event->Get()->Request.GetRemoteAddr()); - - auto itPos = std::next(TenantDynamicNodes.begin(), hash % TenantDynamicNodes.size()); - std::nth_element(TenantDynamicNodes.begin(), itPos, TenantDynamicNodes.end()); - - TNodeId nodeId = *itPos; - SubscribedNodeId = nodeId; - TActorId viewerServiceId = MakeViewerID(nodeId); - - THolder request = MakeHolder(); - request->Record.SetTimeout(Timeout); - auto autocompleteRequest = request->Record.MutableAutocompleteRequest(); - autocompleteRequest->SetDatabase(Database); - for (TString& path: Paths) { - autocompleteRequest->AddTables(path); - } - autocompleteRequest->SetPrefix(Prefix); - autocompleteRequest->SetLimit(Limit); - - ViewerWhiteboardCookie cookie(NKikimrViewer::TEvViewerRequest::kAutocompleteRequest, nodeId); - SendRequest(viewerServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, cookie.ToUi64()); - } - - void PassAway() override { - if (SubscribedNodeId.has_value()) { - Send(TActivationContext::InterconnectProxy(SubscribedNodeId.value()), new TEvents::TEvUnsubscribe()); - } - TBase::PassAway(); - BLOG_TRACE("PassAway()"); - } - - STATEFN(StateRequestedDescribe) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvStateStorage::TEvBoardInfo, Handle); - hFunc(NConsole::TEvConsole::TEvListTenantsResponse, Handle); - hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); - hFunc(TEvents::TEvUndelivered, Undelivered); - hFunc(TEvInterconnect::TEvNodeConnected, Connected); - hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); - hFunc(TEvViewer::TEvViewerResponse, Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void ParseProxyResult() { - if (ProxyResult == nullptr) { - Result.add_error("Failed to collect information from ProxyResult"); - return; - } - if (ProxyResult->Record.HasAutocompleteResponse()) { - Result = ProxyResult->Record.GetAutocompleteResponse(); - } else { - Result.add_error("Proxying return empty response"); - } - - } - - void ParseConsoleResult() { - if (ConsoleResult == nullptr) { - Result.add_error("Failed to collect information from ConsoleResult"); - return; - } - - Ydb::Cms::ListDatabasesResult listTenantsResult; - ConsoleResult->Record.GetResponse().operation().result().UnpackTo(&listTenantsResult); - for (const TString& path : listTenantsResult.paths()) { - Dictionary[path] = TSchemaWordData(path, NKikimrViewer::ext_sub_domain); - } - } - - NKikimrViewer::EAutocompleteType ConvertType(TNavigate::EKind navigate) { - switch (navigate) { - case TNavigate::KindSubdomain: - return NKikimrViewer::sub_domain; - case TNavigate::KindPath: - return NKikimrViewer::dir; - case TNavigate::KindExtSubdomain: - return NKikimrViewer::ext_sub_domain; - case TNavigate::KindTable: - return NKikimrViewer::table; - case TNavigate::KindOlapStore: - return NKikimrViewer::column_store; - case TNavigate::KindColumnTable: - return NKikimrViewer::column_table; - case TNavigate::KindRtmr: - return NKikimrViewer::rtmr_volume; - case TNavigate::KindKesus: - return NKikimrViewer::kesus; - case TNavigate::KindSolomon: - return NKikimrViewer::solomon_volume; - case TNavigate::KindTopic: - return NKikimrViewer::pers_queue_group; - case TNavigate::KindCdcStream: - return NKikimrViewer::cdc_stream; - case TNavigate::KindSequence: - return NKikimrViewer::sequence; - case TNavigate::KindReplication: - return NKikimrViewer::replication; - case TNavigate::KindBlobDepot: - return NKikimrViewer::blob_depot; - case TNavigate::KindExternalTable: - return NKikimrViewer::external_table; - case TNavigate::KindExternalDataSource: - return NKikimrViewer::external_data_source; - case TNavigate::KindBlockStoreVolume: - return NKikimrViewer::block_store_volume; - case TNavigate::KindFileStore: - return NKikimrViewer::file_store; - case TNavigate::KindView: - return NKikimrViewer::view; - default: - return NKikimrViewer::dir; - } - } - - void ParseCacheResult() { - if (CacheResult == nullptr) { - Result.add_error("Failed to collect information from CacheResult"); - return; - } - NSchemeCache::TSchemeCacheNavigate *navigate = CacheResult->Request.Get(); - if (navigate->ErrorCount > 0) { - for (auto& entry: CacheResult->Request.Get()->ResultSet) { - if (entry.Status != TSchemeCacheNavigate::EStatus::Ok) { - Result.add_error(TStringBuilder() << "Error receiving Navigate response: `" << CanonizePath(entry.Path) << "` has <" << ToString(entry.Status) << "> status"); - } - } - return; - } - for (auto& entry: CacheResult->Request.Get()->ResultSet) { - TString path = CanonizePath(entry.Path); - if (entry.ListNodeEntry) { - for (const auto& child : entry.ListNodeEntry->Children) { - Dictionary[child.Name] = TSchemaWordData(child.Name, ConvertType(child.Kind), path); - } - }; - for (const auto& [id, column] : entry.Columns) { - Dictionary[column.Name] = TSchemaWordData(column.Name, NKikimrViewer::column, path); - } - for (const auto& index : entry.Indexes) { - Dictionary[index.GetName()] = TSchemaWordData(index.GetName(), NKikimrViewer::index, path); - } - for (const auto& cdcStream : entry.CdcStreams) { - Dictionary[cdcStream.GetName()] = TSchemaWordData(cdcStream.GetName(), NKikimrViewer::cdc_stream, path); - } - } - } - - void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr &ev) { - CacheResult = ev->Release(); - RequestDone(); - } - - void Handle(NConsole::TEvConsole::TEvListTenantsResponse::TPtr& ev) { - ConsoleResult = ev->Release(); - RequestDone(); - } - - void SendAutocompleteResponse() { - if (ViewerRequest) { - TEvViewer::TEvViewerResponse* viewerResponse = new TEvViewer::TEvViewerResponse(); - viewerResponse->Record.MutableAutocompleteResponse()->CopyFrom(Result); - Send(ViewerRequest->Sender, viewerResponse); - } else { - TStringStream json; - TProtoToJson::ProtoToJson(json, Result, JsonSettings); - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - } - } - - void ReplyAndPassAway() { - if (ProxyResult) { - ParseProxyResult(); - } else if (Database) { - ParseCacheResult(); - } else { - ParseConsoleResult(); - } - - if (!ProxyResult) { - Result.set_success(Result.error_size() == 0); - if (Result.error_size() == 0) { - auto fuzzy = FuzzySearcher(Dictionary); - auto autocomplete = fuzzy.Search(SearchWord, Limit); - Result.MutableResult()->SetTotal(autocomplete.size()); - for (TSchemaWordData& wordData: autocomplete) { - auto entity = Result.MutableResult()->AddEntities(); - entity->SetName(wordData.Name); - entity->SetType(wordData.Type); - if (wordData.Table) { - entity->SetParent(wordData.Table); - } - } - } - } - - SendAutocompleteResponse(); - PassAway(); - } - - void Handle(TEvViewer::TEvViewerResponse::TPtr& ev) { - if (ev.Get()->Get()->Record.HasAutocompleteResponse()) { - ProxyResult = ev.Release()->Release(); - } else { - Direct = true; - SendSchemeCacheRequest(); // fallback - } - RequestDone(); - } - - void HandleTimeout() { - if (ViewerRequest) { - Result.add_error("Request timed out"); - ReplyAndPassAway(); - } else { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: database - in: query - description: database name - required: false - type: string - - name: table - in: query - description: table list - required: false - type: string - - name: prefix - in: query - description: known part of the word - required: false - type: string - - name: limit - in: query - description: limit of entities - required: false - type: integer - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: direct - in: query - description: force execution on current node - required: false - type: boolean - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Autocomplete information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns autocomplete information about objects in the database"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_blobindexstat.h b/ydb/core/viewer/json_blobindexstat.h deleted file mode 100644 index 2cdf56b4bc4f..000000000000 --- a/ydb/core/viewer/json_blobindexstat.h +++ /dev/null @@ -1,30 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include "json_vdisk_req.h" - -namespace NKikimr { -namespace NViewer { - -using TJsonBlobIndexStat = TJsonVDiskRequest; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "\"Get logoblob index stat from VDisk\""; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "\"Get logoblob index stat from VDisk\""; - } -}; - -} -} diff --git a/ydb/core/viewer/json_browse.h b/ydb/core/viewer/json_browse.h deleted file mode 100644 index 223e91571b6a..000000000000 --- a/ydb/core/viewer/json_browse.h +++ /dev/null @@ -1,254 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include "browse.h" -#include "browse_db.h" -#include "browse_pq.h" -#include -#include "viewer.h" -#include "wb_aggregate.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonBrowse : public TActorBootstrapped { - using TBase = TActorBootstrapped; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - bool Recursive = false; - - struct TPathStateInfo { - TString Name; - TString Path; - TActorId BrowseActorId; - NKikimrViewer::TBrowseInfo BrowseInfo; - - TPathStateInfo(const TString& name, const TString& path, const TActorId& browseActorId) - : Name(name) - , Path(path) - , BrowseActorId(browseActorId) - {} - - operator const TString&() const { - return Path; - } - - bool operator== (const TString& otherPath) const { - return Path == otherPath; - } - }; - - TVector Paths; - - using TBrowseRequestKey = std::tuple; - std::unordered_multiset BrowseRequestsInFlight; - ui32 Responses = 0; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonBrowse(IViewer *viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void ParsePath(const TString& path, const TActorContext& ctx) { - size_t prevpos = 0; - size_t pos = 0; - size_t len = path.size(); - while (pos < len) { - if (path[pos] == '/') { - TString n = path.substr(prevpos, pos - prevpos); - TString p = path.substr(0, pos); - if (n.empty() && p.empty()) { - n = p = "/"; - } - Paths.emplace_back(n, p, ctx.RegisterWithSameMailbox(new TBrowse(Viewer, ctx.SelfID, p, Event->Get()->UserToken))); - ++pos; - prevpos = pos; - } else { - ++pos; - } - } - if (pos != prevpos) { - TString n = path.substr(prevpos, pos - prevpos); - TString p = path.substr(0, pos); - Paths.emplace_back(n, p, ctx.RegisterWithSameMailbox(new TBrowse(Viewer, ctx.SelfID, p, Event->Get()->UserToken))); - } - } - - void Bootstrap(const TActorContext& ctx) { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - Recursive = FromStringWithDefault(params.Get("recursive"), false); - TString path = params.Get("path"); - if (Recursive) { - ParsePath(path, ctx); - } else { - Paths.emplace_back(path, path, ctx.RegisterWithSameMailbox(new TBrowse(Viewer, ctx.SelfID, path, Event->Get()->UserToken))); - } - Become(&TThis::StateWait, ctx, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - STFUNC(StateWait) { - switch (ev->GetTypeRewrite()) { - HFunc(NViewerEvents::TEvBrowseResponse, Handle); - HFunc(NViewerEvents::TEvBrowseRequestSent, Handle); - HFunc(NViewerEvents::TEvBrowseRequestCompleted, Handle); - HFunc(NMon::TEvHttpInfoRes, Handle); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(NViewerEvents::TEvBrowseResponse::TPtr &ev, const TActorContext &ctx) { - NViewerEvents::TEvBrowseResponse& event(*ev->Get()); - if (!event.Error.empty()) { - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(event.Error, 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return Die(ctx); - } - auto it = std::find(Paths.begin(), Paths.end(), event.BrowseInfo.GetPath()); - if (it != Paths.end()) { - it->BrowseInfo.MergeFrom(event.BrowseInfo); - it->BrowseActorId = TActorId(); - } - // TODO: error handling? - ++Responses; - if (Responses == Paths.size()) { - ReplyAndDie(ctx); - } - } - - void Handle(NViewerEvents::TEvBrowseRequestSent::TPtr& ev, const TActorContext&) { - NViewerEvents::TEvBrowseRequestSent& event(*ev->Get()); - BrowseRequestsInFlight.emplace(event.Actor, event.Tablet, event.Event); - } - - void Handle(NViewerEvents::TEvBrowseRequestCompleted::TPtr& ev, const TActorContext&) { - NViewerEvents::TEvBrowseRequestCompleted& event(*ev->Get()); - auto it = BrowseRequestsInFlight.find({event.Actor, event.Tablet, event.Event}); - if (it != BrowseRequestsInFlight.end()) { - // we could not delete by key, it could be many items with the same key - BrowseRequestsInFlight.erase(it); - } - BrowseRequestsInFlight.emplace(event.Actor, event.Tablet, event.Event); - } - - void Handle(NMon::TEvHttpInfoRes::TPtr &ev, const TActorContext &ctx) { - ctx.ExecutorThread.Send(ev->Forward(Event->Sender)); - Die(ctx); - } - - void ReplyAndDie(const TActorContext &ctx) { - TStringStream json; - if (!Paths.empty()) { - NKikimrViewer::TBrowseInfo browseInfo; - auto pi = Paths.begin(); - browseInfo.MergeFrom(pi->BrowseInfo); - if (Recursive) { - browseInfo.SetPath(Paths.back().BrowseInfo.GetPath()); - browseInfo.SetName("/"); - } - NKikimrViewer::TBrowseInfo* pBrowseInfo = &browseInfo; - ++pi; - while (pi != Paths.end()) { - TString name = pi->Name; - for (NKikimrViewer::TBrowseInfo& child : *pBrowseInfo->MutableChildren()) { - if (child.GetName() == name) { - pBrowseInfo = &child; - pBrowseInfo->MergeFrom(pi->BrowseInfo); - pBrowseInfo->ClearPath(); - break; - } - } - ++pi; - } - TProtoToJson::ProtoToJson(json, browseInfo, JsonSettings); - } - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } - - void HandleTimeout(const TActorContext &ctx) { - for (auto& pathInfo : Paths) { - if (pathInfo.BrowseActorId) { - ctx.Send(pathInfo.BrowseActorId, new TEvents::TEvPoisonPill()); - } - } - TStringStream result; - RenderPendingRequests(result); - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get(), result.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } - - void RenderPendingRequests(IOutputStream& html) { - for (const auto& request : BrowseRequestsInFlight) { - html << request << Endl; - } - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: path - in: query - description: schema path - required: true - type: string - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Schema information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns brief information about schema object"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_bscontrollerinfo.h b/ydb/core/viewer/json_bscontrollerinfo.h deleted file mode 100644 index 1e0263115a5f..000000000000 --- a/ydb/core/viewer/json_bscontrollerinfo.h +++ /dev/null @@ -1,124 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include "viewer.h" -#include "json_pipe_req.h" -#include - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonBSControllerInfo : public TViewerPipeClient { - using TBase = TViewerPipeClient; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TAutoPtr ControllerInfo; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonBSControllerInfo(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap(const TActorContext& ctx) { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), false); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - InitConfig(params); - RequestBSControllerInfo(); - Become(&TThis::StateRequestedInfo, ctx, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - STATEFN(StateRequestedInfo) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvBlobStorage::TEvResponseControllerInfo, Handle); - hFunc(TEvTabletPipe::TEvClientConnected, TBase::Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvBlobStorage::TEvResponseControllerInfo::TPtr& ev) { - ControllerInfo = ev->Release(); - RequestDone(); - } - - void ReplyAndPassAway() { - TStringStream json; - if (ControllerInfo != nullptr) { - TProtoToJson::ProtoToJson(json, ControllerInfo->Record); - } else { - json << "null"; - } - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void HandleTimeout() { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Node(R"___( - - name: controller_id - in: query - description: storage controller identifier (tablet id) - required: true - type: string - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Storage controller information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns information about storage controller"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_bsgroupinfo.h b/ydb/core/viewer/json_bsgroupinfo.h deleted file mode 100644 index 24c05ffe5d7a..000000000000 --- a/ydb/core/viewer/json_bsgroupinfo.h +++ /dev/null @@ -1,73 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include "wb_merge.h" -#include "json_wb_req.h" - -namespace NKikimr { -namespace NViewer { - -template <> -struct TWhiteboardInfo { - using TResponseType = NKikimrWhiteboard::TEvBSGroupStateResponse; - using TResponseEventType = TEvWhiteboard::TEvBSGroupStateResponse; - using TElementType = NKikimrWhiteboard::TBSGroupStateInfo; - using TElementKeyType = ui32; - - static constexpr bool StaticNodesOnly = true; - - static ::google::protobuf::RepeatedPtrField& GetElementsField(TResponseType& response) { - return *response.MutableBSGroupStateInfo(); - } - - static ui32 GetElementKey(const TElementType& type) { - return type.GetGroupID(); - } - - static TString GetDefaultMergeField() { - return "GroupID"; - } - - static void InitMerger() { - const auto* field = NKikimrWhiteboard::TBSGroupStateInfo::descriptor()->FindFieldByName("Latency"); - TWhiteboardMergerBase::FieldMerger[field] = &TWhiteboardMergerBase::ProtoMaximizeEnumField; - } - - static void MergeResponses(TResponseType& result, TMap& responses, const TString& fields = GetDefaultMergeField()) { - if (fields == GetDefaultMergeField()) { - TWhiteboardMerger::MergeResponsesElementKey(result, responses); - } else { - TWhiteboardMerger::MergeResponses(result, responses, fields); - } - } -}; - -template <> -struct TWhiteboardMergerComparator { - bool operator ()(const NKikimrWhiteboard::TBSGroupStateInfo& a, const NKikimrWhiteboard::TBSGroupStateInfo& b) const { - return std::make_tuple(a.GetGroupGeneration(), a.VDiskIdsSize(), a.GetChangeTime()) - < std::make_tuple(b.GetGroupGeneration(), b.VDiskIdsSize(), b.GetChangeTime()); - } -}; - -using TJsonBSGroupInfo = TJsonWhiteboardRequest; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Storage groups information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns information about storage groups"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_cluster.h b/ydb/core/viewer/json_cluster.h deleted file mode 100644 index a4b6c0e86b81..000000000000 --- a/ydb/core/viewer/json_cluster.h +++ /dev/null @@ -1,542 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include "json_pipe_req.h" -#include "viewer.h" -#include "viewer_probes.h" - -LWTRACE_USING(VIEWER_PROVIDER); - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using namespace NNodeWhiteboard; -using ::google::protobuf::FieldDescriptor; - -class TJsonCluster : public TViewerPipeClient { - using TThis = TJsonCluster; - using TBase = TViewerPipeClient; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - THolder NodesInfo; - TMap SystemInfo; - TMap VDiskInfo; - TMap PDiskInfo; - TMap BSGroupInfo; - TMap TabletInfo; - THolder DescribeResult; - TSet NodesAlive; - TJsonSettings JsonSettings; - ui32 Timeout; - ui32 TenantsNumber = 0; - bool Tablets = false; - - struct TEventLog { - bool IsTimeout = false; - TInstant StartTime; - TInstant StartHandleListTenantsResponseTime; - TInstant StartHandleNodesInfoTime; - TInstant StartMergeBSGroupsTime; - TInstant StartMergeVDisksTime; - TInstant StartMergePDisksTime; - TInstant StartMergeTabletsTime; - TInstant StartResponseBuildingTime; - }; - TEventLog EventLog; -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonCluster(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) - { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - InitConfig(params); - Tablets = FromStringWithDefault(params.Get("tablets"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - } - - void Bootstrap(const TActorContext& ) { - EventLog.StartTime = TActivationContext::Now(); - SendRequest(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes()); - RequestConsoleListTenants(); - Become(&TThis::StateRequested, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void PassAway() override { - if (NodesInfo != nullptr) { - TIntrusivePtr dynamicNameserviceConfig = AppData()->DynamicNameserviceConfig; - for (const auto& ni : NodesInfo->Nodes) { - if (ni.NodeId <= dynamicNameserviceConfig->MaxStaticNodeId) { - Send(TActivationContext::InterconnectProxy(ni.NodeId), new TEvents::TEvUnsubscribe); - } - } - } - TBase::PassAway(); - } - - void SendWhiteboardTabletStateRequest() { - THashSet filterTablets; - TIntrusivePtr domains = AppData()->DomainsInfo; - if (const auto& domain = domains->Domain) { - for (TTabletId id : domain->Coordinators) { - filterTablets.emplace(id); - } - for (TTabletId id : domain->Mediators) { - filterTablets.emplace(id); - } - for (TTabletId id : domain->TxAllocators) { - filterTablets.emplace(id); - } - filterTablets.emplace(domain->SchemeRoot); - filterTablets.emplace(domains->GetHive()); - } - filterTablets.emplace(MakeBSControllerID()); - filterTablets.emplace(MakeDefaultHiveID()); - filterTablets.emplace(MakeCmsID()); - filterTablets.emplace(MakeNodeBrokerID()); - filterTablets.emplace(MakeTenantSlotBrokerID()); - filterTablets.emplace(MakeConsoleID()); - const NKikimrSchemeOp::TPathDescription& pathDescription(DescribeResult->GetRecord().GetPathDescription()); - if (pathDescription.HasDomainDescription()) { - const NKikimrSubDomains::TDomainDescription& domainDescription(pathDescription.GetDomainDescription()); - for (TTabletId tabletId : domainDescription.GetProcessingParams().GetCoordinators()) { - filterTablets.emplace(tabletId); - } - for (TTabletId tabletId : domainDescription.GetProcessingParams().GetMediators()) { - filterTablets.emplace(tabletId); - } - if (domainDescription.HasDomainKey()) { - if (domainDescription.GetDomainKey().HasSchemeShard()) { - filterTablets.emplace(domainDescription.GetDomainKey().GetSchemeShard()); - } - } - } - - TIntrusivePtr dynamicNameserviceConfig = AppData()->DynamicNameserviceConfig; - for (const auto& ni : NodesInfo->Nodes) { - if (ni.NodeId <= dynamicNameserviceConfig->MaxStaticNodeId) { - TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(ni.NodeId); - auto request = new TEvWhiteboard::TEvTabletStateRequest(); - for (TTabletId id: filterTablets) { - request->Record.AddFilterTabletId(id); - } - SendRequest(whiteboardServiceId, request, IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId); - } - } - } - - void SendWhiteboardRequests() { - TIntrusivePtr dynamicNameserviceConfig = AppData()->DynamicNameserviceConfig; - for (const auto& ni : NodesInfo->Nodes) { - TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(ni.NodeId); - SendRequest(whiteboardServiceId, new TEvWhiteboard::TEvSystemStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId); - - if (ni.NodeId <= dynamicNameserviceConfig->MaxStaticNodeId) { - SendRequest(whiteboardServiceId, new TEvWhiteboard::TEvVDiskStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId); - SendRequest(whiteboardServiceId,new TEvWhiteboard::TEvPDiskStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId); - SendRequest(whiteboardServiceId, new TEvWhiteboard::TEvBSGroupStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId); - } - } - if (Tablets) { - SendWhiteboardTabletStateRequest(); - } - } - - void Handle(TEvInterconnect::TEvNodesInfo::TPtr& ev) { - EventLog.StartHandleNodesInfoTime = TActivationContext::Now(); - NodesInfo = ev->Release(); - // before making requests to Whiteboard with the Tablets parameter, we need to review the TEvDescribeSchemeResult information - if (Tablets) { - THolder request = MakeHolder(); - if (!Event->Get()->UserToken.empty()) { - request->Record.SetUserToken(Event->Get()->UserToken); - } - NKikimrSchemeOp::TDescribePath* record = request->Record.MutableDescribePath(); - TIntrusivePtr domains = AppData()->DomainsInfo; - if (const auto& domain = domains->Domain) { - TString domainPath = "/" + domain->Name; - record->SetPath(domainPath); - } - record->MutableOptions()->SetReturnPartitioningInfo(false); - record->MutableOptions()->SetReturnPartitionConfig(false); - record->MutableOptions()->SetReturnChildren(false); - SendRequest(MakeTxProxyID(), request.Release()); - } else { - SendWhiteboardRequests(); - } - - RequestDone(); - } - - void Undelivered(TEvents::TEvUndelivered::TPtr &ev) { - ui32 nodeId = ev.Get()->Cookie; - switch (ev->Get()->SourceType) { - case TEvWhiteboard::EvSystemStateRequest: - if (SystemInfo.emplace(nodeId, NKikimrWhiteboard::TEvSystemStateResponse{}).second) { - RequestDone(); - } - break; - case TEvWhiteboard::EvVDiskStateRequest: - if (VDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvVDiskStateResponse{}).second) { - RequestDone(); - } - break; - case TEvWhiteboard::EvPDiskStateRequest: - if (PDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvPDiskStateResponse{}).second) { - RequestDone(); - } - break; - case TEvWhiteboard::EvBSGroupStateRequest: - if (BSGroupInfo.emplace(nodeId, NKikimrWhiteboard::TEvBSGroupStateResponse{}).second) { - RequestDone(); - } - break; - case TEvWhiteboard::EvTabletStateRequest: - if (TabletInfo.emplace(nodeId, NKikimrWhiteboard::TEvTabletStateResponse{}).second) { - RequestDone(); - } - break; - } - } - - void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &ev) { - ui32 nodeId = ev->Get()->NodeId; - if (SystemInfo.emplace(nodeId, NKikimrWhiteboard::TEvSystemStateResponse{}).second) { - RequestDone(); - } - TIntrusivePtr dynamicNameserviceConfig = AppData()->DynamicNameserviceConfig; - if (nodeId <= dynamicNameserviceConfig->MaxStaticNodeId) { - if (VDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvVDiskStateResponse{}).second) { - RequestDone(); - } - if (PDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvPDiskStateResponse{}).second) { - RequestDone(); - } - if (BSGroupInfo.emplace(nodeId, NKikimrWhiteboard::TEvBSGroupStateResponse{}).second) { - RequestDone(); - } - if (Tablets) { - if (TabletInfo.emplace(nodeId, NKikimrWhiteboard::TEvTabletStateResponse{}).second) { - RequestDone(); - } - } - } - } - - void Handle(TEvWhiteboard::TEvSystemStateResponse::TPtr& ev) { - ui64 nodeId = ev.Get()->Cookie; - SystemInfo[nodeId] = std::move(ev->Get()->Record); - NodesAlive.insert(nodeId); - RequestDone(); - } - - void Handle(TEvWhiteboard::TEvVDiskStateResponse::TPtr& ev) { - ui64 nodeId = ev.Get()->Cookie; - VDiskInfo[nodeId] = std::move(ev->Get()->Record); - NodesAlive.insert(nodeId); - RequestDone(); - } - - void Handle(TEvWhiteboard::TEvPDiskStateResponse::TPtr& ev) { - ui64 nodeId = ev.Get()->Cookie; - PDiskInfo[nodeId] = std::move(ev->Get()->Record); - NodesAlive.insert(nodeId); - RequestDone(); - } - - void Handle(TEvWhiteboard::TEvBSGroupStateResponse::TPtr& ev) { - ui64 nodeId = ev.Get()->Cookie; - BSGroupInfo[nodeId] = std::move(ev->Get()->Record); - NodesAlive.insert(nodeId); - RequestDone(); - } - - void Handle(TEvWhiteboard::TEvTabletStateResponse::TPtr& ev) { - ui64 nodeId = ev.Get()->Cookie; - TabletInfo[nodeId] = std::move(ev->Get()->Record); - NodesAlive.insert(nodeId); - RequestDone(); - } - - void Handle(NConsole::TEvConsole::TEvListTenantsResponse::TPtr& ev) { - EventLog.StartHandleListTenantsResponseTime = TActivationContext::Now(); - Ydb::Cms::ListDatabasesResult listTenantsResult; - ev->Get()->Record.GetResponse().operation().result().UnpackTo(&listTenantsResult); - TenantsNumber = listTenantsResult.paths().size(); - RequestDone(); - } - - void Handle(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr& ev) { - if (ev->Get()->GetRecord().GetStatus() == NKikimrScheme::StatusSuccess) { - DescribeResult = ev->Release(); - SendWhiteboardRequests(); - } - RequestDone(); - } - - void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev) { - if (ev->Get()->Status != NKikimrProto::OK) { - RequestDone(); - } - } - - STATEFN(StateRequested) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvInterconnect::TEvNodesInfo, Handle); - hFunc(TEvWhiteboard::TEvSystemStateResponse, Handle); - hFunc(TEvWhiteboard::TEvVDiskStateResponse, Handle); - hFunc(TEvWhiteboard::TEvPDiskStateResponse, Handle); - hFunc(TEvWhiteboard::TEvBSGroupStateResponse, Handle); - hFunc(TEvWhiteboard::TEvTabletStateResponse, Handle); - hFunc(NConsole::TEvConsole::TEvListTenantsResponse, Handle); - hFunc(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult, Handle); - hFunc(TEvents::TEvUndelivered, Undelivered); - hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); - hFunc(TEvTabletPipe::TEvClientConnected, Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - NKikimrWhiteboard::TEvBSGroupStateResponse MergedBSGroupInfo; - NKikimrWhiteboard::TEvVDiskStateResponse MergedVDiskInfo; - NKikimrWhiteboard::TEvPDiskStateResponse MergedPDiskInfo; - NKikimrWhiteboard::TEvTabletStateResponse MergedTabletInfo; - TMap VDisksIndex; - TMap, const NKikimrWhiteboard::TPDiskStateInfo&> PDisksIndex; - - void ReplyAndPassAway() { - EventLog.StartMergeBSGroupsTime = TActivationContext::Now(); - MergeWhiteboardResponses(MergedBSGroupInfo, BSGroupInfo); - EventLog.StartMergeVDisksTime = TActivationContext::Now(); - MergeWhiteboardResponses(MergedVDiskInfo, VDiskInfo); - EventLog.StartMergePDisksTime = TActivationContext::Now(); - MergeWhiteboardResponses(MergedPDiskInfo, PDiskInfo); - - EventLog.StartMergeTabletsTime = TActivationContext::Now(); - THashSet tablets; - if (Tablets) { - MergeWhiteboardResponses(MergedTabletInfo, TabletInfo); - } - - EventLog.StartResponseBuildingTime = TActivationContext::Now(); - if (Tablets) { - TIntrusivePtr domains = AppData()->DomainsInfo; - if (const auto& domain = domains->Domain) { - tablets.emplace(MakeBSControllerID()); - tablets.emplace(MakeDefaultHiveID()); - tablets.emplace(MakeCmsID()); - tablets.emplace(MakeNodeBrokerID()); - tablets.emplace(MakeTenantSlotBrokerID()); - tablets.emplace(MakeConsoleID()); - tablets.emplace(domain->SchemeRoot); - tablets.emplace(domains->GetHive()); - for (TTabletId id : domain->Coordinators) { - tablets.emplace(id); - } - for (TTabletId id : domain->Mediators) { - tablets.emplace(id); - } - for (TTabletId id : domain->TxAllocators) { - tablets.emplace(id); - } - } - - if (DescribeResult) { - const NKikimrSchemeOp::TPathDescription& pathDescription(DescribeResult->GetRecord().GetPathDescription()); - if (pathDescription.HasDomainDescription()) { - const NKikimrSubDomains::TDomainDescription& domainDescription(pathDescription.GetDomainDescription()); - for (TTabletId tabletId : domainDescription.GetProcessingParams().GetCoordinators()) { - tablets.emplace(tabletId); - } - for (TTabletId tabletId : domainDescription.GetProcessingParams().GetMediators()) { - tablets.emplace(tabletId); - } - if (domainDescription.HasDomainKey()) { - if (domainDescription.GetDomainKey().HasSchemeShard()) { - tablets.emplace(domainDescription.GetDomainKey().GetSchemeShard()); - } - } - } - } - } - - ui64 totalStorageSize = 0; - ui64 availableStorageSize = 0; - - for (auto& element : TWhiteboardInfo::GetElementsField(MergedPDiskInfo)) { - if (element.HasTotalSize() && element.HasAvailableSize()) { - totalStorageSize += element.GetTotalSize(); - availableStorageSize += element.GetAvailableSize(); - } - element.SetStateFlag(GetWhiteboardFlag(GetPDiskStateFlag(element))); - element.SetOverall(GetWhiteboardFlag(GetPDiskOverallFlag(element))); - PDisksIndex.emplace(TWhiteboardInfo::GetElementKey(element), element); - } - for (auto& element : TWhiteboardInfo::GetElementsField(MergedVDiskInfo)) { - element.SetOverall(GetWhiteboardFlag(GetVDiskOverallFlag(element))); - VDisksIndex.emplace(TWhiteboardInfo::GetElementKey(element), element); - } - NKikimrViewer::EFlag flag = NKikimrViewer::Grey; - for (const auto& element : TWhiteboardInfo::GetElementsField(MergedBSGroupInfo)) { - flag = Max(flag, GetBSGroupOverallFlag(element, VDisksIndex, PDisksIndex)); - } - ui32 numberOfCpus = 0; - double loadAverage = 0; - THashSet dataCenters; - THashSet versions; - THashSet hosts; - THashMap names; - for (const auto& [nodeId, sysInfo] : SystemInfo) { - if (sysInfo.SystemStateInfoSize() > 0) { - const NKikimrWhiteboard::TSystemStateInfo& systemState = sysInfo.GetSystemStateInfo(0); - if (systemState.HasNumberOfCpus() && (!systemState.HasHost() || hosts.emplace(systemState.GetHost()).second)) { - numberOfCpus += systemState.GetNumberOfCpus(); - if (systemState.LoadAverageSize() > 0) { - loadAverage += systemState.GetLoadAverage(0); - } - } - if (systemState.HasDataCenter()) { - dataCenters.insert(systemState.GetDataCenter()); - } - if (systemState.HasVersion()) { - versions.insert(systemState.GetVersion()); - } - if (systemState.HasClusterName()) { - names[systemState.GetClusterName()]++; - } - } - } - - NKikimrViewer::TClusterInfo pbCluster; - - if (Tablets) { - for (const NKikimrWhiteboard::TTabletStateInfo& tabletInfo : MergedTabletInfo.GetTabletStateInfo()) { - if (tablets.contains(tabletInfo.GetTabletId())) { - NKikimrWhiteboard::TTabletStateInfo* tablet = pbCluster.AddSystemTablets(); - tablet->CopyFrom(tabletInfo); - auto tabletFlag = GetWhiteboardFlag(GetFlagFromTabletState(tablet->GetState())); - tablet->SetOverall(tabletFlag); - flag = Max(flag, GetViewerFlag(tabletFlag)); - } - } - pbCluster.SetTablets(MergedTabletInfo.TabletStateInfoSize()); - } - pbCluster.SetTenants(TenantsNumber); - - pbCluster.SetOverall(flag); - if (NodesInfo != nullptr) { - pbCluster.SetNodesTotal(NodesInfo->Nodes.size()); - pbCluster.SetNodesAlive(NodesAlive.size()); - } - pbCluster.SetNumberOfCpus(numberOfCpus); - pbCluster.SetLoadAverage(loadAverage); - pbCluster.SetStorageTotal(totalStorageSize); - pbCluster.SetStorageUsed(totalStorageSize - availableStorageSize); - pbCluster.SetHosts(hosts.size()); - TIntrusivePtr domains = AppData()->DomainsInfo; - if (const auto& domain = domains->Domain) { - TString domainName = "/" + domain->Name; - pbCluster.SetDomain(domainName); - } - for (const TString& dc : dataCenters) { - pbCluster.AddDataCenters(dc); - } - for (const TString& version : versions) { - pbCluster.AddVersions(version); - } - auto itMax = std::max_element(names.begin(), names.end(), [](const auto& a, const auto& b) { - return a.second < b.second; - }); - if (itMax != names.end()) { - pbCluster.SetName(itMax->first); - } - - TStringStream json; - TProtoToJson::ProtoToJson(json, pbCluster, JsonSettings); - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), std::move(json.Str())), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - - const TInstant now = TActivationContext::Now(); - LWPROBE(ViewerClusterHandler, TBase::SelfId().NodeId(), Tablets, EventLog.IsTimeout, - EventLog.StartTime.MilliSeconds(), - (now - EventLog.StartTime).MilliSeconds(), - (EventLog.StartHandleListTenantsResponseTime - EventLog.StartTime).MilliSeconds(), - (EventLog.StartHandleNodesInfoTime - EventLog.StartTime).MilliSeconds(), - (EventLog.StartMergeBSGroupsTime - EventLog.StartTime).MilliSeconds(), - (EventLog.StartMergeVDisksTime - EventLog.StartMergeBSGroupsTime).MilliSeconds(), - (EventLog.StartMergePDisksTime - EventLog.StartMergeVDisksTime).MilliSeconds(), - (EventLog.StartMergeTabletsTime - EventLog.StartMergePDisksTime).MilliSeconds(), - (EventLog.StartResponseBuildingTime - EventLog.StartMergeTabletsTime).MilliSeconds(), - (now - EventLog.StartResponseBuildingTime).MilliSeconds() - ); - - PassAway(); - } - - void HandleTimeout() { - EventLog.IsTimeout = true; - ReplyAndPassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: tablets - in: query - description: return system tablets state - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Cluster information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns information about cluster"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_compute.h b/ydb/core/viewer/json_compute.h deleted file mode 100644 index 0d045d700909..000000000000 --- a/ydb/core/viewer/json_compute.h +++ /dev/null @@ -1,710 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "viewer_helper.h" -#include "json_pipe_req.h" -#include "wb_aggregate.h" -#include "wb_merge.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonCompute : public TViewerPipeClient { - using TBase = TViewerPipeClient; - IViewer* Viewer; - THashMap TenantByPath; - THashMap TenantBySubDomainKey; - THashMap HiveBySubDomainKey; - THashMap SubDomainKeyByPath; - THashMap> NavigateResult; - THashMap> HiveDomainStats; - THashMap> HiveNodeStats; - THashMap> TabletInfoIndex; - THashMap HiveNodeStatsIndex; - THashMap TenantPathByNodeId; - NMon::TEvHttpInfo::TPtr Event; - TVector NodeIds; - THashSet PassedNodeIds; - THashSet FoundNodeIds; - THashMap NodeSysInfo; - TMap NodeTabletInfo; - THolder NodesInfo; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - TString User; - TString Path; - TString DomainPath; - TPathId FilterSubDomain; - bool Tablets = true; - TTabletId RootHiveId = 0; - bool RootHiveRequested = false; - NKikimrViewer::TComputeInfo Result; - ui32 UptimeSecondsFilter = 0; - bool ProblemNodesFilter = false; - TString TextFilter; - - enum class EVersion { - v1, - v2 // only this works with sorting - }; - enum class ESort { - NodeId, - Host, - DC, - Rack, - Version, - Uptime, - Memory, - CPU, - LoadAverage, - }; - EVersion Version = EVersion::v1; - std::optional Offset; - std::optional Limit; - ESort Sort = ESort::NodeId; - bool ReverseSort = false; - bool IsNodesListSorted = false; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonCompute(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) - {} - - TString GetDomainId(TPathId pathId) { - return TStringBuilder() << pathId.OwnerId << '-' << pathId.LocalPathId; - } - - bool IsFitsToPath(const TString& path) const { - if (Path.empty()) { - return true; - } - if (Path == path) { - return true; - } - if (Path == DomainPath) { - return false; - } - if (Path.StartsWith(path)) { - return true; - } - return false; - } - - void Bootstrap(const TActorContext& ) { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - InitConfig(params); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - Tablets = FromStringWithDefault(params.Get("tablets"), Tablets); - Path = params.Get("path"); - UptimeSecondsFilter = FromStringWithDefault(params.Get("uptime"), 0); - ProblemNodesFilter = FromStringWithDefault(params.Get("problems_only"), ProblemNodesFilter); - TextFilter = params.Get("filter"); - if (params.Has("offset")) { - Offset = FromStringWithDefault(params.Get("offset"), 0); - } - if (params.Has("limit")) { - Limit = FromStringWithDefault(params.Get("limit"), std::numeric_limits::max()); - } - TString version = params.Get("version"); - if (version == "v1") { - Version = EVersion::v1; - } else if (version == "v2") { - Version = EVersion::v2; - } - TStringBuf sort = params.Get("sort"); - if (sort) { - if (sort.StartsWith("-") || sort.StartsWith("+")) { - ReverseSort = (sort[0] == '-'); - sort.Skip(1); - } - if (sort == "NodeId") { - Sort = ESort::NodeId; - } else if (sort == "Host") { - Sort = ESort::Host; - } else if (sort == "DC") { - Sort = ESort::DC; - } else if (sort == "Rack") { - Sort = ESort::Rack; - } else if (sort == "Version") { - Sort = ESort::Version; - } else if (sort == "Uptime") { - Sort = ESort::Uptime; - } else if (sort == "Memory") { - Sort = ESort::Memory; - } else if (sort == "CPU") { - Sort = ESort::CPU; - } else if (sort == "LoadAverage") { - Sort = ESort::LoadAverage; - } - } - - SendRequest(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes()); - - TIntrusivePtr domains = AppData()->DomainsInfo; - auto *domain = domains->GetDomain(); - - RequestConsoleListTenants(); - - DomainPath = "/" + domain->Name; - if (Path.empty() || DomainPath == Path) { - NKikimrViewer::TTenant& tenant = TenantByPath[DomainPath]; - tenant.SetName(DomainPath); - tenant.SetState(Ydb::Cms::GetDatabaseStatusResult::RUNNING); - tenant.SetType(NKikimrViewer::Domain); - RequestSchemeCacheNavigate(DomainPath); - } - RootHiveId = domains->GetHive(); - if (Requests == 0) { - ReplyAndPassAway(); - } - - Become(&TThis::StateRequested, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void PassAway() override { - for (const TNodeId nodeId : NodeIds) { - Send(TActivationContext::InterconnectProxy(nodeId), new TEvents::TEvUnsubscribe); - } - TBase::PassAway(); - } - - STATEFN(StateRequested) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvInterconnect::TEvNodesInfo, Handle); - hFunc(NConsole::TEvConsole::TEvListTenantsResponse, Handle); - hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); - hFunc(TEvHive::TEvResponseHiveDomainStats, Handle); - hFunc(TEvHive::TEvResponseHiveNodeStats, Handle); - hFunc(TEvWhiteboard::TEvSystemStateResponse, Handle); - hFunc(TEvWhiteboard::TEvTabletStateResponse, Handle); - hFunc(TEvents::TEvUndelivered, Undelivered); - hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); - hFunc(TEvTabletPipe::TEvClientConnected, TBase::Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvInterconnect::TEvNodesInfo::TPtr &ev) { - NodesInfo = ev->Release(); - RequestDone(); - } - - void Handle(NConsole::TEvConsole::TEvListTenantsResponse::TPtr& ev) { - Ydb::Cms::ListDatabasesResult listTenantsResult; - ev->Get()->Record.GetResponse().operation().result().UnpackTo(&listTenantsResult); - for (const TString& path : listTenantsResult.paths()) { - if (IsFitsToPath(path)) { - TString p(Path.empty() ? path : Path); - TenantByPath[p]; - RequestSchemeCacheNavigate(p); - } - } - RequestDone(); - } - - void Handle(TEvHive::TEvResponseHiveDomainStats::TPtr& ev) { - for (const NKikimrHive::THiveDomainStats& hiveStat : ev->Get()->Record.GetDomainStats()) { - TPathId subDomainKey({hiveStat.GetShardId(), hiveStat.GetPathId()}); - if (FilterSubDomain && FilterSubDomain != subDomainKey) { - continue; - } - NKikimrViewer::TTenant& tenant = TenantBySubDomainKey[subDomainKey]; - if (ev->Cookie != HiveBySubDomainKey[subDomainKey]) { - continue; // we avoid overwrite of tenant stats by root stats - } - tenant.SetId(GetDomainId({hiveStat.GetShardId(), hiveStat.GetPathId()})); - tenant.MutableStateStats()->CopyFrom(hiveStat.GetStateStats()); - tenant.MutableMetrics()->CopyFrom(hiveStat.GetMetrics()); - tenant.MutableNodeIds()->CopyFrom(hiveStat.GetNodeIds()); - tenant.SetAliveNodes(hiveStat.GetAliveNodes()); - } - HiveDomainStats[ev->Cookie] = std::move(ev->Release()); - RequestDone(); - } - - bool IsPageNode(TNodeId nodeId) { - if (PassedNodeIds.insert(nodeId).second) { - if (Offset.has_value()) { - if (PassedNodeIds.size() <= Offset.value()) { - return false; - } - } - if (Limit.has_value()) { - if (NodeIds.size() >= Limit.value()) { - return false; - } - } - return true; - } - return false; - } - - bool IsRequiredNode(TNodeId nodeId) { - TIntrusivePtr dynamicNameserviceConfig = AppData()->DynamicNameserviceConfig; - return nodeId > dynamicNameserviceConfig->MaxStaticNodeId && (!IsNodesListSorted || IsPageNode(nodeId)); - } - - bool NeedNodesSorting() { - return Version == EVersion::v2; - } - - bool IsNodeFilter() { - return ProblemNodesFilter || UptimeSecondsFilter > 0 && TextFilter; - } - - void Handle(TEvHive::TEvResponseHiveNodeStats::TPtr& ev) { - BLOG_TRACE("ProcessNodeIds()"); - - auto nodeStats = ev->Get()->Record.GetNodeStats(); - if (NeedNodesSorting() && Sort == ESort::NodeId && !IsNodeFilter()) { - SortCollection(nodeStats, [](const NKikimrHive::THiveNodeStats& node) { return node.GetNodeId();}, ReverseSort); - IsNodesListSorted = true; - } - for (const NKikimrHive::THiveNodeStats& nodeStat : nodeStats) { - auto nodeId = nodeStat.GetNodeId(); - if (IsRequiredNode(nodeId)) { - const auto& nodeDomain = nodeStat.GetNodeDomain(); - const TPathId subDomain(nodeDomain.GetSchemeShard(), nodeDomain.GetPathId()); - if (FilterSubDomain && FilterSubDomain != subDomain) { - continue; - } - NodeIds.emplace_back(nodeId); // order is important - TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(nodeId); - THolder request = MakeHolder(); - SendRequest(whiteboardServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - if (Tablets && !ev->Get()->Record.GetExtendedTabletInfo()) { - THolder request = MakeHolder(); - SendRequest(whiteboardServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - } - } - } - HiveNodeStats[ev->Cookie] = std::move(ev->Release()); - RequestDone(); - } - - void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { - if (ev->Get()->Request->ResultSet.size() == 1 && ev->Get()->Request->ResultSet.begin()->Status == NSchemeCache::TSchemeCacheNavigate::EStatus::Ok) { - const NSchemeCache::TSchemeCacheNavigate::TEntry& result(ev->Get()->Request->ResultSet.front()); - TPathId pathId; - if (!Path.empty() && result.Self) { - switch (result.Self->Info.GetPathType()) { - case NKikimrSchemeOp::EPathTypeSubDomain: - case NKikimrSchemeOp::EPathTypeExtSubDomain: - pathId = TPathId(); - break; - default: - pathId = TPathId(result.Self->Info.GetSchemeshardId(), result.Self->Info.GetPathId()); - break; - } - } - auto domainInfo = result.DomainInfo; - ui64 hiveId = domainInfo->Params.GetHive(); - if (hiveId == 0) { - if (!RootHiveRequested) { - hiveId = RootHiveId; - RootHiveRequested = true; - } - } - if (hiveId) { - RequestHiveDomainStats(hiveId); - RequestHiveNodeStats(hiveId, pathId); - HiveBySubDomainKey[domainInfo->DomainKey] = hiveId; - } - if (domainInfo->ResourcesDomainKey != domainInfo->DomainKey) { - TenantBySubDomainKey[domainInfo->ResourcesDomainKey].SetType(NKikimrViewer::Shared); - TenantBySubDomainKey[domainInfo->DomainKey].SetType(NKikimrViewer::Serverless); - TenantBySubDomainKey[domainInfo->DomainKey].SetResourceId(GetDomainId(domainInfo->ResourcesDomainKey)); - } - - TString path = CanonizePath(result.Path); - SubDomainKeyByPath[path] = domainInfo->DomainKey; - NavigateResult[path] = std::move(ev->Get()->Request); - if (IsFitsToPath(path)) { - FilterSubDomain = domainInfo->DomainKey; - } - } - RequestDone(); - } - - void Handle(NNodeWhiteboard::TEvWhiteboard::TEvSystemStateResponse::TPtr& ev) { - ui32 nodeId = ev.Get()->Cookie; - NodeSysInfo[nodeId] = std::move(ev->Get()->Record); - RequestDone(); - } - - void Handle(NNodeWhiteboard::TEvWhiteboard::TEvTabletStateResponse::TPtr& ev) { - ui32 nodeId = ev.Get()->Cookie; - NodeTabletInfo[nodeId] = std::move(ev->Get()->Record); - RequestDone(); - } - - void Undelivered(TEvents::TEvUndelivered::TPtr& ev) { - ui32 nodeId = ev.Get()->Cookie; - if (ev->Get()->SourceType == NNodeWhiteboard::TEvWhiteboard::EvSystemStateRequest) { - if (NodeSysInfo.emplace(nodeId, NKikimrWhiteboard::TEvSystemStateResponse{}).second) { - RequestDone(); - } - } - if (ev->Get()->SourceType == NNodeWhiteboard::TEvWhiteboard::EvTabletStateRequest) { - if (NodeTabletInfo.emplace(nodeId, NKikimrWhiteboard::TEvTabletStateResponse{}).second) { - RequestDone(); - } - } - } - - void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr&) { - } - - bool CheckNodeFilters(TNodeId nodeId) { - auto itSysInfo = NodeSysInfo.find(nodeId); - if (itSysInfo != NodeSysInfo.end()) { - if (itSysInfo->second.SystemStateInfoSize() == 1) { - const NKikimrWhiteboard::TSystemStateInfo& sysInfo = itSysInfo->second.GetSystemStateInfo(0); - if (UptimeSecondsFilter > 0 && sysInfo.HasStartTime() && sysInfo.HasChangeTime() - && sysInfo.GetChangeTime() - sysInfo.GetStartTime() > UptimeSecondsFilter * 1000) { - return false; - } - if (ProblemNodesFilter && sysInfo.HasSystemState() - && GetViewerFlag(sysInfo.GetSystemState()) == NKikimrViewer::EFlag::Green) { - return false; - } - if (TextFilter) { - if (sysInfo.HasHost() && sysInfo.GetHost().Contains(TextFilter)) { - return true; - } - if (std::to_string(nodeId).contains(TextFilter)) { - return true; - } - return false; - } - } - } - return true; - } - - static double GetLoadAverage(const NKikimrViewer::TComputeNodeInfo& nodeInfo) { - if (nodeInfo.LoadAverageSize() > 0 && nodeInfo.GetNumberOfCpus() > 0) { - return nodeInfo.GetLoadAverage(0) * 100 / nodeInfo.GetNumberOfCpus(); - } - return 0; - } - - static double GetCPU(const NKikimrViewer::TComputeNodeInfo& nodeInfo) { - double cpu = 0; - if (nodeInfo.PoolStatsSize() > 0) { - for (const auto& ps : nodeInfo.GetPoolStats()) { - cpu = std::max(cpu, ps.GetUsage()); - } - } - return cpu; - } - - void PaginateNodes(::google::protobuf::RepeatedPtrField& nodes) { - switch (Sort) { - case ESort::NodeId: - // already sorted - break; - case ESort::Host: - SortCollection(nodes, [](const NKikimrViewer::TComputeNodeInfo& node) { return node.GetHost();}, ReverseSort); - break; - case ESort::DC: - SortCollection(nodes, [](const NKikimrViewer::TComputeNodeInfo& node) { return node.GetDataCenter();}, ReverseSort); - break; - case ESort::Rack: - SortCollection(nodes, [](const NKikimrViewer::TComputeNodeInfo& node) { return node.GetRack();}, ReverseSort); - break; - case ESort::Version: - SortCollection(nodes, [](const NKikimrViewer::TComputeNodeInfo& node) { return node.GetVersion();}, ReverseSort); - break; - case ESort::Uptime: - SortCollection(nodes, [](const NKikimrViewer::TComputeNodeInfo& node) { return node.GetStartTime();}, ReverseSort); - break; - case ESort::Memory: - SortCollection(nodes, [](const NKikimrViewer::TComputeNodeInfo& node) { return node.GetMemoryUsed();}, ReverseSort); - break; - case ESort::CPU: - SortCollection(nodes, [](const NKikimrViewer::TComputeNodeInfo& node) { return GetCPU(node);}, ReverseSort); - break; - case ESort::LoadAverage: - SortCollection(nodes, [](const NKikimrViewer::TComputeNodeInfo& node) { return GetLoadAverage(node);}, ReverseSort); - break; - } - - if (Offset.has_value()) { - if (size_t(nodes.size()) > Offset.value()) { - nodes.erase(nodes.begin(), std::next(nodes.begin(), Offset.value())); - } else { - nodes.Clear(); - } - } - if (Limit.has_value()) { - if (size_t(nodes.size()) > Limit.value()) { - nodes.erase(std::next(nodes.begin(), Limit.value()), nodes.end()); - } - } - } - - void FillResponseNode(const TNodeId nodeId, const TString& path) { - if (!CheckNodeFilters(nodeId)) - return; - FoundNodeIds.insert(nodeId); - NKikimrViewer::TComputeNodeInfo& computeNodeInfo = Version == EVersion::v1 - ? *Result.MutableTenants(Result.TenantsSize() - 1)->AddNodes() - : *Result.AddNodes(); - if (Version == EVersion::v2) { - computeNodeInfo.SetTenant(path); - } - computeNodeInfo.SetNodeId(nodeId); - auto itSysInfo = NodeSysInfo.find(nodeId); - if (itSysInfo != NodeSysInfo.end()) { - if (itSysInfo->second.SystemStateInfoSize() == 1) { - const NKikimrWhiteboard::TSystemStateInfo& sysInfo = itSysInfo->second.GetSystemStateInfo(0); - if (sysInfo.HasStartTime()) { - computeNodeInfo.SetStartTime(sysInfo.GetStartTime()); - } - if (sysInfo.HasChangeTime()) { - computeNodeInfo.SetChangeTime(sysInfo.GetChangeTime()); - } - computeNodeInfo.MutableSystemLocation()->MergeFrom(sysInfo.GetSystemLocation()); - computeNodeInfo.MutableLoadAverage()->MergeFrom(sysInfo.GetLoadAverage()); - if (sysInfo.HasNumberOfCpus()) { - computeNodeInfo.SetNumberOfCpus(sysInfo.GetNumberOfCpus()); - } - // TODO(xenoxeno) - if (sysInfo.HasSystemState()) { - computeNodeInfo.SetOverall(GetViewerFlag(sysInfo.GetSystemState())); - } - if (sysInfo.HasNodeId()) { - computeNodeInfo.SetNodeId(sysInfo.GetNodeId()); - } - if (sysInfo.HasDataCenter()) { - computeNodeInfo.SetDataCenter(sysInfo.GetDataCenter()); - } - if (sysInfo.HasRack()) { - computeNodeInfo.SetRack(sysInfo.GetRack()); - } - if (sysInfo.HasHost()) { - computeNodeInfo.SetHost(sysInfo.GetHost()); - } - if (sysInfo.HasVersion()) { - computeNodeInfo.SetVersion(sysInfo.GetVersion()); - } - if (sysInfo.HasMemoryUsed()) { - computeNodeInfo.SetMemoryUsed(sysInfo.GetMemoryUsed()); - } - if (sysInfo.HasMemoryLimit()) { - computeNodeInfo.SetMemoryLimit(sysInfo.GetMemoryLimit()); - } - computeNodeInfo.MutablePoolStats()->MergeFrom(sysInfo.GetPoolStats()); - computeNodeInfo.MutableEndpoints()->MergeFrom(sysInfo.GetEndpoints()); - computeNodeInfo.MutableRoles()->MergeFrom(sysInfo.GetRoles()); - - } - } - auto itTabletInfo = TabletInfoIndex.find(nodeId); - if (itTabletInfo != TabletInfoIndex.end()) { - THashMap, NKikimrViewer::TTabletStateInfo> tablets; - for (const auto* pTabletInfo : itTabletInfo->second) { - const auto& tabletInfo = *pTabletInfo; - if (tabletInfo.GetState() != NKikimrWhiteboard::TTabletStateInfo::Deleted) { - NKikimrViewer::EFlag state = GetFlagFromTabletState(tabletInfo.GetState()); - auto& tablet = tablets[std::make_pair(tabletInfo.GetType(), state)]; - tablet.SetCount(tablet.GetCount() + 1); - } - } - for (const auto& [prTypeState, tabletInfo] : tablets) { - NKikimrViewer::TTabletStateInfo& tablet = *computeNodeInfo.AddTablets(); - tablet.MergeFrom(tabletInfo); - tablet.SetType(NKikimrTabletBase::TTabletTypes::EType_Name(prTypeState.first)); - tablet.SetState(prTypeState.second); - } - } - auto itHiveNodeStats = HiveNodeStatsIndex.find(nodeId); - if (itHiveNodeStats != HiveNodeStatsIndex.end()) { - computeNodeInfo.MutableMetrics()->CopyFrom(itHiveNodeStats->second->GetMetrics()); - for (const auto& state : itHiveNodeStats->second->GetStateStats()) { - if (state.HasTabletType()) { - NKikimrViewer::TTabletStateInfo& tablet = *computeNodeInfo.AddTablets(); - tablet.SetType(NKikimrTabletBase::TTabletTypes::EType_Name(state.GetTabletType())); - tablet.SetCount(state.GetCount()); - NKikimrViewer::EFlag flag = GetFlagFromTabletState(state.GetVolatileState()); - tablet.SetState(flag); - } - } - } - } - - void ReplyAndPassAway() { - NKikimrWhiteboard::TEvTabletStateResponse tabletInfo; - MergeWhiteboardResponses(tabletInfo, NodeTabletInfo); - for (const auto& info : tabletInfo.GetTabletStateInfo()) { - TabletInfoIndex[info.GetNodeId()].emplace_back(&info); - } - auto itRootHiveNodeStats = HiveNodeStats.find(RootHiveId); - if (itRootHiveNodeStats != HiveNodeStats.end()) { - for (const auto& stats : itRootHiveNodeStats->second->Record.GetNodeStats()) { - HiveNodeStatsIndex[stats.GetNodeId()] = &stats; - } - } - for (const auto& prStats : HiveNodeStats) { - if (prStats.first != RootHiveId) { - for (const auto& stats : prStats.second->Record.GetNodeStats()) { - HiveNodeStatsIndex[stats.GetNodeId()] = &stats; - } - } - } - - for (const std::pair& prTenant : TenantByPath) { - const TString& path = prTenant.first; - if (Version == EVersion::v1) { - NKikimrViewer::TComputeTenantInfo& computeTenantInfo = *Result.AddTenants(); - computeTenantInfo.SetName(path); - // TODO(xenoxeno) - computeTenantInfo.SetOverall(NKikimrViewer::EFlag::Green); - } - auto itSubDomainKey = SubDomainKeyByPath.find(path); - if (itSubDomainKey != SubDomainKeyByPath.end()) { - TPathId subDomainKey(itSubDomainKey->second); - const NKikimrViewer::TTenant& tenantBySubDomainKey(TenantBySubDomainKey[subDomainKey]); - for (TNodeId nodeId : tenantBySubDomainKey.GetNodeIds()) { - if (IsNodesListSorted) { - TenantPathByNodeId[nodeId] = path; - } else { - FillResponseNode(nodeId, path); - } - } - } - } - - if (IsNodesListSorted) { - for (TNodeId nodeId : NodeIds) { - FillResponseNode(nodeId, TenantPathByNodeId[nodeId]); - } - } else if (NeedNodesSorting()) { - PaginateNodes(*Result.MutableNodes()); - } - - Result.SetTotalNodes(NodeIds.size()); - Result.SetFoundNodes(FoundNodeIds.size()); - // TODO(xenoxeno) - Result.SetOverall(NKikimrViewer::EFlag::Green); - - TStringStream json; - TProtoToJson::ProtoToJson(json, Result, JsonSettings); - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), std::move(json.Str())), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void HandleTimeout() { - Result.AddErrors("Timeout occurred"); - ReplyAndPassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: version - in: query - description: query version (v1, v2) - required: false - type: string - - name: path - in: query - description: schema path - required: false - type: string - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - - name: uptime - in: query - description: return only nodes with less uptime in sec. - required: false - type: integer - - name: problems_only - in: query - description: return only problem nodes - required: false - type: boolean - - name: filter - in: query - description: filter nodes by id or host - required: false - type: string - - name: sort - in: query - description: sort by (NodeId,Host,DC,Rack,Version,Uptime,Memory,CPU,LoadAverage) - required: false - type: string - - name: offset - in: query - description: skip N nodes - required: false - type: integer - - name: limit - in: query - description: limit to N nodes - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Database compute information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns information about compute layer of database"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_config.h b/ydb/core/viewer/json_config.h deleted file mode 100644 index e239c82fc4f8..000000000000 --- a/ydb/core/viewer/json_config.h +++ /dev/null @@ -1,65 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include "viewer.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonConfig : public TActorBootstrapped { - using TBase = TActorBootstrapped; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonConfig(IViewer *viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap(const TActorContext& ctx) { - const TKikimrRunConfig& kikimrRunConfig = Viewer->GetKikimrRunConfig(); - TStringStream json; - auto config = kikimrRunConfig.AppConfig; - config.MutableNameserviceConfig()->ClearClusterUUID(); - config.MutableNameserviceConfig()->ClearAcceptUUID(); - config.ClearAuthConfig(); - TProtoToJson::ProtoToJson(json, config); - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Configuration"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns configuration"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_content.h b/ydb/core/viewer/json_content.h deleted file mode 100644 index 0d9399d36022..000000000000 --- a/ydb/core/viewer/json_content.h +++ /dev/null @@ -1,199 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include "viewer.h" -#include "browse.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using ::google::protobuf::FieldDescriptor; - -class TJsonContent : public TActorBootstrapped { - using TThis = TJsonContent; - using TBase = TActorBootstrapped; - - IViewer* Viewer; - TActorId Initiator; - NMon::TEvHttpInfo::TPtr Event; - - IViewer::TContentRequestContext ContentRequestContext; - TInstant BrowseStarted; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonContent(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Initiator(ev->Sender) - , Event(ev) - {} - - STFUNC(StateWaitingBrowse) { - switch (ev->GetTypeRewrite()) { - HFunc(NViewerEvents::TEvBrowseResponse, HandleBrowseResponse); - CFunc(TEvents::TSystem::Wakeup, HandleBrowseTimeout); - } - } - -public: - void Bootstrap(const TActorContext& ctx) { - BuildRequestContext(&Event->Get()->Request, ContentRequestContext); - if (!Event->Get()->UserToken.empty()) { - ContentRequestContext.UserToken = Event->Get()->UserToken; - } - BrowseStarted = ctx.Now(); - ctx.RegisterWithSameMailbox(new TBrowse(Viewer, ctx.SelfID, ContentRequestContext.Path, Event->Get()->UserToken)); - - TBase::Become( - &TThis::StateWaitingBrowse, - ctx, - ContentRequestContext.Timeout, - new TEvents::TEvWakeup()); - } - -private: - static void BuildRequestContext( - const NMonitoring::IMonHttpRequest* httpRequest, - IViewer::TContentRequestContext& reqCtx) { - if (!httpRequest) { - return; - } - - const auto& params = httpRequest->GetParams(); - auto post = httpRequest->GetPostContent(); - - reqCtx.JsonSettings.EnumAsNumbers = !FromStringWithDefault( - params.Get("enums"), - !reqCtx.JsonSettings.EnumAsNumbers); // defaults to false - reqCtx.JsonSettings.UI64AsString = !FromStringWithDefault( - params.Get("ui64"), - !reqCtx.JsonSettings.UI64AsString); // defaults to false - - ui32 timeoutMillis = FromStringWithDefault( - params.Get("timeout"), - (ui32)reqCtx.Timeout.MilliSeconds()); - reqCtx.Timeout = TDuration::MilliSeconds(timeoutMillis); - - reqCtx.Limit = FromStringWithDefault(params.Get("limit"), reqCtx.Limit); - reqCtx.Offset = FromStringWithDefault(params.Get("offset"), reqCtx.Offset); - reqCtx.Key = post; - - if (params.Has("key")) { - reqCtx.Key = params.Get("key"); - } - - reqCtx.Path = params.Get("path"); - } - - void HandleBrowseResponse(NViewerEvents::TEvBrowseResponse::TPtr &ev, const TActorContext &ctx) { - NViewerEvents::TEvBrowseResponse& event = *ev->Get(); - - if (!event.Error.empty()) { - return SendErrorReplyAndDie(event.Error, ctx); - } - - auto type = event.BrowseInfo.GetType(); - auto contentHandler = Viewer->GetContentHandler(type); - if (!contentHandler) { - return SendErrorReplyAndDie(TStringBuilder() - << "HTTP/1.1 500 Internal Server Error\r\n" - "Connection: Close\r\n" - "\r\n" - "No content can be retrieved from " - << (NKikimrViewer::EObjectType_IsValid((int)type) ? NKikimrViewer::EObjectType_Name(type) : TString("unknown")) - << " object\r\n", - ctx); - } - - ContentRequestContext.Type = event.BrowseInfo.GetType(); - ContentRequestContext.ObjectName = event.BrowseInfo.GetName(); - ContentRequestContext.Timeout -= (ctx.Now() - BrowseStarted); - - // spawn content retrieval actor - ctx.RegisterWithSameMailbox(contentHandler(Initiator, ContentRequestContext)); - Die(ctx); - } - - void HandleBrowseTimeout(const TActorContext& ctx) { - return SendErrorReplyAndDie(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), ctx); - } - - void SendErrorReplyAndDie(const TString& error, const TActorContext& ctx) { - ctx.Send( - Initiator, - new NMon::TEvHttpInfoRes( - error, - 0, - NMon::IEvHttpInfoRes::EContentType::Custom)); - - Die(ctx); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: path - in: query - description: schema path - required: true - type: string - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: key - in: query - description: key for positioning - required: false - type: string - - name: limit - in: query - description: rows limit - required: false - type: integer - - name: offset - in: query - description: offset in rows - required: false - type: integer - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Schema content preview"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Return schema preview"; - } -}; - - -} -} diff --git a/ydb/core/viewer/json_counters.h b/ydb/core/viewer/json_counters.h deleted file mode 100644 index 8ad296bcbe43..000000000000 --- a/ydb/core/viewer/json_counters.h +++ /dev/null @@ -1,439 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include "viewer.h" -#include "json_tabletinfo.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using ::google::protobuf::FieldDescriptor; - -class TJsonCounters : public TActorBootstrapped { - using TThis = TJsonCounters; - using TBase = TActorBootstrapped; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - ui32 Requested; - ui32 Received; - THolder NodesInfo; - TMap VDiskInfo; - TMap PDiskInfo; - TMap TabletInfo; - TMap BSGroupInfo; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonCounters(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) - , Requested(0) - , Received(0) - {} - - void Bootstrap(const TActorContext& ctx) { - const TActorId nameserviceId = GetNameserviceActorId(); - ctx.Send(nameserviceId, new TEvInterconnect::TEvListNodes()); - TBase::Become(&TThis::StateRequestedBrowse); - ctx.Schedule(TDuration::Seconds(60), new TEvents::TEvWakeup()); - } - - void Die(const TActorContext& ctx) override { - if (NodesInfo != nullptr) { - for (const auto& ni : NodesInfo->Nodes) { - ctx.Send(TActivationContext::InterconnectProxy(ni.NodeId), new TEvents::TEvUnsubscribe()); - } - } - TBase::Die(ctx); - } - - void SendRequest(ui32 nodeId, const TActorContext& ctx) { - TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(nodeId); - ctx.Send(whiteboardServiceId, new TEvWhiteboard::TEvVDiskStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - ++Requested; - ctx.Send(whiteboardServiceId, new TEvWhiteboard::TEvPDiskStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - ++Requested; - ctx.Send(whiteboardServiceId, new TEvWhiteboard::TEvTabletStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - ++Requested; - ctx.Send(whiteboardServiceId, new TEvWhiteboard::TEvBSGroupStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - ++Requested; - } - - void HandleBrowse(TEvInterconnect::TEvNodesInfo::TPtr& ev, const TActorContext& ctx) { - NodesInfo = ev->Release(); - for (const auto& ni : NodesInfo->Nodes) { - SendRequest(ni.NodeId, ctx); - } - if (Requested > 0) { - TBase::Become(&TThis::StateRequestedNodeInfo); - } else { - ReplyAndDie(ctx); - } - } - - void Undelivered(TEvents::TEvUndelivered::TPtr &ev, const TActorContext &ctx) { - ui32 nodeId = ev.Get()->Cookie; - switch (ev->Get()->SourceType) { - case TEvWhiteboard::EvVDiskStateRequest: - if (VDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvVDiskStateResponse{}).second) { - NodeStateInfoReceived(ctx); - } - break; - case TEvWhiteboard::EvPDiskStateRequest: - if (PDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvPDiskStateResponse{}).second) { - NodeStateInfoReceived(ctx); - } - break; - case TEvWhiteboard::EvTabletStateRequest: - if (TabletInfo.emplace(nodeId, NKikimrWhiteboard::TEvTabletStateResponse{}).second) { - NodeStateInfoReceived(ctx); - } - break; - case TEvWhiteboard::EvBSGroupStateRequest: - if (BSGroupInfo.emplace(nodeId, NKikimrWhiteboard::TEvBSGroupStateResponse{}).second) { - NodeStateInfoReceived(ctx); - } - break; - } - } - - void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &ev, const TActorContext &ctx) { - ui32 nodeId = ev->Get()->NodeId; - if (VDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvVDiskStateResponse{}).second) { - NodeStateInfoReceived(ctx); - } - if (PDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvPDiskStateResponse{}).second) { - NodeStateInfoReceived(ctx); - } - if (TabletInfo.emplace(nodeId, NKikimrWhiteboard::TEvTabletStateResponse{}).second) { - NodeStateInfoReceived(ctx); - } - if (BSGroupInfo.emplace(nodeId, NKikimrWhiteboard::TEvBSGroupStateResponse{}).second) { - NodeStateInfoReceived(ctx); - } - } - - void Handle(TEvWhiteboard::TEvVDiskStateResponse::TPtr& ev, const TActorContext& ctx) { - ui64 nodeId = ev.Get()->Cookie; - VDiskInfo[nodeId] = std::move(ev->Get()->Record); - NodeStateInfoReceived(ctx); - } - - void Handle(TEvWhiteboard::TEvPDiskStateResponse::TPtr& ev, const TActorContext& ctx) { - ui64 nodeId = ev.Get()->Cookie; - PDiskInfo[nodeId] = std::move(ev->Get()->Record); - NodeStateInfoReceived(ctx); - } - - void Handle(TEvWhiteboard::TEvTabletStateResponse::TPtr& ev, const TActorContext& ctx) { - ui64 nodeId = ev.Get()->Cookie; - TabletInfo[nodeId] = std::move(ev->Get()->Record); - NodeStateInfoReceived(ctx); - } - - void Handle(TEvWhiteboard::TEvBSGroupStateResponse::TPtr& ev, const TActorContext& ctx) { - ui64 nodeId = ev.Get()->Cookie; - BSGroupInfo[nodeId] = std::move(ev->Get()->Record); - NodeStateInfoReceived(ctx); - } - - void NodeStateInfoReceived(const TActorContext& ctx) { - ++Received; - if (Received == Requested) { - ReplyAndDie(ctx); - } - } - - STFUNC(StateRequestedBrowse) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvInterconnect::TEvNodesInfo, HandleBrowse); - CFunc(TEvents::TSystem::Wakeup, Timeout); - } - } - - STFUNC(StateRequestedNodeInfo) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvWhiteboard::TEvVDiskStateResponse, Handle); - HFunc(TEvWhiteboard::TEvPDiskStateResponse, Handle); - HFunc(TEvWhiteboard::TEvTabletStateResponse, Handle); - HFunc(TEvWhiteboard::TEvBSGroupStateResponse, Handle); - HFunc(TEvents::TEvUndelivered, Undelivered); - HFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); - CFunc(TEvents::TSystem::Wakeup, Timeout); - } - } - - template - void RenderStats(TStringStream& json, - ResponseType& response, - const TEvInterconnect::TNodeInfo& nodeInfo, - const TString& subsystem, - const TVector& groupFields) { - - TWhiteboardGrouper::GroupResponse(response, groupFields, true); - auto& stateInfo = TWhiteboardInfo::GetElementsField(response); - TStringBuf host(nodeInfo.Host); - size_t pos = host.find('.'); - if (pos != TString::npos) { - host = host.substr(0, pos); - } - for (typename TWhiteboardInfo::TElementType& info : stateInfo) { - const Reflection& reflectionFrom = *info.GetReflection(); - json << ",{\"labels\":{"; - if (nodeInfo.NodeId != 0) { - json << "\"node\":" << nodeInfo.NodeId << ","; - } - json << "\"host\":\"" << host << "\","; - if (nodeInfo.Port != 0) { - json << "\"port\":" << nodeInfo.Port << ","; - } - json << "\"subsystem\":\"" << subsystem << "\","; - json << "\"" << groupFields.front()->name() << "\":\""; - json << reflectionFrom.GetEnum(info, groupFields.front())->name(); - json << "\""; - json << "},\"value\":"; - json << info.GetCount(); - json << '}'; - } - } - - void RenderStats(TStringStream& json, - NKikimrWhiteboard::TEvVDiskStateResponse& response, - const TEvInterconnect::TNodeInfo& nodeInfo) { - if (response.VDiskStateInfoSize() == 0) - return; - static TVector groupFields - = TWhiteboardGrouper::GetProtoFields("VDiskState"); - RenderStats(json, response, nodeInfo, "VDisk", groupFields); - } - - void RenderStats(TStringStream& json, - NKikimrWhiteboard::TEvPDiskStateResponse& response, - const TEvInterconnect::TNodeInfo& nodeInfo) { - if (response.PDiskStateInfoSize() == 0) - return; - static TVector groupFields - = TWhiteboardGrouper::GetProtoFields("State"); - RenderStats(json, response, nodeInfo, "PDisk", groupFields); - } - - void RenderStats(TStringStream& json, - NKikimrWhiteboard::TEvTabletStateResponse& response, - const TEvInterconnect::TNodeInfo& nodeInfo) { - if (response.TabletStateInfoSize() == 0) - return; - static TVector groupFields - = TWhiteboardGrouper::GetProtoFields("State"); - RenderStats(json, response, nodeInfo, "Tablet", groupFields); - } - - void ReplyAndDie(const TActorContext& ctx) { - TStringStream json; - - json << '{'; - json << "\"sensors\":["; - - Sort(NodesInfo->Nodes, []( - const TEvInterconnect::TNodeInfo& a, - const TEvInterconnect::TNodeInfo& b) -> bool { - return a.NodeId < b.NodeId; - }); - - ui32 nodesResponded = 0; - for (const std::pair& value : VDiskInfo) { - if (value.second.HasResponseTime()) { - ++nodesResponded; - } - } - - json << "{\"labels\":{"; - json << "\"subsystem\":\"Viewer\","; - json << "\"host\":\"cluster\","; - json << "\"sensor\":\"NodesResponded\""; - json << "},\"value\":" << nodesResponded; - json << '}'; - - NKikimrWhiteboard::TEvTabletStateResponse mergedTabletInfo; - MergeWhiteboardResponses(mergedTabletInfo, TabletInfo); - TabletInfo.clear(); - for (const auto& tabletInfo : mergedTabletInfo.GetTabletStateInfo()) { - if (!tabletInfo.HasNodeId()) { - continue; - } - auto it = TabletInfo.find(tabletInfo.GetNodeId()); - if (it == TabletInfo.end()) { - it = TabletInfo.emplace(tabletInfo.GetNodeId(), NKikimrWhiteboard::TEvTabletStateResponse{}).first; - } - it->second.AddTabletStateInfo()->CopyFrom(tabletInfo); - } - - std::array pDiskUserSpaceHistogram = {}; - - auto itVDiskInfo = VDiskInfo.begin(); - auto itPDiskInfo = PDiskInfo.begin(); - auto itTabletInfo = TabletInfo.begin(); - - for (const auto& nodeInfo : NodesInfo->Nodes) { - while (itVDiskInfo != VDiskInfo.end() && itVDiskInfo->first < nodeInfo.NodeId) - ++itVDiskInfo; - if (itVDiskInfo != VDiskInfo.end() && itVDiskInfo->first == nodeInfo.NodeId) { - RenderStats(json, itVDiskInfo->second, nodeInfo); - } - while (itPDiskInfo != PDiskInfo.end() && itPDiskInfo->first < nodeInfo.NodeId) - ++itPDiskInfo; - if (itPDiskInfo != PDiskInfo.end() && itPDiskInfo->first == nodeInfo.NodeId) { - RenderStats(json, itPDiskInfo->second, nodeInfo); - auto& stateInfo = TWhiteboardInfo::GetElementsField(itPDiskInfo->second); - for (const typename TWhiteboardInfo::TElementType& info : stateInfo) { - if (info.GetTotalSize() > 0 && info.GetAvailableSize() > 0) { - ++pDiskUserSpaceHistogram[std::min((info.GetTotalSize() - info.GetAvailableSize()) * pDiskUserSpaceHistogram.size() / info.GetTotalSize(), pDiskUserSpaceHistogram.size() - 1)]; - } - } - } - while (itTabletInfo != TabletInfo.end() && itTabletInfo->first < nodeInfo.NodeId) - ++itTabletInfo; - if (itTabletInfo != TabletInfo.end() && itTabletInfo->first == nodeInfo.NodeId) { - RenderStats(json, itTabletInfo->second, nodeInfo); - } - } - - static TEvInterconnect::TNodeInfo totals(0, "", "cluster", "", 0, TNodeLocation()); - - for (size_t p = 0; p < pDiskUserSpaceHistogram.size(); ++p) { - json << ",{\"labels\":{"; - json << "\"bin\":\"" << ((p + 1) * 100 / pDiskUserSpaceHistogram.size()) << "%\","; - json << "\"subsystem\":\"PDisk\","; - json << "\"host\":\"cluster\","; - json << "\"sensor\":\"UsedSpace\""; - json << "},\"value\":"; - json << pDiskUserSpaceHistogram[p]; - json << '}'; - } - - NKikimrWhiteboard::TEvVDiskStateResponse mergedVDiskInfo; - MergeWhiteboardResponses(mergedVDiskInfo, VDiskInfo); - RenderStats(json, mergedVDiskInfo, totals); - NKikimrWhiteboard::TEvPDiskStateResponse mergedPDiskInfo; - MergeWhiteboardResponses(mergedPDiskInfo, PDiskInfo); - RenderStats(json, mergedPDiskInfo, totals); - RenderStats(json, mergedTabletInfo, totals); - NKikimrWhiteboard::TEvBSGroupStateResponse mergedBSGroupInfo; - MergeWhiteboardResponses(mergedBSGroupInfo, BSGroupInfo); - - std::array bsGroupUnavaiableHistogram = {}; - std::array bsGroupGreenHistogram = {}; - std::array bsGroupNotGreenHistogram = {}; - std::unordered_map bsGroupVDisks; - std::unordered_map bsGroupGreenVDisks; - std::unordered_map bsGroupNotGreenVDisks; - { - auto& stateInfo = TWhiteboardInfo::GetElementsField(mergedBSGroupInfo); - for (const typename TWhiteboardInfo::TElementType& info : stateInfo) { - bsGroupVDisks[info.GetGroupID()] = info.VDiskIdsSize(); - } - } - { - auto& stateInfo = TWhiteboardInfo::GetElementsField(mergedVDiskInfo); - for (const typename TWhiteboardInfo::TElementType& info : stateInfo) { - auto groupId = info.GetVDiskId().GetGroupID(); - bsGroupVDisks[groupId]--; - auto flag = GetVDiskOverallFlag(info); - if (flag == NKikimrViewer::EFlag::Green && info.GetReplicated()) { - bsGroupGreenVDisks[groupId]++; - } else { - bsGroupNotGreenVDisks[groupId]++; - } - } - } - { - for (auto it = bsGroupVDisks.begin(); it != bsGroupVDisks.end(); ++it) { - int idx = it->second; - if (idx < 0) { - idx = 0; - } - if (idx >= (int)bsGroupUnavaiableHistogram.size()) { - idx = bsGroupUnavaiableHistogram.size() - 1; - } - bsGroupUnavaiableHistogram[idx]++; - } - } - { - for (auto it = bsGroupGreenVDisks.begin(); it != bsGroupGreenVDisks.end(); ++it) { - int idx = it->second; - if (idx < 0) { - idx = 0; - } - if (idx >= (int)bsGroupGreenHistogram.size()) { - idx = bsGroupGreenHistogram.size() - 1; - } - bsGroupGreenHistogram[idx]++; - } - } - { - for (auto it = bsGroupNotGreenVDisks.begin(); it != bsGroupNotGreenVDisks.end(); ++it) { - int idx = it->second; - if (idx < 0) { - idx = 0; - } - if (idx >= (int)bsGroupNotGreenHistogram.size()) { - idx = bsGroupNotGreenHistogram.size() - 1; - } - bsGroupNotGreenHistogram[idx]++; - } - } - - for (size_t p = 0; p < bsGroupUnavaiableHistogram.size(); ++p) { - json << ",{\"labels\":{"; - json << "\"bin\":\"" << p << "\","; - json << "\"subsystem\":\"BSGroups\","; - json << "\"host\":\"cluster\","; - json << "\"sensor\":\"UnavailableVDisks\""; - json << "},\"value\":"; - json << bsGroupUnavaiableHistogram[p]; - json << '}'; - } - - for (size_t p = 0; p < bsGroupGreenHistogram.size(); ++p) { - json << ",{\"labels\":{"; - json << "\"bin\":\"" << p << "\","; - json << "\"subsystem\":\"BSGroups\","; - json << "\"host\":\"cluster\","; - json << "\"sensor\":\"GreenVDisks\""; - json << "},\"value\":"; - json << bsGroupGreenHistogram[p]; - json << '}'; - } - - for (size_t p = 0; p < bsGroupNotGreenHistogram.size(); ++p) { - json << ",{\"labels\":{"; - json << "\"bin\":\"" << p << "\","; - json << "\"subsystem\":\"BSGroups\","; - json << "\"host\":\"cluster\","; - json << "\"sensor\":\"NotGreenVDisks\""; - json << "},\"value\":"; - json << bsGroupNotGreenHistogram[p]; - json << '}'; - } - - json << ']'; - json << '}'; - - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } - - void Timeout(const TActorContext& ctx) { - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } -}; - -} -} diff --git a/ydb/core/viewer/json_describe.h b/ydb/core/viewer/json_describe.h deleted file mode 100644 index 276bd41e31e6..000000000000 --- a/ydb/core/viewer/json_describe.h +++ /dev/null @@ -1,423 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "json_pipe_req.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using NSchemeShard::TEvSchemeShard; -using TNavigate = NSchemeCache::TSchemeCacheNavigate; - -class TJsonDescribe : public TViewerPipeClient { - using TBase = TViewerPipeClient; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TAutoPtr SchemeShardResult; - TAutoPtr CacheResult; - TAutoPtr DescribeResult; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - bool ExpandSubElements = true; - int Requests = 0; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonDescribe(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void FillParams(NKikimrSchemeOp::TDescribePath* record, const TCgiParameters& params) { - if (params.Has("path")) { - record->SetPath(params.Get("path")); - } - if (params.Has("path_id")) { - record->SetPathId(FromStringWithDefault(params.Get("path_id"))); - } - if (params.Has("schemeshard_id")) { - record->SetSchemeshardId(FromStringWithDefault(params.Get("schemeshard_id"))); - } - record->MutableOptions()->SetBackupInfo(FromStringWithDefault(params.Get("backup"), true)); - record->MutableOptions()->SetShowPrivateTable(FromStringWithDefault(params.Get("private"), true)); - record->MutableOptions()->SetReturnChildren(FromStringWithDefault(params.Get("children"), true)); - record->MutableOptions()->SetReturnBoundaries(FromStringWithDefault(params.Get("boundaries"), false)); - record->MutableOptions()->SetReturnPartitionConfig(FromStringWithDefault(params.Get("partition_config"), true)); - record->MutableOptions()->SetReturnPartitionStats(FromStringWithDefault(params.Get("partition_stats"), false)); - record->MutableOptions()->SetReturnPartitioningInfo(FromStringWithDefault(params.Get("partitioning_info"), true)); - } - - void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), false); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - ExpandSubElements = FromStringWithDefault(params.Get("subs"), ExpandSubElements); - InitConfig(params); - - if (params.Has("schemeshard_id")) { - THolder request = MakeHolder(); - FillParams(&request->Record, params); - ui64 schemeShardId = FromStringWithDefault(params.Get("schemeshard_id")); - SendRequestToPipe(ConnectTabletPipe(schemeShardId), request.Release()); - } else { - THolder request = MakeHolder(); - FillParams(request->Record.MutableDescribePath(), params); - request->Record.SetUserToken(Event->Get()->UserToken); - SendRequest(MakeTxProxyID(), request.Release()); - } - ++Requests; - - if (params.Has("path")) { - TAutoPtr request(new NSchemeCache::TSchemeCacheNavigate()); - NSchemeCache::TSchemeCacheNavigate::TEntry entry; - entry.Operation = NSchemeCache::TSchemeCacheNavigate::OpList; - entry.SyncVersion = false; - entry.Path = SplitPath(params.Get("path")); - request->ResultSet.emplace_back(entry); - SendRequest(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request)); - ++Requests; - } - - Become(&TThis::StateRequestedDescribe, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - STATEFN(StateRequestedDescribe) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvSchemeShard::TEvDescribeSchemeResult, Handle); - hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); - hFunc(TEvTabletPipe::TEvClientConnected, TBase::Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvSchemeShard::TEvDescribeSchemeResult::TPtr& ev) { - SchemeShardResult = ev->Release(); - if (SchemeShardResult->GetRecord().GetStatus() == NKikimrScheme::EStatus::StatusSuccess) { - ReplyAndPassAway(); - } else { - RequestDone("TEvDescribeSchemeResult"); - } - } - - void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr &ev) { - CacheResult = ev->Release(); - RequestDone("TEvNavigateKeySetResult"); - } - - void RequestDone(const char* name) { - --Requests; - if (Requests == 0) { - ReplyAndPassAway(); - } - if (Requests < 0) { - BLOG_CRIT("Requests < 0 in RequestDone(" << name << ")"); - } - } - - void FillDescription(NKikimrSchemeOp::TDirEntry* descr, ui64 schemeShardId) { - descr->SetSchemeshardId(schemeShardId); - descr->SetPathId(InvalidLocalPathId); - descr->SetParentPathId(InvalidLocalPathId); - descr->SetCreateFinished(true); - descr->SetCreateTxId(0); - descr->SetCreateStep(0); - } - - NKikimrSchemeOp::EPathType ConvertType(TNavigate::EKind navigate) { - switch (navigate) { - case TNavigate::KindSubdomain: - return NKikimrSchemeOp::EPathTypeSubDomain; - case TNavigate::KindPath: - return NKikimrSchemeOp::EPathTypeDir; - case TNavigate::KindExtSubdomain: - return NKikimrSchemeOp::EPathTypeExtSubDomain; - case TNavigate::KindTable: - return NKikimrSchemeOp::EPathTypeTable; - case TNavigate::KindOlapStore: - return NKikimrSchemeOp::EPathTypeColumnStore; - case TNavigate::KindColumnTable: - return NKikimrSchemeOp::EPathTypeColumnTable; - case TNavigate::KindRtmr: - return NKikimrSchemeOp::EPathTypeRtmrVolume; - case TNavigate::KindKesus: - return NKikimrSchemeOp::EPathTypeKesus; - case TNavigate::KindSolomon: - return NKikimrSchemeOp::EPathTypeSolomonVolume; - case TNavigate::KindTopic: - return NKikimrSchemeOp::EPathTypePersQueueGroup; - case TNavigate::KindCdcStream: - return NKikimrSchemeOp::EPathTypeCdcStream; - case TNavigate::KindSequence: - return NKikimrSchemeOp::EPathTypeSequence; - case TNavigate::KindReplication: - return NKikimrSchemeOp::EPathTypeReplication; - case TNavigate::KindBlobDepot: - return NKikimrSchemeOp::EPathTypeBlobDepot; - case TNavigate::KindExternalTable: - return NKikimrSchemeOp::EPathTypeExternalTable; - case TNavigate::KindExternalDataSource: - return NKikimrSchemeOp::EPathTypeExternalDataSource; - case TNavigate::KindBlockStoreVolume: - return NKikimrSchemeOp::EPathTypeBlockStoreVolume; - case TNavigate::KindFileStore: - return NKikimrSchemeOp::EPathTypeFileStore; - case TNavigate::KindView: - return NKikimrSchemeOp::EPathTypeView; - default: - return NKikimrSchemeOp::EPathTypeDir; - } - } - - TAutoPtr GetSchemeShardDescribeSchemeInfo() { - TAutoPtr result(new NKikimrViewer::TEvDescribeSchemeInfo()); - auto& record = SchemeShardResult->GetRecord(); - const auto *descriptor = NKikimrScheme::EStatus_descriptor(); - result->SetStatus(descriptor->FindValueByNumber(record.GetStatus())->name()); - result->SetReason(record.GetReason()); - result->SetPath(record.GetPath()); - result->MutablePathDescription()->CopyFrom(record.GetPathDescription()); - result->SetPathId(record.GetPathId()); - result->SetLastExistedPrefixPath(record.GetLastExistedPrefixPath()); - result->SetLastExistedPrefixPathId(record.GetLastExistedPrefixPathId()); - result->MutableLastExistedPrefixDescription()->CopyFrom(record.GetLastExistedPrefixDescription()); - result->SetPathOwnerId(record.GetPathOwnerId()); - result->SetSource(NKikimrViewer::TEvDescribeSchemeInfo::SchemeShard); - - return result; - } - - TAutoPtr GetCacheDescribeSchemeInfo() { - const auto& entry = CacheResult->Request.Get()->ResultSet.front(); - const auto& path = Event->Get()->Request.GetParams().Get("path"); - const auto& schemeShardId = entry.DomainInfo->DomainKey.OwnerId; - - TAutoPtr result(new NKikimrViewer::TEvDescribeSchemeInfo()); - result->SetPath(path); - result->SetPathId(entry.Self->Info.GetPathId()); - result->SetPathOwnerId(entry.Self->Info.GetSchemeshardId()); - - auto* pathDescription = result->MutablePathDescription(); - auto* self = pathDescription->MutableSelf(); - - self->CopyFrom(entry.Self->Info); - FillDescription(self, schemeShardId); - - if (entry.ListNodeEntry) { - for (const auto& child : entry.ListNodeEntry->Children) { - auto descr = pathDescription->AddChildren(); - descr->SetName(child.Name); - descr->SetPathType(ConvertType(child.Kind)); - FillDescription(descr, schemeShardId); - } - }; - const auto *descriptor = NKikimrScheme::EStatus_descriptor(); - auto status = descriptor->FindValueByNumber(NKikimrScheme::StatusSuccess)->name(); - result->SetStatus(status); - result->SetSource(NKikimrViewer::TEvDescribeSchemeInfo::Cache); - return result; - } - - void ReplyAndPassAway() { - TStringStream json; - if (SchemeShardResult != nullptr && SchemeShardResult->GetRecord().GetStatus() == NKikimrScheme::EStatus::StatusSuccess) { - DescribeResult = GetSchemeShardDescribeSchemeInfo(); - } else if (CacheResult != nullptr) { - NSchemeCache::TSchemeCacheNavigate *navigate = CacheResult->Request.Get(); - Y_ABORT_UNLESS(navigate->ResultSet.size() == 1); - if (navigate->ErrorCount == 0) { - DescribeResult = GetCacheDescribeSchemeInfo(); - } - } - if (DescribeResult != nullptr) { - if (ExpandSubElements) { - if (DescribeResult->HasPathDescription()) { - auto& pathDescription = *DescribeResult->MutablePathDescription(); - if (pathDescription.HasTable()) { - auto& table = *pathDescription.MutableTable(); - for (auto& tableIndex : table.GetTableIndexes()) { - NKikimrSchemeOp::TDirEntry& child = *pathDescription.AddChildren(); - child.SetName(tableIndex.GetName()); - child.SetPathType(NKikimrSchemeOp::EPathType::EPathTypeTableIndex); - } - for (auto& tableCdc : table.GetCdcStreams()) { - NKikimrSchemeOp::TDirEntry& child = *pathDescription.AddChildren(); - child.SetName(tableCdc.GetName()); - child.SetPathType(NKikimrSchemeOp::EPathType::EPathTypeCdcStream); - } - } - } - } - const auto *descriptor = NKikimrScheme::EStatus_descriptor(); - auto accessDeniedStatus = descriptor->FindValueByNumber(NKikimrScheme::StatusAccessDenied)->name(); - if (DescribeResult->GetStatus() == accessDeniedStatus) { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPFORBIDDEN(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - return; - } - TProtoToJson::ProtoToJson(json, *DescribeResult, JsonSettings); - DecodeExternalTableContent(json); - } else { - json << "null"; - } - - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void DecodeExternalTableContent(TStringStream& json) const { - if (!DescribeResult) { - return; - } - - if (!DescribeResult->GetPathDescription().HasExternalTableDescription()) { - return; - } - - const auto& content = DescribeResult->GetPathDescription().GetExternalTableDescription().GetContent(); - if (!content) { - return; - } - - NExternalSource::IExternalSourceFactory::TPtr externalSourceFactory{NExternalSource::CreateExternalSourceFactory({})}; - NJson::TJsonValue root; - const auto& sourceType = DescribeResult->GetPathDescription().GetExternalTableDescription().GetSourceType(); - try { - NJson::ReadJsonTree(json.Str(), &root); - root["PathDescription"]["ExternalTableDescription"].EraseValue("Content"); - auto source = externalSourceFactory->GetOrCreate(sourceType); - auto parameters = source->GetParameters(content); - for (const auto& [key, items]: parameters) { - NJson::TJsonValue array{NJson::EJsonValueType::JSON_ARRAY}; - for (const auto& item: items) { - array.AppendValue(item); - } - root["PathDescription"]["ExternalTableDescription"]["Content"][key] = array; - } - } catch (...) { - BLOG_CRIT("Сan't unpack content for external table: " << sourceType << ", error: " << CurrentExceptionMessage()); - } - json.Clear(); - json << root; - } - - void HandleTimeout() { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: path - in: query - description: schema path - required: false - type: string - - name: schemeshard_id - in: query - description: schemeshard identifier (tablet id) - required: false - type: integer - - name: path_id - in: query - description: path id - required: false - type: integer - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: backup - in: query - description: return backup information - required: false - type: boolean - default: true - - name: private - in: query - description: return private tables - required: false - type: boolean - default: true - - name: children - in: query - description: return children - required: false - type: boolean - default: true - - name: boundaries - in: query - description: return boundaries - required: false - type: boolean - default: false - - name: partition_config - in: query - description: return partition configuration - required: false - type: boolean - default: true - - name: partition_stats - in: query - description: return partitions statistics - required: false - type: boolean - default: false - - name: partitioning_info - in: query - description: return partitioning information - required: false - type: boolean - default: true - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Schema detailed information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns detailed information about schema object"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_describe_consumer.h b/ydb/core/viewer/json_describe_consumer.h deleted file mode 100644 index 693f5cc5e065..000000000000 --- a/ydb/core/viewer/json_describe_consumer.h +++ /dev/null @@ -1,107 +0,0 @@ -#pragma once -#include -#include -#include "json_local_rpc.h" - -namespace NKikimr { -namespace NViewer { - -using TDescribeConsumerRpc = TJsonLocalRpc; - -class TJsonDescribeConsumer : public TDescribeConsumerRpc { -public: - using TBase = TDescribeConsumerRpc; - - TJsonDescribeConsumer(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : TBase(viewer, ev) - {} - - void Bootstrap() override { - if (Event->Get()->Request.GetMethod() != HTTP_METHOD_GET) { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "Only GET method is allowed")); - } - const auto& params(Event->Get()->Request.GetParams()); - if (params.Has("database")) { - Database = params.Get("database"); - } else if (params.Has("database_path")) { - Database = params.Get("database_path"); - } else { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "field 'database' is required")); - } - - if (params.Has("consumer")) { - Request.set_consumer(params.Get("consumer")); - } else { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "field 'consumer' is required")); - } - - if (params.Has("include_stats")) { - Request.set_include_stats(FromStringWithDefault(params.Get("include_stats"), false)); - } - - TBase::Bootstrap(); - } -}; - -template<> -YAML::Node TJsonRequestSwagger::GetSwagger() { - YAML::Node node = YAML::Load(R"___( - get: - tags: - - viewer - summary: Topic schema detailed information - description: Returns detailed information about topic - parameters: - - name: database - in: query - description: database name - required: true - type: string - - name: consumer - in: query - description: consumer name - required: true - type: string - - name: include_stats - in: query - description: include stat flag - required: false - type: bool - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - responses: - 200: - description: OK - content: - application/json: - schema: {} - 400: - description: Bad Request - 403: - description: Forbidden - 504: - description: Gateway Timeout - )___"); - node["get"]["responses"]["200"]["content"]["application/json"]["schema"] = TProtoToYaml::ProtoToYamlSchema(); - return node; -} - -} -} diff --git a/ydb/core/viewer/json_describe_topic.h b/ydb/core/viewer/json_describe_topic.h deleted file mode 100644 index 2a7a8fb418dc..000000000000 --- a/ydb/core/viewer/json_describe_topic.h +++ /dev/null @@ -1,107 +0,0 @@ -#pragma once -#include -#include -#include "json_local_rpc.h" - -namespace NKikimr { -namespace NViewer { - -using TDescribeTopicRpc = TJsonLocalRpc; - -class TJsonDescribeTopic : public TDescribeTopicRpc { -public: - using TBase = TDescribeTopicRpc; - - TJsonDescribeTopic(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : TBase(viewer, ev) - {} - - void Bootstrap() override { - if (Event->Get()->Request.GetMethod() != HTTP_METHOD_GET) { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "Only GET method is allowed")); - } - const auto& params(Event->Get()->Request.GetParams()); - if (params.Has("database")) { - Database = params.Get("database"); - } else if (params.Has("database_path")) { - Database = params.Get("database_path"); - } else { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "field 'database' is required")); - } - - if (params.Has("path")) { - Request.set_path(params.Get("path")); - } else { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "field 'path' is required")); - } - - if (params.Has("include_stats")) { - Request.set_include_stats(FromStringWithDefault(params.Get("include_stats"), false)); - } - - TBase::Bootstrap(); - } -}; - -template<> -YAML::Node TJsonRequestSwagger::GetSwagger() { - YAML::Node node = YAML::Load(R"___( - get: - tags: - - viewer - summary: Topic schema detailed information - description: Returns detailed information about topic - parameters: - - name: database - in: query - description: database name - required: true - type: string - - name: path - in: query - description: schema path - required: true - type: string - - name: include_stats - in: query - description: include stat flag - required: false - type: bool - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - responses: - 200: - description: OK - content: - application/json: - schema: {} - 400: - description: Bad Request - 403: - description: Forbidden - 504: - description: Gateway Timeout - )___"); - node["get"]["responses"]["200"]["content"]["application/json"]["schema"] = TProtoToYaml::ProtoToYamlSchema(); - return node; -} - -} -} diff --git a/ydb/core/viewer/json_getblob.h b/ydb/core/viewer/json_getblob.h deleted file mode 100644 index d09c691dcb90..000000000000 --- a/ydb/core/viewer/json_getblob.h +++ /dev/null @@ -1,98 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include "json_vdisk_req.h" - -namespace NKikimr { -namespace NViewer { - -using TJsonGetBlob = TJsonVDiskRequest; - -template <> -struct TJsonVDiskRequestHelper { - static std::unique_ptr MakeRequest(NMon::TEvHttpInfo::TPtr &ev, TString *error) { - const TCgiParameters& cgi = ev->Get()->Request.GetParams(); - - bool internals = cgi.Has("internals"); - TString from = cgi.Get("from"); - TString to = cgi.Get("to"); - - auto assign_blob_id = [] (NKikimrVDisk::LogoBlobId *id, const TLogoBlobID &blobId) { - const ui64 *raw = blobId.GetRaw(); - id->set_raw_x1(raw[0]); - id->set_raw_x2(raw[1]); - id->set_raw_x3(raw[2]); - }; - - TString errorExplanation; - auto try_to_parse = [&] (const TString &field, const TString ¶m, NKikimrVDisk::LogoBlobId *id) { - TLogoBlobID blobId; - bool good = TLogoBlobID::Parse(blobId, param, errorExplanation); - if (!good) { - *error = "Failed to parse '" + field + "' field: " + errorExplanation; - return true; - } - assign_blob_id(id, blobId); - return false; - }; - - - auto req = std::make_unique(); - req->Record.set_show_internals(internals); - - NKikimrVDisk::LogoBlobIdRange *range = req->Record.mutable_range(); - if (from) { - try_to_parse("from", from, range->mutable_from()); - } else { - assign_blob_id(range->mutable_from(), Min()); - } - if (to) { - try_to_parse("to", to, range->mutable_to()); - } else { - assign_blob_id(range->mutable_to(), Max()); - } - - return req; - } - - static TString GetAdditionalParameters() { - return R"___( - - name: from - in: query - description: blob identifier, inclusive lower bound for getting range, default is minimal blob id - type: string - - name: to - in: query - description: blob identifier, inclusive upper bound for getting range, default is maximal blob id - required: false - type: string - - name: internals - in: query - description: return ingress of each blob - required: false - type: boolean - )___"; - } -}; - - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Get blob from VDisk"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Get blob from VDisk"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_graph.h b/ydb/core/viewer/json_graph.h deleted file mode 100644 index 20fa68980227..000000000000 --- a/ydb/core/viewer/json_graph.h +++ /dev/null @@ -1,194 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include "viewer.h" -#include "log.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonGraph : public TActorBootstrapped { - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - std::vector Metrics; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonGraph(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap() { - BLOG_TRACE("Graph received request for " << Event->Get()->Request.GetUri()); - const auto& params(Event->Get()->Request.GetParams()); - NKikimrGraph::TEvGetMetrics getRequest; - if (params.Has("target")) { - StringSplitter(params.Get("target")).Split(',').SkipEmpty().Collect(&Metrics); - for (const auto& metric : Metrics) { - getRequest.AddMetrics(metric); - } - } else { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPBADREQUEST(Event->Get(), {}, "Bad Request"), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - if (params.Has("from")) { - getRequest.SetTimeFrom(FromStringWithDefault(params.Get("from"))); - } - if (params.Has("until")) { - getRequest.SetTimeTo(FromStringWithDefault(params.Get("until"))); - } - if (params.Has("maxDataPoints")) { - getRequest.SetMaxPoints(FromStringWithDefault(params.Get("maxDataPoints"), 1000)); - } - Send(NGraph::MakeGraphServiceId(), new NGraph::TEvGraph::TEvGetMetrics(std::move(getRequest))); - Schedule(TDuration::Seconds(30), new TEvents::TEvWakeup()); - Become(&TThis::StateWork); - } - - STATEFN(StateWork) { - switch (ev->GetTypeRewrite()) { - hFunc(NGraph::TEvGraph::TEvMetricsResult, Handle); - cFunc(TEvents::TSystem::Wakeup, Timeout); - } - } - - void Handle(NGraph::TEvGraph::TEvMetricsResult::TPtr& ev) { - const auto& params(Event->Get()->Request.GetParams()); - const auto& response(ev->Get()->Record); - NJson::TJsonValue json; - - if (response.GetError()) { - json["status"] = "error"; - json["error"] = response.GetError(); - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json, false)), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - if (response.DataSize() != Metrics.size()) { - json["status"] = "error"; - json["error"] = "Invalid data size received"; - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json, false)), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - for (size_t nMetric = 0; nMetric < response.DataSize(); ++nMetric) { - const auto& protoMetric(response.GetData(nMetric)); - if (response.TimeSize() != protoMetric.ValuesSize()) { - json["status"] = "error"; - json["error"] = "Invalid value size received"; - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json, false)), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - } - if (!params.Has("format") || params.Get("format") == "graphite") { // graphite - json.SetType(NJson::JSON_ARRAY); - for (size_t nMetric = 0; nMetric < response.DataSize(); ++nMetric) { - const auto& protoMetric(response.GetData(nMetric)); - NJson::TJsonValue& jsonMetric(json.AppendValue({})); - jsonMetric["target"] = Metrics[nMetric]; - jsonMetric["title"] = Metrics[nMetric]; - jsonMetric["tags"]["name"] = Metrics[nMetric]; - NJson::TJsonValue& jsonDataPoints(jsonMetric["datapoints"]); - jsonDataPoints.SetType(NJson::JSON_ARRAY); - for (size_t nTime = 0; nTime < response.TimeSize(); ++nTime) { - NJson::TJsonValue& jsonDataPoint(jsonDataPoints.AppendValue({})); - jsonDataPoint.AppendValue(response.GetTime(nTime)); - double value = protoMetric.GetValues(nTime); - if (isnan(value)) { - jsonDataPoint.AppendValue(NJson::TJsonValue(NJson::JSON_NULL)); - } else { - jsonDataPoint.AppendValue(value); - } - } - } - } else { // prometheus - json["status"] = "success"; - NJson::TJsonValue& jsonData(json["data"]); - jsonData["resultType"] = "matrix"; - NJson::TJsonValue& jsonResults(jsonData["result"]); - jsonResults.SetType(NJson::JSON_ARRAY); - for (size_t nMetric = 0; nMetric < response.DataSize(); ++nMetric) { - const auto& protoMetric(response.GetData(nMetric)); - NJson::TJsonValue& jsonResult(jsonResults.AppendValue({})); - jsonResult["metric"]["__name__"] = Metrics[nMetric]; - NJson::TJsonValue& jsonValues(jsonResult["values"]); - jsonValues.SetType(NJson::JSON_ARRAY); - for (size_t nTime = 0; nTime < response.TimeSize(); ++nTime) { - NJson::TJsonValue& jsonDataPoint(jsonValues.AppendValue({})); - jsonDataPoint.AppendValue(response.GetTime(nTime)); - double value = protoMetric.GetValues(nTime); - if (isnan(value)) { - jsonDataPoint.AppendValue(NJson::TJsonValue(NJson::JSON_NULL)); - } else { - jsonDataPoint.AppendValue(value); - } - } - } - } - - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json, false)), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void Timeout() { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: target - in: query - description: metrics comma delimited - required: true - type: string - - name: from - in: query - description: time in seconds - required: false - type: integer - - name: until - in: query - description: time in seconds - required: false - type: integer - - name: maxDataPoints - in: query - description: maximum number of data points - required: false - type: integer - - name: format - in: query - description: response format, could be prometheus or graphite - required: false - type: string - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Graph data"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns graph data"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_handlers.cpp b/ydb/core/viewer/json_handlers.cpp new file mode 100644 index 000000000000..7f7b070dd37b --- /dev/null +++ b/ydb/core/viewer/json_handlers.cpp @@ -0,0 +1,47 @@ +#include "json_handlers.h" +#include + +namespace NKikimr::NViewer { + +TSimpleYamlBuilder::TSimpleYamlBuilder(TInitializer initializer) { + Method = Root[TString(initializer.Method)]; + if (initializer.Url) { + Method["tags"].push_back(TString(initializer.Url.After('/').Before('/'))); + } + if (initializer.Tag) { + Method["tags"].push_back(TString(initializer.Tag)); + } + if (initializer.Summary) { + Method["summary"] = TString(initializer.Summary); + } + if (initializer.Description) { + Method["description"] = TString(initializer.Description); + } +} + +void TSimpleYamlBuilder::SetParameters(YAML::Node parameters) { + Method["parameters"] = parameters; +} + +void TSimpleYamlBuilder::AddParameter(TParameter parameter) { + YAML::Node param; + param["in"] = "query"; + param["name"] = TString(parameter.Name); + if (parameter.Description) { + param["description"] = TString(parameter.Description); + } + if (parameter.Type) { + param["type"] = TString(parameter.Type); + } + if (parameter.Default) { + param["default"] = TString(parameter.Default); + } + param["required"] = parameter.Required; + Method["parameters"].push_back(param); +} + +void TSimpleYamlBuilder::SetResponseSchema(YAML::Node schema) { + Method["responses"]["200"]["content"]["application/json"]["schema"] = schema; +} + +} diff --git a/ydb/core/viewer/json_handlers.h b/ydb/core/viewer/json_handlers.h index bfe329ad9385..0e0aeb52a890 100644 --- a/ydb/core/viewer/json_handlers.h +++ b/ydb/core/viewer/json_handlers.h @@ -1,7 +1,7 @@ #pragma once - #include "viewer.h" -#include +#include +#include namespace NKikimr::NViewer { @@ -9,53 +9,36 @@ class TJsonHandlerBase { public: virtual ~TJsonHandlerBase() = default; virtual IActor* CreateRequestActor(IViewer* viewer, NMon::TEvHttpInfo::TPtr& event) = 0; - virtual YAML::Node GetResponseJsonSchema() = 0; - virtual TString GetRequestSummary() = 0; - virtual TString GetRequestDescription() = 0; - virtual YAML::Node GetRequestParameters() = 0; virtual YAML::Node GetRequestSwagger() = 0; }; template class TJsonHandler : public TJsonHandlerBase { public: - IActor* CreateRequestActor(IViewer* viewer, NMon::TEvHttpInfo::TPtr& event) override { - return new ActorRequestType(viewer, event); - } + YAML::Node Swagger; - YAML::Node GetResponseJsonSchema() override { - static YAML::Node jsonSchema = TJsonRequestSchema::GetSchema(); - return jsonSchema; - } + TJsonHandler(YAML::Node swagger) + : Swagger(swagger) + {} - TString GetRequestSummary() override { - static TString summary = TJsonRequestSummary::GetSummary(); - return summary; - } - - TString GetRequestDescription() override { - static TString description = TJsonRequestDescription::GetDescription(); - return description; - } - - YAML::Node GetRequestParameters() override { - static YAML::Node parameters = TJsonRequestParameters::GetParameters(); - return parameters; + IActor* CreateRequestActor(IViewer* viewer, NMon::TEvHttpInfo::TPtr& event) override { + return new ActorRequestType(viewer, event); } YAML::Node GetRequestSwagger() override { - static YAML::Node swagger = TJsonRequestSwagger::GetSwagger(); - return swagger; + return Swagger; } }; struct TJsonHandlers { std::vector JsonHandlersList; THashMap> JsonHandlersIndex; + std::map Capabilities; - void AddHandler(const TString& name, TJsonHandlerBase* handler) { + void AddHandler(const TString& name, TJsonHandlerBase* handler, int version = 1) { JsonHandlersList.push_back(name); JsonHandlersIndex[name] = std::shared_ptr(handler); + Capabilities[name] = version; } TJsonHandlerBase* FindHandler(const TString& name) const { @@ -65,6 +48,45 @@ struct TJsonHandlers { } return it->second.get(); } + + int GetCapabilityVersion(const TString& name) const { + auto it = Capabilities.find(name); + if (it == Capabilities.end()) { + return 0; + } + return it->second; + } +}; + +class TSimpleYamlBuilder { +public: + struct TInitializer { + TStringBuf Method; + TStringBuf Tag; + TStringBuf Url; + TStringBuf Summary; + TStringBuf Description; + }; + + struct TParameter { + TStringBuf Name; + TStringBuf Description; + TStringBuf Type; + TStringBuf Default; + bool Required = false; + }; + + YAML::Node Root; + YAML::Node Method; + + TSimpleYamlBuilder(TInitializer initializer); + void SetParameters(YAML::Node parameters); + void AddParameter(TParameter parameter); + void SetResponseSchema(YAML::Node schema); + + operator YAML::Node() { + return Root; + } }; } // namespace NKikimr::NViewer diff --git a/ydb/core/viewer/json_handlers_browse.cpp b/ydb/core/viewer/json_handlers_browse.cpp new file mode 100644 index 000000000000..c9ec41656d8f --- /dev/null +++ b/ydb/core/viewer/json_handlers_browse.cpp @@ -0,0 +1,37 @@ +#include "json_handlers.h" +#include "browse.h" +#include "browse_db.h" +#include "browse_pq.h" +#include "viewer_browse.h" +#include "viewer_content.h" +#include "viewer_metainfo.h" + +namespace NKikimr::NViewer { + +void SetupDBVirtualHandlers(IViewer* viewer) { + viewer->RegisterVirtualHandler( + NKikimrViewer::EObjectType::Table, + [] (const TActorId& owner, const IViewer::TBrowseContext& browseContext) -> IActor* { + return new NViewerDB::TBrowseTable(owner, browseContext); + }); +} + +void InitViewerMetaInfoJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/metainfo", new TJsonHandler(TJsonMetaInfo::GetSwagger())); +} + +void InitViewerBrowseJsonHandler(TJsonHandlers& jsonHandlers) { + jsonHandlers.AddHandler("/viewer/browse", new TJsonHandler(TJsonBrowse::GetSwagger())); +} + +void InitViewerContentJsonHandler(TJsonHandlers &jsonHandlers) { + jsonHandlers.AddHandler("/viewer/content", new TJsonHandler(TJsonContent::GetSwagger())); +} + +void InitViewerBrowseJsonHandlers(TJsonHandlers& jsonHandlers) { + InitViewerMetaInfoJsonHandler(jsonHandlers); + InitViewerBrowseJsonHandler(jsonHandlers); + InitViewerContentJsonHandler(jsonHandlers); +} + +} diff --git a/ydb/core/viewer/json_handlers_operation.cpp b/ydb/core/viewer/json_handlers_operation.cpp index 4e9c90da6d79..7375d177983b 100644 --- a/ydb/core/viewer/json_handlers_operation.cpp +++ b/ydb/core/viewer/json_handlers_operation.cpp @@ -1,17 +1,32 @@ #include "json_handlers.h" - -#include "operation_get.h" -#include "operation_list.h" #include "operation_cancel.h" #include "operation_forget.h" +#include "operation_get.h" +#include "operation_list.h" namespace NKikimr::NViewer { -void InitOperationJsonHandlers(TJsonHandlers& jsonHandlers) { - jsonHandlers.AddHandler("/operation/get", new TJsonHandler); - jsonHandlers.AddHandler("/operation/list", new TJsonHandler); - jsonHandlers.AddHandler("/operation/cancel", new TJsonHandler); - jsonHandlers.AddHandler("/operation/forget", new TJsonHandler); +void InitOperationGetJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/operation/get", new TJsonHandler(TOperationGet::GetSwagger())); +} + +void InitOperationListJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/operation/list", new TJsonHandler(TOperationList::GetSwagger())); +} + +void InitOperationCancelJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/operation/cancel", new TJsonHandler(TOperationCancel::GetSwagger())); } +void InitOperationForgetJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/operation/forget", new TJsonHandler(TOperationForget::GetSwagger())); } + +void InitOperationJsonHandlers(TJsonHandlers& jsonHandlers) { + InitOperationGetJsonHandler(jsonHandlers); + InitOperationListJsonHandler(jsonHandlers); + InitOperationCancelJsonHandler(jsonHandlers); + InitOperationForgetJsonHandler(jsonHandlers); +} + +} // namespace NKikimr::NViewer diff --git a/ydb/core/viewer/json_handlers_pdisk.cpp b/ydb/core/viewer/json_handlers_pdisk.cpp index ef99307e25be..55845fc83cad 100644 --- a/ydb/core/viewer/json_handlers_pdisk.cpp +++ b/ydb/core/viewer/json_handlers_pdisk.cpp @@ -1,19 +1,26 @@ -#include -#include - #include "json_handlers.h" - -#include "json_pdisk_restart.h" #include "pdisk_info.h" +#include "pdisk_restart.h" #include "pdisk_status.h" - namespace NKikimr::NViewer { +void InitPDiskInfoJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/pdisk/info", new TJsonHandler(TPDiskInfo::GetSwagger())); +} + +void InitPDiskRestartJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/pdisk/restart", new TJsonHandler(TJsonPDiskRestart::GetSwagger())); +} + +void InitPDiskStatusJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/pdisk/status", new TJsonHandler(TPDiskStatus::GetSwagger())); +} + void InitPDiskJsonHandlers(TJsonHandlers& jsonHandlers) { - jsonHandlers.AddHandler("/pdisk/info", new TJsonHandler); - jsonHandlers.AddHandler("/pdisk/restart", new TJsonHandler); - jsonHandlers.AddHandler("/pdisk/status", new TJsonHandler); + InitPDiskInfoJsonHandler(jsonHandlers); + InitPDiskRestartJsonHandler(jsonHandlers); + InitPDiskStatusJsonHandler(jsonHandlers); } } diff --git a/ydb/core/viewer/json_handlers_pq.cpp b/ydb/core/viewer/json_handlers_pq.cpp new file mode 100644 index 000000000000..d3503b95b7f9 --- /dev/null +++ b/ydb/core/viewer/json_handlers_pq.cpp @@ -0,0 +1,29 @@ +#include "viewer.h" +#include "browse_pq.h" + +namespace NKikimr::NViewer { + +void SetupPQVirtualHandlers(IViewer* viewer) { + viewer->RegisterVirtualHandler( + NKikimrViewer::EObjectType::Root, + [] (const TActorId& owner, const IViewer::TBrowseContext& browseContext) -> IActor* { + return new NViewerPQ::TBrowseRoot(owner, browseContext); + }); + viewer->RegisterVirtualHandler( + NKikimrViewer::EObjectType::Consumers, + [] (const TActorId& owner, const IViewer::TBrowseContext& browseContext) -> IActor* { + return new NViewerPQ::TBrowseConsumers(owner, browseContext); + }); + viewer->RegisterVirtualHandler( + NKikimrViewer::EObjectType::Consumer, + [] (const TActorId& owner, const IViewer::TBrowseContext& browseContext) -> IActor* { + return new NViewerPQ::TBrowseConsumer(owner, browseContext); + }); + viewer->RegisterVirtualHandler( + NKikimrViewer::EObjectType::Topic, + [] (const TActorId& owner, const IViewer::TBrowseContext& browseContext) -> IActor* { + return new NViewerPQ::TBrowseTopic(owner, browseContext); + }); +} + +} diff --git a/ydb/core/viewer/json_handlers_query.cpp b/ydb/core/viewer/json_handlers_query.cpp new file mode 100644 index 000000000000..a79c05b54c1f --- /dev/null +++ b/ydb/core/viewer/json_handlers_query.cpp @@ -0,0 +1,20 @@ +#include "json_handlers.h" +#include "query_execute_script.h" +#include "query_fetch_script.h" + +namespace NKikimr::NViewer { + +void InitQueryExecuteScriptJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/query/script/execute", new TJsonHandler(TQueryExecuteScript::GetSwagger())); +} + +void InitQueryFetchScriptJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/query/script/fetch", new TJsonHandler(TQueryFetchScript::GetSwagger())); +} + +void InitQueryJsonHandlers(TJsonHandlers& jsonHandlers) { + InitQueryExecuteScriptJsonHandler(jsonHandlers); + InitQueryFetchScriptJsonHandler(jsonHandlers); +} + +} // namespace NKikimr::NViewer diff --git a/ydb/core/viewer/json_handlers_scheme.cpp b/ydb/core/viewer/json_handlers_scheme.cpp index c5dc07f28b28..0445940e641c 100644 --- a/ydb/core/viewer/json_handlers_scheme.cpp +++ b/ydb/core/viewer/json_handlers_scheme.cpp @@ -1,11 +1,14 @@ #include "json_handlers.h" - #include "scheme_directory.h" namespace NKikimr::NViewer { +void InitSchemeDirectoryHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/scheme/directory", new TJsonSchemeDirectoryHandler(), 2); +} + void InitSchemeJsonHandlers(TJsonHandlers& jsonHandlers) { - jsonHandlers.AddHandler("/scheme/directory", new TJsonSchemeDirectoryHandler()); + InitSchemeDirectoryHandler(jsonHandlers); } } diff --git a/ydb/core/viewer/json_handlers_storage.cpp b/ydb/core/viewer/json_handlers_storage.cpp new file mode 100644 index 000000000000..750da437452e --- /dev/null +++ b/ydb/core/viewer/json_handlers_storage.cpp @@ -0,0 +1,14 @@ +#include "json_handlers.h" +#include "storage_groups.h" + +namespace NKikimr::NViewer { + +void InitStorageGroupsJsonHandler(TJsonHandlers& jsonHandlers) { + jsonHandlers.AddHandler("/storage/groups", new TJsonHandler(TStorageGroups::GetSwagger()), 6); +} + +void InitStorageJsonHandlers(TJsonHandlers& jsonHandlers) { + InitStorageGroupsJsonHandler(jsonHandlers); +} + +} diff --git a/ydb/core/viewer/json_handlers_vdisk.cpp b/ydb/core/viewer/json_handlers_vdisk.cpp index 49c869692f08..633bf9c80566 100644 --- a/ydb/core/viewer/json_handlers_vdisk.cpp +++ b/ydb/core/viewer/json_handlers_vdisk.cpp @@ -1,20 +1,56 @@ -#include -#include - #include "json_handlers.h" - -#include "json_vdiskstat.h" -#include "json_getblob.h" -#include "json_blobindexstat.h" -#include "json_vdisk_evict.h" +#include "vdisk_vdiskstat.h" +#include "vdisk_blobindexstat.h" +#include "vdisk_getblob.h" +#include "vdisk_evict.h" namespace NKikimr::NViewer { +void InitVDiskStatJsonHandler(TJsonHandlers& handlers) { + TSimpleYamlBuilder yaml({ + .Method = "get", + .Tag = "vdisk", + .Summary = "VDisk statistic", + .Description = "VDisk statistic", + }); + yaml.SetParameters(TJsonVDiskStat::GetParameters()); + yaml.SetResponseSchema(TJsonVDiskStat::GetSchema()); + handlers.AddHandler("/vdisk/vdiskstat", new TJsonHandler(yaml)); +} + +void InitVDiskGetBlobJsonHandler(TJsonHandlers& handlers) { + TSimpleYamlBuilder yaml({ + .Method = "get", + .Tag = "vdisk", + .Summary = "Get blob from VDisk", + .Description = "Get blob from VDisk", + }); + yaml.SetParameters(TJsonGetBlob::GetParameters()); + yaml.SetResponseSchema(TJsonGetBlob::GetSchema()); + handlers.AddHandler("/vdisk/getblob", new TJsonHandler(yaml)); +} + +void InitVDiskBlobIndexStatJsonHandler(TJsonHandlers& jsonHandlers) { + TSimpleYamlBuilder yaml({ + .Method = "get", + .Tag = "vdisk", + .Summary = "Get logoblob index stat from VDisk", + .Description = "Get logoblob index stat from VDisk", + }); + yaml.SetParameters(TJsonBlobIndexStat::GetParameters()); + yaml.SetResponseSchema(TJsonBlobIndexStat::GetSchema()); + jsonHandlers.AddHandler("/vdisk/blobindexstat", new TJsonHandler(yaml)); +} + +void InitVDiskEvictJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/vdisk/evict", new TJsonHandler(TJsonVDiskEvict::GetSwagger())); +} + void InitVDiskJsonHandlers(TJsonHandlers& jsonHandlers) { - jsonHandlers.AddHandler("/vdisk/vdiskstat", new TJsonHandler); - jsonHandlers.AddHandler("/vdisk/getblob", new TJsonHandler); - jsonHandlers.AddHandler("/vdisk/blobindexstat", new TJsonHandler); - jsonHandlers.AddHandler("/vdisk/evict", new TJsonHandler); + InitVDiskStatJsonHandler(jsonHandlers); + InitVDiskGetBlobJsonHandler(jsonHandlers); + InitVDiskBlobIndexStatJsonHandler(jsonHandlers); + InitVDiskEvictJsonHandler(jsonHandlers); } } diff --git a/ydb/core/viewer/json_handlers_viewer.cpp b/ydb/core/viewer/json_handlers_viewer.cpp index ff55ada322b4..b7f70577c398 100644 --- a/ydb/core/viewer/json_handlers_viewer.cpp +++ b/ydb/core/viewer/json_handlers_viewer.cpp @@ -1,89 +1,314 @@ -#include - #include "json_handlers.h" - -#include "json_nodelist.h" -#include "json_nodeinfo.h" -#include "json_vdiskinfo.h" -#include "json_pdiskinfo.h" -#include "json_describe.h" -#include "json_describe_topic.h" -#include "json_describe_consumer.h" -#include "json_hotkeys.h" -#include "json_sysinfo.h" -#include "json_tabletinfo.h" -#include "json_hiveinfo.h" -#include "json_bsgroupinfo.h" -#include "json_bscontrollerinfo.h" -#include "json_config.h" -#include "json_counters.h" -#include "json_topicinfo.h" -#include "json_pqconsumerinfo.h" -#include "json_tabletcounters.h" -#include "json_storage.h" -#include "json_storage_usage.h" -#include "json_metainfo.h" -#include "json_browse.h" -#include "json_cluster.h" -#include "json_content.h" -#include "json_labeledcounters.h" -#include "json_tenants.h" -#include "json_hivestats.h" -#include "json_tenantinfo.h" -#include "json_whoami.h" -#include "json_query.h" -#include "json_netinfo.h" -#include "json_compute.h" -#include "json_healthcheck.h" -#include "json_nodes.h" -#include "json_acl.h" -#include "json_graph.h" -#include "json_render.h" -#include "json_autocomplete.h" -#include "check_access.h" +#include "viewer_acl.h" +#include "viewer_autocomplete.h" +#include "viewer_bscontrollerinfo.h" +#include "viewer_capabilities.h" +#include "viewer_check_access.h" +#include "viewer_cluster.h" +#include "viewer_compute.h" +#include "viewer_config.h" +#include "viewer_counters.h" +#include "viewer_describe_consumer.h" +#include "viewer_describe.h" +#include "viewer_describe_topic.h" +#include "viewer_feature_flags.h" +#include "viewer_graph.h" +#include "viewer_healthcheck.h" +#include "viewer_hiveinfo.h" +#include "viewer_hivestats.h" +#include "viewer_hotkeys.h" +#include "viewer_labeled_counters.h" +#include "viewer_netinfo.h" +#include "viewer_nodelist.h" +#include "viewer_nodes.h" +#include "viewer_pqconsumerinfo.h" +#include "viewer_query.h" +#include "viewer_render.h" +#include "viewer_storage.h" +#include "viewer_storage_usage.h" +#include "viewer_tabletcounters.h" +#include "viewer_tenantinfo.h" +#include "viewer_tenants.h" +#include "viewer_topicinfo.h" +#include "viewer_whoami.h" namespace NKikimr::NViewer { +TBSGroupState GetBSGroupOverallStateWithoutLatency( + const NKikimrWhiteboard::TBSGroupStateInfo& info, + const TMap& vDisksIndex, + const TMap, const NKikimrWhiteboard::TPDiskStateInfo&>& pDisksIndex) { + + TBSGroupState groupState; + groupState.Overall = NKikimrViewer::EFlag::Grey; + + const auto& vDiskIds = info.GetVDiskIds(); + std::unordered_map failedRings; + std::unordered_map failedDomains; + TVector vDiskFlags; + vDiskFlags.reserve(vDiskIds.size()); + for (auto iv = vDiskIds.begin(); iv != vDiskIds.end(); ++iv) { + const NKikimrBlobStorage::TVDiskID& vDiskId = *iv; + NKikimrViewer::EFlag flag = NKikimrViewer::EFlag::Grey; + auto ie = vDisksIndex.find(vDiskId); + if (ie != vDisksIndex.end()) { + auto pDiskId = std::make_pair(ie->second.GetNodeId(), ie->second.GetPDiskId()); + auto ip = pDisksIndex.find(pDiskId); + if (ip != pDisksIndex.end()) { + const NKikimrWhiteboard::TPDiskStateInfo& pDiskInfo(ip->second); + flag = Max(flag, GetPDiskOverallFlag(pDiskInfo)); + } else { + flag = NKikimrViewer::EFlag::Red; + } + const NKikimrWhiteboard::TVDiskStateInfo& vDiskInfo(ie->second); + flag = Max(flag, GetVDiskOverallFlag(vDiskInfo)); + if (vDiskInfo.GetDiskSpace() > NKikimrWhiteboard::EFlag::Green) { + groupState.SpaceProblems++; + } + } else { + flag = NKikimrViewer::EFlag::Red; + } + vDiskFlags.push_back(flag); + if (flag == NKikimrViewer::EFlag::Red || flag == NKikimrViewer::EFlag::Blue) { + groupState.MissingDisks++; + ++failedRings[vDiskId.GetRing()]; + ++failedDomains[vDiskId.GetDomain()]; + } + groupState.Overall = Max(groupState.Overall, flag); + } + + groupState.Overall = Min(groupState.Overall, NKikimrViewer::EFlag::Yellow); // without failed rings we only allow to raise group status up to Blue/Yellow + TString erasure = info.GetErasureSpecies(); + if (erasure == TErasureType::ErasureSpeciesName(TErasureType::ErasureNone)) { + if (!failedDomains.empty()) { + groupState.Overall = NKikimrViewer::EFlag::Red; + } + } else if (erasure == TErasureType::ErasureSpeciesName(TErasureType::ErasureMirror3dc)) { + if (failedRings.size() > 2) { + groupState.Overall = NKikimrViewer::EFlag::Red; + } else if (failedRings.size() == 2) { // TODO: check for 1 ring - 1 domain rule + groupState.Overall = NKikimrViewer::EFlag::Orange; + } else if (failedRings.size() > 0) { + groupState.Overall = Min(groupState.Overall, NKikimrViewer::EFlag::Yellow); + } + } else if (erasure == TErasureType::ErasureSpeciesName(TErasureType::Erasure4Plus2Block)) { + if (failedDomains.size() > 2) { + groupState.Overall = NKikimrViewer::EFlag::Red; + } else if (failedDomains.size() > 1) { + groupState.Overall = NKikimrViewer::EFlag::Orange; + } else if (failedDomains.size() > 0) { + groupState.Overall = Min(groupState.Overall, NKikimrViewer::EFlag::Yellow); + } + } + return groupState; +} + +NKikimrViewer::EFlag GetBSGroupOverallFlagWithoutLatency( + const NKikimrWhiteboard::TBSGroupStateInfo& info, + const TMap& vDisksIndex, + const TMap, const NKikimrWhiteboard::TPDiskStateInfo&>& pDisksIndex) { + return GetBSGroupOverallStateWithoutLatency(info, vDisksIndex, pDisksIndex).Overall; +} + +TBSGroupState GetBSGroupOverallState( + const NKikimrWhiteboard::TBSGroupStateInfo& info, + const TMap& vDisksIndex, + const TMap, const NKikimrWhiteboard::TPDiskStateInfo&>& pDisksIndex) { + TBSGroupState state = GetBSGroupOverallStateWithoutLatency(info, vDisksIndex, pDisksIndex); + if (info.HasLatency()) { + state.Overall = Max(state.Overall, Min(NKikimrViewer::EFlag::Yellow, GetViewerFlag(info.GetLatency()))); + } + return state; +} + +NKikimrViewer::EFlag GetBSGroupOverallFlag( + const NKikimrWhiteboard::TBSGroupStateInfo& info, + const TMap& vDisksIndex, + const TMap, const NKikimrWhiteboard::TPDiskStateInfo&>& pDisksIndex) { + return GetBSGroupOverallState(info, vDisksIndex, pDisksIndex).Overall; +} + +void InitViewerCapabilitiesJsonHandler(TJsonHandlers& jsonHandlers) { + TSimpleYamlBuilder yaml({ + .Method = "get", + .Tag = "viewer", + .Summary = "Viewer capabilities", + .Description = "Viewer capabilities", + }); + jsonHandlers.AddHandler("/viewer/capabilities", new TJsonHandler(yaml)); +} + +void InitViewerNodelistJsonHandler(TJsonHandlers& jsonHandlers) { + jsonHandlers.AddHandler("/viewer/nodelist", new TJsonHandler(TJsonNodeList::GetSwagger())); +} + +void InitViewerNodeInfoJsonHandler(TJsonHandlers& jsonHandlers); +void InitViewerSysInfoJsonHandler(TJsonHandlers& jsonHandlers); +void InitViewerVDiskInfoJsonHandler(TJsonHandlers& jsonHandlers); +void InitViewerPDiskInfoJsonHandler(TJsonHandlers& jsonHandlers); +void InitViewerTabletInfoJsonHandler(TJsonHandlers& jsonHandlers); + +void InitViewerDescribeJsonHandler(TJsonHandlers& jsonHandlers) { + jsonHandlers.AddHandler("/viewer/describe", new TJsonHandler(TJsonDescribe::GetSwagger())); +} + +void InitViewerDescribeTopicJsonHandler(TJsonHandlers& jsonHandlers) { + jsonHandlers.AddHandler("/viewer/describe_topic", new TJsonHandler(TJsonDescribeTopic::GetSwagger())); +} + +void InitViewerDescribeConsumerJsonHandler(TJsonHandlers& jsonHandlers) { + jsonHandlers.AddHandler("/viewer/describe_consumer", new TJsonHandler(TJsonDescribeConsumer::GetSwagger())); +} + +void InitViewerHotkeysJsonHandler(TJsonHandlers& jsonHandlers) { + jsonHandlers.AddHandler("/viewer/hotkeys", new TJsonHandler(TJsonHotkeys::GetSwagger())); +} + +void InitViewerHiveInfoJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/hiveinfo", new TJsonHandler(TJsonHiveInfo::GetSwagger())); +} + +void InitViewerBSGroupInfoJsonHandler(TJsonHandlers& jsonHandlers); + +void InitViewerBSControllerInfoJsonHandler(TJsonHandlers& jsonHandlers) { + jsonHandlers.AddHandler("/viewer/bscontrollerinfo", new TJsonHandler(TJsonBSControllerInfo::GetSwagger())); +} + +void InitViewerConfigJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/config", new TJsonHandler(TJsonConfig::GetSwagger())); +} + +void InitViewerCountersJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/counters", new TJsonHandler(TJsonCounters::GetSwagger())); +} + +void InitViewerTopicInfoJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/topicinfo", new TJsonHandler(TJsonTopicInfo::GetSwagger())); +} + +void InitViewerPQConsumerInfoJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/pqconsumerinfo", new TJsonHandler(TJsonPQConsumerInfo::GetSwagger())); +} + +void InitViewerTabletCountersJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/tabletcounters", new TJsonHandler(TJsonTabletCounters::GetSwagger())); +} + +void InitViewerStorageJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/storage", new TJsonHandler(TJsonStorage::GetSwagger())); +} + +void InitViewerStorageUsageJsonHandler(TJsonHandlers &handlers) { + handlers.AddHandler("/viewer/storage_usage", new TJsonHandler(TJsonStorageUsage::GetSwagger())); +} + +void InitViewerClusterJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/cluster", new TJsonHandler(TJsonCluster::GetSwagger()), 5); +} + +void InitViewerLabeledCountersJsonHandler(TJsonHandlers &handlers) { + handlers.AddHandler("/viewer/labeledcounters", new TJsonHandler(TJsonLabeledCounters::GetSwagger())); +} + +void InitViewerTenantsJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/tenants", new TJsonHandler(TJsonTenants::GetSwagger())); +} + +void InitViewerHiveStatsJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/hivestats", new TJsonHandler(TJsonHiveStats::GetSwagger())); +} + +void InitViewerTenantInfoJsonHandler(TJsonHandlers &handlers) { + handlers.AddHandler("/viewer/tenantinfo", new TJsonHandler(TJsonTenantInfo::GetSwagger()), 3); +} + +void InitViewerWhoAmIJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/whoami", new TJsonHandler(TJsonWhoAmI::GetSwagger())); +} + +void InitViewerQueryJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/query", new TJsonHandler(TJsonQuery::GetSwagger()), 4); +} + +void InitViewerNetInfoJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/netinfo", new TJsonHandler(TJsonNetInfo::GetSwagger())); +} + +void InitViewerComputeJsonHandler(TJsonHandlers &handlers) { + handlers.AddHandler("/viewer/compute", new TJsonHandler(TJsonCompute::GetSwagger())); +} + +void InitViewerHealthCheckJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/healthcheck", new TJsonHandler(TJsonHealthCheck::GetSwagger())); +} + +void InitViewerNodesJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/nodes", new TJsonHandler(TJsonNodes::GetSwagger()), 11); +} + +void InitViewerACLJsonHandler(TJsonHandlers &jsonHandlers) { + jsonHandlers.AddHandler("/viewer/acl", new TJsonHandler(TJsonACL::GetSwagger())); +} + +void InitViewerGraphJsonHandler(TJsonHandlers &handlers) { + handlers.AddHandler("/viewer/graph", new TJsonHandler(TJsonGraph::GetSwagger())); +} + +void InitViewerRenderJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/render", new TJsonHandler(TJsonRender::GetSwagger())); +} + +void InitViewerAutocompleteJsonHandler(TJsonHandlers& jsonHandlers) { + jsonHandlers.AddHandler("/viewer/autocomplete", new TJsonHandler(TJsonAutocomplete::GetSwagger()), 2); +} + +void InitViewerCheckAccessJsonHandler(TJsonHandlers& jsonHandlers) { + jsonHandlers.AddHandler("/viewer/check_access", new TJsonHandler(TCheckAccess::GetSwagger())); +} + +void InitViewerFeatureFlagsJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/feature_flags", new TJsonHandler(TJsonFeatureFlags::GetSwagger()), 2); +} + void InitViewerJsonHandlers(TJsonHandlers& jsonHandlers) { - jsonHandlers.AddHandler("/viewer/nodelist", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/nodeinfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/sysinfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/vdiskinfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/pdiskinfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/tabletinfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/describe", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/describe_topic", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/describe_consumer", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/hotkeys", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/hiveinfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/bsgroupinfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/bscontrollerinfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/config", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/counters", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/topicinfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/pqconsumerinfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/tabletcounters", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/storage", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/storage_usage", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/metainfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/browse", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/cluster", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/content", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/labeledcounters", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/tenants", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/hivestats", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/tenantinfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/whoami", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/query", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/netinfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/compute", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/healthcheck", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/nodes", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/acl", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/graph", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/render", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/autocomplete", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/check_access", new TJsonHandler); + InitViewerCapabilitiesJsonHandler(jsonHandlers); + InitViewerNodelistJsonHandler(jsonHandlers); + InitViewerNodeInfoJsonHandler(jsonHandlers); + InitViewerSysInfoJsonHandler(jsonHandlers); + InitViewerVDiskInfoJsonHandler(jsonHandlers); + InitViewerPDiskInfoJsonHandler(jsonHandlers); + InitViewerTabletInfoJsonHandler(jsonHandlers); + InitViewerDescribeJsonHandler(jsonHandlers); + InitViewerDescribeTopicJsonHandler(jsonHandlers); + InitViewerDescribeConsumerJsonHandler(jsonHandlers); + InitViewerHotkeysJsonHandler(jsonHandlers); + InitViewerHiveInfoJsonHandler(jsonHandlers); + InitViewerBSGroupInfoJsonHandler(jsonHandlers); + InitViewerBSControllerInfoJsonHandler(jsonHandlers); + InitViewerConfigJsonHandler(jsonHandlers); + InitViewerCountersJsonHandler(jsonHandlers); + InitViewerTopicInfoJsonHandler(jsonHandlers); + InitViewerPQConsumerInfoJsonHandler(jsonHandlers); + InitViewerTabletCountersJsonHandler(jsonHandlers); + InitViewerStorageJsonHandler(jsonHandlers); + InitViewerStorageUsageJsonHandler(jsonHandlers); + InitViewerClusterJsonHandler(jsonHandlers); + InitViewerLabeledCountersJsonHandler(jsonHandlers); + InitViewerTenantsJsonHandler(jsonHandlers); + InitViewerHiveStatsJsonHandler(jsonHandlers); + InitViewerTenantInfoJsonHandler(jsonHandlers); + InitViewerWhoAmIJsonHandler(jsonHandlers); + InitViewerQueryJsonHandler(jsonHandlers); + InitViewerNetInfoJsonHandler(jsonHandlers); + InitViewerComputeJsonHandler(jsonHandlers); + InitViewerHealthCheckJsonHandler(jsonHandlers); + InitViewerNodesJsonHandler(jsonHandlers); + InitViewerACLJsonHandler(jsonHandlers); + InitViewerGraphJsonHandler(jsonHandlers); + InitViewerRenderJsonHandler(jsonHandlers); + InitViewerAutocompleteJsonHandler(jsonHandlers); + InitViewerCheckAccessJsonHandler(jsonHandlers); + InitViewerFeatureFlagsJsonHandler(jsonHandlers); } } diff --git a/ydb/core/viewer/json_healthcheck.h b/ydb/core/viewer/json_healthcheck.h deleted file mode 100644 index 376320d64d40..000000000000 --- a/ydb/core/viewer/json_healthcheck.h +++ /dev/null @@ -1,340 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include -#include -#include -#include -#include -#include "json_pipe_req.h" -#include "healthcheck_record.h" -#include - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -enum HealthCheckResponseFormat { - JSON, - PROMETHEUS -}; - -class TJsonHealthCheck : public TViewerPipeClient { - using TBase = TViewerPipeClient; - IViewer* Viewer; - static const bool WithRetry = false; - NMon::TEvHttpInfo::TPtr Event; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - HealthCheckResponseFormat Format; - TString Database; - bool Cache = true; - bool MergeRecords = false; - std::optional Result; - std::optional SubscribedNodeId; - Ydb::Monitoring::StatusFlag::Status MinStatus = Ydb::Monitoring::StatusFlag::UNSPECIFIED; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonHealthCheck(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) - {} - - THolder MakeSelfCheckRequest() { - const auto& params(Event->Get()->Request.GetParams()); - THolder request = MakeHolder(); - request->Database = Database; - if (params.Has("verbose")) { - request->Request.set_return_verbose_status(FromStringWithDefault(params.Get("verbose"), false)); - } - if (params.Has("max_level")) { - request->Request.set_maximum_level(FromStringWithDefault(params.Get("max_level"), 0)); - } - if (MinStatus != Ydb::Monitoring::StatusFlag::UNSPECIFIED) { - request->Request.set_minimum_status(MinStatus); - } - if (params.Has("merge_records")) { - request->Request.set_merge_records(MergeRecords); - } - SetDuration(TDuration::MilliSeconds(Timeout), *request->Request.mutable_operation_params()->mutable_operation_timeout()); - return request; - } - - void SendHealthCheckRequest() { - auto request = MakeSelfCheckRequest(); - Send(NHealthCheck::MakeHealthCheckID(), request.Release()); - } - - void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - InitConfig(params); - - Format = HealthCheckResponseFormat::JSON; - if (params.Has("format")) { - auto& format = params.Get("format"); - if (format == "json") { - Format = HealthCheckResponseFormat::JSON; - } else if (format == "prometheus") { - Format = HealthCheckResponseFormat::PROMETHEUS; - } - } else if (const auto *header = Event->Get()->Request.GetHeaders().FindHeader("Accept")) { - THashSet accept; - StringSplitter(header->Value()).SplitBySet(", ").SkipEmpty().Collect(&accept); - if (accept.contains("*/*") || accept.contains("application/json")) { - Format = HealthCheckResponseFormat::JSON; - } else if (accept.contains("text/plain")) { - Format = HealthCheckResponseFormat::PROMETHEUS; - } else { - Format = HealthCheckResponseFormat::JSON; - } - } - if (Format == HealthCheckResponseFormat::JSON) { - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - } - Database = params.Get("tenant"); - Cache = FromStringWithDefault(params.Get("cache"), Cache); - MergeRecords = FromStringWithDefault(params.Get("merge_records"), MergeRecords); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - - if (params.Get("min_status") && !Ydb::Monitoring::StatusFlag_Status_Parse(params.Get("min_status"), &MinStatus)) { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "The field 'min_status' cannot be parsed"), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - if (AppData()->FeatureFlags.GetEnableDbMetadataCache() && Cache && Database && MergeRecords) { - RequestStateStorageMetadataCacheEndpointsLookup(Database); - } else { - SendHealthCheckRequest(); - } - Timeout += Timeout * 20 / 100; // we prefer to wait for more (+20%) verbose timeout status from HC - Become(&TThis::StateRequestedInfo, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void PassAway() override { - if (SubscribedNodeId.has_value()) { - Send(TActivationContext::InterconnectProxy(SubscribedNodeId.value()), new TEvents::TEvUnsubscribe()); - } - TBase::PassAway(); - } - - STFUNC(StateRequestedInfo) { - switch (ev->GetTypeRewrite()) { - hFunc(NHealthCheck::TEvSelfCheckResult, Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - hFunc(NHealthCheck::TEvSelfCheckResultProto, Handle); - cFunc(TEvents::TSystem::Undelivered, SendHealthCheckRequest); - hFunc(TEvStateStorage::TEvBoardInfo, Handle); - } - } - - int GetIssueCount(const Ydb::Monitoring::IssueLog& issueLog) { - return issueLog.count() == 0 ? 1 : issueLog.count(); - } - - THolder> GetRecordCounters() { - const auto *descriptor = Ydb::Monitoring::StatusFlag_Status_descriptor(); - THashMap recordCounters; - for (auto& log : Result->issue_log()) { - TMetricRecord record { - .Database = log.location().database().name(), - .Message = log.message(), - .Status = descriptor->FindValueByNumber(log.status())->name(), - .Type = log.type() - }; - - auto it = recordCounters.find(record); - if (it != recordCounters.end()) { - it->second += GetIssueCount(log); - } else { - recordCounters[record] = GetIssueCount(log); - } - } - - return MakeHolder>(recordCounters); - } - - void HandleJSON() { - TStringStream json; - TProtoToJson::ProtoToJson(json, *Result, JsonSettings); - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - } - - void HandlePrometheus() { - auto recordCounters = GetRecordCounters(); - - TStringStream ss; - IMetricEncoderPtr encoder = EncoderPrometheus(&ss); - IMetricEncoder* e = encoder.Get(); - - TIntrusivePtr domains = AppData()->DomainsInfo; - auto *domain = domains->GetDomain(); - auto filterDatabase = Database ? Database : "/" + domain->Name; - e->OnStreamBegin(); - if (recordCounters->size() > 0) { - for (auto& recordCounter : *recordCounters) { - e->OnMetricBegin(EMetricType::IGAUGE); - { - e->OnLabelsBegin(); - e->OnLabel("sensor", "ydb_healthcheck"); - e->OnLabel("DOMAIN", domain->Name); - e->OnLabel("DATABASE", recordCounter.first.Database ? recordCounter.first.Database : filterDatabase); - e->OnLabel("MESSAGE", recordCounter.first.Message); - e->OnLabel("STATUS", recordCounter.first.Status); - e->OnLabel("TYPE", recordCounter.first.Type); - e->OnLabelsEnd(); - } - e->OnInt64(TInstant::Zero(), recordCounter.second); - e->OnMetricEnd(); - } - } - const auto *descriptor = Ydb::Monitoring::SelfCheck_Result_descriptor(); - auto result = descriptor->FindValueByNumber(Result->self_check_result())->name(); - e->OnMetricBegin(EMetricType::IGAUGE); - { - e->OnLabelsBegin(); - e->OnLabel("sensor", "ydb_healthcheck"); - e->OnLabel("DOMAIN", domain->Name); - e->OnLabel("DATABASE", filterDatabase); - e->OnLabel("MESSAGE", result); - e->OnLabel("STATUS", result); - e->OnLabel("TYPE", "ALL"); - e->OnLabelsEnd(); - } - e->OnInt64(TInstant::Zero(), 1); - e->OnMetricEnd(); - e->OnStreamEnd(); - - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKTEXT(Event->Get()) + ss.Str(), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - } - - void ReplyAndPassAway() { - if (Result) { - if (Format == HealthCheckResponseFormat::JSON) { - HandleJSON(); - } else { - HandlePrometheus(); - } - } - PassAway(); - } - - void Handle(NHealthCheck::TEvSelfCheckResult::TPtr& ev) { - Result = std::move(ev->Get()->Result); - ReplyAndPassAway(); - } - - void Handle(NHealthCheck::TEvSelfCheckResultProto::TPtr& ev) { - Result = std::move(ev->Get()->Record); - NHealthCheck::RemoveUnrequestedEntries(*Result, MakeSelfCheckRequest().Release()->Request); - ReplyAndPassAway(); - } - - void Handle(TEvStateStorage::TEvBoardInfo::TPtr& ev) { - auto activeNode = TDatabaseMetadataCache::PickActiveNode(ev->Get()->InfoEntries); - if (activeNode != 0) { - SubscribedNodeId = activeNode; - std::optional cache = MakeDatabaseMetadataCacheId(activeNode); - auto request = MakeHolder(); - Send(*cache, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, activeNode); - } else { - SendHealthCheckRequest(); - } - } - - void HandleTimeout() { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - - name: tenant - in: query - description: path to database - required: false - type: string - - name: cache - in: query - description: use cache - required: false - type: boolean - - name: verbose - in: query - description: return verbose status - required: false - type: boolean - - name: merge_records - in: query - description: merge records - required: false - type: boolean - - name: max_level - in: query - description: max depth of issues to return - required: false - type: integer - - name: min_status - in: query - description: min status of issues to return - required: false - type: string - - name: format - in: query - description: format of reply - required: false - type: string - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Self-check result"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Performs self-check and returns result"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_hiveinfo.h b/ydb/core/viewer/json_hiveinfo.h deleted file mode 100644 index d5f97f8eabe0..000000000000 --- a/ydb/core/viewer/json_hiveinfo.h +++ /dev/null @@ -1,174 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "json_pipe_req.h" -#include - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonHiveInfo : public TViewerPipeClient { - using TBase = TViewerPipeClient; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TAutoPtr HiveInfo; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - TNodeId NodeId = 0; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonHiveInfo(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - ui64 hiveId = FromStringWithDefault(params.Get("hive_id"), 0); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), false); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - NodeId = FromStringWithDefault(params.Get("node"), 0); - InitConfig(params); - if (hiveId != 0 ) { - TAutoPtr request = new TEvHive::TEvRequestHiveInfo(); - if (params.Has("tablet_id")) { - request->Record.SetTabletID(FromStringWithDefault(params.Get("tablet_id"), 0)); - } - if (params.Has("tablet_type")) { - request->Record.SetTabletType(static_cast(FromStringWithDefault(params.Get("tablet_type"), 0))); - } - if (FromStringWithDefault(params.Get("followers"), false)) { - request->Record.SetReturnFollowers(true); - } - if (FromStringWithDefault(params.Get("metrics"), false)) { - request->Record.SetReturnMetrics(true); - } - SendRequestToPipe(ConnectTabletPipe(hiveId), request.Release()); - Become(&TThis::StateRequestedInfo, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } else { - ReplyAndPassAway(); - } - } - - STATEFN(StateRequestedInfo) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvHive::TEvResponseHiveInfo, Handle); - hFunc(TEvTabletPipe::TEvClientConnected, TBase::Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvHive::TEvResponseHiveInfo::TPtr& ev) { - HiveInfo = ev->Release(); - RequestDone(); - } - - void ReplyAndPassAway() { - TStringStream json; - if (HiveInfo != nullptr) { - if (NodeId != 0) { - for (auto itRecord = HiveInfo->Record.MutableTablets()->begin(); itRecord != HiveInfo->Record.MutableTablets()->end();) { - if (itRecord->GetNodeID() != NodeId) { - itRecord = HiveInfo->Record.MutableTablets()->erase(itRecord); - } else { - ++itRecord; - } - } - } - TProtoToJson::ProtoToJson(json, HiveInfo->Record, JsonSettings); - } else { - json << "null"; - } - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void HandleTimeout() { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: hive_id - in: query - description: hive identifier (tablet id) - required: true - type: string - - name: tablet_id - in: query - description: tablet id filter - required: false - type: string - - name: tablet_type - in: query - description: tablet type filter - required: false - type: string - - name: followers - in: query - description: return followers - required: false - type: boolean - - name: metrics - in: query - description: return tablet metrics - required: false - type: boolean - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Hive information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns information about tablets from Hive"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_hivestats.h b/ydb/core/viewer/json_hivestats.h deleted file mode 100644 index 483c0e5c120a..000000000000 --- a/ydb/core/viewer/json_hivestats.h +++ /dev/null @@ -1,142 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "json_pipe_req.h" -#include - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonHiveStats : public TViewerPipeClient { - using TBase = TViewerPipeClient; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TAutoPtr HiveStats; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonHiveStats(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - ui64 hiveId = FromStringWithDefault(params.Get("hive_id"), 0); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - InitConfig(params); - if (hiveId != 0 ) { - THolder request = MakeHolder(); - request->Record.SetReturnFollowers(FromStringWithDefault(params.Get("followers"), false)); - request->Record.SetReturnMetrics(FromStringWithDefault(params.Get("metrics"), true)); - SendRequestToPipe(ConnectTabletPipe(hiveId), request.Release()); - Become(&TThis::StateRequestedInfo, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } else { - ReplyAndPassAway(); - } - } - - STATEFN(StateRequestedInfo) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvHive::TEvResponseHiveDomainStats, Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvHive::TEvResponseHiveDomainStats::TPtr& ev) { - HiveStats = ev->Release(); - RequestDone(); - } - - void ReplyAndPassAway() { - TStringStream json; - if (HiveStats != nullptr) { - TProtoToJson::ProtoToJson(json, HiveStats->Record, JsonSettings); - } else { - json << "null"; - } - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void HandleTimeout() { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: hive_id - in: query - description: hive identifier (tablet id) - required: true - type: string - - name: followers - in: query - description: return followers - required: false - type: boolean - - name: metrics - in: query - description: return tablet metrics - required: false - type: boolean - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Hive statistics"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns information about Hive statistics"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_hotkeys.h b/ydb/core/viewer/json_hotkeys.h deleted file mode 100644 index b4922a4af965..000000000000 --- a/ydb/core/viewer/json_hotkeys.h +++ /dev/null @@ -1,169 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "json_pipe_req.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using NSchemeShard::TEvSchemeShard; - -class TJsonHotkeys : public TViewerPipeClient { - static const bool WithRetry = false; - using TBase = TViewerPipeClient; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TAutoPtr DescribeResult; - ui32 Timeout = 0; - ui32 Limit = 0; - float PollingFactor = 0.0; - bool EnableSampling = false; - - struct KeysComparator { - bool operator ()(const std::pair>& a, const std::pair>& b) const { - return a.first > b.first; - }; - }; - - TMultiSet>, KeysComparator> Keys; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonHotkeys(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void FillParams(NKikimrSchemeOp::TDescribePath* record, const TCgiParameters& params) { - if (params.Has("path")) { - record->SetPath(params.Get("path")); - } - record->MutableOptions()->SetReturnPartitionStats(true); - } - - void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - Limit = FromStringWithDefault(params.Get("limit"), 10); - PollingFactor = std::max(0.0f, std::min(FromStringWithDefault(params.Get("polling_factor"), 0.2), 1.0f)); - EnableSampling = FromStringWithDefault(params.Get("enable_sampling"), false); - InitConfig(params); - - THolder request = MakeHolder(); - FillParams(request->Record.MutableDescribePath(), params); - request->Record.SetUserToken(Event->Get()->UserToken); - SendRequest(MakeTxProxyID(), request.Release()); - - Become(&TThis::StateRequestedDescribe, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - STATEFN(StateRequestedDescribe) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvSchemeShard::TEvDescribeSchemeResult, Handle); - hFunc(TEvDataShard::TEvGetDataHistogramResponse, Handle); - hFunc(TEvTabletPipe::TEvClientConnected, TBase::Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvSchemeShard::TEvDescribeSchemeResult::TPtr& ev) { - DescribeResult = ev->Release(); - const auto& pbRecord(DescribeResult->GetRecord()); - if (pbRecord.HasPathDescription()) { - const auto& pathDescription = pbRecord.GetPathDescription(); - const auto& partitions = pathDescription.GetTablePartitions(); - const auto& metrics = pathDescription.GetTablePartitionMetrics(); - TVector> tabletsOrder; - - for (int i = 0; i < metrics.size(); ++i) { - tabletsOrder.emplace_back(metrics.Get(i).GetCPU(), i); - } - - Sort(tabletsOrder, std::greater>()); - ui32 tablets = (ui32) std::max(1, (int) std::ceil(PollingFactor * tabletsOrder.size())); - - for (ui32 i = 0; i < tablets; ++i) { - THolder request = MakeHolder(); - if (EnableSampling) { - request->Record.SetCollectKeySampleMs(30000); // 30 sec - } - request->Record.SetActualData(true); - ui64 datashardId = partitions.Get(tabletsOrder[i].second).GetDatashardId(); - SendRequestToPipe(ConnectTabletPipe(datashardId), request.Release()); - } - } - - RequestDone(); - } - - void Handle(TEvDataShard::TEvGetDataHistogramResponse::TPtr& ev) { - const auto& rec = ev->Get()->Record; - for (const auto& i: rec.GetTableHistograms()) { - for (const auto& item: i.GetKeyAccessSample().GetItems()) { - TVector keys(item.GetKeyValues().begin(), item.GetKeyValues().end()); - Keys.emplace(item.GetValue(), std::move(keys)); - if (Keys.size() > Limit) { - Keys.erase(--Keys.end()); - } - } - } - - RequestDone(); - } - - NJson::TJsonValue BuildResponse() { - NJson::TJsonValue root; - if (DescribeResult != nullptr) { - NJson::TJsonValue& hotkeys = root["hotkeys"]; - for (const auto &i: Keys) { - NJson::TJsonValue entry; - NJson::TJsonValue keyValues; - for (const auto &j: i.second) { - keyValues.AppendValue(j); - } - entry["accessSample"] = i.first; - entry["keyValues"] = std::move(keyValues); - hotkeys.AppendValue(std::move(entry)); - } - } - return root; - } - - void ReplyAndPassAway() { - if (DescribeResult != nullptr) { - switch (DescribeResult->GetRecord().GetStatus()) { - case NKikimrScheme::StatusAccessDenied: - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPFORBIDDEN(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - return; - default: - break; - } - } - NJson::TJsonValue root = BuildResponse(); - TString json = NJson::WriteJson(root, false); - - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void HandleTimeout() { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -} -} diff --git a/ydb/core/viewer/json_labeledcounters.h b/ydb/core/viewer/json_labeledcounters.h deleted file mode 100644 index c8ea014b6205..000000000000 --- a/ydb/core/viewer/json_labeledcounters.h +++ /dev/null @@ -1,237 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonLabeledCounters : public TActorBootstrapped { - using TBase = TActorBootstrapped; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - NKikimrLabeledCounters::TEvTabletLabeledCountersResponse LabeledCountersResult; - TJsonSettings JsonSettings; - TString Groups; - TString GroupNames; - TString Topic; - TString Consumer; - TString DC; - TVector Counters; - ui32 Version = 1; - ui32 Timeout = 0; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonLabeledCounters(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap(const TActorContext& ctx) { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), false); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - Groups = params.Get("group"); - Topic = NPersQueue::ConvertNewTopicName(params.Get("topic")); - if (Topic.empty()) - Topic = "*"; - Consumer = NPersQueue::ConvertNewConsumerName(params.Get("consumer"), ctx); - DC = params.Get("dc"); - if (DC.empty()) - DC = "*"; - GroupNames = params.Get("group_names"); - Split(params.Get("counters"), ",", Counters); - Version = FromStringWithDefault(params.Get("version"), Version); - Sort(Counters); - if (Version >= 3) { - TString topic = "rt3." + DC + "--" + Topic; - if (!Consumer.empty()) { - Groups = Consumer + "/*/" + topic; - if (Topic != "*") { - Groups += "," + topic; - } - } else { - Groups = topic; - } - } - CreateClusterLabeledCountersAggregator(ctx.SelfID, TTabletTypes::PersQueue, ctx, Version, Version >= 2 ? Groups : TString()); - Become(&TThis::StateRequestedTopicInfo, ctx, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void Die(const TActorContext& ctx) override { - TBase::Die(ctx); - } - - STFUNC(StateRequestedTopicInfo) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvTabletCounters::TEvTabletLabeledCountersResponse, Handle); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvTabletCounters::TEvTabletLabeledCountersResponse::TPtr &ev, const TActorContext &ctx) { - if (Version == 1) { - for (ui32 i = 0; i < ev->Get()->Record.LabeledCountersByGroupSize(); ++i) { - auto& uc = *ev->Get()->Record.MutableLabeledCountersByGroup(i); - if (!Groups.empty() && !IsMatchesWildcards(uc.GetGroup(), Groups)) { - continue; - } - if (!GroupNames.empty() && !IsMatchesWildcard(uc.GetGroupNames(), GroupNames)) { - continue; - } - if (Counters.empty()) { - LabeledCountersResult.AddLabeledCountersByGroup()->Swap(&uc); - } else { - auto& lc = *LabeledCountersResult.AddLabeledCountersByGroup(); - lc.SetGroup(uc.GetGroup()); - lc.SetGroupNames(uc.GetGroupNames()); - for (auto& c : *uc.MutableLabeledCounter()) { - if (BinarySearch(Counters.begin(), Counters.end(), c.GetName())) { - lc.AddLabeledCounter()->Swap(&c); - } - } - } - } - } else if (Version >= 2) { - const NKikimrLabeledCounters::TEvTabletLabeledCountersResponse& source(ev->Get()->Record); - TVector> counterNamesMapping; - counterNamesMapping.reserve(source.CounterNamesSize()); - for (const TString& counterName : source.GetCounterNames()) { - if (Counters.empty() || BinarySearch(Counters.begin(), Counters.end(), counterName)) { - counterNamesMapping.push_back(LabeledCountersResult.CounterNamesSize()); - LabeledCountersResult.AddCounterNames(counterName); - } else { - counterNamesMapping.push_back(Nothing()); - } - } - for (ui32 i = 0; i < ev->Get()->Record.LabeledCountersByGroupSize(); ++i) { - auto& uc = *ev->Get()->Record.MutableLabeledCountersByGroup(i); - auto& lc = *LabeledCountersResult.AddLabeledCountersByGroup(); - lc.SetGroup(uc.GetGroup()); - for (auto& c : *uc.MutableLabeledCounter()) { - ui32 nameId = c.GetNameId(); - if (counterNamesMapping[c.GetNameId()].Defined()) { - nameId = counterNamesMapping[c.GetNameId()].GetRef(); - auto* lci = lc.AddLabeledCounter(); - lci->SetValue(c.GetValue()); - lci->SetNameId(nameId); - } - } - } - } - ReplyAndDie(ctx); - } - - void ReplyAndDie(const TActorContext &ctx) { - TStringStream json; - TProtoToJson::ProtoToJson(json, LabeledCountersResult, JsonSettings); - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } - - void HandleTimeout(const TActorContext &ctx) { - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: group - in: query - description: group name - required: false - type: string - - name: dc - in: query - description: datacenter name - required: false - type: string - default: "*" - - name: topic - in: query - description: topic name - required: false - type: string - default: "*" - - name: consumer - in: query - description: consumer name - required: false - type: string - default: "" - - name: group_names - in: query - description: group names - required: false - type: string - - name: counters - in: query - description: counters names - required: false - type: string - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - default: false - - name: all - in: query - description: return information about all topics and clients - required: false - type: boolean - default: false - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - default: false - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - default: 10000 - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Labeled counters info"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns information about labeled counters"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_local_rpc.h b/ydb/core/viewer/json_local_rpc.h index 00cd683cbb72..a2574bd6aa0f 100644 --- a/ydb/core/viewer/json_local_rpc.h +++ b/ydb/core/viewer/json_local_rpc.h @@ -1,20 +1,9 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" #include "json_pipe_req.h" - #include -#include +#include -namespace NKikimr { -namespace NViewer { +namespace NKikimr::NViewer { struct TEvLocalRpcPrivate { enum EEv { @@ -35,27 +24,16 @@ struct TEvLocalRpcPrivate { }; }; -using namespace NActors; -using NSchemeShard::TEvSchemeShard; - template -class TJsonLocalRpc : public TActorBootstrapped> { +class TJsonLocalRpc : public TViewerPipeClient { using TThis = TJsonLocalRpc; - using TBase = TActorBootstrapped; - - using TBase::Send; - using TBase::PassAway; - using TBase::Become; + using TBase = TViewerPipeClient; protected: - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TProtoRequest Request; + using TBase::ReplyAndPassAway; + using TRequestProtoType = TProtoRequest; + std::vector AllowedMethods = {}; TAutoPtr> Result; - - TJsonSettings JsonSettings; - ui32 Timeout = 0; - TString Database; NThreading::TFuture RpcFuture; public: @@ -63,13 +41,11 @@ class TJsonLocalRpc : public TActorBootstrappedname()) {} - TProtoRequest Params2Proto(const TCgiParameters& params) { - TProtoRequest request; + void Params2Proto(const TCgiParameters& params, TRequestProtoType& request) { using google::protobuf::Descriptor; using google::protobuf::Reflection; using google::protobuf::FieldDescriptor; @@ -119,44 +95,51 @@ class TJsonLocalRpc : public TActorBootstrappedGet()->Request.GetPostContent(); - if (!postData.empty()) { - try { - NProtobufJson::Json2Proto(postData, request, json2ProtoConfig); - } - catch (const yexception& e) { - ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", e.what())); + virtual bool ValidateRequest(TRequestProtoType& request) { + using google::protobuf::Descriptor; + using google::protobuf::Reflection; + using google::protobuf::FieldDescriptor; + const Descriptor& descriptor = *TRequestProtoType::GetDescriptor(); + const Reflection& reflection = *TRequestProtoType::GetReflection(); + for (int idx = 0; idx < descriptor.field_count(); ++idx) { + const FieldDescriptor* field = descriptor.field(idx); + const auto& options(field->options()); + if (options.HasExtension(Ydb::required)) { + if (options.GetExtension(Ydb::required)) { + if (!reflection.HasField(request, field)) { + ReplyAndPassAway(GetHTTPBADREQUEST("text/plain", TStringBuilder() << "field '" << field->name() << "' is required")); + return false; + } + } } - } else { - const auto& params(Event->Get()->Request.GetParams()); - return Params2Proto(params); } - return request; + return true; } - bool PostToRequest() { + bool Params2Proto(TRequestProtoType& request) { auto postData = Event->Get()->Request.GetPostContent(); if (!postData.empty()) { try { - NProtobufJson::Json2Proto(postData, Request, {}); - return true; + NProtobufJson::Json2Proto(postData, request); } catch (const yexception& e) { - ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", e.what())); + ReplyAndPassAway(GetHTTPBADREQUEST("text/plain", e.what())); return false; } } + const auto& params(Event->Get()->Request.GetParams()); + Params2Proto(params, request); + if (!ValidateRequest(request)) { + return false; + } return true; } - void SendGrpcRequest() { - RpcFuture = NRpcService::DoLocalRpc(std::move(Request), Database, Event->Get()->UserToken, TlsActivationContext->ActorSystem()); + void SendGrpcRequest(TRequestProtoType&& request) { + // TODO(xenoxeno): pass trace id + RpcFuture = NRpcService::DoLocalRpc(std::move(request), Database, Event->Get()->UserToken, TlsActivationContext->ActorSystem()); RpcFuture.Subscribe([actorId = TBase::SelfId(), actorSystem = TlsActivationContext->ActorSystem()] (const NThreading::TFuture& future) { auto& response = future.GetValueSync(); @@ -182,14 +165,21 @@ class TJsonLocalRpc : public TActorBootstrappedGet()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), true); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - - SendGrpcRequest(); - - Become(&TThis::StateRequested, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); + if (!AllowedMethods.empty() && std::find(AllowedMethods.begin(), AllowedMethods.end(), Event->Get()->Request.GetMethod()) == AllowedMethods.end()) { + return ReplyAndPassAway(GetHTTPBADREQUEST("text/plain", "Method is not allowed")); + } + if (Database.empty()) { + return ReplyAndPassAway(GetHTTPBADREQUEST("text/plain", "field 'database' is required")); + } + if (TBase::NeedToRedirect()) { + return; + } + TRequestProtoType request; + if (!Params2Proto(request)) { + return; + } + SendGrpcRequest(std::move(request)); + Become(&TThis::StateRequested, Timeout, new TEvents::TEvWakeup()); } void Handle(typename TEvLocalRpcPrivate::TEvGrpcRequestResult::TPtr& ev) { @@ -206,38 +196,24 @@ class TJsonLocalRpc : public TActorBootstrappedStatus) { - if (!Result->Status->IsSuccess()) { + if (Result->Status->IsSuccess()) { + return ReplyAndPassAway(GetHTTPOKJSON(Result->Message)); + } else { NJson::TJsonValue json; TString message; - MakeErrorReply(json, message, Result->Status.value()); + MakeJsonErrorReply(json, message, Result->Status.value()); TStringStream stream; NJson::WriteJson(&stream, &json); if (Result->Status->GetStatus() == NYdb::EStatus::UNAUTHORIZED) { - return ReplyAndPassAway(Viewer->GetHTTPFORBIDDEN(Event->Get(), "application/json", stream.Str())); + return ReplyAndPassAway(GetHTTPFORBIDDEN("application/json", stream.Str()), message); } else { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "application/json", stream.Str())); + return ReplyAndPassAway(GetHTTPBADREQUEST("application/json", stream.Str()), message); } - } else { - TStringStream json; - TProtoToJson::ProtoToJson(json, Result->Message, JsonSettings); - return ReplyAndPassAway(Viewer->GetHTTPOKJSON(Event->Get(), json.Str())); } } else { - return ReplyAndPassAway(Viewer->GetHTTPINTERNALERROR(Event->Get())); + return ReplyAndPassAway(GetHTTPINTERNALERROR("text/plain", "no Result or Status"), "internal error"); } } - - - void HandleTimeout() { - ReplyAndPassAway(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get())); - } - - void ReplyAndPassAway(TString data) { - Send(Event->Sender, new NMon::TEvHttpInfoRes(data, 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } }; - -} -} +} // namespace NKikimr::NViewer diff --git a/ydb/core/viewer/json_metainfo.h b/ydb/core/viewer/json_metainfo.h deleted file mode 100644 index b96a26a4478a..000000000000 --- a/ydb/core/viewer/json_metainfo.h +++ /dev/null @@ -1,189 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "browse.h" -#include -#include "viewer.h" -#include "wb_aggregate.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonMetaInfo : public TActorBootstrapped { - using TBase = TActorBootstrapped; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - bool Counters = false; - NKikimrViewer::TMetaInfo MetaInfo; - TActorId BrowseActorID; - using TBrowseRequestKey = std::tuple; - std::unordered_multiset BrowseRequestsInFlight; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonMetaInfo(IViewer *viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap(const TActorContext& ctx) { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - Counters = FromStringWithDefault(params.Get("counters"), false); - TString path = params.Get("path"); - BrowseActorID = ctx.RegisterWithSameMailbox(new TBrowse(Viewer, ctx.SelfID, path, Event->Get()->UserToken)); - Become(&TThis::StateWait, ctx, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void Die(const TActorContext& ctx) override { - ctx.Send(BrowseActorID, new TEvents::TEvPoisonPill()); - TBase::Die(ctx); - } - - STFUNC(StateWait) { - switch (ev->GetTypeRewrite()) { - HFunc(NViewerEvents::TEvBrowseResponse, Handle); - HFunc(NViewerEvents::TEvBrowseRequestSent, Handle); - HFunc(NViewerEvents::TEvBrowseRequestCompleted, Handle); - HFunc(NMon::TEvHttpInfoRes, Handle); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(NMon::TEvHttpInfoRes::TPtr &ev, const TActorContext &ctx) { - ctx.ExecutorThread.Send(ev->Forward(Event->Sender)); - Die(ctx); - } - - void Handle(NViewerEvents::TEvBrowseResponse::TPtr &ev, const TActorContext &ctx) { - NViewerEvents::TEvBrowseResponse& event(*ev->Get()); - if (!event.Error.empty()) { - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(event.Error, 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return Die(ctx); - } - MetaInfo.MergeFrom(event.MetaInfo); - if (!Counters) { - // TODO(xenoxeno): it could be a little bit more effective - MetaInfo.ClearCounters(); - } - ReplyAndDie(ctx); - } - - void Handle(NViewerEvents::TEvBrowseRequestSent::TPtr& ev, const TActorContext&) { - NViewerEvents::TEvBrowseRequestSent& event(*ev->Get()); - BrowseRequestsInFlight.emplace(event.Actor, event.Tablet, event.Event); - } - - void Handle(NViewerEvents::TEvBrowseRequestCompleted::TPtr& ev, const TActorContext&) { - NViewerEvents::TEvBrowseRequestCompleted& event(*ev->Get()); - auto it = BrowseRequestsInFlight.find({event.Actor, event.Tablet, event.Event}); - if (it != BrowseRequestsInFlight.end()) { - // we could not delete by key, it could be many items with the same key - BrowseRequestsInFlight.erase(it); - } - BrowseRequestsInFlight.emplace(event.Actor, event.Tablet, event.Event); - } - - void ReplyAndDie(const TActorContext &ctx) { - TStringStream json; - TProtoToJson::ProtoToJson(json, MetaInfo, JsonSettings); - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } - - void HandleTimeout(const TActorContext &ctx) { - TStringStream result; - RenderPendingRequests(result); - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get(), result.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } - - void RenderPendingRequests(IOutputStream& html) { - for (const auto& request : BrowseRequestsInFlight) { - html << request << Endl; - } - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: path - in: query - description: schema path - required: false - type: string - - name: tablet_id - in: query - description: tablet identifier - required: false - type: integer - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: counters - in: query - description: return tablet counters - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Schema meta information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns meta information about schema path"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_netinfo.h b/ydb/core/viewer/json_netinfo.h deleted file mode 100644 index e57e18677801..000000000000 --- a/ydb/core/viewer/json_netinfo.h +++ /dev/null @@ -1,357 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "json_pipe_req.h" -#include "wb_aggregate.h" -#include "wb_merge.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonNetInfo : public TViewerPipeClient { - using TBase = TViewerPipeClient; - IViewer* Viewer; - std::unordered_map TenantByPath; - std::unordered_map TenantBySubDomainKey; - std::unordered_map> NavigateResult; - std::unique_ptr HiveStats; - NMon::TEvHttpInfo::TPtr Event; - std::vector NodeIds; - std::unordered_map> NodeSysInfo; - std::unordered_map> NodeNetInfo; - std::unique_ptr NodesInfo; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - TString User; - TString Path; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonNetInfo(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - InitConfig(params); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - Path = params.Get("path"); - - SendRequest(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes()); - - TIntrusivePtr domains = AppData()->DomainsInfo; - auto *domain = domains->GetDomain(); - ui64 consoleId = MakeConsoleID(); - - if (consoleId != 0) { - RequestConsoleListTenants(); - } - - ui64 hiveId = domains->GetHive(); - if (hiveId != TDomainsInfo::BadTabletId) { - RequestHiveDomainStats(hiveId); - } - - TString domainPath = "/" + domain->Name; - if (Path.empty() || domainPath == Path) { - NKikimrViewer::TTenant& tenant = TenantByPath[domainPath]; - tenant.SetName(domainPath); - tenant.SetState(Ydb::Cms::GetDatabaseStatusResult::State::GetDatabaseStatusResult_State_RUNNING); - RequestSchemeCacheNavigate(domainPath); - } - - Become(&TThis::StateWork, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void PassAway() override { - for (const TNodeId nodeId : NodeIds) { - Send(TActivationContext::InterconnectProxy(nodeId), new TEvents::TEvUnsubscribe()); - } - TBase::PassAway(); - } - - STATEFN(StateWork) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvInterconnect::TEvNodesInfo, Handle); - hFunc(NConsole::TEvConsole::TEvListTenantsResponse, Handle); - hFunc(TEvHive::TEvResponseHiveDomainStats, Handle); - hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); - hFunc(TEvWhiteboard::TEvSystemStateResponse, Handle); - hFunc(TEvWhiteboard::TEvNodeStateResponse, Handle); - hFunc(TEvents::TEvUndelivered, Undelivered); - hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); - hFunc(TEvTabletPipe::TEvClientConnected, TBase::Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvInterconnect::TEvNodesInfo::TPtr& ev) { - NodesInfo.reset(ev->Release().Release()); - RequestDone(); - } - - void Handle(NConsole::TEvConsole::TEvListTenantsResponse::TPtr& ev) { - Ydb::Cms::ListDatabasesResult listTenantsResult; - ev->Get()->Record.GetResponse().operation().result().UnpackTo(&listTenantsResult); - for (const TString& path : listTenantsResult.paths()) { - if (!Path.empty() && path != Path) { - continue; - } - TenantByPath[path]; - RequestSchemeCacheNavigate(path); - } - RequestDone(); - } - - void Handle(TEvHive::TEvResponseHiveDomainStats::TPtr& ev) { - HiveStats.reset(ev->Release().Release()); - for (const NKikimrHive::THiveDomainStats& hiveStat : HiveStats->Record.GetDomainStats()) { - TPathId subDomainKey(hiveStat.GetShardId(), hiveStat.GetPathId()); - NKikimrViewer::TTenant& tenant = TenantBySubDomainKey[subDomainKey]; - tenant.SetId(TStringBuilder() << hiveStat.GetShardId() << '-' << hiveStat.GetPathId()); - tenant.MutableNodeIds()->MergeFrom(hiveStat.GetNodeIds()); - for (TNodeId nodeId : hiveStat.GetNodeIds()) { - NodeIds.emplace_back(nodeId); - } - } - for (TNodeId nodeId : NodeIds) { - TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(nodeId); - SendRequest( - whiteboardServiceId, - new NNodeWhiteboard::TEvWhiteboard::TEvSystemStateRequest(), - IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, - nodeId); - SendRequest( - whiteboardServiceId, - new NNodeWhiteboard::TEvWhiteboard::TEvNodeStateRequest(), - IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, - nodeId); - - } - RequestDone(); - } - - void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { - if (ev->Get()->Request->ResultSet.size() == 1 && ev->Get()->Request->ResultSet.begin()->Status == NSchemeCache::TSchemeCacheNavigate::EStatus::Ok) { - TString path = CanonizePath(ev->Get()->Request->ResultSet.begin()->Path); - NavigateResult[path].reset(ev->Release().Release()); - } - RequestDone(); - } - - void Handle(NNodeWhiteboard::TEvWhiteboard::TEvSystemStateResponse::TPtr& ev) { - ui32 nodeId = ev.Get()->Cookie; - NodeSysInfo[nodeId].reset(ev->Release().Release()); - RequestDone(); - } - - void Handle(NNodeWhiteboard::TEvWhiteboard::TEvNodeStateResponse::TPtr& ev) { - ui32 nodeId = ev.Get()->Cookie; - NodeNetInfo[nodeId].reset(ev->Release().Release()); - RequestDone(); - } - - void Undelivered(TEvents::TEvUndelivered::TPtr &ev) { - ui32 nodeId = ev.Get()->Cookie; - if (ev->Get()->SourceType == NNodeWhiteboard::TEvWhiteboard::EvSystemStateRequest) { - if (NodeSysInfo.emplace(nodeId, nullptr).second) { - RequestDone(); - } - } - if (ev->Get()->SourceType == NNodeWhiteboard::TEvWhiteboard::EvNodeStateRequest) { - if (NodeNetInfo.emplace(nodeId, nullptr).second) { - RequestDone(); - } - } - } - - void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &ev) { - ui32 nodeId = ev->Get()->NodeId; - if (NodeSysInfo.emplace(nodeId, nullptr).second) { - RequestDone(); - } - if (NodeNetInfo.emplace(nodeId, nullptr).second) { - RequestDone(); - } - } - - void ReplyAndPassAway() { - THashMap nodeInfoIndex; - if (NodesInfo) { - for (const TEvInterconnect::TNodeInfo& nodeInfo : NodesInfo->Nodes) { - nodeInfoIndex[nodeInfo.NodeId] = &nodeInfo; - } - } - TIntrusivePtr dynamicNameserviceConfig = AppData()->DynamicNameserviceConfig; - NKikimrViewer::TNetInfo result; - for (const std::pair& prTenant : TenantByPath) { - const TString& path = prTenant.first; - //const NKikimrViewer::TTenant& tenantByPath(prTenant.second); - NKikimrViewer::TNetTenantInfo& netTenantInfo = *result.AddTenants(); - netTenantInfo.SetName(path); - auto itNavigate = NavigateResult.find(path); - if (itNavigate != NavigateResult.end()) { - auto domainInfo = itNavigate->second->Request->ResultSet.begin()->DomainInfo; - TPathId subDomainKey(domainInfo->DomainKey); - const NKikimrViewer::TTenant& tenantBySubDomainKey(TenantBySubDomainKey[subDomainKey]); - for (TNodeId nodeId : tenantBySubDomainKey.GetNodeIds()) { - NKikimrViewer::TNetNodeInfo& netNodeInfo = *netTenantInfo.AddNodes(); - netNodeInfo.SetNodeId(nodeId); - auto itSysInfo = NodeSysInfo.find(nodeId); - if (itSysInfo != NodeSysInfo.end()) { - if (itSysInfo->second != nullptr && itSysInfo->second->Record.SystemStateInfoSize() == 1) { - const NKikimrWhiteboard::TSystemStateInfo& sysInfo = itSysInfo->second->Record.GetSystemStateInfo(0); - if (sysInfo.HasDataCenter()) { - netNodeInfo.SetDataCenter(sysInfo.GetDataCenter()); - } - if (sysInfo.HasRack()) { - netNodeInfo.SetRack(sysInfo.GetRack()); - } - } - } - if (dynamicNameserviceConfig) { - netNodeInfo.SetNodeType(nodeId <= dynamicNameserviceConfig->MaxStaticNodeId ? NKikimrViewer::ENodeType::Static : NKikimrViewer::ENodeType::Dynamic); - } - auto itNodeInfo = nodeInfoIndex.find(nodeId); - if (itNodeInfo != nodeInfoIndex.end()) { - netNodeInfo.SetHost(itNodeInfo->second->Host); - netNodeInfo.SetPort(itNodeInfo->second->Port); - } - auto itNetInfo = NodeNetInfo.find(nodeId); - if (itNetInfo != NodeNetInfo.end()) { - if (itNetInfo->second != nullptr) { - for (const NKikimrWhiteboard::TNodeStateInfo& netInfo : itNetInfo->second->Record.GetNodeStateInfo()) { - TString peerName(netInfo.GetPeerName()); - TNodeId nodeId = FromStringWithDefault(TStringBuf(peerName).Before(':')); - if (nodeInfoIndex.find(nodeId) == nodeInfoIndex.end()) { - continue; - } - NKikimrViewer::TNetNodePeerInfo& netNodePeerInfo = *netNodeInfo.AddPeers(); - netNodePeerInfo.SetNodeId(nodeId); - netNodePeerInfo.SetPeerName(peerName); - netNodePeerInfo.SetConnected(netInfo.GetConnected()); - netNodePeerInfo.SetConnectStatus(GetViewerFlag(netInfo.GetConnectStatus())); - netNodePeerInfo.SetChangeTime(netInfo.GetChangeTime()); - if (dynamicNameserviceConfig) { - netNodePeerInfo.SetNodeType(nodeId <= dynamicNameserviceConfig->MaxStaticNodeId ? NKikimrViewer::ENodeType::Static : NKikimrViewer::ENodeType::Dynamic); - } - auto itSysInfo = NodeSysInfo.find(nodeId); - if (itSysInfo != NodeSysInfo.end()) { - if (itSysInfo->second != nullptr && itSysInfo->second->Record.SystemStateInfoSize() == 1) { - const NKikimrWhiteboard::TSystemStateInfo& sysInfo = itSysInfo->second->Record.GetSystemStateInfo(0); - if (sysInfo.HasDataCenter()) { - netNodePeerInfo.SetDataCenter(sysInfo.GetDataCenter()); - } - if (sysInfo.HasRack()) { - netNodePeerInfo.SetRack(sysInfo.GetRack()); - } - } - } - auto itNodeInfo = nodeInfoIndex.find(nodeId); - if (itNodeInfo != nodeInfoIndex.end()) { - netNodePeerInfo.SetHost(itNodeInfo->second->Host); - netNodePeerInfo.SetPort(itNodeInfo->second->Port); - } - } - } - } - - // TODO(xenoxeno) - netNodeInfo.SetOverall(NKikimrViewer::EFlag::Green); - } - } - - // TODO(xenoxeno) - netTenantInfo.SetOverall(NKikimrViewer::EFlag::Green); - } - - // TODO(xenoxeno) - result.SetOverall(NKikimrViewer::EFlag::Green); - TStringStream json; - TProtoToJson::ProtoToJson(json, result, JsonSettings); - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void HandleTimeout() { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: path - in: query - description: schema path - required: false - type: string - - name: hive_id - in: query - description: hive identifier (tablet id) - required: false - type: string - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Network information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns network information"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_nodeinfo.h b/ydb/core/viewer/json_nodeinfo.h deleted file mode 100644 index ac1dbb04878d..000000000000 --- a/ydb/core/viewer/json_nodeinfo.h +++ /dev/null @@ -1,62 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include "wb_merge.h" -#include "json_wb_req.h" - -namespace NKikimr { -namespace NViewer { - -template <> -struct TWhiteboardInfo { - using TResponseType = NKikimrWhiteboard::TEvNodeStateResponse; - using TElementType = NKikimrWhiteboard::TNodeStateInfo; - using TElementKeyType = TString; - - static constexpr bool StaticNodesOnly = false; - - static ::google::protobuf::RepeatedPtrField& GetElementsField(TResponseType& response) { - return *response.MutableNodeStateInfo(); - } - - static const TString& GetElementKey(const TElementType& type) { - return type.GetPeerName(); - } - - static TString GetDefaultMergeField() { - return "PeerName"; - } - - static void MergeResponses(TResponseType& result, TMap& responses, const TString& fields = GetDefaultMergeField()) { - TWhiteboardMerger::MergeResponses(result, responses, fields); - } - - static void InitMerger() { - const auto* field = NKikimrWhiteboard::TNodeStateInfo::descriptor()->FindFieldByName("ConnectStatus"); - TWhiteboardMergerBase::FieldMerger[field] = &TWhiteboardMergerBase::ProtoMaximizeEnumField; - field = NKikimrWhiteboard::TNodeStateInfo::descriptor()->FindFieldByName("Connected"); - TWhiteboardMergerBase::FieldMerger[field] = &TWhiteboardMergerBase::ProtoMaximizeBoolField; - } -}; - -using TJsonNodeInfo = TJsonWhiteboardRequest; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Interconnect information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns information about node connections"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_nodelist.h b/ydb/core/viewer/json_nodelist.h deleted file mode 100644 index d1d3310f616e..000000000000 --- a/ydb/core/viewer/json_nodelist.h +++ /dev/null @@ -1,123 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include "viewer.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonNodeList : public TActorBootstrapped { - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TAutoPtr NodesInfo; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonNodeList(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap(const TActorContext& ctx) { - const TActorId nameserviceId = GetNameserviceActorId(); - ctx.Send(nameserviceId, new TEvInterconnect::TEvListNodes()); - ctx.Schedule(TDuration::Seconds(10), new TEvents::TEvWakeup()); - Become(&TThis::StateRequestedBrowse); - } - - STFUNC(StateRequestedBrowse) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvInterconnect::TEvNodesInfo, Handle); - CFunc(TEvents::TSystem::Wakeup, Timeout); - } - } - - void Handle(TEvInterconnect::TEvNodesInfo::TPtr &ev, const TActorContext &ctx) { - NodesInfo = ev->Release(); - ReplyAndDie(ctx); - } - - void ReplyAndDie(const TActorContext &ctx) { - NJson::TJsonValue json; - json.SetType(NJson::EJsonValueType::JSON_ARRAY); - if (NodesInfo != nullptr) { - for (auto it = NodesInfo->Nodes.begin(); it != NodesInfo->Nodes.end(); ++it) { - const TEvInterconnect::TNodeInfo& nodeInfo = *it; - NJson::TJsonValue& jsonNodeInfo = json.AppendValue(NJson::TJsonValue()); - jsonNodeInfo["Id"] = nodeInfo.NodeId; - if (!nodeInfo.Host.empty()) { - jsonNodeInfo["Host"] = nodeInfo.Host; - } - if (!nodeInfo.ResolveHost.empty()) { - jsonNodeInfo["ResolveHost"] = nodeInfo.ResolveHost; - } - jsonNodeInfo["Address"] = nodeInfo.Address; - jsonNodeInfo["Port"] = nodeInfo.Port; - if (nodeInfo.Location != TNodeLocation()) { - NJson::TJsonValue& jsonPhysicalLocation = jsonNodeInfo["PhysicalLocation"]; - const auto& x = nodeInfo.Location.GetLegacyValue(); - jsonPhysicalLocation["DataCenter"] = x.DataCenter; - jsonPhysicalLocation["Room"] = x.Room; - jsonPhysicalLocation["Rack"] = x.Rack; - jsonPhysicalLocation["Body"] = x.Body; - jsonPhysicalLocation["DataCenterId"] = nodeInfo.Location.GetDataCenterId(); - jsonPhysicalLocation["Location"] = nodeInfo.Location.ToString(); - } - } - } - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json, false)), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } - - void Timeout(const TActorContext &ctx) { - ReplyAndDie(ctx); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return YAML::Load(R"___( - type: array - title: TEvNodeListResponse - items: - type: object - title: TNodeInfo - properties: - Id: - type: integer - Host: - type: string - Address: - type: string - Port: - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Nodes list"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns list of nodes"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_nodes.h b/ydb/core/viewer/json_nodes.h deleted file mode 100644 index 987cdd369be2..000000000000 --- a/ydb/core/viewer/json_nodes.h +++ /dev/null @@ -1,1020 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "viewer_helper.h" -#include "json_pipe_req.h" -#include "json_sysinfo.h" -#include "json_pdiskinfo.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using namespace NNodeWhiteboard; -using ::google::protobuf::FieldDescriptor; - -class TJsonNodes : public TViewerPipeClient { - using TThis = TJsonNodes; - using TBase = TViewerPipeClient; - using TNodeId = ui32; - using TPDiskId = std::pair; - IViewer* Viewer; - TActorId Initiator; - NMon::TEvHttpInfo::TPtr Event; - std::unique_ptr NodesInfo; - std::unordered_map PDiskInfo; - std::unordered_map VDiskInfo; - std::unordered_map> TabletInfo; - std::unordered_map SysInfo; - std::unordered_map NavigateResult; - std::unique_ptr BaseConfig; - std::unordered_map BaseConfigGroupIndex; - std::unordered_map DisconnectTime; - std::unordered_map NodeName; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - TString FilterTenant; - TSubDomainKey FilterSubDomainKey; - TString FilterPath; - TString FilterStoragePool; - std::unordered_set FilterNodeIds; - std::unordered_set FilterGroupIds; - std::unordered_set PassedNodeIds; - std::vector NodeIds; - std::optional Offset; - std::optional Limit; - ui32 UptimeSeconds = 0; - bool ProblemNodesOnly = false; - TString Filter; - - enum class EWith { - Everything, - MissingDisks, - SpaceProblems, - }; - EWith With = EWith::Everything; - - enum class EType { - Any, - Static, - Dynamic, - }; - EType Type = EType::Any; - - enum class ESort { - NodeId, - Host, - DC, - Rack, - Version, - Uptime, - Memory, - CPU, - LoadAverage, - Missing, - }; - ESort Sort = ESort::NodeId; - bool ReverseSort = false; - bool SortedNodeList = false; - bool LimitApplied = false; - - bool Storage = false; - bool Tablets = false; - TPathId FilterPathId; - bool ResolveGroupsToNodes = false; - TNodeId MinAllowedNodeId = std::numeric_limits::min(); - TNodeId MaxAllowedNodeId = std::numeric_limits::max(); - ui32 RequestsBeforeNodeList = 0; - ui64 HiveId = 0; - std::optional MaximumDisksPerNode; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TString GetLogPrefix() { - static TString prefix = "json/nodes "; - return prefix; - } - - TJsonNodes(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Initiator(ev->Sender) - , Event(std::move(ev)) - { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - InitConfig(params); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - UptimeSeconds = FromStringWithDefault(params.Get("uptime"), 0); - ProblemNodesOnly = FromStringWithDefault(params.Get("problems_only"), ProblemNodesOnly); - Filter = params.Get("filter"); - FilterPath = params.Get("path"); - FilterTenant = params.Get("tenant"); - FilterStoragePool = params.Get("pool"); - SplitIds(params.Get("node_id"), ',', FilterNodeIds); - auto itZero = FilterNodeIds.find(0); - if (itZero != FilterNodeIds.end()) { - FilterNodeIds.erase(itZero); - FilterNodeIds.insert(TlsActivationContext->ActorSystem()->NodeId); - } - if (params.Get("with") == "missing") { - With = EWith::MissingDisks; - } else if (params.Get("with") == "space") { - With = EWith::SpaceProblems; - } - if (params.Has("offset")) { - Offset = FromStringWithDefault(params.Get("offset"), 0); - } - if (params.Has("limit")) { - Limit = FromStringWithDefault(params.Get("limit"), std::numeric_limits::max()); - } - if (params.Get("type") == "static") { - Type = EType::Static; - } else if (params.Get("type") == "dynamic") { - Type = EType::Dynamic; - } else if (params.Get("type") == "any") { - Type = EType::Any; - } - Storage = FromStringWithDefault(params.Get("storage"), Storage); - Tablets = FromStringWithDefault(params.Get("tablets"), Tablets); - ResolveGroupsToNodes = FromStringWithDefault(params.Get("resolve_groups"), ResolveGroupsToNodes); - TStringBuf sort = params.Get("sort"); - if (sort) { - if (sort.StartsWith("-") || sort.StartsWith("+")) { - ReverseSort = (sort[0] == '-'); - sort.Skip(1); - } - if (sort == "NodeId") { - Sort = ESort::NodeId; - } else if (sort == "Host") { - Sort = ESort::Host; - } else if (sort == "DC") { - Sort = ESort::DC; - } else if (sort == "Rack") { - Sort = ESort::Rack; - } else if (sort == "Version") { - Sort = ESort::Version; - } else if (sort == "Uptime") { - Sort = ESort::Uptime; - } else if (sort == "Memory") { - Sort = ESort::Memory; - } else if (sort == "CPU") { - Sort = ESort::CPU; - } else if (sort == "LoadAverage") { - Sort = ESort::LoadAverage; - } else if (sort == "Missing") { - Sort = ESort::Missing; - } - } - } - - void Bootstrap() { - BLOG_TRACE("Bootstrap()"); - if (Type != EType::Any) { - TIntrusivePtr dynamicNameserviceConfig = AppData()->DynamicNameserviceConfig; - if (dynamicNameserviceConfig) { - if (Type == EType::Static) { - MaxAllowedNodeId = dynamicNameserviceConfig->MaxStaticNodeId; - } - if (Type == EType::Dynamic) { - MinAllowedNodeId = dynamicNameserviceConfig->MaxStaticNodeId + 1; - } - } - } - - if (Storage) { - BLOG_TRACE("RequestBSControllerConfig()"); - RequestBSControllerConfig(); - ++RequestsBeforeNodeList; - } - - if (!FilterTenant.empty()) { - RequestForTenant(FilterTenant); - } - - if (!FilterPath.empty()) { - BLOG_TRACE("Requesting navigate for " << FilterPath); - RequestSchemeCacheNavigate(FilterPath); - ++RequestsBeforeNodeList; - } - - BLOG_TRACE("Request TEvListNodes"); - SendRequest(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes()); - ++RequestsBeforeNodeList; - if (Requests == 0) { - ReplyAndPassAway(); - return; - } - TBase::Become(&TThis::StateWork, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void PassAway() override { - BLOG_TRACE("PassAway()"); - for (const TNodeId nodeId : NodeIds) { - Send(TActivationContext::InterconnectProxy(nodeId), new TEvents::TEvUnsubscribe()); - } - TBase::PassAway(); - } - - void RequestForTenant(const TString& filterTenant) { - BLOG_TRACE("RequestForTenant " << filterTenant); - FilterTenant = filterTenant; - if (Type == EType::Static || Type == EType::Any) { - if (ResolveGroupsToNodes) { - if (!Storage) { - BLOG_TRACE("RequestBSControllerConfig()"); - RequestBSControllerConfig(); - ++RequestsBeforeNodeList; - } - } - } - if (Type == EType::Dynamic || Type == EType::Any) { - BLOG_TRACE("RequestStateStorageEndpointsLookup for " << FilterTenant); - RequestStateStorageEndpointsLookup(FilterTenant); // to get dynamic nodes - ++RequestsBeforeNodeList; - } - } - - bool CheckNodeFilters(TNodeId nodeId) { - if (Storage && With == EWith::MissingDisks) { - auto itPDiskState = PDiskInfo.find(nodeId); - if (itPDiskState != PDiskInfo.end()) { - int disksNormal = 0; - for (const auto& protoPDiskInfo : itPDiskState->second.GetPDiskStateInfo()) { - if (protoPDiskInfo.state() == NKikimrBlobStorage::TPDiskState::Normal) { - ++disksNormal; - } - } - if (itPDiskState->second.pdiskstateinfo_size() == disksNormal) { - return false; - } - } - } - auto itSysInfo = SysInfo.find(nodeId); - if (itSysInfo != SysInfo.end() && itSysInfo->second.SystemStateInfoSize() > 0) { - const auto& sysState(itSysInfo->second.GetSystemStateInfo(0)); - if (Storage && With == EWith::SpaceProblems) { - if (!sysState.HasMaxDiskUsage() || sysState.GetMaxDiskUsage() < 0.85) { - return false; - } - } - if (UptimeSeconds > 0 && sysState.HasStartTime() && itSysInfo->second.HasResponseTime() - && itSysInfo->second.GetResponseTime() - sysState.GetStartTime() > UptimeSeconds * 1000) { - return false; - } - if (ProblemNodesOnly && sysState.HasSystemState() - && GetViewerFlag(sysState.GetSystemState()) == NKikimrViewer::EFlag::Green) { - return false; - } - if (Filter) { - if (sysState.HasHost() && sysState.GetHost().Contains(Filter)) { - return true; - } - if (std::to_string(nodeId).contains(Filter)) { - return true; - } - return false; - } - } - - return true; - } - - bool HasNodeFilter() { - return With != EWith::Everything || UptimeSeconds != 0 || ProblemNodesOnly || !Filter.empty(); - } - - void SendNodeRequest(TNodeId nodeId) { - if (PassedNodeIds.insert(nodeId).second) { - if (SortedNodeList) { - // optimization for early paging with default sort - LimitApplied = true; - if (Offset.has_value()) { - if (PassedNodeIds.size() <= Offset.value()) { - return; - } - } - if (Limit.has_value()) { - if (NodeIds.size() >= Limit.value()) { - return; - } - } - } - NodeIds.push_back(nodeId); // order is important - TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(nodeId); - BLOG_TRACE("SendSystemStateRequest to " << nodeId); - SendRequest(whiteboardServiceId, new TEvWhiteboard::TEvSystemStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - if (Storage) { - BLOG_TRACE("SendV/PDiskStateRequest to " << nodeId); - SendRequest(whiteboardServiceId, new TEvWhiteboard::TEvPDiskStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - SendRequest(whiteboardServiceId, new TEvWhiteboard::TEvVDiskStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - } - if (Tablets && FilterPathId == TPathId()) { - BLOG_TRACE("SendTabletStateRequest to " << nodeId); - auto request = std::make_unique(); - request->Record.SetGroupBy("Type,State"); - SendRequest(whiteboardServiceId, request.release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - } - } - } - - void ProcessNodeIds() { - BLOG_TRACE("ProcessNodeIds()"); - - if (!HasNodeFilter()) { - switch (Sort) { - case ESort::NodeId: { - SortCollection(NodesInfo->Nodes, [](const TEvInterconnect::TNodeInfo& node) { return node.NodeId;}, ReverseSort); - SortedNodeList = true; - break; - } - case ESort::Host: { - SortCollection(NodesInfo->Nodes, [](const TEvInterconnect::TNodeInfo& node) { return node.Host;}, ReverseSort); - SortedNodeList = true; - break; - } - case ESort::DC: { - SortCollection(NodesInfo->Nodes, [](const TEvInterconnect::TNodeInfo& node) { return node.Location.GetDataCenterId();}, ReverseSort); - SortedNodeList = true; - break; - } - default: - break; - } - } - - for (const auto& ni : NodesInfo->Nodes) { - if ((FilterNodeIds.empty() || FilterNodeIds.count(ni.NodeId) > 0) && ni.NodeId >= MinAllowedNodeId && ni.NodeId <= MaxAllowedNodeId) { - SendNodeRequest(ni.NodeId); - } - } - } - - void Handle(TEvBlobStorage::TEvControllerConfigResponse::TPtr& ev) { - BLOG_TRACE("Received TEvControllerConfigResponse"); - const NKikimrBlobStorage::TEvControllerConfigResponse& pbRecord(ev->Get()->Record); - if (pbRecord.HasResponse() && pbRecord.GetResponse().StatusSize() > 0) { - const NKikimrBlobStorage::TConfigResponse::TStatus& pbStatus(pbRecord.GetResponse().GetStatus(0)); - if (pbStatus.HasBaseConfig()) { - BaseConfig.reset(ev->Release().Release()); - const NKikimrBlobStorage::TEvControllerConfigResponse& pbRecord(BaseConfig->Record); - const NKikimrBlobStorage::TConfigResponse::TStatus& pbStatus(pbRecord.GetResponse().GetStatus(0)); - const NKikimrBlobStorage::TBaseConfig& pbConfig(pbStatus.GetBaseConfig()); - for (const NKikimrBlobStorage::TBaseConfig::TGroup& group : pbConfig.GetGroup()) { - BaseConfigGroupIndex[group.GetGroupId()] = &group; - } - std::unordered_map disksPerNode; - disksPerNode.reserve(pbConfig.NodeSize()); - for (const NKikimrBlobStorage::TBaseConfig::TPDisk& pdisk : pbConfig.GetPDisk()) { - disksPerNode[pdisk.GetNodeId()]++; - } - int maximumDisksPerNode = 0; - for (const auto& [nodeId, disks] : disksPerNode) { - if (disks > maximumDisksPerNode) { - maximumDisksPerNode = disks; - } - } - MaximumDisksPerNode = maximumDisksPerNode; - } - } - if (ResolveGroupsToNodes) { - BLOG_TRACE("Requesting navigate for " << FilterTenant); - RequestSchemeCacheNavigate(FilterTenant); // to get storage pools and then groups and then pdisks - ++RequestsBeforeNodeList; - } - if (--RequestsBeforeNodeList == 0) { - ProcessNodeIds(); - } - - RequestDone(); - } - - bool IsSubDomainPath(const TSchemeCacheNavigate::TEntry& entry) { - switch (entry.Kind) { - case TSchemeCacheNavigate::EKind::KindSubdomain: - case TSchemeCacheNavigate::EKind::KindExtSubdomain: - return true; - case TSchemeCacheNavigate::EKind::KindPath: - return entry.Self->Info.GetPathId() == NSchemeShard::RootPathId; - default: - return false; - } - } - - void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { - if (ev->Get()->Request->ResultSet.size() == 1 && ev->Get()->Request->ResultSet.begin()->Status == NSchemeCache::TSchemeCacheNavigate::EStatus::Ok) { - TSchemeCacheNavigate::TEntry& entry(ev->Get()->Request->ResultSet.front()); - TString path = CanonizePath(entry.Path); - BLOG_TRACE("Received navigate for " << path); - if (IsSubDomainPath(entry)) { - if (HiveId == 0) { - HiveId = entry.DomainInfo->Params.GetHive(); - } - if (!FilterSubDomainKey) { - const auto ownerId = entry.DomainInfo->DomainKey.OwnerId; - const auto localPathId = entry.DomainInfo->DomainKey.LocalPathId; - FilterSubDomainKey = TSubDomainKey(ownerId, localPathId); - } - - if (FilterTenant.empty()) { - RequestForTenant(path); - } - - if (entry.DomainInfo->ResourcesDomainKey && entry.DomainInfo->DomainKey != entry.DomainInfo->ResourcesDomainKey) { - TPathId resourceDomainKey(entry.DomainInfo->ResourcesDomainKey); - BLOG_TRACE("Requesting navigate for resource domain " << resourceDomainKey); - RequestSchemeCacheNavigate(resourceDomainKey); - ++RequestsBeforeNodeList; - } else if (Storage && entry.DomainDescription) { - for (const auto& storagePool : entry.DomainDescription->Description.GetStoragePools()) { - TString storagePoolName = storagePool.GetName(); - THolder request = MakeHolder(); - request->Record.SetReturnAllMatchingGroups(true); - request->Record.AddGroupParameters()->MutableStoragePoolSpecifier()->SetName(storagePoolName); - BLOG_TRACE("Requesting BSControllerSelectGroups for " << storagePoolName); - RequestBSControllerSelectGroups(std::move(request)); - ++RequestsBeforeNodeList; - } - } - } else { - if (entry.DomainInfo) { - TPathId domainKey(entry.DomainInfo->DomainKey); - BLOG_TRACE("Requesting navigate for parent domain " << domainKey); - RequestSchemeCacheNavigate(domainKey); - ++RequestsBeforeNodeList; - - if (!FilterPath.empty() && Tablets && FilterPathId == TPathId()) { - FilterPathId = TPathId(entry.Self->Info.GetSchemeshardId(), entry.Self->Info.GetPathId()); - } - } - } - NavigateResult.emplace(path, std::move(entry)); - - if (HiveId != 0) { - BLOG_TRACE("Requesting hive " << HiveId << " for path id " << FilterPathId); - RequestHiveNodeStats(HiveId, FilterPathId); - ++RequestsBeforeNodeList; - } - } else { - BLOG_TRACE("Error receiving Navigate response"); - FilterNodeIds = { 0 }; - } - - if (--RequestsBeforeNodeList == 0) { - ProcessNodeIds(); - } - RequestDone(); - } - - void Handle(TEvHive::TEvResponseHiveNodeStats::TPtr& ev) { - BLOG_TRACE("ResponseHiveNodeStats()"); - for (const NKikimrHive::THiveNodeStats& nodeStats : ev->Get()->Record.GetNodeStats()) { - const TSubDomainKey nodeSubDomainKey = TSubDomainKey(nodeStats.GetNodeDomain()); - if (FilterSubDomainKey && FilterSubDomainKey != nodeSubDomainKey) { - continue; - } - ui32 nodeId = nodeStats.GetNodeId(); - auto& tabletInfo(TabletInfo[nodeId]); - for (const NKikimrHive::THiveDomainStatsStateCount& stateStats : nodeStats.GetStateStats()) { - tabletInfo.emplace_back(); - NKikimrViewer::TTabletStateInfo& viewerTablet(tabletInfo.back()); - viewerTablet.SetType(NKikimrTabletBase::TTabletTypes::EType_Name(stateStats.GetTabletType())); - viewerTablet.SetCount(stateStats.GetCount()); - viewerTablet.SetState(GetFlagFromTabletState(stateStats.GetVolatileState())); - } - BLOG_TRACE("HiveNodeStats filter node by " << nodeId); - FilterNodeIds.insert(nodeId); - DisconnectTime[nodeId] = nodeStats.GetLastAliveTimestamp(); - if (nodeStats.HasNodeName()) { - NodeName[nodeId] = nodeStats.GetNodeName(); - } - } - if (--RequestsBeforeNodeList == 0) { - ProcessNodeIds(); - } - RequestDone(); - } - - void Handle(TEvBlobStorage::TEvControllerSelectGroupsResult::TPtr& ev) { - BLOG_TRACE("Received TEvControllerSelectGroupsResult"); - for (const auto& matchingGroups : ev->Get()->Record.GetMatchingGroups()) { - for (const auto& group : matchingGroups.GetGroups()) { - TString storagePoolName = group.GetStoragePoolName(); - if (FilterStoragePool.empty() || FilterStoragePool == storagePoolName) { - if (FilterGroupIds.emplace(group.GetGroupID()).second && BaseConfig) { - auto itBaseConfigGroupIndex = BaseConfigGroupIndex.find(group.GetGroupID()); - if (itBaseConfigGroupIndex != BaseConfigGroupIndex.end()) { - for (const NKikimrBlobStorage::TVSlotId& vslot : itBaseConfigGroupIndex->second->GetVSlotId()) { - BLOG_TRACE("SelectGroups filter by node " << vslot.GetNodeId()); - FilterNodeIds.insert(vslot.GetNodeId()); - } - } - } - } - } - } - if (--RequestsBeforeNodeList == 0) { - ProcessNodeIds(); - } - RequestDone(); - } - - void Handle(TEvInterconnect::TEvNodesInfo::TPtr& ev) { - BLOG_TRACE("Received TEvNodesInfo " << ev->Get()->Nodes.size()); - NodesInfo.reset(ev->Release().Release()); - if (--RequestsBeforeNodeList == 0) { - ProcessNodeIds(); - } - RequestDone(); - } - - void Handle(TEvStateStorage::TEvBoardInfo::TPtr& ev) { - if (ev->Get()->Status == TEvStateStorage::TEvBoardInfo::EStatus::Ok) { - BLOG_TRACE("Received TEvBoardInfo"); - for (const auto& [actorId, infoEntry] : ev->Get()->InfoEntries) { - auto nodeId(actorId.NodeId()); - BLOG_TRACE("BoardInfo filter node by " << nodeId); - FilterNodeIds.insert(nodeId); - } - } else { - BLOG_TRACE("Error receiving TEvBoardInfo response"); - FilterNodeIds = { 0 }; - } - - if (--RequestsBeforeNodeList == 0) { - ProcessNodeIds(); - } - RequestDone(); - } - - void Undelivered(TEvents::TEvUndelivered::TPtr& ev) { - ui32 nodeId = ev.Get()->Cookie; - BLOG_TRACE("Undelivered type " << ev->Get()->SourceType << " from node " << nodeId); - switch (ev->Get()->SourceType) { - case TEvWhiteboard::EvSystemStateRequest: - if (SysInfo.emplace(nodeId, NKikimrWhiteboard::TEvSystemStateResponse{}).second) { - RequestDone(); - } - break; - case TEvWhiteboard::EvPDiskStateRequest: - if (PDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvPDiskStateResponse{}).second) { - RequestDone(); - } - break; - case TEvWhiteboard::EvVDiskStateRequest: - if (VDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvVDiskStateResponse{}).second) { - RequestDone(); - } - break; - case TEvWhiteboard::EvTabletStateRequest: - RequestDone(); - break; - } - } - - void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr& ev) { - ui32 nodeId = ev->Get()->NodeId; - BLOG_TRACE("Disconnected from node " << nodeId); - if (SysInfo.emplace(nodeId, NKikimrWhiteboard::TEvSystemStateResponse{}).second) { - RequestDone(); - } - if (Storage) { - if (PDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvPDiskStateResponse{}).second) { - RequestDone(); - } - if (VDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvVDiskStateResponse{}).second) { - RequestDone(); - } - } - if (Tablets) { - if (TabletInfo.emplace(nodeId, std::vector()).second) { - RequestDone(); - } - } - } - - void Handle(TEvWhiteboard::TEvSystemStateResponse::TPtr& ev) { - ui64 nodeId = ev.Get()->Cookie; - BLOG_TRACE("SystemStateResponse from node " << nodeId); - SysInfo[nodeId] = std::move(ev->Get()->Record); - RequestDone(); - } - - void Handle(TEvWhiteboard::TEvPDiskStateResponse::TPtr& ev) { - ui64 nodeId = ev.Get()->Cookie; - BLOG_TRACE("PDiskStateResponse from node " << nodeId); - PDiskInfo[nodeId] = std::move(ev->Get()->Record); - RequestDone(); - } - - void Handle(TEvWhiteboard::TEvVDiskStateResponse::TPtr& ev) { - ui64 nodeId = ev.Get()->Cookie; - BLOG_TRACE("VDiskStateResponse from node " << nodeId); - VDiskInfo[nodeId] = std::move(ev->Get()->Record); - RequestDone(); - } - - void Handle(TEvWhiteboard::TEvTabletStateResponse::TPtr& ev) { - ui64 nodeId = ev.Get()->Cookie; - BLOG_TRACE("TabletStateResponse from node " << nodeId); - NKikimrWhiteboard::TEvTabletStateResponse response = std::move(ev->Get()->Record); - bool needToGroup = response.TabletStateInfoSize() > 0 && !response.GetTabletStateInfo(0).HasCount(); - if (needToGroup) { // for compatibility with older versions - GroupWhiteboardResponses(response, "Type,Overall", false); - } - auto& vecTablets(TabletInfo[nodeId]); - for (const NKikimrWhiteboard::TTabletStateInfo& tablet : response.GetTabletStateInfo()) { - if (tablet.GetState() == NKikimrWhiteboard::TTabletStateInfo::Dead - || tablet.GetState() == NKikimrWhiteboard::TTabletStateInfo::Deleted) { - continue; - } - vecTablets.emplace_back(); - NKikimrViewer::TTabletStateInfo& viewerTablet(vecTablets.back()); - viewerTablet.SetType(NKikimrTabletBase::TTabletTypes::EType_Name(tablet.GetType())); - viewerTablet.SetCount(tablet.GetCount()); - viewerTablet.SetState(GetFlagFromTabletState(tablet.GetState())); - } - RequestDone(); - } - - STATEFN(StateWork) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvInterconnect::TEvNodesInfo, Handle); - hFunc(TEvWhiteboard::TEvSystemStateResponse, Handle); - hFunc(TEvWhiteboard::TEvPDiskStateResponse, Handle); - hFunc(TEvWhiteboard::TEvVDiskStateResponse, Handle); - hFunc(TEvWhiteboard::TEvTabletStateResponse, Handle); - hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); - hFunc(TEvHive::TEvResponseHiveNodeStats, Handle); - hFunc(TEvBlobStorage::TEvControllerSelectGroupsResult, Handle); - hFunc(TEvBlobStorage::TEvControllerConfigResponse, Handle); - hFunc(TEvStateStorage::TEvBoardInfo, Handle); - hFunc(TEvents::TEvUndelivered, Undelivered); - hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); - hFunc(TEvTabletPipe::TEvClientConnected, TBase::Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - NKikimrWhiteboard::TPDiskStateInfo& GetPDisk(TPDiskId pDiskId) { - auto itPDiskInfo = PDiskInfo.find(pDiskId.first); - if (itPDiskInfo == PDiskInfo.end()) { - itPDiskInfo = PDiskInfo.insert({pDiskId.first, NKikimrWhiteboard::TEvPDiskStateResponse{}}).first; - } - - for (auto& pDiskInfo : *itPDiskInfo->second.mutable_pdiskstateinfo()) { - if (pDiskInfo.pdiskid() == pDiskId.second) { - return pDiskInfo; - } - } - - NKikimrWhiteboard::TPDiskStateInfo& pDiskInfo = *itPDiskInfo->second.add_pdiskstateinfo(); - pDiskInfo.SetPDiskId(pDiskId.second); - return pDiskInfo; - } - - static double GetCPU(const NKikimrWhiteboard::TSystemStateInfo& sysInfo) { - double cpu = 0; - if (sysInfo.PoolStatsSize() > 0) { - for (const auto& ps : sysInfo.GetPoolStats()) { - cpu = std::max(cpu, ps.GetUsage()); - } - } - return cpu; - } - - static double GetLoadAverage(const NKikimrWhiteboard::TSystemStateInfo& sysInfo) { - if (sysInfo.LoadAverageSize() > 0 && sysInfo.GetNumberOfCpus() > 0) { - return sysInfo.GetLoadAverage(0) * 100 / sysInfo.GetNumberOfCpus(); - } - return 0; - } - - static uint32 GetMissing(const NKikimrViewer::TNodeInfo& nodeInfo) { - uint32 missing = 0; - for (const auto& pDisk : nodeInfo.GetPDisks()) { - if (pDisk.state() != NKikimrBlobStorage::TPDiskState::Normal) { - missing++; - } - } - return missing; - } - - void ReplyAndPassAway() { - NKikimrViewer::TNodesInfo result; - - if (Storage && BaseConfig) { - const NKikimrBlobStorage::TEvControllerConfigResponse& pbRecord(BaseConfig->Record); - const NKikimrBlobStorage::TConfigResponse::TStatus& pbStatus(pbRecord.GetResponse().GetStatus(0)); - const NKikimrBlobStorage::TBaseConfig& pbConfig(pbStatus.GetBaseConfig()); - for (const NKikimrBlobStorage::TBaseConfig::TPDisk& pDisk : pbConfig.GetPDisk()) { - if (!FilterNodeIds.empty() && FilterNodeIds.count(pDisk.GetNodeId()) == 0) { - continue; - } - if (pDisk.GetNodeId() < MinAllowedNodeId || pDisk.GetNodeId() > MaxAllowedNodeId) { - continue; - } - TPDiskId pDiskId(pDisk.GetNodeId(), pDisk.GetPDiskId()); - NKikimrWhiteboard::TPDiskStateInfo& pDiskInfo = GetPDisk(pDiskId); - pDiskInfo.SetPath(pDisk.GetPath()); - pDiskInfo.SetGuid(pDisk.GetGuid()); - pDiskInfo.SetCategory(static_cast(pDisk.GetType())); - if (pDiskInfo.GetTotalSize() == 0) { - pDiskInfo.SetTotalSize(pDisk.GetPDiskMetrics().GetTotalSize()); - } - if (pDiskInfo.GetAvailableSize() == 0) { - pDiskInfo.SetAvailableSize(pDisk.GetPDiskMetrics().GetAvailableSize()); - } - } - for (const NKikimrBlobStorage::TBaseConfig::TNode& node : pbConfig.GetNode()) { - if (!FilterNodeIds.empty() && FilterNodeIds.count(node.GetNodeId()) == 0) { - continue; - } - if (node.GetNodeId() < MinAllowedNodeId || node.GetNodeId() > MaxAllowedNodeId) { - continue; - } - if (node.GetLastDisconnectTimestamp() > node.GetLastConnectTimestamp()) { - DisconnectTime[node.GetNodeId()] = node.GetLastDisconnectTimestamp() / 1000; // us -> ms - } - } - } - - bool noDC = true; - bool noRack = true; - - for (TNodeId nodeId : NodeIds) { - if (!CheckNodeFilters(nodeId)) { - continue; - } - - NKikimrViewer::TNodeInfo& nodeInfo = *result.add_nodes(); - nodeInfo.set_nodeid(nodeId); - BLOG_TRACE("AddingNode " << nodeId); - auto itSystemState = SysInfo.find(nodeId); - if (itSystemState != SysInfo.end() && itSystemState->second.SystemStateInfoSize() > 0) { - *nodeInfo.MutableSystemState() = itSystemState->second.GetSystemStateInfo(0); - } else if (NodesInfo != nullptr) { - auto* icNodeInfo = NodesInfo->GetNodeInfo(nodeId); - if (icNodeInfo != nullptr) { - nodeInfo.MutableSystemState()->SetHost(icNodeInfo->Host); - } - auto itDisconnectTime = DisconnectTime.find(nodeId); - if (itDisconnectTime != DisconnectTime.end()) { - nodeInfo.MutableSystemState()->SetDisconnectTime(itDisconnectTime->second); - } - auto itNodeName = NodeName.find(nodeId); - if (itNodeName != NodeName.end()) { - nodeInfo.MutableSystemState()->SetNodeName(itNodeName->second); - } - } - if (Storage) { - auto itPDiskState = PDiskInfo.find(nodeId); - if (itPDiskState != PDiskInfo.end()) { - for (auto& protoPDiskInfo : *itPDiskState->second.MutablePDiskStateInfo()) { - NKikimrWhiteboard::TPDiskStateInfo& pDiskInfo = *nodeInfo.AddPDisks(); - pDiskInfo = std::move(protoPDiskInfo); - } - } - auto itVDiskState = VDiskInfo.find(nodeId); - if (itVDiskState != VDiskInfo.end()) { - for (auto& protoVDiskInfo : *itVDiskState->second.MutableVDiskStateInfo()) { - NKikimrWhiteboard::TVDiskStateInfo& vDiskInfo = *nodeInfo.AddVDisks(); - vDiskInfo = std::move(protoVDiskInfo); - } - } - } - if (Tablets) { - auto itTabletState = TabletInfo.find(nodeId); - if (itTabletState != TabletInfo.end()) { - for (auto& viewerTabletInfo : itTabletState->second) { - NKikimrViewer::TTabletStateInfo& tabletInfo = *nodeInfo.AddTablets(); - tabletInfo = std::move(viewerTabletInfo); - } - } - } - - if (!nodeInfo.GetSystemState().GetLocation().GetDataCenter().empty()) { - noDC = false; - } - if (nodeInfo.GetSystemState().GetSystemLocation().GetDataCenter() != 0) { - noDC = false; - } - if (!nodeInfo.GetSystemState().GetLocation().GetRack().empty()) { - noRack = false; - } - if (nodeInfo.GetSystemState().GetSystemLocation().GetRack() != 0) { - noRack = false; - } - } - - if (!SortedNodeList) { - switch (Sort) { - case ESort::NodeId: - case ESort::Host: - case ESort::DC: - // already sorted - break; - case ESort::Rack: - SortCollection(*result.MutableNodes(), [](const NKikimrViewer::TNodeInfo& node) { return node.GetSystemState().GetRack();}, ReverseSort); - break; - case ESort::Version: - SortCollection(*result.MutableNodes(), [](const NKikimrViewer::TNodeInfo& node) { return node.GetSystemState().GetVersion();}, ReverseSort); - break; - case ESort::Uptime: - SortCollection(*result.MutableNodes(), [](const NKikimrViewer::TNodeInfo& node) { return node.GetSystemState().GetStartTime();}, ReverseSort); - break; - case ESort::Memory: - SortCollection(*result.MutableNodes(), [](const NKikimrViewer::TNodeInfo& node) { return node.GetSystemState().GetMemoryUsed();}, ReverseSort); - break; - case ESort::CPU: - SortCollection(*result.MutableNodes(), [](const NKikimrViewer::TNodeInfo& node) { return GetCPU(node.GetSystemState());}, ReverseSort); - break; - case ESort::LoadAverage: - SortCollection(*result.MutableNodes(), [](const NKikimrViewer::TNodeInfo& node) { return GetLoadAverage(node.GetSystemState());}, ReverseSort); - break; - case ESort::Missing: - SortCollection(*result.MutableNodes(), [](const NKikimrViewer::TNodeInfo& node) { return GetMissing(node);}, ReverseSort); - break; - } - } - - result.SetTotalNodes(PassedNodeIds.size()); - result.SetFoundNodes(LimitApplied ? PassedNodeIds.size() : result.NodesSize()); - - BLOG_TRACE("Total/Found " << result.GetTotalNodes() << "/" << result.GetFoundNodes()); - - if (!LimitApplied) { - auto& nodes = *result.MutableNodes(); - if (Offset.has_value()) { - BLOG_TRACE("ErasingFromBegining " << Offset.value()); - if (size_t(nodes.size()) > Offset.value()) { - nodes.erase(nodes.begin(), std::next(nodes.begin(), Offset.value())); - } else { - nodes.Clear(); - } - } - if (Limit.has_value()) { - BLOG_TRACE("LimitingWith " << Limit.value()); - if (size_t(nodes.size()) > Limit.value()) { - nodes.erase(std::next(nodes.begin(), Limit.value()), nodes.end()); - } - } - } - - for (NKikimrViewer::TNodeInfo& nodeInfo : *result.MutableNodes()) { - if (Storage) { - { - auto& cont(*nodeInfo.MutablePDisks()); - std::sort(cont.begin(), cont.end(), [](const NKikimrWhiteboard::TPDiskStateInfo& a, const NKikimrWhiteboard::TPDiskStateInfo& b) -> bool { - return a.GetPath() < b.GetPath(); - }); - } - { - auto& cont(*nodeInfo.MutableVDisks()); - std::sort(cont.begin(), cont.end(), [](const NKikimrWhiteboard::TVDiskStateInfo& a, const NKikimrWhiteboard::TVDiskStateInfo& b) -> bool { - return VDiskIDFromVDiskID(a.GetVDiskId()) < VDiskIDFromVDiskID(b.GetVDiskId()); - }); - } - } - if (Tablets) { - { - auto& cont(*nodeInfo.MutableTablets()); - std::sort(cont.begin(), cont.end(), [](const NKikimrViewer::TTabletStateInfo& a, const NKikimrViewer::TTabletStateInfo& b) -> bool { - return a.GetType() < b.GetType(); - }); - } - } - } - - if (MaximumDisksPerNode.has_value()) { - result.SetMaximumDisksPerNode(MaximumDisksPerNode.value()); - } - if (noDC) { - result.SetNoDC(true); - } - if (noRack) { - result.SetNoRack(true); - } - - TStringStream json; - TProtoToJson::ProtoToJson(json, result, JsonSettings); - Send(Initiator, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), std::move(json.Str())), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void HandleTimeout() { - ReplyAndPassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as numbers - required: false - type: boolean - - name: path - in: query - description: path to schema object - required: false - type: string - - name: with - in: query - description: filter nodes by missing disks or space - required: false - type: string - - name: type - in: query - description: nodes type to get (static,dynamic,any) - required: false - type: string - - name: storage - in: query - description: return storage info - required: false - type: boolean - - name: tablets - in: query - description: return tablets info - required: false - type: boolean - - name: sort - in: query - description: sort by (NodeId,Host,DC,Rack,Version,Uptime,Memory,CPU,LoadAverage,Missing) - required: false - type: string - - name: offset - in: query - description: skip N nodes - required: false - type: integer - - name: limit - in: query - description: limit to N nodes - required: false - type: integer - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - - name: uptime - in: query - description: return only nodes with less uptime in sec. - required: false - type: integer - - name: problems_only - in: query - description: return only problem nodes - required: false - type: boolean - - name: filter - in: query - description: filter nodes by id or host - required: false - type: string - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Nodes info"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Information about nodes"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_pdisk_restart.h b/ydb/core/viewer/json_pdisk_restart.h deleted file mode 100644 index 98d499466eb2..000000000000 --- a/ydb/core/viewer/json_pdisk_restart.h +++ /dev/null @@ -1,225 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "json_pipe_req.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonPDiskRestart : public TViewerPipeClient { - enum EEv { - EvRetryNodeRequest = EventSpaceBegin(NActors::TEvents::ES_PRIVATE), - EvEnd - }; - - static_assert(EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE), "expect EvEnd < EventSpaceEnd(TEvents::ES_PRIVATE)"); - - struct TEvRetryNodeRequest : NActors::TEventLocal { - TEvRetryNodeRequest() - {} - }; - -protected: - using TThis = TJsonPDiskRestart; - using TBase = TViewerPipeClient; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - ui32 Timeout = 0; - ui32 ActualRetries = 0; - ui32 Retries = 0; - TDuration RetryPeriod = TDuration::MilliSeconds(500); - - std::unique_ptr Response; - - ui32 NodeId = 0; - ui32 PDiskId = 0; - bool Force = false; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonPDiskRestart(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - NodeId = FromStringWithDefault(params.Get("node_id"), 0); - PDiskId = FromStringWithDefault(params.Get("pdisk_id"), Max()); - Force = FromStringWithDefault(params.Get("force"), false); - - if (PDiskId == Max()) { - TBase::Send(Event->Sender, new NMon::TEvHttpInfoRes( - Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "field 'pdisk_id' is required"), - 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - if (Event->Get()->Request.GetMethod() != HTTP_METHOD_POST) { - TBase::Send(Event->Sender, new NMon::TEvHttpInfoRes( - Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "Only POST method is allowed"), - 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - if (Force && !Viewer->CheckAccessAdministration(Event->Get())) { - TBase::Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPFORBIDDEN(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - - if (!NodeId) { - NodeId = TlsActivationContext->ActorSystem()->NodeId; - } - TBase::InitConfig(params); - - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - Retries = FromStringWithDefault(params.Get("retries"), 0); - RetryPeriod = TDuration::MilliSeconds(FromStringWithDefault(params.Get("retry_period"), RetryPeriod.MilliSeconds())); - - SendRequest(); - - TBase::Become(&TThis::StateWork, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - STATEFN(StateWork) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvBlobStorage::TEvControllerConfigResponse, Handle); - cFunc(TEvRetryNodeRequest::EventType, HandleRetry); - cFunc(TEvents::TEvUndelivered::EventType, Undelivered); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void SendRequest() { - RequestBSControllerPDiskRestart(NodeId, PDiskId, Force); - } - - bool RetryRequest() { - if (Retries) { - if (++ActualRetries <= Retries) { - TBase::Schedule(RetryPeriod, new TEvRetryNodeRequest()); - return true; - } - } - return false; - } - - void Undelivered() { - if (!RetryRequest()) { - TBase::RequestDone(); - } - } - - void Handle(TEvBlobStorage::TEvControllerConfigResponse::TPtr& ev) { - Response.reset(ev->Release().Release()); - ReplyAndPassAway(); - } - - void HandleRetry() { - SendRequest(); - } - - void HandleTimeout() { - Send(Event->Sender, new NMon::TEvHttpInfoRes( - Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get(), "text/plain", "Timeout receiving response from BSC"), - 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void PassAway() override { - TBase::PassAway(); - } - - void ReplyAndPassAway() { - NJson::TJsonValue json; - if (Response != nullptr) { - if (Response->Record.GetResponse().GetSuccess()) { - json["result"] = true; - } else { - json["result"] = false; - TString error; - bool forceRetryPossible = false; - Viewer->TranslateFromBSC2Human(Response->Record.GetResponse(), error, forceRetryPossible); - json["error"] = error; - if (forceRetryPossible && Viewer->CheckAccessAdministration(Event->Get())) { - json["forceRetryPossible"] = true; - } - } - json["debugMessage"] = Response->Record.ShortDebugString(); - TBase::Send(Event->Sender, - new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json)), - 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - } else { - TBase::Send(Event->Sender, - new NMon::TEvHttpInfoRes(Viewer->GetHTTPINTERNALERROR(Event->Get(), "text/plain", "No response was received from BSC"), - 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - } - PassAway(); - } -}; - -template <> -YAML::Node TJsonRequestSwagger::GetSwagger() { - return YAML::Load(R"___( - post: - tags: - - pdisk - summary: Restart PDisk - description: Restart PDisk on the specified node - parameters: - - name: node_id - in: query - description: node identifier - type: integer - - name: pdisk_id - in: query - description: pdisk identifier - required: true - type: integer - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - - name: force - in: query - description: attempt forced operation, ignore warnings - required: false - type: boolean - responses: - 200: - description: OK - content: - application/json: - schema: - type: object - properties: - result: - type: boolean - description: was operation successful or not - error: - type: string - description: details about failed operation - forceRetryPossible: - type: boolean - description: if true, operation can be retried with force flag - 400: - description: Bad Request - 403: - description: Forbidden - 504: - description: Gateway Timeout - )___"); -} - -} -} diff --git a/ydb/core/viewer/json_pdiskinfo.h b/ydb/core/viewer/json_pdiskinfo.h deleted file mode 100644 index 5a2ff46edc2f..000000000000 --- a/ydb/core/viewer/json_pdiskinfo.h +++ /dev/null @@ -1,59 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include "json_wb_req.h" - -namespace NKikimr { -namespace NViewer { - -template <> -struct TWhiteboardInfo { - using TResponseType = NKikimrWhiteboard::TEvPDiskStateResponse; - using TResponseEventType = TEvWhiteboard::TEvPDiskStateResponse; - using TElementType = NKikimrWhiteboard::TPDiskStateInfo; - using TElementKeyType = std::pair; - - static constexpr bool StaticNodesOnly = true; - - static ::google::protobuf::RepeatedPtrField& GetElementsField(TResponseType& response) { - return *response.MutablePDiskStateInfo(); - } - - static std::pair GetElementKey(const TElementType& type) { - return std::make_pair(type.GetNodeId(), type.GetPDiskId()); - } - - static TString GetDefaultMergeField() { - return "NodeId,PDiskId"; - } - - static void MergeResponses(TResponseType& result, TMap& responses, const TString& fields = GetDefaultMergeField()) { - if (fields == GetDefaultMergeField()) { - TWhiteboardMerger::MergeResponsesElementKey(result, responses); - } else { - TWhiteboardMerger::MergeResponses(result, responses, fields); - } - } -}; - -using TJsonPDiskInfo = TJsonWhiteboardRequest; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "PDisk information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns PDisk information"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_pipe_req.cpp b/ydb/core/viewer/json_pipe_req.cpp new file mode 100644 index 000000000000..52777ea8492c --- /dev/null +++ b/ydb/core/viewer/json_pipe_req.cpp @@ -0,0 +1,847 @@ +#include "json_pipe_req.h" +#include +#include + +namespace NKikimr::NViewer { + +NTabletPipe::TClientConfig TViewerPipeClient::GetPipeClientConfig() { + NTabletPipe::TClientConfig clientConfig; + if (WithRetry) { + clientConfig.RetryPolicy = {.RetryLimitCount = 3}; + } + return clientConfig; +} + +TViewerPipeClient::~TViewerPipeClient() = default; + +TViewerPipeClient::TViewerPipeClient() = default; + +TViewerPipeClient::TViewerPipeClient(NWilson::TTraceId traceId) { + if (traceId) { + Span = {TComponentTracingLevels::THttp::TopLevel, std::move(traceId), "viewer", NWilson::EFlags::AUTO_END}; + } +} + +TViewerPipeClient::TViewerPipeClient(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev, const TString& handlerName) + : Viewer(viewer) + , Event(ev) +{ + TCgiParameters params = Event->Get()->Request.GetParams(); + if (Event->Get()->Request.GetHeader("Content-Type") == "application/json") { + NJson::TJsonValue jsonData; + if (NJson::ReadJsonTree(Event->Get()->Request.GetPostContent(), &jsonData)) { + if (jsonData.IsMap()) { + for (const auto& [key, value] : jsonData.GetMap()) { + switch (value.GetType()) { + case NJson::EJsonValueType::JSON_STRING: + case NJson::EJsonValueType::JSON_INTEGER: + case NJson::EJsonValueType::JSON_UINTEGER: + case NJson::EJsonValueType::JSON_DOUBLE: + case NJson::EJsonValueType::JSON_BOOLEAN: + params.InsertUnescaped(key, value.GetStringRobust()); + break; + default: + break; + } + } + } + } + } + InitConfig(params); + NWilson::TTraceId traceId; + TStringBuf traceparent = Event->Get()->Request.GetHeader("traceparent"); + if (traceparent) { + traceId = NWilson::TTraceId::FromTraceparentHeader(traceparent, TComponentTracingLevels::ProductionVerbose); + } + TStringBuf wantTrace = Event->Get()->Request.GetHeader("X-Want-Trace"); + TStringBuf traceVerbosity = Event->Get()->Request.GetHeader("X-Trace-Verbosity"); + TStringBuf traceTTL = Event->Get()->Request.GetHeader("X-Trace-TTL"); + if (!traceId && (FromStringWithDefault(wantTrace) || !traceVerbosity.empty() || !traceTTL.empty())) { + ui8 verbosity = TComponentTracingLevels::ProductionVerbose; + if (traceVerbosity) { + verbosity = FromStringWithDefault(traceVerbosity, verbosity); + verbosity = std::min(verbosity, NWilson::TTraceId::MAX_VERBOSITY); + } + ui32 ttl = Max(); + if (traceTTL) { + ttl = FromStringWithDefault(traceTTL, ttl); + ttl = std::min(ttl, NWilson::TTraceId::MAX_TIME_TO_LIVE); + } + traceId = NWilson::TTraceId::NewTraceId(verbosity, ttl); + } + if (traceId) { + Span = {TComponentTracingLevels::THttp::TopLevel, std::move(traceId), handlerName ? "http " + handlerName : "http viewer", NWilson::EFlags::AUTO_END}; + Span.Attribute("request_type", TString(Event->Get()->Request.GetUri().Before('?'))); + Span.Attribute("request_params", TString(Event->Get()->Request.GetUri().After('?'))); + } +} + +TActorId TViewerPipeClient::ConnectTabletPipe(NNodeWhiteboard::TTabletId tabletId) { + TPipeInfo& pipeInfo = PipeInfo[tabletId]; + if (!pipeInfo.PipeClient) { + auto pipe = NTabletPipe::CreateClient(SelfId(), tabletId, GetPipeClientConfig()); + pipeInfo.PipeClient = RegisterWithSameMailbox(pipe); + } + pipeInfo.Requests++; + return pipeInfo.PipeClient; +} + +void TViewerPipeClient::SendEvent(std::unique_ptr event) { + if (DelayedRequests.empty() && Requests < MaxRequestsInFlight) { + TActivationContext::Send(event.release()); + ++Requests; + } else { + DelayedRequests.push_back({ + .Event = std::move(event), + }); + } +} + +void TViewerPipeClient::SendRequest(TActorId recipient, IEventBase* ev, ui32 flags, ui64 cookie, NWilson::TTraceId traceId) { + SendEvent(std::make_unique(recipient, SelfId(), ev, flags, cookie, nullptr /*forwardOnNondelivery*/, std::move(traceId))); +} + +void TViewerPipeClient::SendRequestToPipe(TActorId pipe, IEventBase* ev, ui64 cookie, NWilson::TTraceId traceId) { + std::unique_ptr event = std::make_unique(pipe, SelfId(), ev, 0 /*flags*/, cookie, nullptr /*forwardOnNondelivery*/, std::move(traceId)); + event->Rewrite(TEvTabletPipe::EvSend, pipe); + SendEvent(std::move(event)); +} + +void TViewerPipeClient::SendDelayedRequests() { + while (!DelayedRequests.empty() && Requests < MaxRequestsInFlight) { + auto& request(DelayedRequests.front()); + TActivationContext::Send(request.Event.release()); + ++Requests; + DelayedRequests.pop_front(); + } +} + +TPathId TViewerPipeClient::GetPathId(const TEvTxProxySchemeCache::TEvNavigateKeySetResult& ev) { + if (ev.Request->ResultSet.size() == 1) { + if (ev.Request->ResultSet.begin()->Self) { + const auto& info = ev.Request->ResultSet.begin()->Self->Info; + return TPathId(info.GetSchemeshardId(), info.GetPathId()); + } + if (ev.Request->ResultSet.begin()->TableId) { + return ev.Request->ResultSet.begin()->TableId.PathId; + } + } + return {}; +} + +TString TViewerPipeClient::GetPath(const TEvTxProxySchemeCache::TEvNavigateKeySetResult& ev) { + if (ev.Request->ResultSet.size() == 1) { + return CanonizePath(ev.Request->ResultSet.begin()->Path); + } + return {}; +} + +TPathId TViewerPipeClient::GetPathId(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { + return GetPathId(*ev->Get()); +} + +TString TViewerPipeClient::GetPath(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { + return GetPath(*ev->Get()); +} + +bool TViewerPipeClient::IsSuccess(const std::unique_ptr& ev) { + return (ev->Request->ResultSet.size() > 0) && (std::find_if(ev->Request->ResultSet.begin(), ev->Request->ResultSet.end(), + [](const auto& entry) { + return entry.Status == NSchemeCache::TSchemeCacheNavigate::EStatus::Ok; + }) != ev->Request->ResultSet.end()); +} + +TString TViewerPipeClient::GetError(const std::unique_ptr& ev) { + if (ev->Request->ResultSet.size() == 0) { + return "empty response"; + } + for (const auto& entry : ev->Request->ResultSet) { + if (entry.Status != NSchemeCache::TSchemeCacheNavigate::EStatus::Ok) { + switch (entry.Status) { + case NSchemeCache::TSchemeCacheNavigate::EStatus::Ok: + return "Ok"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::Unknown: + return "Unknown"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::RootUnknown: + return "RootUnknown"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::PathErrorUnknown: + return "PathErrorUnknown"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::PathNotTable: + return "PathNotTable"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::PathNotPath: + return "PathNotPath"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::TableCreationNotComplete: + return "TableCreationNotComplete"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::LookupError: + return "LookupError"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::RedirectLookupError: + return "RedirectLookupError"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::AccessDenied: + return "AccessDenied"; + default: + return ::ToString(static_cast(ev->Request->ResultSet.begin()->Status)); + } + } + } + return "no error"; +} + +bool TViewerPipeClient::IsSuccess(const std::unique_ptr& ev) { + return ev->Status == TEvStateStorage::TEvBoardInfo::EStatus::Ok; +} + +TString TViewerPipeClient::GetError(const std::unique_ptr& ev) { + switch (ev->Status) { + case TEvStateStorage::TEvBoardInfo::EStatus::Unknown: + return "Unknown"; + case TEvStateStorage::TEvBoardInfo::EStatus::Ok: + return "Ok"; + case TEvStateStorage::TEvBoardInfo::EStatus::NotAvailable: + return "NotAvailable"; + default: + return ::ToString(static_cast(ev->Status)); + } +} + +void TViewerPipeClient::RequestHiveDomainStats(NNodeWhiteboard::TTabletId hiveId) { + TActorId pipeClient = ConnectTabletPipe(hiveId); + THolder request = MakeHolder(); + request->Record.SetReturnFollowers(Followers); + request->Record.SetReturnMetrics(Metrics); + SendRequestToPipe(pipeClient, request.Release(), hiveId); +} + +void TViewerPipeClient::RequestHiveNodeStats(NNodeWhiteboard::TTabletId hiveId, TPathId pathId) { + TActorId pipeClient = ConnectTabletPipe(hiveId); + THolder request = MakeHolder(); + request->Record.SetReturnMetrics(Metrics); + if (pathId != TPathId()) { + request->Record.SetReturnExtendedTabletInfo(true); + request->Record.SetFilterTabletsBySchemeShardId(pathId.OwnerId); + request->Record.SetFilterTabletsByPathId(pathId.LocalPathId); + } + SendRequestToPipe(pipeClient, request.Release(), hiveId); +} + +void TViewerPipeClient::RequestHiveStorageStats(NNodeWhiteboard::TTabletId hiveId) { + TActorId pipeClient = ConnectTabletPipe(hiveId); + THolder request = MakeHolder(); + SendRequestToPipe(pipeClient, request.Release(), hiveId); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeViewerRequest(TNodeId nodeId, TEvViewer::TEvViewerRequest* ev, ui32 flags) { + TActorId viewerServiceId = MakeViewerID(nodeId); + TRequestResponse response(Span.CreateChild(TComponentTracingLevels::THttp::Detailed, TypeName(*ev))); + if (response.Span) { + response.Span.Attribute("target_node_id", nodeId); + TStringBuilder askFor; + askFor << ev->Record.GetLocation().NodeIdSize() << " nodes ("; + for (size_t i = 0; i < std::min(ev->Record.GetLocation().NodeIdSize(), 16); ++i) { + if (i) { + askFor << ", "; + } + askFor << ev->Record.GetLocation().GetNodeId(i); + } + if (ev->Record.GetLocation().NodeIdSize() > 16) { + askFor << ", ..."; + } + askFor << ")"; + response.Span.Attribute("ask_for", askFor); + switch (ev->Record.Request_case()) { + case NKikimrViewer::TEvViewerRequest::kTabletRequest: + response.Span.Attribute("request_type", "TabletRequest"); + break; + case NKikimrViewer::TEvViewerRequest::kSystemRequest: + response.Span.Attribute("request_type", "SystemRequest"); + break; + case NKikimrViewer::TEvViewerRequest::kQueryRequest: + response.Span.Attribute("request_type", "QueryRequest"); + break; + case NKikimrViewer::TEvViewerRequest::kRenderRequest: + response.Span.Attribute("request_type", "RenderRequest"); + break; + case NKikimrViewer::TEvViewerRequest::kAutocompleteRequest: + response.Span.Attribute("request_type", "AutocompleteRequest"); + break; + default: + response.Span.Attribute("request_type", ::ToString(static_cast(ev->Record.Request_case()))); + break; + } + } + SendRequest(viewerServiceId, ev, flags, nodeId, response.Span.GetTraceId()); + return response; +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestHiveDomainStats(NNodeWhiteboard::TTabletId hiveId) { + TActorId pipeClient = ConnectTabletPipe(hiveId); + THolder request = MakeHolder(); + request->Record.SetReturnFollowers(Followers); + request->Record.SetReturnMetrics(Metrics); + auto response = MakeRequestToPipe(pipeClient, request.Release(), hiveId); + if (response.Span) { + auto hive_id = "#" + ::ToString(hiveId); + response.Span.Attribute("hive_id", hive_id); + } + return response; +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestHiveStorageStats(NNodeWhiteboard::TTabletId hiveId) { + TActorId pipeClient = ConnectTabletPipe(hiveId); + THolder request = MakeHolder(); + auto response = MakeRequestToPipe(pipeClient, request.Release(), hiveId); + if (response.Span) { + auto hive_id = "#" + ::ToString(hiveId); + response.Span.Attribute("hive_id", hive_id); + } + return response; +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestHiveNodeStats(TTabletId hiveId, TEvHive::TEvRequestHiveNodeStats* request) { + TActorId pipeClient = ConnectTabletPipe(hiveId); + auto response = MakeRequestToPipe(pipeClient, request, hiveId); + if (response.Span) { + auto hive_id = "#" + ::ToString(hiveId); + response.Span.Attribute("hive_id", hive_id); + } + return response; +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestViewer(TNodeId nodeId, TEvViewer::TEvViewerRequest* request, ui32 flags) { + auto requestType = request->Record.GetRequestCase(); + auto response = MakeRequest(MakeViewerID(nodeId), request, flags, nodeId); + if (response.Span) { + TString requestTypeString; + switch (requestType) { + case NKikimrViewer::TEvViewerRequest::kTabletRequest: + requestTypeString = "TabletRequest"; + break; + case NKikimrViewer::TEvViewerRequest::kSystemRequest: + requestTypeString = "SystemRequest"; + break; + case NKikimrViewer::TEvViewerRequest::kQueryRequest: + requestTypeString = "QueryRequest"; + break; + case NKikimrViewer::TEvViewerRequest::kRenderRequest: + requestTypeString = "RenderRequest"; + break; + case NKikimrViewer::TEvViewerRequest::kAutocompleteRequest: + requestTypeString = "AutocompleteRequest"; + break; + default: + requestTypeString = ::ToString(static_cast(requestType)); + break; + } + response.Span.Attribute("request_type", requestTypeString); + } + return response; +} + +void TViewerPipeClient::RequestConsoleListTenants() { + TActorId pipeClient = ConnectTabletPipe(GetConsoleId()); + THolder request = MakeHolder(); + SendRequestToPipe(pipeClient, request.Release()); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestConsoleListTenants() { + TActorId pipeClient = ConnectTabletPipe(GetConsoleId()); + THolder request = MakeHolder(); + return MakeRequestToPipe(pipeClient, request.Release()); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestConsoleNodeConfigByTenant(TString tenant, ui64 cookie) { + TActorId pipeClient = ConnectTabletPipe(GetConsoleId()); + auto request = MakeHolder(); + request->Record.MutableNode()->SetTenant(tenant); + request->Record.AddItemKinds(static_cast(NKikimrConsole::TConfigItem::FeatureFlagsItem)); + return MakeRequestToPipe(pipeClient, request.Release(), cookie); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestConsoleGetAllConfigs() { + TActorId pipeClient = ConnectTabletPipe(GetConsoleId()); + return MakeRequestToPipe(pipeClient, new NConsole::TEvConsole::TEvGetAllConfigsRequest()); +} + +void TViewerPipeClient::RequestConsoleGetTenantStatus(const TString& path) { + TActorId pipeClient = ConnectTabletPipe(GetConsoleId()); + THolder request = MakeHolder(); + request->Record.MutableRequest()->set_path(path); + SendRequestToPipe(pipeClient, request.Release()); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestConsoleGetTenantStatus(const TString& path) { + TActorId pipeClient = ConnectTabletPipe(GetConsoleId()); + THolder request = MakeHolder(); + request->Record.MutableRequest()->set_path(path); + auto response = MakeRequestToPipe(pipeClient, request.Release()); + if (response.Span) { + response.Span.Attribute("path", path); + } + return response; +} + +void TViewerPipeClient::RequestBSControllerConfig() { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + THolder request = MakeHolder(); + request->Record.MutableRequest()->AddCommand()->MutableQueryBaseConfig(); + SendRequestToPipe(pipeClient, request.Release()); +} + +void TViewerPipeClient::RequestBSControllerConfigWithStoragePools() { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + THolder request = MakeHolder(); + request->Record.MutableRequest()->AddCommand()->MutableQueryBaseConfig(); + request->Record.MutableRequest()->AddCommand()->MutableReadStoragePool()->SetBoxId(Max()); + SendRequestToPipe(pipeClient, request.Release()); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestBSControllerConfigWithStoragePools() { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + THolder request = MakeHolder(); + request->Record.MutableRequest()->AddCommand()->MutableQueryBaseConfig(); + request->Record.MutableRequest()->AddCommand()->MutableReadStoragePool()->SetBoxId(Max()); + return MakeRequestToPipe(pipeClient, request.Release()); +} + +void TViewerPipeClient::RequestBSControllerInfo() { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + THolder request = MakeHolder(); + SendRequestToPipe(pipeClient, request.Release()); +} + +void TViewerPipeClient::RequestBSControllerSelectGroups(THolder request) { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + SendRequestToPipe(pipeClient, request.Release()); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestBSControllerSelectGroups(THolder request, ui64 cookie) { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + return MakeRequestToPipe(pipeClient, request.Release(), cookie); +} + +void TViewerPipeClient::RequestBSControllerPDiskRestart(ui32 nodeId, ui32 pdiskId, bool force) { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + THolder request = MakeHolder(); + auto* restartPDisk = request->Record.MutableRequest()->AddCommand()->MutableRestartPDisk(); + restartPDisk->MutableTargetPDiskId()->SetNodeId(nodeId); + restartPDisk->MutableTargetPDiskId()->SetPDiskId(pdiskId); + if (force) { + request->Record.MutableRequest()->SetIgnoreDegradedGroupsChecks(true); + } + SendRequestToPipe(pipeClient, request.Release()); +} + +void TViewerPipeClient::RequestBSControllerVDiskEvict(ui32 groupId, ui32 groupGeneration, ui32 failRealmIdx, ui32 failDomainIdx, ui32 vdiskIdx, bool force) { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + THolder request = MakeHolder(); + auto* evictVDisk = request->Record.MutableRequest()->AddCommand()->MutableReassignGroupDisk(); + evictVDisk->SetGroupId(groupId); + evictVDisk->SetGroupGeneration(groupGeneration); + evictVDisk->SetFailRealmIdx(failRealmIdx); + evictVDisk->SetFailDomainIdx(failDomainIdx); + evictVDisk->SetVDiskIdx(vdiskIdx); + if (force) { + request->Record.MutableRequest()->SetIgnoreDegradedGroupsChecks(true); + } + SendRequestToPipe(pipeClient, request.Release()); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::RequestBSControllerPDiskInfo(ui32 nodeId, ui32 pdiskId) { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + auto request = std::make_unique(); + request->Record.SetInclusiveFrom(true); + request->Record.SetInclusiveTo(true); + request->Record.MutableFrom()->SetNodeId(nodeId); + request->Record.MutableFrom()->SetPDiskId(pdiskId); + request->Record.MutableTo()->SetNodeId(nodeId); + request->Record.MutableTo()->SetPDiskId(pdiskId); + return MakeRequestToPipe(pipeClient, request.release()); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::RequestBSControllerVDiskInfo(ui32 nodeId, ui32 pdiskId) { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + auto request = std::make_unique(); + request->Record.SetInclusiveFrom(true); + request->Record.SetInclusiveTo(true); + request->Record.MutableFrom()->SetNodeId(nodeId); + request->Record.MutableFrom()->SetPDiskId(pdiskId); + request->Record.MutableFrom()->SetVSlotId(0); + request->Record.MutableTo()->SetNodeId(nodeId); + request->Record.MutableTo()->SetPDiskId(pdiskId); + request->Record.MutableTo()->SetVSlotId(std::numeric_limits::max()); + return MakeRequestToPipe(pipeClient, request.release()); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::RequestBSControllerGroups() { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + auto request = std::make_unique(); + return MakeRequestToPipe(pipeClient, request.release()); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::RequestBSControllerPools() { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + auto request = std::make_unique(); + return MakeRequestToPipe(pipeClient, request.release()); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::RequestBSControllerVSlots() { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + auto request = std::make_unique(); + return MakeRequestToPipe(pipeClient, request.release()); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::RequestBSControllerPDisks() { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + auto request = std::make_unique(); + return MakeRequestToPipe(pipeClient, request.release()); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::RequestBSControllerStorageStats() { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + return MakeRequestToPipe(pipeClient, new NSysView::TEvSysView::TEvGetStorageStatsRequest()); +} + +void TViewerPipeClient::RequestBSControllerPDiskUpdateStatus(const NKikimrBlobStorage::TUpdateDriveStatus& driveStatus, bool force) { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + THolder request = MakeHolder(); + auto* updateDriveStatus = request->Record.MutableRequest()->AddCommand()->MutableUpdateDriveStatus(); + updateDriveStatus->CopyFrom(driveStatus); + if (force) { + request->Record.MutableRequest()->SetIgnoreDegradedGroupsChecks(true); + } + SendRequestToPipe(pipeClient, request.Release()); +} + +void TViewerPipeClient::RequestSchemeCacheNavigate(const TString& path) { + THolder request = MakeHolder(); + NSchemeCache::TSchemeCacheNavigate::TEntry entry; + entry.Path = SplitPath(path); + entry.RedirectRequired = false; + entry.Operation = NSchemeCache::TSchemeCacheNavigate::EOp::OpPath; + request->ResultSet.emplace_back(entry); + SendRequest(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request.Release())); +} + +void TViewerPipeClient::RequestSchemeCacheNavigate(const TPathId& pathId) { + THolder request = MakeHolder(); + NSchemeCache::TSchemeCacheNavigate::TEntry entry; + entry.TableId.PathId = pathId; + entry.RequestType = NSchemeCache::TSchemeCacheNavigate::TEntry::ERequestType::ByTableId; + entry.RedirectRequired = false; + entry.Operation = NSchemeCache::TSchemeCacheNavigate::EOp::OpPath; + request->ResultSet.emplace_back(entry); + SendRequest(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request.Release())); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestSchemeCacheNavigate(const TString& path, ui64 cookie) { + THolder request = MakeHolder(); + NSchemeCache::TSchemeCacheNavigate::TEntry entry; + entry.Path = SplitPath(path); + entry.RedirectRequired = false; + entry.Operation = NSchemeCache::TSchemeCacheNavigate::EOp::OpPath; + request->ResultSet.emplace_back(entry); + auto response = MakeRequest(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request.Release()), 0 /*flags*/, cookie); + if (response.Span) { + response.Span.Attribute("path", path); + } + return response; +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestSchemeCacheNavigate(TPathId pathId, ui64 cookie) { + THolder request = MakeHolder(); + NSchemeCache::TSchemeCacheNavigate::TEntry entry; + entry.TableId.PathId = pathId; + entry.RequestType = NSchemeCache::TSchemeCacheNavigate::TEntry::ERequestType::ByTableId; + entry.RedirectRequired = false; + entry.Operation = NSchemeCache::TSchemeCacheNavigate::EOp::OpPath; + request->ResultSet.emplace_back(entry); + auto response = MakeRequest(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request.Release()), 0 /*flags*/, cookie); + if (response.Span) { + response.Span.Attribute("path_id", pathId.ToString()); + } + return response; +} + +void TViewerPipeClient::RequestTxProxyDescribe(const TString& path) { + THolder request(new TEvTxUserProxy::TEvNavigate()); + request->Record.MutableDescribePath()->SetPath(path); + SendRequest(MakeTxProxyID(), request.Release()); +} + +void TViewerPipeClient::RequestStateStorageEndpointsLookup(const TString& path) { + RegisterWithSameMailbox(CreateBoardLookupActor(MakeEndpointsBoardPath(path), + SelfId(), + EBoardLookupMode::Second)); + ++Requests; +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestStateStorageEndpointsLookup(const TString& path, ui64 cookie) { + TRequestResponse response(Span.CreateChild(TComponentTracingLevels::THttp::Detailed, "BoardLookupActor")); + RegisterWithSameMailbox(CreateBoardLookupActor(MakeEndpointsBoardPath(path), + SelfId(), + EBoardLookupMode::Second, {}, cookie)); + if (response.Span) { + response.Span.Attribute("path", path); + } + ++Requests; + return response; +} + +void TViewerPipeClient::RequestStateStorageMetadataCacheEndpointsLookup(const TString& path) { + if (!AppData()->DomainsInfo->Domain) { + return; + } + RegisterWithSameMailbox(CreateBoardLookupActor(MakeDatabaseMetadataCacheBoardPath(path), + SelfId(), + EBoardLookupMode::Second)); + ++Requests; +} + +std::vector TViewerPipeClient::GetNodesFromBoardReply(const TEvStateStorage::TEvBoardInfo& ev) { + std::vector databaseNodes; + if (ev.Status == TEvStateStorage::TEvBoardInfo::EStatus::Ok) { + for (const auto& [actorId, infoEntry] : ev.InfoEntries) { + databaseNodes.emplace_back(actorId.NodeId()); + } + } + std::sort(databaseNodes.begin(), databaseNodes.end()); + databaseNodes.erase(std::unique(databaseNodes.begin(), databaseNodes.end()), databaseNodes.end()); + return databaseNodes; +} + +std::vector TViewerPipeClient::GetNodesFromBoardReply(TEvStateStorage::TEvBoardInfo::TPtr& ev) { + return GetNodesFromBoardReply(*ev->Get()); +} + +void TViewerPipeClient::InitConfig(const TCgiParameters& params) { + Followers = FromStringWithDefault(params.Get("followers"), Followers); + Metrics = FromStringWithDefault(params.Get("metrics"), Metrics); + WithRetry = FromStringWithDefault(params.Get("with_retry"), WithRetry); + MaxRequestsInFlight = FromStringWithDefault(params.Get("max_requests_in_flight"), MaxRequestsInFlight); + Database = params.Get("database"); + if (!Database) { + Database = params.Get("tenant"); + } + Direct = FromStringWithDefault(params.Get("direct"), Direct); + JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); + JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); + if (FromStringWithDefault(params.Get("enums"), true)) { + Proto2JsonConfig.EnumMode = TProto2JsonConfig::EnumValueMode::EnumName; + } + if (!FromStringWithDefault(params.Get("ui64"), false)) { + Proto2JsonConfig.StringifyNumbers = TProto2JsonConfig::EStringifyNumbersMode::StringifyInt64Always; + } + Proto2JsonConfig.MapAsObject = true; + Proto2JsonConfig.ConvertAny = true; + Proto2JsonConfig.WriteNanAsString = true; + Timeout = TDuration::MilliSeconds(FromStringWithDefault(params.Get("timeout"), Timeout.MilliSeconds())); +} + +void TViewerPipeClient::InitConfig(const TRequestSettings& settings) { + Followers = settings.Followers; + Metrics = settings.Metrics; + WithRetry = settings.WithRetry; +} + +void TViewerPipeClient::ClosePipes() { + for (const auto& [tabletId, pipeInfo] : PipeInfo) { + if (pipeInfo.PipeClient) { + NTabletPipe::CloseClient(SelfId(), pipeInfo.PipeClient); + } + } + PipeInfo.clear(); +} + +ui32 TViewerPipeClient::FailPipeConnect(NNodeWhiteboard::TTabletId tabletId) { + auto itPipeInfo = PipeInfo.find(tabletId); + if (itPipeInfo != PipeInfo.end()) { + ui32 requests = itPipeInfo->second.Requests; + NTabletPipe::CloseClient(SelfId(), itPipeInfo->second.PipeClient); + PipeInfo.erase(itPipeInfo); + return requests; + } + return 0; +} + +TRequestState TViewerPipeClient::GetRequest() const { + return {Event->Get(), Span.GetTraceId()}; +} + +void TViewerPipeClient::ReplyAndPassAway(TString data, const TString& error) { + TString message = error; + Send(Event->Sender, new NMon::TEvHttpInfoRes(data, 0, NMon::IEvHttpInfoRes::EContentType::Custom)); + if (message.empty()) { + TStringBuf dataParser(data); + if (dataParser.NextTok(' ') == "HTTP/1.1") { + TStringBuf code = dataParser.NextTok(' '); + if (code.size() == 3 && code[0] != '2') { + message = dataParser.NextTok('\n'); + } + } + } + if (Span) { + if (message) { + Span.EndError(message); + } else { + Span.EndOk(); + } + } + PassAway(); +} + +TString TViewerPipeClient::GetHTTPOK(TString contentType, TString response, TInstant lastModified) { + return Viewer->GetHTTPOK(GetRequest(), std::move(contentType), std::move(response), lastModified); +} + +TString TViewerPipeClient::GetHTTPOKJSON(TString response, TInstant lastModified) { + return Viewer->GetHTTPOKJSON(GetRequest(), std::move(response), lastModified); +} + +TString TViewerPipeClient::GetHTTPOKJSON(const NJson::TJsonValue& response, TInstant lastModified) { + return GetHTTPOKJSON(NJson::WriteJson(response, false), lastModified); +} + +TString TViewerPipeClient::GetHTTPOKJSON(const google::protobuf::Message& response, TInstant lastModified) { + TStringStream json; + NProtobufJson::Proto2Json(response, json, Proto2JsonConfig); + return GetHTTPOKJSON(json.Str(), lastModified); +} + +TString TViewerPipeClient::GetHTTPGATEWAYTIMEOUT(TString contentType, TString response) { + return Viewer->GetHTTPGATEWAYTIMEOUT(GetRequest(), std::move(contentType), std::move(response)); +} + +TString TViewerPipeClient::GetHTTPBADREQUEST(TString contentType, TString response) { + return Viewer->GetHTTPBADREQUEST(GetRequest(), std::move(contentType), std::move(response)); +} + +TString TViewerPipeClient::GetHTTPINTERNALERROR(TString contentType, TString response) { + return Viewer->GetHTTPINTERNALERROR(GetRequest(), std::move(contentType), std::move(response)); +} + +TString TViewerPipeClient::GetHTTPFORBIDDEN(TString contentType, TString response) { + return Viewer->GetHTTPFORBIDDEN(GetRequest(), std::move(contentType), std::move(response)); +} + +TString TViewerPipeClient::MakeForward(const std::vector& nodes) { + return Viewer->MakeForward(GetRequest(), nodes); +} + +void TViewerPipeClient::RequestDone(ui32 requests) { + if (requests == 0) { + return; + } + Requests -= requests; + if (!DelayedRequests.empty()) { + SendDelayedRequests(); + } + if (Requests == 0) { + ReplyAndPassAway(); + } +} + +void TViewerPipeClient::Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev) { + if (ev->Get()->Status != NKikimrProto::OK) { + ui32 requests = FailPipeConnect(ev->Get()->TabletId); + RequestDone(requests); + } +} + +void TViewerPipeClient::HandleResolveResource(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { + if (ResourceNavigateResponse) { + ResourceNavigateResponse->Set(std::move(ev)); + if (ResourceNavigateResponse->IsOk()) { + TSchemeCacheNavigate::TEntry& entry(ResourceNavigateResponse->Get()->Request->ResultSet.front()); + SharedDatabase = CanonizePath(entry.Path); + if (SharedDatabase == AppData()->TenantName) { + Direct = true; + return Bootstrap(); // retry bootstrap without redirect this time + } + DatabaseBoardInfoResponse = MakeRequestStateStorageEndpointsLookup(SharedDatabase); + } else { + ReplyAndPassAway(GetHTTPBADREQUEST("text/plain", "Failed to resolve database - shared database not found")); + } + } +} + +void TViewerPipeClient::HandleResolveDatabase(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { + if (DatabaseNavigateResponse) { + DatabaseNavigateResponse->Set(std::move(ev)); + if (DatabaseNavigateResponse->IsOk()) { + TSchemeCacheNavigate::TEntry& entry(DatabaseNavigateResponse->Get()->Request->ResultSet.front()); + if (entry.DomainInfo && entry.DomainInfo->ResourcesDomainKey && entry.DomainInfo->DomainKey != entry.DomainInfo->ResourcesDomainKey) { + ResourceNavigateResponse = MakeRequestSchemeCacheNavigate(TPathId(entry.DomainInfo->ResourcesDomainKey)); + Become(&TViewerPipeClient::StateResolveResource); + return; + } + DatabaseBoardInfoResponse = MakeRequestStateStorageEndpointsLookup(CanonizePath(entry.Path)); + } else { + ReplyAndPassAway(GetHTTPBADREQUEST("text/plain", "Failed to resolve database - not found")); + } + } +} + +void TViewerPipeClient::HandleResolve(TEvStateStorage::TEvBoardInfo::TPtr& ev) { + if (DatabaseBoardInfoResponse) { + DatabaseBoardInfoResponse->Set(std::move(ev)); + if (DatabaseBoardInfoResponse->IsOk()) { + ReplyAndPassAway(MakeForward(GetNodesFromBoardReply(DatabaseBoardInfoResponse->GetRef()))); + } else { + ReplyAndPassAway(GetHTTPBADREQUEST("text/plain", "Failed to resolve database - no nodes found")); + } + } +} + +void TViewerPipeClient::HandleTimeout() { + ReplyAndPassAway(GetHTTPGATEWAYTIMEOUT()); +} + +STATEFN(TViewerPipeClient::StateResolveDatabase) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvStateStorage::TEvBoardInfo, HandleResolve); + hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, HandleResolveDatabase); + cFunc(TEvents::TEvWakeup::EventType, HandleTimeout); + } +} + +STATEFN(TViewerPipeClient::StateResolveResource) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvStateStorage::TEvBoardInfo, HandleResolve); + hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, HandleResolveResource); + cFunc(TEvents::TEvWakeup::EventType, HandleTimeout); + } +} + +void TViewerPipeClient::RedirectToDatabase(const TString& database) { + DatabaseNavigateResponse = MakeRequestSchemeCacheNavigate(database); + Become(&TViewerPipeClient::StateResolveDatabase); +} + +bool TViewerPipeClient::NeedToRedirect() { + if (Event) { + Direct |= !Event->Get()->Request.GetHeader("X-Forwarded-From-Node").empty(); // we're already forwarding + Direct |= (Database == AppData()->TenantName) || Database.empty(); // we're already on the right node or don't use database filter + if (Database && !Direct) { + RedirectToDatabase(Database); // to find some dynamic node and redirect query there + return true; + } + } + return false; +} + +void TViewerPipeClient::PassAway() { + std::sort(SubscriptionNodeIds.begin(), SubscriptionNodeIds.end()); + SubscriptionNodeIds.erase(std::unique(SubscriptionNodeIds.begin(), SubscriptionNodeIds.end()), SubscriptionNodeIds.end()); + for (TNodeId nodeId : SubscriptionNodeIds) { + Send(TActivationContext::InterconnectProxy(nodeId), new TEvents::TEvUnsubscribe()); + } + ClosePipes(); + TBase::PassAway(); +} + +void TViewerPipeClient::AddEvent(const TString& name) { + if (Span) { + Span.Event(name); + } +} + +} diff --git a/ydb/core/viewer/json_pipe_req.h b/ydb/core/viewer/json_pipe_req.h index b37026c1826d..d3d9a7a4e6c9 100644 --- a/ydb/core/viewer/json_pipe_req.h +++ b/ydb/core/viewer/json_pipe_req.h @@ -1,54 +1,69 @@ #pragma once - -#include -#include -#include -#include -#include -#include +#include "viewer.h" #include #include +#include #include +#include #include #include +#include #include #include #include -#include -#include "viewer.h" +#include +#include +#include +#include -namespace NKikimr { -namespace NViewer { +namespace NKikimr::NViewer { using namespace NKikimr; using namespace NSchemeCache; +using namespace NProtobufJson; using NNodeWhiteboard::TNodeId; +using NNodeWhiteboard::TTabletId; + +class TViewerPipeClient : public TActorBootstrapped { + using TBase = TActorBootstrapped; + +public: + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { + return NKikimrServices::TActivity::VIEWER_HANDLER; + } + + virtual void Bootstrap() = 0; + virtual void ReplyAndPassAway() = 0; -template -class TViewerPipeClient : public TActorBootstrapped { protected: - using TBase = TActorBootstrapped; bool Followers = true; bool Metrics = true; - bool WithRetry = true; + bool WithRetry = false; + TString Database; + TString SharedDatabase; + bool Direct = false; ui32 Requests = 0; - static constexpr ui32 MaxRequestsInFlight = 50; + ui32 MaxRequestsInFlight = 200; NWilson::TSpan Span; IViewer* Viewer = nullptr; NMon::TEvHttpInfo::TPtr Event; + TJsonSettings JsonSettings; + TProto2JsonConfig Proto2JsonConfig; + TDuration Timeout = TDuration::Seconds(10); struct TPipeInfo { TActorId PipeClient; ui32 Requests = 0; }; - std::unordered_map PipeInfo; + std::unordered_map PipeInfo; struct TDelayedRequest { std::unique_ptr Event; }; std::deque DelayedRequests; + std::vector SubscriptionNodeIds; template struct TRequestResponse { @@ -66,23 +81,30 @@ class TViewerPipeClient : public TActorBootstrapped { TRequestResponse& operator =(TRequestResponse&&) = default; void Set(std::unique_ptr&& response) { + constexpr bool hasErrorCheck = requires(const std::unique_ptr& r) {TViewerPipeClient::IsSuccess(r);}; + if constexpr (hasErrorCheck) { + if (!TViewerPipeClient::IsSuccess(response)) { + Error(TViewerPipeClient::GetError(response)); + return; + } + } if (!IsDone()) { Span.EndOk(); + Response = std::move(response); } - Response = std::move(response); } void Set(TAutoPtr>&& response) { Set(std::unique_ptr(response->Release().Release())); } - void Error(const TString& error) { + bool Error(const TString& error) { if (!IsDone()) { Span.EndError(error); - } - if (!IsOk()) { Response = error; + return true; } + return false; } bool IsOk() const { @@ -105,386 +127,192 @@ class TViewerPipeClient : public TActorBootstrapped { return std::get>(Response).get(); } - T* operator ->() { + const T* Get() const { return std::get>(Response).get(); } - TString GetError() const { - return std::get(Response); + T& GetRef() { + return *Get(); } - }; - NTabletPipe::TClientConfig GetPipeClientConfig() { - NTabletPipe::TClientConfig clientConfig; - if (WithRetry) { - clientConfig.RetryPolicy = {.RetryLimitCount = 3}; + const T& GetRef() const { + return *Get(); } - return clientConfig; - } - TViewerPipeClient() = default; - - TViewerPipeClient(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) - { - InitConfig(Event->Get()->Request.GetParams()); - NWilson::TTraceId traceId; - TStringBuf traceparent = Event->Get()->Request.GetHeader("traceparent"); - if (traceparent) { - traceId = NWilson::TTraceId::FromTraceparentHeader(traceparent, TComponentTracingLevels::ProductionVerbose); + T* operator ->() { + return Get(); } - TStringBuf wantTrace = Event->Get()->Request.GetHeader("X-Want-Trace"); - if (!traceId && FromStringWithDefault(wantTrace)) { - traceId = NWilson::TTraceId::NewTraceId(TComponentTracingLevels::ProductionVerbose, Max()); + + const T* operator ->() const { + return Get(); } - if (traceId) { - Span = {TComponentTracingLevels::THttp::TopLevel, std::move(traceId), "http", NWilson::EFlags::AUTO_END}; - Span.Attribute("request_type", TString(Event->Get()->Request.GetUri().Before('?'))); + + T& operator *() { + return GetRef(); } - } - TActorId ConnectTabletPipe(NNodeWhiteboard::TTabletId tabletId) { - TPipeInfo& pipeInfo = PipeInfo[tabletId]; - if (!pipeInfo.PipeClient) { - auto pipe = NTabletPipe::CreateClient(TBase::SelfId(), tabletId, GetPipeClientConfig()); - pipeInfo.PipeClient = TBase::RegisterWithSameMailbox(pipe); + const T& operator *() const { + return GetRef(); } - pipeInfo.Requests++; - return pipeInfo.PipeClient; - } - void SendEvent(std::unique_ptr event) { - if (DelayedRequests.empty() && Requests < MaxRequestsInFlight) { - TActivationContext::Send(event.release()); - ++Requests; - } else { - DelayedRequests.push_back({ - .Event = std::move(event), - }); + TString GetError() const { + return std::get(Response); } - } - void SendRequest(const TActorId& recipient, IEventBase* ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) { - SendEvent(std::make_unique(recipient, TBase::SelfId(), ev, flags, cookie, nullptr/*forwardOnNondelivery*/, std::move(traceId))); - } + void Event(const TString& name) { + if (Span) { + Span.Event(name); + } + } + }; - void SendRequestToPipe(const TActorId& pipe, IEventBase* ev, ui64 cookie = 0, NWilson::TTraceId traceId = {}) { - std::unique_ptr event = std::make_unique(pipe, TBase::SelfId(), ev, 0/*flags*/, cookie, nullptr/*forwardOnNondelivery*/, std::move(traceId)); - event->Rewrite(TEvTabletPipe::EvSend, pipe); - SendEvent(std::move(event)); - } + std::optional> DatabaseNavigateResponse; + std::optional> ResourceNavigateResponse; + std::optional> DatabaseBoardInfoResponse; + + NTabletPipe::TClientConfig GetPipeClientConfig(); + + ~TViewerPipeClient(); + TViewerPipeClient(); + TViewerPipeClient(NWilson::TTraceId traceId); + TViewerPipeClient(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev, const TString& handlerName = {}); + TActorId ConnectTabletPipe(TTabletId tabletId); + void SendEvent(std::unique_ptr event); + void SendRequest(TActorId recipient, IEventBase* ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}); + void SendRequestToPipe(TActorId pipe, IEventBase* ev, ui64 cookie = 0, NWilson::TTraceId traceId = {}); template - TRequestResponse MakeRequest(const TActorId& recipient, IEventBase* ev, ui32 flags = 0, ui64 cookie = 0) { + TRequestResponse MakeRequest(TActorId recipient, IEventBase* ev, ui32 flags = 0, ui64 cookie = 0) { TRequestResponse response(Span.CreateChild(TComponentTracingLevels::THttp::Detailed, TypeName(*ev))); SendRequest(recipient, ev, flags, cookie, response.Span.GetTraceId()); + if (flags & IEventHandle::FlagSubscribeOnSession) { + SubscriptionNodeIds.push_back(recipient.NodeId()); + } return response; } template - TRequestResponse MakeRequestToPipe(const TActorId& pipe, IEventBase* ev, ui64 cookie = 0) { + TRequestResponse MakeRequestToPipe(TActorId pipe, IEventBase* ev, ui64 cookie = 0) { TRequestResponse response(Span.CreateChild(TComponentTracingLevels::THttp::Detailed, TypeName(*ev))); SendRequestToPipe(pipe, ev, cookie, response.Span.GetTraceId()); return response; } - void SendDelayedRequests() { - while (!DelayedRequests.empty() && Requests < MaxRequestsInFlight) { - auto& request(DelayedRequests.front()); - TActivationContext::Send(request.Event.release()); - ++Requests; - DelayedRequests.pop_front(); - } - } - - void RequestHiveDomainStats(NNodeWhiteboard::TTabletId hiveId) { - TActorId pipeClient = ConnectTabletPipe(hiveId); - THolder request = MakeHolder(); - request->Record.SetReturnFollowers(Followers); - request->Record.SetReturnMetrics(Metrics); - SendRequestToPipe(pipeClient, request.Release(), hiveId); - } - - void RequestHiveNodeStats(NNodeWhiteboard::TTabletId hiveId, TPathId pathId) { - TActorId pipeClient = ConnectTabletPipe(hiveId); - THolder request = MakeHolder(); - request->Record.SetReturnMetrics(Metrics); - if (pathId != TPathId()) { - request->Record.SetReturnExtendedTabletInfo(true); - request->Record.SetFilterTabletsBySchemeShardId(pathId.OwnerId); - request->Record.SetFilterTabletsByPathId(pathId.LocalPathId); + template + TRequestResponse::Type> MakeWhiteboardRequest(TNodeId nodeId, TRequest* ev, ui32 flags = IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession) { + TActorId whiteboardServiceId = NNodeWhiteboard::MakeNodeWhiteboardServiceId(nodeId); + TRequestResponse::Type> response(Span.CreateChild(TComponentTracingLevels::THttp::Detailed, TypeName(*ev))); + if (response.Span) { + response.Span.Attribute("target_node_id", nodeId); } - SendRequestToPipe(pipeClient, request.Release(), hiveId); + SendRequest(whiteboardServiceId, ev, flags, nodeId, response.Span.GetTraceId()); + return response; } - void RequestHiveStorageStats(NNodeWhiteboard::TTabletId hiveId) { - TActorId pipeClient = ConnectTabletPipe(hiveId); - THolder request = MakeHolder(); - SendRequestToPipe(pipeClient, request.Release(), hiveId); - } + TRequestResponse MakeViewerRequest(TNodeId nodeId, TEvViewer::TEvViewerRequest* ev, ui32 flags = IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession); + void SendDelayedRequests(); + void RequestHiveDomainStats(TTabletId hiveId); + void RequestHiveNodeStats(TTabletId hiveId, TPathId pathId); + void RequestHiveStorageStats(TTabletId hiveId); - NNodeWhiteboard::TTabletId GetConsoleId() { + TTabletId GetConsoleId() { return MakeConsoleID(); } - void RequestConsoleListTenants() { - TActorId pipeClient = ConnectTabletPipe(GetConsoleId()); - THolder request = MakeHolder(); - SendRequestToPipe(pipeClient, request.Release()); - } - - void RequestConsoleGetTenantStatus(const TString& path) { - TActorId pipeClient = ConnectTabletPipe(GetConsoleId()); - THolder request = MakeHolder(); - request->Record.MutableRequest()->set_path(path); - SendRequestToPipe(pipeClient, request.Release()); - } - - NNodeWhiteboard::TTabletId GetBSControllerId() { + TTabletId GetBSControllerId() { return MakeBSControllerID(); } - void RequestBSControllerConfig() { - TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); - THolder request = MakeHolder(); - request->Record.MutableRequest()->AddCommand()->MutableQueryBaseConfig(); - SendRequestToPipe(pipeClient, request.Release()); - } - - void RequestBSControllerConfigWithStoragePools() { - TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); - THolder request = MakeHolder(); - request->Record.MutableRequest()->AddCommand()->MutableQueryBaseConfig(); - request->Record.MutableRequest()->AddCommand()->MutableReadStoragePool()->SetBoxId(Max()); - SendRequestToPipe(pipeClient, request.Release()); - } - - void RequestBSControllerInfo() { - TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); - THolder request = MakeHolder(); - SendRequestToPipe(pipeClient, request.Release()); - } - - void RequestBSControllerSelectGroups(THolder request) { - TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); - SendRequestToPipe(pipeClient, request.Release()); - } - - void RequestBSControllerPDiskRestart(ui32 nodeId, ui32 pdiskId, bool force = false) { - TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); - THolder request = MakeHolder(); - auto* restartPDisk = request->Record.MutableRequest()->AddCommand()->MutableRestartPDisk(); - restartPDisk->MutableTargetPDiskId()->SetNodeId(nodeId); - restartPDisk->MutableTargetPDiskId()->SetPDiskId(pdiskId); - if (force) { - request->Record.MutableRequest()->SetIgnoreDegradedGroupsChecks(true); - } - SendRequestToPipe(pipeClient, request.Release()); - } - - void RequestBSControllerVDiskEvict(ui32 groupId, ui32 groupGeneration, ui32 failRealmIdx, ui32 failDomainIdx, ui32 vdiskIdx, bool force = false) { - TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); - THolder request = MakeHolder(); - auto* evictVDisk = request->Record.MutableRequest()->AddCommand()->MutableReassignGroupDisk(); - evictVDisk->SetGroupId(groupId); - evictVDisk->SetGroupGeneration(groupGeneration); - evictVDisk->SetFailRealmIdx(failRealmIdx); - evictVDisk->SetFailDomainIdx(failDomainIdx); - evictVDisk->SetVDiskIdx(vdiskIdx); - if (force) { - request->Record.MutableRequest()->SetIgnoreDegradedGroupsChecks(true); - } - SendRequestToPipe(pipeClient, request.Release()); - } - - TRequestResponse RequestBSControllerPDiskInfo(ui32 nodeId, ui32 pdiskId) { - TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); - auto request = std::make_unique(); - request->Record.SetInclusiveFrom(true); - request->Record.SetInclusiveTo(true); - request->Record.MutableFrom()->SetNodeId(nodeId); - request->Record.MutableFrom()->SetPDiskId(pdiskId); - request->Record.MutableTo()->SetNodeId(nodeId); - request->Record.MutableTo()->SetPDiskId(pdiskId); - return MakeRequestToPipe(pipeClient, request.release(), 0/*cookie*/); - } - - TRequestResponse RequestBSControllerVDiskInfo(ui32 nodeId, ui32 pdiskId) { - TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); - auto request = std::make_unique(); - request->Record.SetInclusiveFrom(true); - request->Record.SetInclusiveTo(true); - request->Record.MutableFrom()->SetNodeId(nodeId); - request->Record.MutableFrom()->SetPDiskId(pdiskId); - request->Record.MutableFrom()->SetVSlotId(0); - request->Record.MutableTo()->SetNodeId(nodeId); - request->Record.MutableTo()->SetPDiskId(pdiskId); - request->Record.MutableTo()->SetVSlotId(std::numeric_limits::max()); - return MakeRequestToPipe(pipeClient, request.release(), 0/*cookie*/); - } - - void RequestBSControllerPDiskUpdateStatus(const NKikimrBlobStorage::TUpdateDriveStatus& driveStatus, bool force = false) { - TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); - THolder request = MakeHolder(); - auto* updateDriveStatus = request->Record.MutableRequest()->AddCommand()->MutableUpdateDriveStatus(); - updateDriveStatus->CopyFrom(driveStatus); - if (force) { - request->Record.MutableRequest()->SetIgnoreDegradedGroupsChecks(true); - } - SendRequestToPipe(pipeClient, request.Release()); - } - - void RequestSchemeCacheNavigate(const TString& path) { - THolder request = MakeHolder(); - NSchemeCache::TSchemeCacheNavigate::TEntry entry; - entry.Path = SplitPath(path); - entry.RedirectRequired = false; - entry.Operation = NSchemeCache::TSchemeCacheNavigate::EOp::OpPath; - request->ResultSet.emplace_back(entry); - SendRequest(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request.Release())); - } - - void RequestSchemeCacheNavigate(const TPathId& pathId) { - THolder request = MakeHolder(); - NSchemeCache::TSchemeCacheNavigate::TEntry entry; - entry.TableId.PathId = pathId; - entry.RequestType = NSchemeCache::TSchemeCacheNavigate::TEntry::ERequestType::ByTableId; - entry.RedirectRequired = false; - entry.Operation = NSchemeCache::TSchemeCacheNavigate::EOp::OpPath; - request->ResultSet.emplace_back(entry); - SendRequest(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request.Release())); - } - - void RequestTxProxyDescribe(const TString& path) { - THolder request(new TEvTxUserProxy::TEvNavigate()); - request->Record.MutableDescribePath()->SetPath(path); - SendRequest(MakeTxProxyID(), request.Release()); - } - - void RequestStateStorageEndpointsLookup(const TString& path) { - TBase::RegisterWithSameMailbox(CreateBoardLookupActor(MakeEndpointsBoardPath(path), - TBase::SelfId(), - EBoardLookupMode::Second)); - ++Requests; - } - - void RequestStateStorageMetadataCacheEndpointsLookup(const TString& path) { - if (!AppData()->DomainsInfo->Domain) { - return; - } - TBase::RegisterWithSameMailbox(CreateBoardLookupActor(MakeDatabaseMetadataCacheBoardPath(path), - TBase::SelfId(), - EBoardLookupMode::Second)); - ++Requests; - } - - std::vector GetNodesFromBoardReply(TEvStateStorage::TEvBoardInfo::TPtr& ev) { - std::vector databaseNodes; - if (ev->Get()->Status == TEvStateStorage::TEvBoardInfo::EStatus::Ok) { - for (const auto& [actorId, infoEntry] : ev->Get()->InfoEntries) { - databaseNodes.emplace_back(actorId.NodeId()); - } - } - std::sort(databaseNodes.begin(), databaseNodes.end()); - databaseNodes.erase(std::unique(databaseNodes.begin(), databaseNodes.end()), databaseNodes.end()); - return databaseNodes; - } - - void InitConfig(const TCgiParameters& params) { - Followers = FromStringWithDefault(params.Get("followers"), Followers); - Metrics = FromStringWithDefault(params.Get("metrics"), Metrics); - WithRetry = FromStringWithDefault(params.Get("with_retry"), WithRetry); - } - - void InitConfig(const TRequestSettings& settings) { - Followers = settings.Followers; - Metrics = settings.Metrics; - WithRetry = settings.WithRetry; - } - - void ClosePipes() { - for (const auto& [tabletId, pipeInfo] : PipeInfo) { - if (pipeInfo.PipeClient) { - NTabletPipe::CloseClient(TBase::SelfId(), pipeInfo.PipeClient); - } - } - PipeInfo.clear(); - } - - ui32 FailPipeConnect(NNodeWhiteboard::TTabletId tabletId) { - auto itPipeInfo = PipeInfo.find(tabletId); - if (itPipeInfo != PipeInfo.end()) { - ui32 requests = itPipeInfo->second.Requests; - NTabletPipe::CloseClient(TBase::SelfId(), itPipeInfo->second.PipeClient); - PipeInfo.erase(itPipeInfo); - return requests; - } - return 0; - } - - void RequestDone(ui32 requests = 1) { - Requests -= requests; - if (!DelayedRequests.empty()) { - SendDelayedRequests(); - } - if (Requests == 0) { - static_cast(this)->ReplyAndPassAway(); - } - } - - void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev) { - if (ev->Get()->Status != NKikimrProto::OK) { - ui32 requests = FailPipeConnect(ev->Get()->TabletId); - RequestDone(requests); - } - } - - void PassAway() override { - ClosePipes(); - TBase::PassAway(); - } - - TRequestState GetRequest() const { - return {Event->Get(), Span.GetTraceId()}; - } - - void ReplyAndPassAway(TString data, const TString& error = {}) { - TBase::Send(Event->Sender, new NMon::TEvHttpInfoRes(data, 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - if (Span) { - if (error) { - Span.EndError(error); - } else { - Span.EndOk(); - } - } - PassAway(); - } - - TString GetHTTPOK(TString contentType = {}, TString response = {}, TInstant lastModified = {}) { - return Viewer->GetHTTPOK(GetRequest(), contentType, response, lastModified); - } - - TString GetHTTPOKJSON(TString response = {}, TInstant lastModified = {}) { - return Viewer->GetHTTPOKJSON(GetRequest(), response, lastModified); - } - - TString GetHTTPGATEWAYTIMEOUT(TString contentType = {}, TString response = {}) { - return Viewer->GetHTTPGATEWAYTIMEOUT(GetRequest(), contentType, response); - } - - TString GetHTTPBADREQUEST(TString contentType = {}, TString response = {}) { - return Viewer->GetHTTPBADREQUEST(GetRequest(), contentType, response); - } - - TString GetHTTPINTERNALERROR(TString contentType = {}, TString response = {}) { - return Viewer->GetHTTPINTERNALERROR(GetRequest(), contentType, response); - } - - TString MakeForward(const std::vector& nodes) { - return Viewer->MakeForward(GetRequest(), nodes); - } + static TPathId GetPathId(const TEvTxProxySchemeCache::TEvNavigateKeySetResult& ev); + static TString GetPath(const TEvTxProxySchemeCache::TEvNavigateKeySetResult& ev); + + static TPathId GetPathId(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev); + static TString GetPath(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev); + + static bool IsSuccess(const std::unique_ptr& ev); + static TString GetError(const std::unique_ptr& ev); + + static bool IsSuccess(const std::unique_ptr& ev); + static TString GetError(const std::unique_ptr& ev); + + TRequestResponse MakeRequestHiveDomainStats(TTabletId hiveId); + TRequestResponse MakeRequestHiveStorageStats(TTabletId hiveId); + TRequestResponse MakeRequestHiveNodeStats(TTabletId hiveId, TEvHive::TEvRequestHiveNodeStats* request); + void RequestConsoleListTenants(); + TRequestResponse MakeRequestConsoleListTenants(); + TRequestResponse MakeRequestConsoleNodeConfigByTenant(TString tenant, ui64 cookie = 0); + TRequestResponse MakeRequestConsoleGetAllConfigs(); + void RequestConsoleGetTenantStatus(const TString& path); + TRequestResponse MakeRequestConsoleGetTenantStatus(const TString& path); + void RequestBSControllerConfig(); + void RequestBSControllerConfigWithStoragePools(); + TRequestResponse MakeRequestBSControllerConfigWithStoragePools(); + void RequestBSControllerInfo(); + void RequestBSControllerSelectGroups(THolder request); + TRequestResponse MakeRequestBSControllerSelectGroups(THolder request, ui64 cookie = 0); + void RequestBSControllerPDiskRestart(ui32 nodeId, ui32 pdiskId, bool force = false); + void RequestBSControllerVDiskEvict(ui32 groupId, ui32 groupGeneration, ui32 failRealmIdx, ui32 failDomainIdx, ui32 vdiskIdx, bool force = false); + TRequestResponse RequestBSControllerPDiskInfo(ui32 nodeId, ui32 pdiskId); + TRequestResponse RequestBSControllerVDiskInfo(ui32 nodeId, ui32 pdiskId); + TRequestResponse RequestBSControllerGroups(); + TRequestResponse RequestBSControllerPools(); + TRequestResponse RequestBSControllerVSlots(); + TRequestResponse RequestBSControllerPDisks(); + TRequestResponse RequestBSControllerStorageStats(); + void RequestBSControllerPDiskUpdateStatus(const NKikimrBlobStorage::TUpdateDriveStatus& driveStatus, bool force = false); + void RequestSchemeCacheNavigate(const TString& path); + void RequestSchemeCacheNavigate(const TPathId& pathId); + TRequestResponse MakeRequestSchemeCacheNavigate(const TString& path, ui64 cookie = 0); + TRequestResponse MakeRequestSchemeCacheNavigate(TPathId pathId, ui64 cookie = 0); + TRequestResponse MakeRequestViewer(TNodeId nodeId, TEvViewer::TEvViewerRequest* request, ui32 flags = 0); + void RequestTxProxyDescribe(const TString& path); + void RequestStateStorageEndpointsLookup(const TString& path); + void RequestStateStorageMetadataCacheEndpointsLookup(const TString& path); + TRequestResponse MakeRequestStateStorageEndpointsLookup(const TString& path, ui64 cookie = 0); + std::vector GetNodesFromBoardReply(TEvStateStorage::TEvBoardInfo::TPtr& ev); + std::vector GetNodesFromBoardReply(const TEvStateStorage::TEvBoardInfo& ev); + void InitConfig(const TCgiParameters& params); + void InitConfig(const TRequestSettings& settings); + void ClosePipes(); + ui32 FailPipeConnect(TTabletId tabletId); + + bool IsLastRequest() const { + return Requests == 1; + } + + bool WaitingForResponse() const { + return Requests != 0; + } + + bool NoMoreRequests(ui32 requestsDone = 0) const { + return Requests == requestsDone; + } + + TRequestState GetRequest() const; + void ReplyAndPassAway(TString data, const TString& error = {}); + + TString GetHTTPOK(TString contentType = {}, TString response = {}, TInstant lastModified = {}); + TString GetHTTPOKJSON(TString response = {}, TInstant lastModified = {}); + TString GetHTTPOKJSON(const NJson::TJsonValue& response, TInstant lastModified = {}); + TString GetHTTPOKJSON(const google::protobuf::Message& response, TInstant lastModified = {}); + TString GetHTTPGATEWAYTIMEOUT(TString contentType = {}, TString response = {}); + TString GetHTTPBADREQUEST(TString contentType = {}, TString response = {}); + TString GetHTTPINTERNALERROR(TString contentType = {}, TString response = {}); + TString GetHTTPFORBIDDEN(TString contentType = {}, TString response = {}); + TString MakeForward(const std::vector& nodes); + + void RequestDone(ui32 requests = 1); + void AddEvent(const TString& name); + void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev); + void HandleResolveDatabase(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev); + void HandleResolveResource(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev); + void HandleResolve(TEvStateStorage::TEvBoardInfo::TPtr& ev); + STATEFN(StateResolveDatabase); + STATEFN(StateResolveResource); + void RedirectToDatabase(const TString& database); + bool NeedToRedirect(); + void HandleTimeout(); + void PassAway() override; }; } -} diff --git a/ydb/core/viewer/json_pqconsumerinfo.h b/ydb/core/viewer/json_pqconsumerinfo.h deleted file mode 100644 index 0071ec99e8bd..000000000000 --- a/ydb/core/viewer/json_pqconsumerinfo.h +++ /dev/null @@ -1,190 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - - -class TJsonPQConsumerInfo : public TActorBootstrapped { - using TBase = TActorBootstrapped; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - NKikimrClient::TResponse Result; - TJsonSettings JsonSettings; - TString Topic; - TString Client; - TString DC; - ui32 Version = 0; - ui32 Timeout = 0; - ui32 Requests = 0; - ui32 Responses = 0; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonPQConsumerInfo( - IViewer* viewer, - NMon::TEvHttpInfo::TPtr& ev - ) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap(const TActorContext& ctx) { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), false); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - Topic = params.Get("topic"); - Version = FromStringWithDefault(params.Get("version"), 0); - DC = params.Get("dc"); - //TODO: make here list of topics - Client = params.Get("client"); - if (Version >= 3) { - Topic = "rt3." + DC + "--" + NPersQueue::ConvertNewTopicName(Topic); - Client = NPersQueue::ConvertNewConsumerName(Client, ctx); - } else { - size_t pos = Topic.rfind('/'); - if (pos != TString::npos) { - Topic = Topic.substr(pos + 1); - } - } - { - NKikimrClient::TPersQueueRequest request; - request.MutableMetaRequest()->MutableCmdGetPartitionStatus()->SetClientId(Client); - request.MutableMetaRequest()->MutableCmdGetPartitionStatus()->AddTopicRequest()->SetTopic(Topic); - ctx.Register(NMsgBusProxy::CreateActorServerPersQueue( - ctx.SelfID, - request, - NMsgBusProxy::CreatePersQueueMetaCacheV2Id() - )); - ++Requests; - } - { - NKikimrClient::TPersQueueRequest request; - request.MutableMetaRequest()->MutableCmdGetReadSessionsInfo()->SetClientId(Client); - request.MutableMetaRequest()->MutableCmdGetReadSessionsInfo()->AddTopic(Topic); - ctx.Register(NMsgBusProxy::CreateActorServerPersQueue( - ctx.SelfID, - request, - NMsgBusProxy::CreatePersQueueMetaCacheV2Id() - )); - ++Requests; - } - Become(&TThis::StateRequestedTopicInfo, ctx, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void Die(const TActorContext& ctx) override { - TBase::Die(ctx); - } - - STFUNC(StateRequestedTopicInfo) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvPersQueue::TEvResponse, Handle); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvPersQueue::TEvResponse::TPtr &ev, const TActorContext &ctx) { - Result.MergeFrom(ev->Get()->Record); - if (++Responses == Requests) { - ReplyAndDie(ctx); - } - } - - void ReplyAndDie(const TActorContext &ctx) { - TStringStream json; - TProtoToJson::ProtoToJson(json, Result.GetMetaResponse(), JsonSettings); - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } - - void HandleTimeout(const TActorContext &ctx) { - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: topic - in: query - description: topic name - required: true - type: string - - name: dc - in: query - description: dc name (required with version >= 3) - required: false - type: string - default: "" - - name: version - in: query - description: query version - required: false - type: integer - default: 0 - - name: client - in: query - description: client name - required: true - type: string - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - default: false - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - default: false - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - default: 10000 - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Consumer-topic metrics"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns consumer-topic metrics"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_query.h b/ydb/core/viewer/json_query.h deleted file mode 100644 index 0378f12d516e..000000000000 --- a/ydb/core/viewer/json_query.h +++ /dev/null @@ -1,744 +0,0 @@ -#pragma once -#include "viewer.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "json_pipe_req.h" -#include "viewer_request.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using namespace NMonitoring; -using ::google::protobuf::FieldDescriptor; -using namespace NNodeWhiteboard; - -class TJsonQuery : public TViewerPipeClient { - using TThis = TJsonQuery; - using TBase = TViewerPipeClient; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - TVector ResultSets; - TString Query; - TString Database; - TString Action; - TString Stats; - TString Syntax; - TString QueryId; - TString TransactionMode; - bool Direct = false; - bool IsBase64Encode = true; - - enum ESchemaType { - Classic, - Modern, - Multi, - Ydb, - }; - ESchemaType Schema = ESchemaType::Classic; - TRequestResponse CreateSessionResponse; - TRequestResponse QueryResponse; - TString SessionId; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - ESchemaType StringToSchemaType(const TString& schemaStr) { - if (schemaStr == "classic") { - return ESchemaType::Classic; - } else if (schemaStr == "modern") { - return ESchemaType::Modern; - } else if (schemaStr == "multi") { - return ESchemaType::Multi; - } else if (schemaStr == "ydb") { - return ESchemaType::Ydb; - } else { - return ESchemaType::Classic; - } - } - - void ParseCgiParameters(const TCgiParameters& params) { - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), false); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 60000); - Query = params.Get("query"); - Database = params.Get("database"); - Stats = params.Get("stats"); - Action = params.Get("action"); - TString schemaStr = params.Get("schema"); - Schema = StringToSchemaType(schemaStr); - Syntax = params.Get("syntax"); - QueryId = params.Get("query_id"); - TransactionMode = params.Get("transaction_mode"); - Direct = FromStringWithDefault(params.Get("direct"), Direct); - IsBase64Encode = FromStringWithDefault(params.Get("base64"), true); - } - - bool ParsePostContent(const TStringBuf& content) { - static NJson::TJsonReaderConfig JsonConfig; - NJson::TJsonValue requestData; - bool success = NJson::ReadJsonTree(content, &JsonConfig, &requestData); - if (success) { - Query = Query.empty() ? requestData["query"].GetStringSafe({}) : Query; - Database = Database.empty() ? requestData["database"].GetStringSafe({}) : Database; - Stats = Stats.empty() ? requestData["stats"].GetStringSafe({}) : Stats; - Action = Action.empty() ? requestData["action"].GetStringSafe({}) : Action; - Syntax = Syntax.empty() ? requestData["syntax"].GetStringSafe({}) : Syntax; - QueryId = QueryId.empty() ? requestData["query_id"].GetStringSafe({}) : QueryId; - TransactionMode = TransactionMode.empty() ? requestData["transaction_mode"].GetStringSafe({}) : TransactionMode; - } - return success; - } - - bool IsPostContent() const { - return NViewer::IsPostContent(Event); - } - - TJsonQuery(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : TBase(viewer, ev) - { - } - - void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - InitConfig(params); - ParseCgiParameters(params); - if (IsPostContent()) { - TStringBuf content = Event->Get()->Request.GetPostContent(); - if (!ParsePostContent(content)) { - return TBase::ReplyAndPassAway(GetHTTPBADREQUEST("text/plain", "Bad content received"), "BadRequest"); - } - } - if (Query.empty() && Action != "cancel-query") { - return TBase::ReplyAndPassAway(GetHTTPBADREQUEST("text/plain", "Query is empty"), "EmptyQuery"); - } - - Direct |= Event->Get()->Request.GetUri().StartsWith("/node/"); // we're already forwarding - Direct |= (Database == AppData()->TenantName); // we're already on the right node - - if (Database && !Direct) { - BLOG_TRACE("Requesting StateStorageEndpointsLookup for " << Database); - RequestStateStorageEndpointsLookup(Database); // to find some dynamic node and redirect query there - } else { - SendKpqProxyRequest(); - } - Become(&TThis::StateWork, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void HandleReply(TEvStateStorage::TEvBoardInfo::TPtr& ev) { - BLOG_TRACE("Received TEvBoardInfo"); - TBase::ReplyAndPassAway(MakeForward(GetNodesFromBoardReply(ev))); - } - - void PassAway() override { - if (QueryId) { - Viewer->EndRunningQuery(QueryId, SelfId()); - } - if (SessionId) { - auto event = std::make_unique(); - event->Record.MutableRequest()->SetSessionId(SessionId); - BLOG_TRACE("Closing session " << SessionId); - Send(NKqp::MakeKqpProxyID(SelfId().NodeId()), event.release()); - } - TBase::PassAway(); - BLOG_TRACE("PassAway()"); - } - - STATEFN(StateWork) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvStateStorage::TEvBoardInfo, HandleReply); - hFunc(NKqp::TEvKqp::TEvCreateSessionResponse, HandleReply); - hFunc(NKqp::TEvKqp::TEvQueryResponse, HandleReply); - hFunc(NKqp::TEvKqp::TEvAbortExecution, HandleReply); - hFunc(NKqp::TEvKqp::TEvPingSessionResponse, HandleReply); - hFunc(NKqp::TEvKqpExecuter::TEvStreamData, HandleReply); - hFunc(NKqp::TEvKqpExecuter::TEvStreamProfile, HandleReply); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void SendKpqProxyRequest() { - if (QueryId) { - TActorId actorId = Viewer->FindRunningQuery(QueryId); - if (actorId) { - auto event = std::make_unique(); - Ydb::Issue::IssueMessage* issue = event->Record.AddIssues(); - issue->set_message("Query was cancelled"); - issue->set_severity(NYql::TSeverityIds::S_ERROR); - Send(actorId, event.release()); - - if (Action == "cancel-query") { - return TBase::ReplyAndPassAway(GetHTTPOK("text/plain", "Query was cancelled")); - } - } else { - if (Action == "cancel-query") { - return TBase::ReplyAndPassAway(GetHTTPBADREQUEST("text/plain", "Query not found"), "BadRequest"); - } - } - Viewer->AddRunningQuery(QueryId, SelfId()); - } - - auto event = std::make_unique(); - if (Database) { - event->Record.MutableRequest()->SetDatabase(Database); - if (Span) { - Span.Attribute("database", Database); - } - } - BLOG_TRACE("Creating session"); - CreateSessionResponse = MakeRequest(NKqp::MakeKqpProxyID(SelfId().NodeId()), event.release()); - } - - void SetTransactionMode(NKikimrKqp::TQueryRequest& request) { - if (TransactionMode == "serializable-read-write") { - request.mutable_txcontrol()->mutable_begin_tx()->mutable_serializable_read_write(); - request.mutable_txcontrol()->set_commit_tx(true); - } else if (TransactionMode == "online-read-only") { - request.mutable_txcontrol()->mutable_begin_tx()->mutable_online_read_only(); - request.mutable_txcontrol()->set_commit_tx(true); - } else if (TransactionMode == "stale-read-only") { - request.mutable_txcontrol()->mutable_begin_tx()->mutable_stale_read_only(); - request.mutable_txcontrol()->set_commit_tx(true); - } else if (TransactionMode == "snapshot-read-only") { - request.mutable_txcontrol()->mutable_begin_tx()->mutable_snapshot_read_only(); - request.mutable_txcontrol()->set_commit_tx(true); - } - } - - void HandleReply(NKqp::TEvKqp::TEvCreateSessionResponse::TPtr& ev) { - if (ev->Get()->Record.GetYdbStatus() == Ydb::StatusIds::SUCCESS) { - CreateSessionResponse.Set(std::move(ev)); - } else { - CreateSessionResponse.Error("FailedToCreateSession"); - return TBase::ReplyAndPassAway( - GetHTTPINTERNALERROR("text/plain", - TStringBuilder() << "Failed to create session, error " << ev->Get()->Record.GetYdbStatus()), "InternalError"); - } - SessionId = CreateSessionResponse->Record.GetResponse().GetSessionId(); - BLOG_TRACE("Session created " << SessionId); - - { - auto event = std::make_unique(); - event->Record.MutableRequest()->SetSessionId(SessionId); - ActorIdToProto(SelfId(), event->Record.MutableRequest()->MutableExtSessionCtrlActorId()); - Send(NKqp::MakeKqpProxyID(SelfId().NodeId()), event.release()); - } - - auto event = MakeHolder(); - NKikimrKqp::TQueryRequest& request = *event->Record.MutableRequest(); - request.SetQuery(Query); - request.SetSessionId(SessionId); - if (Database) { - request.SetDatabase(Database); - } - if (Event->Get()->UserToken) { - event->Record.SetUserToken(Event->Get()->UserToken); - } - if (Action.empty() || Action == "execute-script" || Action == "execute") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXECUTE); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_SCRIPT); - request.SetKeepSession(false); - } else if (Action == "execute-query") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXECUTE); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_GENERIC_QUERY); - request.SetKeepSession(false); - SetTransactionMode(request); - } else if (Action == "explain-query") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXPLAIN); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_GENERIC_QUERY); - request.SetKeepSession(false); - } else if (Action == "execute-scan") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXECUTE); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_SCAN); - request.SetKeepSession(false); - } else if (Action == "execute-data") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXECUTE); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_DML); - request.SetKeepSession(false); - SetTransactionMode(request); - } else if (Action == "explain" || Action == "explain-ast" || Action == "explain-data") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXPLAIN); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_DML); - } else if (Action == "explain-scan") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXPLAIN); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_SCAN); - } else if (Action == "explain-script") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXPLAIN); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_SCRIPT); - } - if (Stats == "profile") { - request.SetStatsMode(NYql::NDqProto::DQ_STATS_MODE_PROFILE); - request.SetCollectStats(Ydb::Table::QueryStatsCollection::STATS_COLLECTION_PROFILE); - } else if (Stats == "full") { - request.SetStatsMode(NYql::NDqProto::DQ_STATS_MODE_FULL); - request.SetCollectStats(Ydb::Table::QueryStatsCollection::STATS_COLLECTION_FULL); - } - if (Syntax == "yql_v1") { - request.SetSyntax(Ydb::Query::Syntax::SYNTAX_YQL_V1); - } else if (Syntax == "pg") { - request.SetSyntax(Ydb::Query::Syntax::SYNTAX_PG); - } - ActorIdToProto(SelfId(), event->Record.MutableRequestActorId()); - QueryResponse = MakeRequest(NKqp::MakeKqpProxyID(SelfId().NodeId()), event.Release()); - BLOG_TRACE("Query sent"); - } - -private: - NJson::TJsonValue ColumnPrimitiveValueToJsonValue(NYdb::TValueParser& valueParser) { - switch (const auto primitive = valueParser.GetPrimitiveType()) { - case NYdb::EPrimitiveType::Bool: - return valueParser.GetBool(); - case NYdb::EPrimitiveType::Int8: - return valueParser.GetInt8(); - case NYdb::EPrimitiveType::Uint8: - return valueParser.GetUint8(); - case NYdb::EPrimitiveType::Int16: - return valueParser.GetInt16(); - case NYdb::EPrimitiveType::Uint16: - return valueParser.GetUint16(); - case NYdb::EPrimitiveType::Int32: - return valueParser.GetInt32(); - case NYdb::EPrimitiveType::Uint32: - return valueParser.GetUint32(); - case NYdb::EPrimitiveType::Int64: - return TStringBuilder() << valueParser.GetInt64(); - case NYdb::EPrimitiveType::Uint64: - return TStringBuilder() << valueParser.GetUint64(); - case NYdb::EPrimitiveType::Float: - return valueParser.GetFloat(); - case NYdb::EPrimitiveType::Double: - return valueParser.GetDouble(); - case NYdb::EPrimitiveType::Utf8: - return valueParser.GetUtf8(); - case NYdb::EPrimitiveType::Date: - return valueParser.GetDate().ToString(); - case NYdb::EPrimitiveType::Datetime: - return valueParser.GetDatetime().ToString(); - case NYdb::EPrimitiveType::Timestamp: - return valueParser.GetTimestamp().ToString(); - case NYdb::EPrimitiveType::Interval: - return TStringBuilder() << valueParser.GetInterval(); - case NYdb::EPrimitiveType::Date32: - return valueParser.GetInt32(); - case NYdb::EPrimitiveType::Datetime64: - return valueParser.GetDatetime64(); - case NYdb::EPrimitiveType::Timestamp64: - return valueParser.GetTimestamp64(); - case NYdb::EPrimitiveType::Interval64: - return valueParser.GetInterval64(); - case NYdb::EPrimitiveType::TzDate: - return valueParser.GetTzDate(); - case NYdb::EPrimitiveType::TzDatetime: - return valueParser.GetTzDatetime(); - case NYdb::EPrimitiveType::TzTimestamp: - return valueParser.GetTzTimestamp(); - case NYdb::EPrimitiveType::String: - return IsBase64Encode ? Base64Encode(valueParser.GetString()) : valueParser.GetString(); - case NYdb::EPrimitiveType::Yson: - return valueParser.GetYson(); - case NYdb::EPrimitiveType::Json: - return valueParser.GetJson(); - case NYdb::EPrimitiveType::JsonDocument: - return valueParser.GetJsonDocument(); - case NYdb::EPrimitiveType::DyNumber: - return valueParser.GetDyNumber(); - case NYdb::EPrimitiveType::Uuid: - return valueParser.GetUuid().ToString(); - default: - Y_ENSURE(false, TStringBuilder() << "Unsupported type: " << primitive); } - } - - NJson::TJsonValue ColumnValueToJsonValue(NYdb::TValueParser& valueParser) { - switch (valueParser.GetKind()) { - case NYdb::TTypeParser::ETypeKind::Primitive: - return ColumnPrimitiveValueToJsonValue(valueParser); - - case NYdb::TTypeParser::ETypeKind::Optional: - valueParser.OpenOptional(); - if (valueParser.IsNull()) { - return NJson::JSON_NULL; - } - switch(valueParser.GetKind()) { - case NYdb::TTypeParser::ETypeKind::Primitive: - return ColumnPrimitiveValueToJsonValue(valueParser); - case NYdb::TTypeParser::ETypeKind::Decimal: - return valueParser.GetDecimal().ToString(); - default: - return NJson::JSON_UNDEFINED; - } - - case NYdb::TTypeParser::ETypeKind::Tagged: - valueParser.OpenTagged(); - return ColumnValueToJsonValue(valueParser); - - case NYdb::TTypeParser::ETypeKind::Pg: - return valueParser.GetPg().Content_; - - default: - return NJson::JSON_UNDEFINED; - } - } - - void HandleReply(NKqp::TEvKqp::TEvQueryResponse::TPtr& ev) { - BLOG_TRACE("Query response received"); - NJson::TJsonValue jsonResponse; - if (ev->Get()->Record.GetRef().GetYdbStatus() == Ydb::StatusIds::SUCCESS) { - QueryResponse.Set(std::move(ev)); - MakeOkReply(jsonResponse, QueryResponse->Record.GetRef()); - if (Schema == ESchemaType::Classic && Stats.empty() && (Action.empty() || Action == "execute")) { - jsonResponse = std::move(jsonResponse["result"]); - } - } else { - QueryResponse.Error("QueryError"); - NYql::TIssues issues; - NYql::IssuesFromMessage(ev->Get()->Record.GetRef().GetResponse().GetQueryIssues(), issues); - MakeErrorReply(jsonResponse, NYdb::TStatus(NYdb::EStatus(ev->Get()->Record.GetRef().GetYdbStatus()), std::move(issues))); - } - - TStringStream stream; - NJson::WriteJson(&stream, &jsonResponse, { - .ValidateUtf8 = false, - .WriteNanAsString = true, - }); - - TBase::ReplyAndPassAway(GetHTTPOKJSON(stream.Str())); - } - - void HandleReply(NKqp::TEvKqp::TEvAbortExecution::TPtr& ev) { - QueryResponse.Error("Aborted"); - auto& record(ev->Get()->Record); - NJson::TJsonValue jsonResponse; - if (record.IssuesSize() > 0) { - NYql::TIssues issues; - NYql::IssuesFromMessage(record.GetIssues(), issues); - MakeErrorReply(jsonResponse, NYdb::TStatus(NYdb::EStatus(record.GetStatusCode()), std::move(issues))); - } - - TStringStream stream; - NJson::WriteJson(&stream, &jsonResponse, { - .ValidateUtf8 = false, - .WriteNanAsString = true, - }); - - TBase::ReplyAndPassAway(GetHTTPOKJSON(stream.Str())); - } - - void HandleReply(NKqp::TEvKqpExecuter::TEvStreamProfile::TPtr& ev) { - Y_UNUSED(ev); - } - - void HandleReply(NKqp::TEvKqp::TEvPingSessionResponse::TPtr& ev) { - Y_UNUSED(ev); - } - - void HandleReply(NKqp::TEvKqpExecuter::TEvStreamData::TPtr& ev) { - const NKikimrKqp::TEvExecuterStreamData& data(ev->Get()->Record); - - ResultSets.emplace_back(); - ResultSets.back() = std::move(data.GetResultSet()); - - THolder ack = MakeHolder(); - ack->Record.SetSeqNo(ev->Get()->Record.GetSeqNo()); - Send(ev->Sender, ack.Release()); - } - - void HandleTimeout() { - TStringBuilder error; - error << "Timeout executing query"; - if (SessionId) { - auto event = std::make_unique(); - event->Record.MutableRequest()->SetSessionId(SessionId); - BLOG_TRACE("Cancelling query in session " << SessionId); - Send(NKqp::MakeKqpProxyID(SelfId().NodeId()), event.release()); - error << ", query was cancelled"; - } - NJson::TJsonValue json; - json["error"]["severity"] = NYql::TSeverityIds::S_ERROR; - json["error"]["message"] = error; - NJson::TJsonValue& issue = json["issues"].AppendValue({}); - issue["severity"] = NYql::TSeverityIds::S_ERROR; - issue["message"] = error; - TBase::ReplyAndPassAway(GetHTTPOKJSON(NJson::WriteJson(json, false))); - } - -private: - void MakeErrorReply(NJson::TJsonValue& jsonResponse, const NYdb::TStatus& status) { - TString message; - - NViewer::MakeErrorReply(jsonResponse, message, status); - - if (Span) { - Span.EndError("Error"); - } - } - - void MakeOkReply(NJson::TJsonValue& jsonResponse, NKikimrKqp::TEvQueryResponse& record) { - const auto& response = record.GetResponse(); - - if (response.ResultsSize() > 0 || response.YdbResultsSize() > 0) { - try { - for (const auto& result : response.GetResults()) { - Ydb::ResultSet resultSet; - NKqp::ConvertKqpQueryResultToDbResult(result, &resultSet); - ResultSets.emplace_back(std::move(resultSet)); - } - - for (const auto& result : response.GetYdbResults()) { - ResultSets.emplace_back(result); - } - } - catch (const std::exception& ex) { - NYql::TIssues issues; - issues.AddIssue(TStringBuilder() << "Convert error: " << ex.what()); - MakeErrorReply(jsonResponse, NYdb::TStatus(NYdb::EStatus::BAD_REQUEST, std::move(issues))); - return; - } - } - - if (ResultSets.size() > 0) { - if (Schema == ESchemaType::Classic) { - NJson::TJsonValue& jsonResults = jsonResponse["result"]; - jsonResults.SetType(NJson::JSON_ARRAY); - for (auto it = ResultSets.begin(); it != ResultSets.end(); ++it) { - NYdb::TResultSet resultSet(*it); - const auto& columnsMeta = resultSet.GetColumnsMeta(); - NYdb::TResultSetParser rsParser(resultSet); - while (rsParser.TryNextRow()) { - NJson::TJsonValue& jsonRow = jsonResults.AppendValue({}); - for (size_t columnNum = 0; columnNum < columnsMeta.size(); ++columnNum) { - const NYdb::TColumn& columnMeta = columnsMeta[columnNum]; - jsonRow[columnMeta.Name] = ColumnValueToJsonValue(rsParser.ColumnParser(columnNum)); - } - } - } - } - - if (Schema == ESchemaType::Modern) { - { - NJson::TJsonValue& jsonColumns = jsonResponse["columns"]; - NYdb::TResultSet resultSet(ResultSets.front()); - const auto& columnsMeta = resultSet.GetColumnsMeta(); - jsonColumns.SetType(NJson::JSON_ARRAY); - for (size_t columnNum = 0; columnNum < columnsMeta.size(); ++columnNum) { - NJson::TJsonValue& jsonColumn = jsonColumns.AppendValue({}); - const NYdb::TColumn& columnMeta = columnsMeta[columnNum]; - jsonColumn["name"] = columnMeta.Name; - jsonColumn["type"] = columnMeta.Type.ToString(); - } - } - - NJson::TJsonValue& jsonResults = jsonResponse["result"]; - jsonResults.SetType(NJson::JSON_ARRAY); - for (auto it = ResultSets.begin(); it != ResultSets.end(); ++it) { - NYdb::TResultSet resultSet(*it); - const auto& columnsMeta = resultSet.GetColumnsMeta(); - NYdb::TResultSetParser rsParser(resultSet); - while (rsParser.TryNextRow()) { - NJson::TJsonValue& jsonRow = jsonResults.AppendValue({}); - jsonRow.SetType(NJson::JSON_ARRAY); - for (size_t columnNum = 0; columnNum < columnsMeta.size(); ++columnNum) { - NJson::TJsonValue& jsonColumn = jsonRow.AppendValue({}); - jsonColumn = ColumnValueToJsonValue(rsParser.ColumnParser(columnNum)); - } - } - } - } - - if (Schema == ESchemaType::Multi) { - NJson::TJsonValue& jsonResults = jsonResponse["result"]; - jsonResults.SetType(NJson::JSON_ARRAY); - for (auto it = ResultSets.begin(); it != ResultSets.end(); ++it) { - NYdb::TResultSet resultSet(*it); - const auto& columnsMeta = resultSet.GetColumnsMeta(); - NJson::TJsonValue& jsonResult = jsonResults.AppendValue({}); - - NJson::TJsonValue& jsonColumns = jsonResult["columns"]; - jsonColumns.SetType(NJson::JSON_ARRAY); - for (size_t columnNum = 0; columnNum < columnsMeta.size(); ++columnNum) { - NJson::TJsonValue& jsonColumn = jsonColumns.AppendValue({}); - const NYdb::TColumn& columnMeta = columnsMeta[columnNum]; - jsonColumn["name"] = columnMeta.Name; - jsonColumn["type"] = columnMeta.Type.ToString(); - } - - NJson::TJsonValue& jsonRows = jsonResult["rows"]; - NYdb::TResultSetParser rsParser(resultSet); - while (rsParser.TryNextRow()) { - NJson::TJsonValue& jsonRow = jsonRows.AppendValue({}); - jsonRow.SetType(NJson::JSON_ARRAY); - for (size_t columnNum = 0; columnNum < columnsMeta.size(); ++columnNum) { - NJson::TJsonValue& jsonColumn = jsonRow.AppendValue({}); - jsonColumn = ColumnValueToJsonValue(rsParser.ColumnParser(columnNum)); - } - } - } - } - - if (Schema == ESchemaType::Ydb) { - NJson::TJsonValue& jsonResults = jsonResponse["result"]; - jsonResults.SetType(NJson::JSON_ARRAY); - for (auto it = ResultSets.begin(); it != ResultSets.end(); ++it) { - NYdb::TResultSet resultSet(*it); - const auto& columnsMeta = resultSet.GetColumnsMeta(); - NYdb::TResultSetParser rsParser(resultSet); - while (rsParser.TryNextRow()) { - NJson::TJsonValue& jsonRow = jsonResults.AppendValue({}); - TString row = NYdb::FormatResultRowJson(rsParser, columnsMeta, IsBase64Encode ? NYdb::EBinaryStringEncoding::Base64 : NYdb::EBinaryStringEncoding::Unicode); - NJson::ReadJsonTree(row, &jsonRow); - } - } - } - } - if (response.HasQueryAst()) { - jsonResponse["ast"] = response.GetQueryAst(); - } - if (response.HasQueryPlan()) { - NJson::ReadJsonTree(response.GetQueryPlan(), &(jsonResponse["plan"])); - } - if (response.HasQueryStats()) { - NProtobufJson::Proto2Json(response.GetQueryStats(), jsonResponse["stats"]); - } - } -}; - -template <> -YAML::Node TJsonRequestSwagger::GetSwagger() { - YAML::Node node = YAML::Load(R"___( - post: - tags: - - viewer - summary: Executes SQL query - description: Executes SQL query - parameters: - - name: action - in: query - type: string - enum: [execute-scan, execute-script, execute-query, execute-data, explain-ast, explain-scan, explain-script, explain-query, explain-data, cancel-query] - required: true - description: > - execute method: - * `execute-query` - execute query (QueryService) - * `execute-data` - execute data query (DataQuery) - * `execute-scan` - execute scan query (ScanQuery) - * `execute-script` - execute script query (ScriptingService) - * `explain-query` - explain query (QueryService) - * `explain-data` - explain data query (DataQuery) - * `explain-scan` - explain scan query (ScanQuery) - * `explain-script` - explain script query (ScriptingService) - * `cancel-query` - cancel query (using query_id) - - name: database - in: query - description: database name - type: string - required: false - - name: query - in: query - description: SQL query text - type: string - required: false - - name: query_id - in: query - description: unique query identifier (uuid) - use the same id to cancel query - required: false - - name: syntax - in: query - description: > - query syntax: - * `yql_v1` - YQL v1 (default) - * `pg` - PostgreSQL compatible - type: string - enum: [yql_v1, pg] - required: false - - name: schema - in: query - description: > - result format schema: - * `classic` - * `modern` - * `multi` - * `ydb` - type: string - enum: [classic, modern, ydb, multi] - required: false - - name: stats - in: query - description: > - return stats: - * `profile` - * `full` - type: string - enum: [profile, full] - required: false - - name: transaction_mode - in: query - description: > - transaction mode: - * `serializable-read-write` - * `online-read-only` - * `stale-read-only` - * `snapshot-read-only` - type: string - enum: [serializable-read-write, online-read-only, stale-read-only, snapshot-read-only] - required: false - - name: direct - in: query - description: force processing query on current node - type: boolean - required: false - - name: base64 - in: query - description: return strings using base64 encoding - type: string - required: false - - name: timeout - in: query - description: timeout in ms - type: integer - required: false - - name: ui64 - in: query - description: return ui64 as number to avoid 56-bit js rounding - type: boolean - required: false - requestBody: - description: Executes SQL query - required: false - content: - application/json: - schema: - type: object - description: the same properties as in query parameters - responses: - 200: - description: OK - content: - application/json: - schema: - type: object - description: format depends on schema parameter - 400: - description: Bad Request - 403: - description: Forbidden - 504: - description: Gateway Timeout - )___"); - return node; -} - - -} -} diff --git a/ydb/core/viewer/json_query_old.h b/ydb/core/viewer/json_query_old.h deleted file mode 100644 index 8a532385ce65..000000000000 --- a/ydb/core/viewer/json_query_old.h +++ /dev/null @@ -1,636 +0,0 @@ -#pragma once -#include "viewer.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "json_pipe_req.h" -#include "viewer_request.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using namespace NMonitoring; -using ::google::protobuf::FieldDescriptor; - -// we only keep this class for compatibility with viewer requests -// DO NOT EDIT THIS FILE - it should be deleted after 2025-01-01 -class TJsonQueryOld : public TViewerPipeClient { - using TThis = TJsonQueryOld; - using TBase = TViewerPipeClient; - IViewer* Viewer; - TJsonSettings JsonSettings; - NMon::TEvHttpInfo::TPtr Event; - TEvViewer::TEvViewerRequest::TPtr ViewerRequest; - ui32 Timeout = 0; - TVector ResultSets; - TString Query; - TString Database; - TString Action; - TString Stats; - TString Syntax; - TString UserToken; - bool IsBase64Encode; - - enum ESchemaType { - Classic, - Modern, - Multi, - Ydb, - }; - ESchemaType Schema = ESchemaType::Classic; - - std::optional SubscribedNodeId; - std::vector TenantDynamicNodes; - bool Direct = false; - bool MadeKqpProxyRequest = false; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - ESchemaType StringToSchemaType(const TString& schemaStr) { - if (schemaStr == "classic") { - return ESchemaType::Classic; - } else if (schemaStr == "modern") { - return ESchemaType::Modern; - } else if (schemaStr == "multi") { - return ESchemaType::Multi; - } else if (schemaStr == "ydb") { - return ESchemaType::Ydb; - } else { - return ESchemaType::Classic; - } - } - - void ParseCgiParameters(const TCgiParameters& params) { - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), false); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 60000); - Query = params.Get("query"); - Database = params.Get("database"); - Stats = params.Get("stats"); - Action = params.Get("action"); - TString schemaStr = params.Get("schema"); - Schema = StringToSchemaType(schemaStr); - Syntax = params.Get("syntax"); - Direct = FromStringWithDefault(params.Get("direct"), Direct); - IsBase64Encode = FromStringWithDefault(params.Get("base64"), true); - } - - void ParsePostContent(const TStringBuf& content) { - static NJson::TJsonReaderConfig JsonConfig; - NJson::TJsonValue requestData; - bool success = NJson::ReadJsonTree(content, &JsonConfig, &requestData); - if (success) { - Query = Query.empty() ? requestData["query"].GetStringSafe({}) : Query; - Database = Database.empty() ? requestData["database"].GetStringSafe({}) : Database; - Stats = Stats.empty() ? requestData["stats"].GetStringSafe({}) : Stats; - Action = Action.empty() ? requestData["action"].GetStringSafe({}) : Action; - Syntax = Syntax.empty() ? requestData["syntax"].GetStringSafe({}) : Syntax; - } - } - - bool IsPostContent() const { - return NViewer::IsPostContent(Event); - } - - TJsonQueryOld(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) - { - const auto& params(Event->Get()->Request.GetParams()); - InitConfig(params); - ParseCgiParameters(params); - if (IsPostContent()) { - TStringBuf content = Event->Get()->Request.GetPostContent(); - ParsePostContent(content); - } - UserToken = Event->Get()->UserToken; - } - - TJsonQueryOld(TEvViewer::TEvViewerRequest::TPtr& ev) - : ViewerRequest(ev) - { - auto& request = ViewerRequest->Get()->Record.GetQueryRequest(); - - TCgiParameters params(request.GetUri()); - InitConfig(params); - ParseCgiParameters(params); - - TStringBuf content = request.GetContent(); - if (content) { - ParsePostContent(content); - } - - Timeout = ViewerRequest->Get()->Record.GetTimeout(); - UserToken = request.GetUserToken(); - Direct = true; - } - - void PassAway() override { - if (SubscribedNodeId.has_value()) { - Send(TActivationContext::InterconnectProxy(SubscribedNodeId.value()), new TEvents::TEvUnsubscribe()); - } - TBase::PassAway(); - BLOG_TRACE("PassAway()"); - } - - STATEFN(StateWork) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvStateStorage::TEvBoardInfo, HandleReply); - hFunc(TEvents::TEvUndelivered, Undelivered); - hFunc(TEvInterconnect::TEvNodeConnected, Connected); - hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); - hFunc(TEvViewer::TEvViewerResponse, HandleReply); - hFunc(NKqp::TEvKqp::TEvQueryResponse, HandleReply); - hFunc(NKqp::TEvKqp::TEvAbortExecution, HandleReply); - hFunc(NKqp::TEvKqpExecuter::TEvStreamData, HandleReply); - hFunc(NKqp::TEvKqpExecuter::TEvStreamProfile, HandleReply); - - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void SendKpqProxyRequest() { - if (MadeKqpProxyRequest) { - return; - } - MadeKqpProxyRequest = true; - auto event = MakeHolder(); - NKikimrKqp::TQueryRequest& request = *event->Record.MutableRequest(); - request.SetQuery(Query); - if (Action.empty() || Action == "execute-script" || Action == "execute") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXECUTE); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_SCRIPT); - request.SetKeepSession(false); - } else if (Action == "execute-query") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXECUTE); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_GENERIC_QUERY); - request.mutable_txcontrol()->mutable_begin_tx()->mutable_serializable_read_write(); - request.mutable_txcontrol()->set_commit_tx(true); - request.SetKeepSession(false); - } else if (Action == "explain-query") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXPLAIN); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_GENERIC_QUERY); - request.SetKeepSession(false); - } else if (Action == "execute-scan") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXECUTE); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_SCAN); - request.SetKeepSession(false); - } else if (Action == "execute-data") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXECUTE); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_DML); - request.mutable_txcontrol()->mutable_begin_tx()->mutable_serializable_read_write(); - request.mutable_txcontrol()->set_commit_tx(true); - request.SetKeepSession(false); - } else if (Action == "explain" || Action == "explain-ast" || Action == "explain-data") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXPLAIN); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_DML); - } else if (Action == "explain-scan") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXPLAIN); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_SCAN); - } else if (Action == "explain-script") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXPLAIN); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_SCRIPT); - } - if (Stats == "profile") { - request.SetStatsMode(NYql::NDqProto::DQ_STATS_MODE_PROFILE); - request.SetCollectStats(Ydb::Table::QueryStatsCollection::STATS_COLLECTION_PROFILE); - } else if (Stats == "full") { - request.SetStatsMode(NYql::NDqProto::DQ_STATS_MODE_FULL); - request.SetCollectStats(Ydb::Table::QueryStatsCollection::STATS_COLLECTION_FULL); - } - if (Database) { - request.SetDatabase(Database); - } - if (UserToken) { - event->Record.SetUserToken(UserToken); - } - if (Syntax == "yql_v1") { - request.SetSyntax(Ydb::Query::Syntax::SYNTAX_YQL_V1); - } else if (Syntax == "pg") { - request.SetSyntax(Ydb::Query::Syntax::SYNTAX_PG); - } - ActorIdToProto(SelfId(), event->Record.MutableRequestActorId()); - Send(NKqp::MakeKqpProxyID(SelfId().NodeId()), event.Release()); - } - - void Bootstrap() { - if (Query.empty()) { - if (Event) { - ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), {}, "Bad Request")); - } else { - auto* response = new TEvViewer::TEvViewerResponse(); - response->Record.MutableQueryResponse()->SetYdbStatus(Ydb::StatusIds::BAD_REQUEST); - ReplyAndPassAway(response); - } - return; - } - - if (Database && !Direct) { - RequestStateStorageEndpointsLookup(Database); // to find some dynamic node and redirect query there - } - - if (Requests == 0) { - SendKpqProxyRequest(); - } - Become(&TThis::StateWork, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - -private: - NJson::TJsonValue ColumnPrimitiveValueToJsonValue(NYdb::TValueParser& valueParser) { - switch (const auto primitive = valueParser.GetPrimitiveType()) { - case NYdb::EPrimitiveType::Bool: - return valueParser.GetBool(); - case NYdb::EPrimitiveType::Int8: - return valueParser.GetInt8(); - case NYdb::EPrimitiveType::Uint8: - return valueParser.GetUint8(); - case NYdb::EPrimitiveType::Int16: - return valueParser.GetInt16(); - case NYdb::EPrimitiveType::Uint16: - return valueParser.GetUint16(); - case NYdb::EPrimitiveType::Int32: - return valueParser.GetInt32(); - case NYdb::EPrimitiveType::Uint32: - return valueParser.GetUint32(); - case NYdb::EPrimitiveType::Int64: - return TStringBuilder() << valueParser.GetInt64(); - case NYdb::EPrimitiveType::Uint64: - return TStringBuilder() << valueParser.GetUint64(); - case NYdb::EPrimitiveType::Float: - return valueParser.GetFloat(); - case NYdb::EPrimitiveType::Double: - return valueParser.GetDouble(); - case NYdb::EPrimitiveType::Utf8: - return valueParser.GetUtf8(); - case NYdb::EPrimitiveType::Date: - return valueParser.GetDate().ToString(); - case NYdb::EPrimitiveType::Datetime: - return valueParser.GetDatetime().ToString(); - case NYdb::EPrimitiveType::Timestamp: - return valueParser.GetTimestamp().ToString(); - case NYdb::EPrimitiveType::Interval: - return TStringBuilder() << valueParser.GetInterval(); - case NYdb::EPrimitiveType::Date32: - return TStringBuilder() << valueParser.GetDate32(); - case NYdb::EPrimitiveType::Datetime64: - return TStringBuilder() << valueParser.GetDatetime64(); - case NYdb::EPrimitiveType::Timestamp64: - return TStringBuilder() << valueParser.GetTimestamp64(); - case NYdb::EPrimitiveType::Interval64: - return TStringBuilder() << valueParser.GetInterval64(); - case NYdb::EPrimitiveType::TzDate: - return valueParser.GetTzDate(); - case NYdb::EPrimitiveType::TzDatetime: - return valueParser.GetTzDatetime(); - case NYdb::EPrimitiveType::TzTimestamp: - return valueParser.GetTzTimestamp(); - case NYdb::EPrimitiveType::String: - return IsBase64Encode ? Base64Encode(valueParser.GetString()) : valueParser.GetString(); - case NYdb::EPrimitiveType::Yson: - return valueParser.GetYson(); - case NYdb::EPrimitiveType::Json: - return valueParser.GetJson(); - case NYdb::EPrimitiveType::JsonDocument: - return valueParser.GetJsonDocument(); - case NYdb::EPrimitiveType::DyNumber: - return valueParser.GetDyNumber(); - case NYdb::EPrimitiveType::Uuid: - return valueParser.GetUuid().ToString(); - default: - Y_ENSURE(false, TStringBuilder() << "Unsupported type: " << primitive); } - } - - NJson::TJsonValue ColumnValueToJsonValue(NYdb::TValueParser& valueParser) { - switch (valueParser.GetKind()) { - case NYdb::TTypeParser::ETypeKind::Primitive: - return ColumnPrimitiveValueToJsonValue(valueParser); - - case NYdb::TTypeParser::ETypeKind::Optional: - valueParser.OpenOptional(); - if (valueParser.IsNull()) { - return NJson::JSON_NULL; - } - switch(valueParser.GetKind()) { - case NYdb::TTypeParser::ETypeKind::Primitive: - return ColumnPrimitiveValueToJsonValue(valueParser); - case NYdb::TTypeParser::ETypeKind::Decimal: - return valueParser.GetDecimal().ToString(); - default: - return NJson::JSON_UNDEFINED; - } - - case NYdb::TTypeParser::ETypeKind::Tagged: - valueParser.OpenTagged(); - return ColumnValueToJsonValue(valueParser); - - case NYdb::TTypeParser::ETypeKind::Pg: - return valueParser.GetPg().Content_; - - default: - return NJson::JSON_UNDEFINED; - } - } - - void Connected(TEvInterconnect::TEvNodeConnected::TPtr &) {} - - void Undelivered(TEvents::TEvUndelivered::TPtr &ev) { - if (ev->Get()->SourceType == NViewer::TEvViewer::EvViewerRequest) { - SendKpqProxyRequest(); - } - } - - void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &) { - SendKpqProxyRequest(); - } - - void SendDynamicNodeQueryRequest() { - ui64 hash = std::hash()(Event->Get()->Request.GetRemoteAddr()); - - auto itPos = std::next(TenantDynamicNodes.begin(), hash % TenantDynamicNodes.size()); - std::nth_element(TenantDynamicNodes.begin(), itPos, TenantDynamicNodes.end()); - - TNodeId nodeId = *itPos; - SubscribedNodeId = nodeId; - TActorId viewerServiceId = MakeViewerID(nodeId); - - THolder request = MakeHolder(); - request->Record.SetTimeout(Timeout); - auto queryRequest = request->Record.MutableQueryRequest(); - queryRequest->SetUri(TString(Event->Get()->Request.GetUri())); - if (IsPostContent()) { - TStringBuf content = Event->Get()->Request.GetPostContent(); - queryRequest->SetContent(TString(content)); - } - if (UserToken) { - queryRequest->SetUserToken(UserToken); - } - - ViewerWhiteboardCookie cookie(NKikimrViewer::TEvViewerRequest::kQueryRequest, nodeId); - SendRequest(viewerServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, cookie.ToUi64()); - } - - void HandleReply(TEvStateStorage::TEvBoardInfo::TPtr& ev) { - BLOG_TRACE("Received TEvBoardInfo"); - if (ev->Get()->Status == TEvStateStorage::TEvBoardInfo::EStatus::Ok) { - for (const auto& [actorId, infoEntry] : ev->Get()->InfoEntries) { - TenantDynamicNodes.emplace_back(actorId.NodeId()); - } - } - if (TenantDynamicNodes.empty()) { - SendKpqProxyRequest(); - } else { - SendDynamicNodeQueryRequest(); - } - } - - void Handle(NKikimrKqp::TEvQueryResponse& record) { - if (Event) { - NJson::TJsonValue jsonResponse; - if (record.GetYdbStatus() == Ydb::StatusIds::SUCCESS) { - MakeOkReply(jsonResponse, record); - } else { - MakeErrorReply(jsonResponse, record); - } - - if (Schema == ESchemaType::Classic && Stats.empty() && (Action.empty() || Action == "execute")) { - jsonResponse = std::move(jsonResponse["result"]); - } - - TStringStream stream; - NJson::TJsonWriterConfig config; - config.ValidateUtf8 = false; - config.WriteNanAsString = true; - NJson::WriteJson(&stream, &jsonResponse, config); - - ReplyAndPassAway(stream.Str()); - } else { - TEvViewer::TEvViewerResponse* response = new TEvViewer::TEvViewerResponse(); - response->Record.MutableQueryResponse()->CopyFrom(record); - response->Record.MutableQueryResponse()->MutableResponse()->MutableYdbResults()->Add(ResultSets.begin(), ResultSets.end()); - ReplyAndPassAway(response); - } - } - - void HandleReply(NKqp::TEvKqp::TEvQueryResponse::TPtr& ev) { - Handle(ev->Get()->Record.GetRef()); - } - - void HandleReply(TEvViewer::TEvViewerResponse::TPtr& ev) { - auto& record = ev.Get()->Get()->Record; - if (record.HasQueryResponse()) { - Handle(*(ev.Get()->Get()->Record.MutableQueryResponse())); - } else { - SendKpqProxyRequest(); // fallback - } - } - - void HandleReply(NKqp::TEvKqp::TEvAbortExecution::TPtr& ev) { - Y_UNUSED(ev); - } - - void HandleReply(NKqp::TEvKqpExecuter::TEvStreamProfile::TPtr& ev) { - Y_UNUSED(ev); - } - - void HandleReply(NKqp::TEvKqpExecuter::TEvStreamData::TPtr& ev) { - const NKikimrKqp::TEvExecuterStreamData& data(ev->Get()->Record); - - ResultSets.emplace_back(); - ResultSets.back() = std::move(data.GetResultSet()); - - THolder ack = MakeHolder(); - ack->Record.SetSeqNo(ev->Get()->Record.GetSeqNo()); - Send(ev->Sender, ack.Release()); - } - - void HandleTimeout() { - if (Event) { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } else { - auto* response = new TEvViewer::TEvViewerResponse(); - response->Record.MutableQueryResponse()->SetYdbStatus(Ydb::StatusIds::TIMEOUT); - ReplyAndPassAway(response); - } - } - - void ReplyAndPassAway(TEvViewer::TEvViewerResponse* response) { - Send(ViewerRequest->Sender, response); - PassAway(); - } - - void ReplyAndPassAway(TString data) { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), std::move(data)), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - -private: - void MakeErrorReply(NJson::TJsonValue& jsonResponse, NKikimrKqp::TEvQueryResponse& record) { - NJson::TJsonValue& jsonIssues = jsonResponse["issues"]; - - // find first deepest error - google::protobuf::RepeatedPtrField* protoIssues = record.MutableResponse()->MutableQueryIssues(); - std::stable_sort(protoIssues->begin(), protoIssues->end(), [](const Ydb::Issue::IssueMessage& a, const Ydb::Issue::IssueMessage& b) -> bool { - return a.severity() < b.severity(); - }); - while (protoIssues->size() > 0 && (*protoIssues)[0].issuesSize() > 0) { - protoIssues = (*protoIssues)[0].mutable_issues(); - } - if (protoIssues->size() > 0) { - const Ydb::Issue::IssueMessage& issue = (*protoIssues)[0]; - NProtobufJson::Proto2Json(issue, jsonResponse["error"]); - } - for (const auto& queryIssue : record.GetResponse().GetQueryIssues()) { - NJson::TJsonValue& issue = jsonIssues.AppendValue({}); - NProtobufJson::Proto2Json(queryIssue, issue); - } - } - - void MakeOkReply(NJson::TJsonValue& jsonResponse, NKikimrKqp::TEvQueryResponse& record) { - const auto& response = record.GetResponse(); - - if (response.ResultsSize() > 0 || response.YdbResultsSize() > 0) { - try { - for (const auto& result : response.GetResults()) { - Ydb::ResultSet resultSet; - NKqp::ConvertKqpQueryResultToDbResult(result, &resultSet); - ResultSets.emplace_back(std::move(resultSet)); - } - - for (const auto& result : response.GetYdbResults()) { - ResultSets.emplace_back(result); - } - } - catch (const std::exception& ex) { - Ydb::Issue::IssueMessage* issue = record.MutableResponse()->AddQueryIssues(); - issue->set_message(Sprintf("Convert error: %s", ex.what())); - issue->set_severity(NYql::TSeverityIds::S_ERROR); - MakeErrorReply(jsonResponse, record); - return; - } - } - - if (ResultSets.size() > 0) { - if (Schema == ESchemaType::Classic) { - NJson::TJsonValue& jsonResults = jsonResponse["result"]; - jsonResults.SetType(NJson::JSON_ARRAY); - for (auto it = ResultSets.begin(); it != ResultSets.end(); ++it) { - NYdb::TResultSet resultSet(*it); - const auto& columnsMeta = resultSet.GetColumnsMeta(); - NYdb::TResultSetParser rsParser(resultSet); - while (rsParser.TryNextRow()) { - NJson::TJsonValue& jsonRow = jsonResults.AppendValue({}); - for (size_t columnNum = 0; columnNum < columnsMeta.size(); ++columnNum) { - const NYdb::TColumn& columnMeta = columnsMeta[columnNum]; - jsonRow[columnMeta.Name] = ColumnValueToJsonValue(rsParser.ColumnParser(columnNum)); - } - } - } - } - - if (Schema == ESchemaType::Modern) { - { - NJson::TJsonValue& jsonColumns = jsonResponse["columns"]; - NYdb::TResultSet resultSet(ResultSets.front()); - const auto& columnsMeta = resultSet.GetColumnsMeta(); - jsonColumns.SetType(NJson::JSON_ARRAY); - for (size_t columnNum = 0; columnNum < columnsMeta.size(); ++columnNum) { - NJson::TJsonValue& jsonColumn = jsonColumns.AppendValue({}); - const NYdb::TColumn& columnMeta = columnsMeta[columnNum]; - jsonColumn["name"] = columnMeta.Name; - jsonColumn["type"] = columnMeta.Type.ToString(); - } - } - - NJson::TJsonValue& jsonResults = jsonResponse["result"]; - jsonResults.SetType(NJson::JSON_ARRAY); - for (auto it = ResultSets.begin(); it != ResultSets.end(); ++it) { - NYdb::TResultSet resultSet(*it); - const auto& columnsMeta = resultSet.GetColumnsMeta(); - NYdb::TResultSetParser rsParser(resultSet); - while (rsParser.TryNextRow()) { - NJson::TJsonValue& jsonRow = jsonResults.AppendValue({}); - jsonRow.SetType(NJson::JSON_ARRAY); - for (size_t columnNum = 0; columnNum < columnsMeta.size(); ++columnNum) { - NJson::TJsonValue& jsonColumn = jsonRow.AppendValue({}); - jsonColumn = ColumnValueToJsonValue(rsParser.ColumnParser(columnNum)); - } - } - } - } - - if (Schema == ESchemaType::Multi) { - NJson::TJsonValue& jsonResults = jsonResponse["result"]; - jsonResults.SetType(NJson::JSON_ARRAY); - for (auto it = ResultSets.begin(); it != ResultSets.end(); ++it) { - NYdb::TResultSet resultSet(*it); - const auto& columnsMeta = resultSet.GetColumnsMeta(); - NJson::TJsonValue& jsonResult = jsonResults.AppendValue({}); - - NJson::TJsonValue& jsonColumns = jsonResult["columns"]; - jsonColumns.SetType(NJson::JSON_ARRAY); - for (size_t columnNum = 0; columnNum < columnsMeta.size(); ++columnNum) { - NJson::TJsonValue& jsonColumn = jsonColumns.AppendValue({}); - const NYdb::TColumn& columnMeta = columnsMeta[columnNum]; - jsonColumn["name"] = columnMeta.Name; - jsonColumn["type"] = columnMeta.Type.ToString(); - } - - NJson::TJsonValue& jsonRows = jsonResult["rows"]; - NYdb::TResultSetParser rsParser(resultSet); - while (rsParser.TryNextRow()) { - NJson::TJsonValue& jsonRow = jsonRows.AppendValue({}); - jsonRow.SetType(NJson::JSON_ARRAY); - for (size_t columnNum = 0; columnNum < columnsMeta.size(); ++columnNum) { - NJson::TJsonValue& jsonColumn = jsonRow.AppendValue({}); - jsonColumn = ColumnValueToJsonValue(rsParser.ColumnParser(columnNum)); - } - } - } - } - - if (Schema == ESchemaType::Ydb) { - NJson::TJsonValue& jsonResults = jsonResponse["result"]; - jsonResults.SetType(NJson::JSON_ARRAY); - for (auto it = ResultSets.begin(); it != ResultSets.end(); ++it) { - NYdb::TResultSet resultSet(*it); - const auto& columnsMeta = resultSet.GetColumnsMeta(); - NYdb::TResultSetParser rsParser(resultSet); - while (rsParser.TryNextRow()) { - NJson::TJsonValue& jsonRow = jsonResults.AppendValue({}); - TString row = NYdb::FormatResultRowJson(rsParser, columnsMeta, IsBase64Encode ? NYdb::EBinaryStringEncoding::Base64 : NYdb::EBinaryStringEncoding::Unicode); - NJson::ReadJsonTree(row, &jsonRow); - } - } - } - } - if (response.HasQueryAst()) { - jsonResponse["ast"] = response.GetQueryAst(); - } - if (response.HasQueryPlan()) { - NJson::ReadJsonTree(response.GetQueryPlan(), &(jsonResponse["plan"])); - } - if (response.HasQueryStats()) { - NProtobufJson::Proto2Json(response.GetQueryStats(), jsonResponse["stats"]); - } - } - -}; - - -} -} diff --git a/ydb/core/viewer/json_render.h b/ydb/core/viewer/json_render.h deleted file mode 100644 index a0788879a19c..000000000000 --- a/ydb/core/viewer/json_render.h +++ /dev/null @@ -1,344 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include "json_pipe_req.h" -#include "viewer_request.h" -#include "viewer.h" -#include "log.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using namespace NMonitoring; - -class TJsonRender : public TViewerPipeClient { - using TThis = TJsonRender; - using TBase = TViewerPipeClient; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TEvViewer::TEvViewerRequest::TPtr ViewerRequest; - ui32 Timeout = 0; - std::vector Metrics; - TString Database; - TCgiParameters Params; - - std::optional SubscribedNodeId; - std::vector TenantDynamicNodes; - bool Direct = false; - bool MadeProxyRequest = false; -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonRender(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - { - const auto& params(Event->Get()->Request.GetParams()); - - InitConfig(params); - Database = params.Get("database"); - Direct = FromStringWithDefault(params.Get("direct"), Direct); - Timeout = FromStringWithDefault(params.Get("timeout"), 30000); - } - - TJsonRender(TEvViewer::TEvViewerRequest::TPtr& ev) - : ViewerRequest(ev) - { - auto& request = ViewerRequest->Get()->Record.GetRenderRequest(); - - TCgiParameters params(request.GetUri()); - InitConfig(params); - Direct = true; - Timeout = ViewerRequest->Get()->Record.GetTimeout(); - } - - void Bootstrap() { - auto postData = Event - ? Event->Get()->Request.GetPostContent() - : ViewerRequest->Get()->Record.GetRenderRequest().GetContent(); - BLOG_D("PostData=" << postData); - NKikimrGraph::TEvGetMetrics getRequest; - if (postData) { - Params = TCgiParameters(postData); - if (Params.Has("target")) { - TString metric; - size_t num = 0; - for (;;) { - metric = Params.Get("target", num); - if (metric.empty()) { - break; - } - Metrics.push_back(metric); - ++num; - } - } - //StringSplitter(Params.Get("target")).Split(',').SkipEmpty().Collect(&Metrics); - - if (Database && !Direct) { - RequestStateStorageEndpointsLookup(Database); // to find some dynamic node and redirect there - } - if (Requests == 0) { - SendGraphRequest(); - } - } else { - ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), {}, "Bad Request")); - return; - } - - Become(&TThis::StateWork, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void PassAway() override { - if (SubscribedNodeId.has_value()) { - Send(TActivationContext::InterconnectProxy(SubscribedNodeId.value()), new TEvents::TEvUnsubscribe()); - } - TBase::PassAway(); - BLOG_TRACE("PassAway()"); - } - - STATEFN(StateWork) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvStateStorage::TEvBoardInfo, Handle); - hFunc(TEvents::TEvUndelivered, Undelivered); - hFunc(TEvInterconnect::TEvNodeConnected, Connected); - hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); - hFunc(TEvViewer::TEvViewerResponse, Handle); - hFunc(NGraph::TEvGraph::TEvMetricsResult, Handle); - - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Connected(TEvInterconnect::TEvNodeConnected::TPtr &) {} - - void Undelivered(TEvents::TEvUndelivered::TPtr &ev) { - if (ev->Get()->SourceType == NViewer::TEvViewer::EvViewerRequest) { - SendGraphRequest(); - } - } - - void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &) { - SendGraphRequest(); - } - - void SendDynamicNodeRenderRequest() { - ui64 hash = std::hash()(Event->Get()->Request.GetRemoteAddr()); - - auto itPos = std::next(TenantDynamicNodes.begin(), hash % TenantDynamicNodes.size()); - std::nth_element(TenantDynamicNodes.begin(), itPos, TenantDynamicNodes.end()); - - TNodeId nodeId = *itPos; - SubscribedNodeId = nodeId; - TActorId viewerServiceId = MakeViewerID(nodeId); - - THolder request = MakeHolder(); - request->Record.SetTimeout(Timeout); - auto renderRequest = request->Record.MutableRenderRequest(); - renderRequest->SetUri(TString(Event->Get()->Request.GetUri())); - - TStringBuf content = Event->Get()->Request.GetPostContent(); - renderRequest->SetContent(TString(content)); - - ViewerWhiteboardCookie cookie(NKikimrViewer::TEvViewerRequest::kRenderRequest, nodeId); - SendRequest(viewerServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, cookie.ToUi64()); - } - - void Handle(TEvStateStorage::TEvBoardInfo::TPtr& ev) { - BLOG_TRACE("Received TEvBoardInfo"); - if (ev->Get()->Status == TEvStateStorage::TEvBoardInfo::EStatus::Ok) { - for (const auto& [actorId, infoEntry] : ev->Get()->InfoEntries) { - TenantDynamicNodes.emplace_back(actorId.NodeId()); - } - } - if (TenantDynamicNodes.empty()) { - SendGraphRequest(); - } else { - SendDynamicNodeRenderRequest(); - } - } - - void SendGraphRequest() { - if (MadeProxyRequest) { - return; - } - MadeProxyRequest = true; - NKikimrGraph::TEvGetMetrics getRequest; - if (Metrics.size() > 0) { - for (const auto& metric : Metrics) { - getRequest.AddMetrics(metric); - } - } else { - static const TString png1x1 = "\x89\x50\x4e\x47\x0d\x0a\x1a\x0a\x00\x00\x00\x0d\x49\x48\x44\x52\x00\x00\x00\x01\x00\x00\x00\x01\x01" - "\x03\x00\x00\x00\x25\xdb\x56\xca\x00\x00\x00\x03\x50\x4c\x54\x45\x00\x00\x00\xa7\x7a\x3d\xda\x00\x00" - "\x00\x01\x74\x52\x4e\x53\x00\x40\xe6\xd8\x66\x00\x00\x00\x0a\x49\x44\x41\x54\x08\xd7\x63\x60\x00\x00" - "\x00\x02\x00\x01\xe2\x21\xbc\x33\x00\x00\x00\x00\x49\x45\x4e\x44\xae\x42\x60\x82"; - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOK(Event->Get(), "image/png", png1x1), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - if (Params.Has("from")) { - getRequest.SetTimeFrom(FromStringWithDefault(Params.Get("from"))); - } - if (Params.Has("until")) { - getRequest.SetTimeTo(FromStringWithDefault(Params.Get("until"))); - } - if (Params.Has("maxDataPoints")) { - getRequest.SetMaxPoints(FromStringWithDefault(Params.Get("maxDataPoints"), 1000)); - } - Send(NGraph::MakeGraphServiceId(), new NGraph::TEvGraph::TEvGetMetrics(std::move(getRequest))); - } - - void HandleRenderResponse(NKikimrGraph::TEvMetricsResult& response) { - if (Event) { - NJson::TJsonValue json; - - if (response.GetError()) { - json["status"] = "error"; - json["error"] = response.GetError(); - ReplyAndPassAway(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json, false))); - return; - } - if (response.DataSize() != Metrics.size()) { - json["status"] = "error"; - json["error"] = "Invalid data size received"; - ReplyAndPassAway(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json, false))); - return; - } - for (size_t nMetric = 0; nMetric < response.DataSize(); ++nMetric) { - const auto& protoMetric(response.GetData(nMetric)); - if (response.TimeSize() != protoMetric.ValuesSize()) { - json["status"] = "error"; - json["error"] = "Invalid value size received"; - ReplyAndPassAway(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json, false))); - return; - } - } - { // graphite - json.SetType(NJson::JSON_ARRAY); - for (size_t nMetric = 0; nMetric < response.DataSize(); ++nMetric) { - const auto& protoMetric(response.GetData(nMetric)); - NJson::TJsonValue& jsonMetric(json.AppendValue({})); - jsonMetric["target"] = Metrics[nMetric]; - jsonMetric["title"] = Metrics[nMetric]; - jsonMetric["tags"]["name"] = Metrics[nMetric]; - NJson::TJsonValue& jsonDataPoints(jsonMetric["datapoints"]); - jsonDataPoints.SetType(NJson::JSON_ARRAY); - for (size_t nTime = 0; nTime < response.TimeSize(); ++nTime) { - NJson::TJsonValue& jsonDataPoint(jsonDataPoints.AppendValue({})); - double value = protoMetric.GetValues(nTime); - if (isnan(value)) { - jsonDataPoint.AppendValue(NJson::TJsonValue(NJson::JSON_NULL)); - } else { - jsonDataPoint.AppendValue(value); - } - jsonDataPoint.AppendValue(response.GetTime(nTime)); - } - } - } - - ReplyAndPassAway(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json, false))); - } else { - TEvViewer::TEvViewerResponse* viewerResponse = new TEvViewer::TEvViewerResponse(); - viewerResponse->Record.MutableRenderResponse()->CopyFrom(response); - ReplyAndPassAway(viewerResponse); - } - } - - void Handle(NGraph::TEvGraph::TEvMetricsResult::TPtr& ev) { - HandleRenderResponse(ev->Get()->Record); - } - - void Handle(TEvViewer::TEvViewerResponse::TPtr& ev) { - auto& record = ev.Get()->Get()->Record; - if (record.HasRenderResponse()) { - HandleRenderResponse(*(record.MutableRenderResponse())); - } else { - SendGraphRequest(); // fallback - } - } - - void HandleTimeout() { - if (Event) { - ReplyAndPassAway(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get())); - } else { - auto* response = new TEvViewer::TEvViewerResponse(); - response->Record.MutableRenderResponse()->SetError("Request timed out"); - ReplyAndPassAway(response); - } - } - - void ReplyAndPassAway(TEvViewer::TEvViewerResponse* response) { - Send(ViewerRequest->Sender, response); - PassAway(); - } - - void ReplyAndPassAway(TString data) { - Send(Event->Sender, new NMon::TEvHttpInfoRes(std::move(data), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: target - in: query - description: metrics comma delimited - required: true - type: string - - name: from - in: query - description: time in seconds - required: false - type: integer - - name: database - in: query - description: database name - required: false - type: string - - name: direct - in: query - description: force processing query on current node - required: false - type: boolean - - name: until - in: query - description: time in seconds - required: false - type: integer - - name: maxDataPoints - in: query - description: maximum number of data points - required: false - type: integer - - name: format - in: query - description: response format - required: false - type: string - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Graph data"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns graph data in graphite format"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_storage.h b/ydb/core/viewer/json_storage.h deleted file mode 100644 index fba5047a69e0..000000000000 --- a/ydb/core/viewer/json_storage.h +++ /dev/null @@ -1,601 +0,0 @@ -#pragma once -#include "json_storage_base.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using namespace NNodeWhiteboard; - -using ::google::protobuf::FieldDescriptor; - -class TJsonStorage : public TJsonStorageBase { - using TBase = TJsonStorageBase; - using TThis = TJsonStorage; - - bool NeedGroups = true; - bool NeedDisks = true; - bool NeedDonors = true; - - enum class EGroupSort { - PoolName, - Kind, - MediaType, - Erasure, - Degraded, - Usage, - GroupId, - Used, - Limit, - Read, - Write - }; - enum class EVersion { - v1, - v2 // only this works with sorting, limiting and filtering with usage buckets - }; - EVersion Version = EVersion::v1; - EGroupSort GroupSort = EGroupSort::PoolName; - bool ReverseSort = false; - std::optional Offset; - std::optional Limit; - - ui32 UsagePace = 5; - TVector UsageBuckets; - - struct TGroupRow { - TString PoolName; - TString GroupId; - TString Kind; - TString MediaType; - TString Erasure; - ui32 Degraded; - float Usage; - uint64 Used; - uint64 Limit; - uint64 Read; - uint64 Write; - - TGroupRow() - : Degraded(0) - , Usage(0) - , Used(0) - , Limit(0) - , Read(0) - , Write(0) - {} - }; - THashMap GroupRowsByGroupId; - -public: - TJsonStorage(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : TBase(viewer, ev) - { - const auto& params(Event->Get()->Request.GetParams()); - NeedGroups = FromStringWithDefault(params.Get("need_groups"), true); - NeedDisks = FromStringWithDefault(params.Get("need_disks"), NeedGroups); - NeedDonors = FromStringWithDefault(params.Get("need_donors"), NeedDonors); - NeedGroups = Max(NeedGroups, NeedDisks); - UsagePace = FromStringWithDefault(params.Get("usage_pace"), UsagePace); - if (UsagePace == 0) { - Send(Initiator, new NMon::TEvHttpInfoRes(Viewer->GetHTTPBADREQUEST(Event->Get(), {}, "Bad Request"), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - SplitIds(params.Get("usage_buckets"), ',', UsageBuckets); - Sort(UsageBuckets); - - TString version = params.Get("version"); - if (version == "v1") { - Version = EVersion::v1; - } else if (version == "v2") { - Version = EVersion::v2; - } - Offset = FromStringWithDefault(params.Get("offset"), 0); - Limit = FromStringWithDefault(params.Get("limit"), std::numeric_limits::max()); - TStringBuf sort = params.Get("sort"); - if (sort) { - if (sort.StartsWith("-") || sort.StartsWith("+")) { - ReverseSort = (sort[0] == '-'); - sort.Skip(1); - } - if (sort == "PoolName") { - GroupSort = EGroupSort::PoolName; - } else if (sort == "Kind") { - GroupSort = EGroupSort::Kind; - } else if (sort == "MediaType") { - GroupSort = EGroupSort::MediaType; - } else if (sort == "Erasure") { - GroupSort = EGroupSort::Erasure; - } else if (sort == "Degraded") { - GroupSort = EGroupSort::Degraded; - } else if (sort == "Usage") { - GroupSort = EGroupSort::Usage; - } else if (sort == "GroupId") { - GroupSort = EGroupSort::GroupId; - } else if (sort == "Used") { - GroupSort = EGroupSort::Used; - } else if (sort == "Limit") { - GroupSort = EGroupSort::Limit; - } else if (sort == "Read") { - GroupSort = EGroupSort::Read; - } else if (sort == "Write") { - GroupSort = EGroupSort::Write; - } - } - } - - void Bootstrap() override { - TIntrusivePtr domains = AppData()->DomainsInfo; - ui64 hiveId = domains->GetHive(); - if (hiveId != TDomainsInfo::BadTabletId) { - RequestHiveStorageStats(hiveId); - } - TBase::Bootstrap(); - } - - void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { - TBase::Handle(ev, true); - } - - void RemapGroup(IOutputStream& json, - const ::google::protobuf::Message& protoFrom, - const TJsonSettings& jsonSettings) { - const auto& info = static_cast(protoFrom); - TString groupId = info.GetGroupId(); - if (Version == EVersion::v2) { - const auto& groupRow = GroupRowsByGroupId[groupId]; - json << "\"PoolName\":\"" << groupRow.PoolName << "\","; - json << "\"Kind\":\"" << groupRow.Kind << "\","; - json << "\"MediaType\":\"" << groupRow.MediaType << "\","; - json << "\"Erasure\":\"" << groupRow.Erasure << "\","; - json << "\"Degraded\":\"" << groupRow.Degraded << "\","; - json << "\"Usage\":\"" << groupRow.Usage << "\","; - json << "\"Used\":\"" << groupRow.Used << "\","; - json << "\"Limit\":\"" << groupRow.Limit << "\","; - json << "\"Read\":\"" << groupRow.Read << "\","; - json << "\"Write\":\"" << groupRow.Write << "\","; - } - auto ib = BSGroupIndex.find(groupId); - if (ib != BSGroupIndex.end()) { - TProtoToJson::ProtoToJsonInline(json, ib->second, jsonSettings); - if (auto ih = BSGroupHiveIndex.find(groupId); ih != BSGroupHiveIndex.end()) { - json << ','; - TProtoToJson::ProtoToJsonInline(json, ih->second, jsonSettings); - } - if (auto io = BSGroupOverall.find(groupId); io != BSGroupOverall.end()) { - json << ",\"Overall\":\"" << io->second << "\""; - } - } - } - - void RemapVDisks(IOutputStream& json, - const ::google::protobuf::Message& protoFrom, - const TJsonSettings& jsonSettings) { - NKikimrWhiteboard::EFlag diskSpace = NKikimrWhiteboard::Grey; - json << "\"VDisks\":["; - const auto& info = static_cast(protoFrom); - const auto& vDiskIds = info.GetVDiskIds(); - for (auto iv = vDiskIds.begin(); iv != vDiskIds.end(); ++iv) { - if (iv != vDiskIds.begin()) { - json << ','; - } - const NKikimrBlobStorage::TVDiskID& vDiskId = *iv; - auto ie = VDisksIndex.find(vDiskId); - if (ie != VDisksIndex.end()) { - json << '{'; - TProtoToJson::ProtoToJsonInline(json, ie->second, jsonSettings); - if (auto io = VDisksOverall.find(vDiskId); io != VDisksOverall.end()) { - json << ",\"Overall\":\"" << io->second << "\""; - } - json << '}'; - diskSpace = std::max(diskSpace, ie->second.GetDiskSpace()); - } else { - json << "{\"VDiskId\":"; - TProtoToJson::ProtoToJson(json, vDiskId, jsonSettings); - json << "}"; - } - } - json << ']'; - if (diskSpace != NKikimrWhiteboard::Grey) { - json << ",\"DiskSpace\":\""; - json << NKikimrWhiteboard::EFlag_Name(diskSpace); - json << "\""; - } - } - - void RemapDonors(IOutputStream& json, - const ::google::protobuf::Message& protoFrom, - const TJsonSettings& jsonSettings) { - const auto& info = static_cast(protoFrom); - const auto& donors = info.GetDonors(); - if (donors.empty()) { - return; - } - json << "\"Donors\":["; - for (auto id = donors.begin(); id != donors.end(); ++id) { - if (id != donors.begin()) { - json << ','; - } - const NKikimrBlobStorage::TVSlotId& vSlotId = *id; - auto ie = VSlotsIndex.find(vSlotId); - if (ie != VSlotsIndex.end()) { - json << '{'; - TProtoToJson::ProtoToJsonInline(json, ie->second, jsonSettings); - json << '}'; - } else { - json << "{"; - TProtoToJson::ProtoToJsonInline(json, vSlotId, jsonSettings); - json << "}"; - } - } - json << ']'; - } - - void RemapPDisk(IOutputStream& json, - const ::google::protobuf::Message& protoFrom, - const TJsonSettings& jsonSettings) { - json << "\"PDisk\":"; - const auto& info = static_cast(protoFrom); - ui32 nodeId = info.GetNodeId(); - ui32 pDiskId = info.GetPDiskId(); - auto ie = PDisksIndex.find(std::make_pair(nodeId, pDiskId)); - if (ie != PDisksIndex.end()) { - TProtoToJson::ProtoToJson(json, ie->second, jsonSettings); - if (auto io = PDisksOverall.find(std::make_pair(nodeId, pDiskId)); io != PDisksOverall.end()) { - json << ",\"Overall\":\"" << io->second << "\""; - } - } else { - json << "{\"PDiskId\":" << pDiskId << ",\"NodeId\":" << nodeId << "}"; - } - } - - bool CheckGroupFilters(const TString& groupId, const TString& poolName, const TGroupRow& groupRow) { - if (!EffectiveGroupFilter.contains(groupId)) { - return false; - } - switch (With) { - case EWith::MissingDisks: - if (BSGroupWithMissingDisks.count(groupId) == 0) { - return false; - } - break; - case EWith::SpaceProblems: - if (BSGroupWithSpaceProblems.count(groupId) == 0 && groupRow.Usage < 0.8) { - return false; - } - break; - case EWith::Everything: - break; - } - if (Filter) { - if (poolName.Contains(Filter)) { - return true; - } - if (groupId.Contains(Filter)) { - return true; - } - return false; - } - return true; - } - - void ReplyAndPassAway() override { - if (CheckAdditionalNodesInfoNeeded()) { - return; - } - CollectDiskInfo(true); - ParsePDisksFromBaseConfig(); - ParseVDisksFromBaseConfig(); - - for (const auto& [hiveId, hiveStats] : HiveStorageStats) { - for (auto& pbPool : *hiveStats->Record.MutablePools()) { - for (auto& pbGroup : *pbPool.MutableGroups()) { - TString groupId = ToString(pbGroup.GetGroupID()); - NKikimrHive::THiveStorageGroupStats& stats = BSGroupHiveIndex[groupId]; - stats.SetAcquiredUnits(stats.GetAcquiredUnits() + pbGroup.GetAcquiredUnits()); - stats.SetAcquiredIOPS(stats.GetAcquiredIOPS() + pbGroup.GetAcquiredIOPS()); - stats.SetAcquiredThroughput(stats.GetAcquiredThroughput() + pbGroup.GetAcquiredThroughput()); - stats.SetAcquiredSize(stats.GetAcquiredSize() + pbGroup.GetAcquiredSize()); - stats.SetMaximumIOPS(stats.GetMaximumIOPS() + pbGroup.GetMaximumIOPS()); - stats.SetMaximumThroughput(stats.GetMaximumThroughput() + pbGroup.GetMaximumThroughput()); - stats.SetMaximumSize(stats.GetMaximumSize() + pbGroup.GetMaximumSize()); - } - } - } - ui64 foundGroups = 0; - ui64 totalGroups = 0; - TVector GroupRows; - for (const auto& [poolName, poolInfo] : StoragePoolInfo) { - if ((!FilterTenant.empty() || !FilterStoragePools.empty()) && FilterStoragePools.count(poolName) == 0) { - continue; - } - NKikimrViewer::TStoragePoolInfo* pool = StorageInfo.AddStoragePools(); - for (TString groupId : poolInfo.Groups) { - TGroupRow row; - row.PoolName = poolName; - row.GroupId = groupId; - row.Kind = poolInfo.Kind; - row.MediaType = poolInfo.MediaType; - auto ib = BSGroupIndex.find(groupId); - if (ib != BSGroupIndex.end()) { - row.Erasure = ib->second.GetErasureSpecies(); - const auto& vDiskIds = ib->second.GetVDiskIds(); - for (auto iv = vDiskIds.begin(); iv != vDiskIds.end(); ++iv) { - const NKikimrBlobStorage::TVDiskID& vDiskId = *iv; - auto ie = VDisksIndex.find(vDiskId); - bool degraded = false; - if (ie != VDisksIndex.end()) { - ui32 nodeId = ie->second.GetNodeId(); - ui32 pDiskId = ie->second.GetPDiskId(); - degraded |= !ie->second.GetReplicated() || ie->second.GetVDiskState() != NKikimrWhiteboard::EVDiskState::OK; - row.Used += ie->second.GetAllocatedSize(); - row.Limit += ie->second.GetAllocatedSize() + ie->second.GetAvailableSize(); - row.Read += ie->second.GetReadThroughput(); - row.Write += ie->second.GetWriteThroughput(); - - auto ip = PDisksIndex.find(std::make_pair(nodeId, pDiskId)); - if (ip != PDisksIndex.end()) { - degraded |= ip->second.GetState() != NKikimrBlobStorage::TPDiskState::Normal; - if (!ie->second.HasAvailableSize()) { - row.Limit += ip->second.GetAvailableSize(); - } - } - } - if (degraded) { - row.Degraded++; - } - } - } - row.Usage = row.Limit == 0 ? 100 : (float)row.Used / row.Limit; - - ++totalGroups; - if (!CheckGroupFilters(groupId, poolName, row)) { - continue; - } - ++foundGroups; - if (Version == EVersion::v1) { - pool->AddGroups()->SetGroupId(groupId); - pool->SetMediaType(poolInfo.MediaType); - } else if (Version == EVersion::v2) { - if (!UsageBuckets.empty() && !BinarySearch(UsageBuckets.begin(), UsageBuckets.end(), (ui32)(row.Usage * 100) / UsagePace)) { - continue; - } - GroupRows.emplace_back(row); - GroupRowsByGroupId[groupId] = row; - } - auto itHiveGroup = BSGroupHiveIndex.find(groupId); - if (itHiveGroup != BSGroupHiveIndex.end()) { - pool->SetAcquiredUnits(pool->GetAcquiredUnits() + itHiveGroup->second.GetAcquiredUnits()); - pool->SetAcquiredIOPS(pool->GetAcquiredIOPS() + itHiveGroup->second.GetAcquiredIOPS()); - pool->SetAcquiredThroughput(pool->GetAcquiredThroughput() + itHiveGroup->second.GetAcquiredThroughput()); - pool->SetAcquiredSize(pool->GetAcquiredSize() + itHiveGroup->second.GetAcquiredSize()); - pool->SetMaximumIOPS(pool->GetMaximumIOPS() + itHiveGroup->second.GetMaximumIOPS()); - pool->SetMaximumThroughput(pool->GetMaximumThroughput() + itHiveGroup->second.GetMaximumThroughput()); - pool->SetMaximumSize(pool->GetMaximumSize() + itHiveGroup->second.GetMaximumSize()); - } - } - if (pool->GroupsSize() == 0) { - StorageInfo.MutableStoragePools()->RemoveLast(); - continue; - } - if (!poolName.empty()) { - pool->SetName(poolName); - } - if (!poolInfo.Kind.empty()) { - pool->SetKind(poolInfo.Kind); - } - pool->SetOverall(poolInfo.Overall); - } - - if (Version == EVersion::v2) { - switch (GroupSort) { - case EGroupSort::PoolName: - SortCollection(GroupRows, [](const TGroupRow& node) { return node.PoolName;}, ReverseSort); - break; - case EGroupSort::GroupId: - SortCollection(GroupRows, [](const TGroupRow& node) { return node.GroupId;}, ReverseSort); - break; - case EGroupSort::Kind: - SortCollection(GroupRows, [](const TGroupRow& node) { return node.Kind;}, ReverseSort); - break; - case EGroupSort::MediaType: - SortCollection(GroupRows, [](const TGroupRow& node) { return node.MediaType;}, ReverseSort); - break; - case EGroupSort::Erasure: - SortCollection(GroupRows, [](const TGroupRow& node) { return node.Erasure;}, ReverseSort); - break; - case EGroupSort::Degraded: - SortCollection(GroupRows, [](const TGroupRow& node) { return node.Degraded;}, ReverseSort); - break; - case EGroupSort::Usage: - SortCollection(GroupRows, [](const TGroupRow& node) { return node.Usage;}, ReverseSort); - break; - case EGroupSort::Used: - SortCollection(GroupRows, [](const TGroupRow& node) { return node.Used;}, ReverseSort); - break; - case EGroupSort::Limit: - SortCollection(GroupRows, [](const TGroupRow& node) { return node.Limit;}, ReverseSort); - break; - case EGroupSort::Read: - SortCollection(GroupRows, [](const TGroupRow& node) { return node.Read;}, ReverseSort); - break; - case EGroupSort::Write: - SortCollection(GroupRows, [](const TGroupRow& node) { return node.Write;}, ReverseSort); - break; - } - - ui32 start = Offset.has_value() ? Offset.value() : 0; - ui32 end = GroupRows.size(); - if (Limit.has_value()) { - end = Min(end, start + Limit.value()); - } - for (ui32 i = start; i < end; ++i) { - NKikimrViewer::TStorageGroupInfo* group = StorageInfo.AddStorageGroups(); - group->SetGroupId(GroupRows[i].GroupId); - } - } - - const FieldDescriptor* field; - if (NeedGroups) { - field = NKikimrViewer::TStorageGroupInfo::descriptor()->FindFieldByName("GroupId"); - JsonSettings.FieldRemapper[field] = [this]( - IOutputStream& json, - const ::google::protobuf::Message& protoFrom, - const TJsonSettings& jsonSettings) -> void { - RemapGroup(json, protoFrom, jsonSettings); - }; - } - if (NeedDisks) { - field = NKikimrWhiteboard::TBSGroupStateInfo::descriptor()->FindFieldByName("VDiskIds"); - JsonSettings.FieldRemapper[field] = [this]( - IOutputStream& json, - const ::google::protobuf::Message& protoFrom, - const TJsonSettings& jsonSettings) -> void { - RemapVDisks(json, protoFrom, jsonSettings); - }; - field = NKikimrWhiteboard::TVDiskStateInfo::descriptor()->FindFieldByName("PDiskId"); - JsonSettings.FieldRemapper[field] = [this]( - IOutputStream& json, - const ::google::protobuf::Message& protoFrom, - const TJsonSettings& jsonSettings) -> void { - RemapPDisk(json, protoFrom, jsonSettings); - }; - if (NeedDonors) { - field = NKikimrWhiteboard::TVDiskStateInfo::descriptor()->FindFieldByName("Donors"); - JsonSettings.FieldRemapper[field] = [this]( - IOutputStream& json, - const ::google::protobuf::Message& protoFrom, - const TJsonSettings& jsonSettings) -> void { - RemapDonors(json, protoFrom, jsonSettings); - }; - } - } - StorageInfo.SetTotalGroups(totalGroups); - StorageInfo.SetFoundGroups(foundGroups); - - TStringStream json; - TProtoToJson::ProtoToJson(json, StorageInfo, JsonSettings); - Send(Initiator, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), std::move(json.Str())), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: tenant - in: query - description: tenant name - required: false - type: string - - name: pool - in: query - description: storage pool name - required: false - type: string - - name: node_id - in: query - description: node id - required: false - type: integer - - name: pdisk_id - in: query - description: pdisk id - required: false - type: integer - - name: group_id - in: query - description: group id - required: false - type: integer - - name: need_groups - in: query - description: return groups information - required: false - type: boolean - default: true - - name: need_disks - in: query - description: return disks information - required: false - type: boolean - default: true - - name: with - in: query - description: filter groups by missing or space - required: false - type: string - - name: version - in: query - description: query version (v1, v2) - required: false - type: string - - name: usage_pace - in: query - description: bucket size as a percentage - required: false - type: integer - default: 5 - - name: usage_buckets - in: query - description: filter groups by usage buckets - required: false - type: integer - - name: sort - in: query - description: sort by (PoolName,Kind,MediaType,Erasure,Degraded,Usage,GroupId,Used,Limit,Read,Write) - required: false - type: string - - name: offset - in: query - description: skip N nodes - required: false - type: integer - - name: limit - in: query - description: limit to N nodes - required: false - type: integer - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Storage information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns information about storage"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_storage_base.h b/ydb/core/viewer/json_storage_base.h index f4f728369d08..1f8616ce6bbc 100644 --- a/ydb/core/viewer/json_storage_base.h +++ b/ydb/core/viewer/json_storage_base.h @@ -1,17 +1,11 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include +#include "json_pipe_req.h" #include "viewer.h" +#include "viewer_bsgroupinfo.h" +#include "viewer_vdiskinfo.h" +#include "viewer_pdiskinfo.h" #include "viewer_helper.h" -#include "json_pipe_req.h" -#include "json_vdiskinfo.h" -#include "json_pdiskinfo.h" +#include "wb_merge.h" template<> struct std::hash { @@ -31,17 +25,16 @@ struct std::equal_to { } }; -namespace NKikimr { -namespace NViewer { +namespace NKikimr::NViewer { using namespace NActors; using namespace NNodeWhiteboard; using ::google::protobuf::FieldDescriptor; -class TJsonStorageBase : public TViewerPipeClient { +class TJsonStorageBase : public TViewerPipeClient { protected: - using TBase = TViewerPipeClient; + using TBase = TViewerPipeClient; using TThis = TJsonStorageBase; using TNodeId = ui32; @@ -104,10 +97,10 @@ class TJsonStorageBase : public TViewerPipeClient { TString Erasure; ui32 Degraded; float Usage; - uint64 Used; - uint64 Limit; - uint64 Read; - uint64 Write; + ui64 Used; + ui64 Limit; + ui64 Read; + ui64 Write; TGroupRow() : Used(0) @@ -146,11 +139,7 @@ class TJsonStorageBase : public TViewerPipeClient { } public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - virtual void Bootstrap() { + void Bootstrap() override { TIntrusivePtr domains = AppData()->DomainsInfo; if (FilterTenant.empty()) { @@ -550,7 +539,7 @@ class TJsonStorageBase : public TViewerPipeClient { } } - virtual void ReplyAndPassAway() {} + void ReplyAndPassAway() override {} void HandleTimeout(TEvents::TEvWakeup::TPtr& ev) { switch (ev->Get()->Tag) { @@ -565,4 +554,3 @@ class TJsonStorageBase : public TViewerPipeClient { }; } -} diff --git a/ydb/core/viewer/json_storage_usage.h b/ydb/core/viewer/json_storage_usage.h deleted file mode 100644 index e759a72c3ecf..000000000000 --- a/ydb/core/viewer/json_storage_usage.h +++ /dev/null @@ -1,146 +0,0 @@ -#pragma once -#include "json_storage_base.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using namespace NNodeWhiteboard; - -using ::google::protobuf::FieldDescriptor; - -class TJsonStorageUsage : public TJsonStorageBase { - using TBase = TJsonStorageBase; - using TThis = TJsonStorageUsage; - ui32 Pace = 5; - -public: - TJsonStorageUsage(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : TBase(viewer, ev) - { - const auto& params(Event->Get()->Request.GetParams()); - Pace = FromStringWithDefault(params.Get("pace"), Pace); - if (Pace == 0) { - Send(Initiator, new NMon::TEvHttpInfoRes(Viewer->GetHTTPBADREQUEST(Event->Get(), {}, "Bad Request"), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - } - - void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { - TBase::Handle(ev, false); - } - - void ReplyAndPassAway() override { - if (CheckAdditionalNodesInfoNeeded()) { - return; - } - CollectDiskInfo(true); - ParseVDisksFromBaseConfig(); - - TMap buckets; - for (const auto& [poolName, poolInfo] : StoragePoolInfo) { - if ((!FilterTenant.empty() || !FilterStoragePools.empty()) && FilterStoragePools.count(poolName) == 0) { - continue; - } - for (TString groupId : poolInfo.Groups) { - auto ib = BSGroupIndex.find(groupId); - if (ib != BSGroupIndex.end()) { - int64 used = 0; - int64 limit = 0; - const auto& vDiskIds = ib->second.GetVDiskIds(); - for (auto iv = vDiskIds.begin(); iv != vDiskIds.end(); ++iv) { - const NKikimrBlobStorage::TVDiskID& vDiskId = *iv; - auto ie = VDisksIndex.find(vDiskId); - if (ie != VDisksIndex.end()) { - used += ie->second.GetAllocatedSize(); - limit += ie->second.GetAllocatedSize() + ie->second.GetAvailableSize(); - } - } - int bucketNumber = limit == 0 ? 100 : used * 100 / limit / Pace; - if (!buckets.contains(bucketNumber)) { - buckets[bucketNumber] = 0; - } - buckets[bucketNumber]++; - } - } - } - NKikimrViewer::TStorageUsageStats StorageStats; - StorageStats.SetPace(Pace); - for (ui32 i = 0; i * Pace < 100; i++) { - StorageStats.AddBuckets(buckets[i]); - } - - TStringStream json; - TProtoToJson::ProtoToJson(json, StorageStats, JsonSettings); - Send(Initiator, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), std::move(json.Str())), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: enums - in: query - description: convert enums to strings - type: boolean - required: false - - name: ui64 - in: query - description: return ui64 as number - type: boolean - required: false - - name: tenant - in: query - description: tenant name - type: string - required: false - - name: pool - in: query - description: storage pool name - type: string - required: false - - name: node_id - in: query - description: node id - type: integer - required: false - - name: pace - in: query - description: bucket size as a percentage - type: integer - required: false - default: 5 - - name: timeout - in: query - description: timeout in ms - type: integer - required: false - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Storage groups statistics"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns the distribution of groups by usage"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_sysinfo.h b/ydb/core/viewer/json_sysinfo.h deleted file mode 100644 index 922f9566b917..000000000000 --- a/ydb/core/viewer/json_sysinfo.h +++ /dev/null @@ -1,73 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include "json_wb_req.h" - -namespace NKikimr { -namespace NViewer { - -template <> -class TWhiteboardMerger { -public: - static THolder MergeResponses(TMap>& responses, const TString&) { - THolder result = MakeHolder(); - ui64 minResponseTime = 0; - auto* field = result->Record.MutableSystemStateInfo(); - field->Reserve(responses.size()); - for (auto it = responses.begin(); it != responses.end(); ++it) { - if (it->second != nullptr && it->second->Record.SystemStateInfoSize() > 0) { - auto* element = field->Add(); - element->Swap(it->second->Record.MutableSystemStateInfo(0)); - element->SetNodeId(it->first); - if (minResponseTime == 0 || it->second->Record.GetResponseTime() < minResponseTime) { - minResponseTime = it->second->Record.GetResponseTime(); - } - } - } - result->Record.SetResponseTime(minResponseTime); - return result; - } -}; - -template <> -struct TWhiteboardInfo { - using TResponseType = NKikimrWhiteboard::TEvSystemStateResponse; - using TResponseEventType = TEvWhiteboard::TEvSystemStateResponse; - using TElementType = NKikimrWhiteboard::TSystemStateInfo; - - static constexpr bool StaticNodesOnly = false; - - static ::google::protobuf::RepeatedPtrField& GetElementsField(TResponseType& response) { - return *response.MutableSystemStateInfo(); - } - - static TString GetDefaultMergeField() { - return "NodeId"; - } - - static void MergeResponses(TResponseType& result, TMap& responses, const TString& fields = GetDefaultMergeField()) { - TWhiteboardMerger::MergeResponses(result, responses, fields); - } -}; - -using TJsonSysInfo = TJsonWhiteboardRequest; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "System information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns system information"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_tabletcounters.h b/ydb/core/viewer/json_tabletcounters.h deleted file mode 100644 index 023552fdea3f..000000000000 --- a/ydb/core/viewer/json_tabletcounters.h +++ /dev/null @@ -1,230 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "wb_aggregate.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonTabletCounters : public TActorBootstrapped { - static const bool WithRetry = false; - using TBase = TActorBootstrapped; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TVector PipeClients; - TVector Tablets; - TMap> Results; - THolder DescribeResult; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - bool Aggregate = false; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonTabletCounters(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - static NTabletPipe::TClientConfig InitPipeClientConfig() { - NTabletPipe::TClientConfig clientConfig; - if (WithRetry) { - clientConfig.RetryPolicy = NTabletPipe::TClientRetryPolicy::WithRetries(); - } - return clientConfig; - } - - static const NTabletPipe::TClientConfig& GetPipeClientConfig() { - static NTabletPipe::TClientConfig clientConfig = InitPipeClientConfig(); - return clientConfig; - } - - void Bootstrap(const TActorContext& ctx) { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), false); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - Aggregate = FromStringWithDefault(params.Get("aggregate"), true); - if (params.Has("path")) { - THolder request(new TEvTxUserProxy::TEvNavigate()); - if (!Event->Get()->UserToken.empty()) { - request->Record.SetUserToken(Event->Get()->UserToken); - } - NKikimrSchemeOp::TDescribePath* record = request->Record.MutableDescribePath(); - record->SetPath(params.Get("path")); - - TActorId txproxy = MakeTxProxyID(); - ctx.Send(txproxy, request.Release()); - Become(&TThis::StateRequestedDescribe, ctx, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } else if (params.Has("tablet_id")) { - TTabletId tabletId = FromStringWithDefault(params.Get("tablet_id"), 0); - if (tabletId != 0) { - Tablets.emplace_back(tabletId); - TActorId PipeClient = ctx.RegisterWithSameMailbox(NTabletPipe::CreateClient(ctx.SelfID, tabletId, GetPipeClientConfig())); - NTabletPipe::SendData(ctx, PipeClient, new TEvTablet::TEvGetCounters(), tabletId); - PipeClients.emplace_back(PipeClient); - Become(&TThis::StateRequestedGetCounters, ctx, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - if (PipeClients.empty()) { - ReplyAndDie(ctx); - } - } - } - - void Die(const TActorContext& ctx) override { - for (const TActorId& pipeClient : PipeClients) { - NTabletPipe::CloseClient(ctx, pipeClient); - } - TBase::Die(ctx); - } - - STFUNC(StateRequestedDescribe) { - switch (ev->GetTypeRewrite()) { - HFunc(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult, Handle); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - STFUNC(StateRequestedGetCounters) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvTablet::TEvGetCountersResponse, Handle); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr &ev, const TActorContext &ctx) { - DescribeResult = ev->Release(); - if (DescribeResult->GetRecord().GetStatus() == NKikimrScheme::EStatus::StatusSuccess) { - Tablets.reserve(DescribeResult->GetRecord().GetPathDescription().TablePartitionsSize()); - for (const auto& partition : DescribeResult->GetRecord().GetPathDescription().GetTablePartitions()) { - Tablets.emplace_back(partition.GetDatashardId()); - } - Tablets.reserve(DescribeResult->GetRecord().GetPathDescription().GetPersQueueGroup().PartitionsSize()); - for (const auto& partition : DescribeResult->GetRecord().GetPathDescription().GetPersQueueGroup().GetPartitions()) { - Tablets.emplace_back(partition.GetTabletId()); - } - Sort(Tablets); - Tablets.erase(std::unique(Tablets.begin(), Tablets.end()), Tablets.end()); - } - for (auto tabletId : Tablets) { - TActorId PipeClient = ctx.RegisterWithSameMailbox(NTabletPipe::CreateClient(ctx.SelfID, tabletId, GetPipeClientConfig())); - NTabletPipe::SendData(ctx, PipeClient, new TEvTablet::TEvGetCounters(), tabletId); - PipeClients.emplace_back(PipeClient); - } - if (Tablets.empty()) { - ReplyAndDie(ctx); - } - Become(&TThis::StateRequestedGetCounters); - } - - void Handle(TEvTablet::TEvGetCountersResponse::TPtr &ev, const TActorContext &ctx) { - Results.emplace(ev->Cookie, ev->Release()); - if (Results.size() == Tablets.size()) { - ReplyAndDie(ctx); - } - } - - void ReplyAndDie(const TActorContext &ctx) { - TStringStream json; - if (!Results.empty()) { - if (Aggregate) { - THolder response = AggregateWhiteboardResponses(Results); - TProtoToJson::ProtoToJson(json, response->Record, JsonSettings); - } else { - json << '{'; - for (auto it = Results.begin(); it != Results.end(); ++it) { - if (it != Results.begin()) { - json << ','; - } - json << '"' << it->first << "\":"; - TProtoToJson::ProtoToJson(json, it->second->Record, JsonSettings); - } - json << '}'; - } - } else { - json << "null"; - } - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } - - void HandleTimeout(const TActorContext &ctx) { - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: path - in: query - description: schema path - required: false - type: string - - name: tablet_id - in: query - description: tablet identifier - required: false - type: integer - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: aggregate - in: query - description: aggregate tablet counters - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Tablet counters information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns information about tablet counters"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_tabletinfo.h b/ydb/core/viewer/json_tabletinfo.h deleted file mode 100644 index aac8fabe01a5..000000000000 --- a/ydb/core/viewer/json_tabletinfo.h +++ /dev/null @@ -1,472 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "json_pipe_req.h" -#include "json_wb_req.h" -#include - -namespace NKikimr { -namespace NViewer { - -template<> -struct TWhiteboardInfo { - using TResponseEventType = TEvWhiteboard::TEvTabletStateResponse; - using TResponseType = NKikimrWhiteboard::TEvTabletStateResponse; - using TElementType = NKikimrWhiteboard::TTabletStateInfo; - using TElementTypePacked5 = NNodeWhiteboard::TEvWhiteboard::TEvTabletStateResponsePacked5; - using TElementKeyType = std::pair; - - static constexpr bool StaticNodesOnly = false; - - static ::google::protobuf::RepeatedPtrField& GetElementsField(TResponseType& response) { - return *response.MutableTabletStateInfo(); - } - - static std::span GetElementsFieldPacked5(const TResponseType& response) { - const auto& packed5 = response.GetPacked5(); - return std::span{reinterpret_cast(packed5.data()), packed5.size() / sizeof(TElementTypePacked5)}; - } - - static size_t GetElementsCount(const TResponseType& response) { - return response.GetTabletStateInfo().size() + response.GetPacked5().size() / sizeof(TElementTypePacked5); - } - - static TElementKeyType GetElementKey(const TElementType& type) { - return TElementKeyType(type.GetTabletId(), type.GetFollowerId()); - } - - static TElementKeyType GetElementKey(const TElementTypePacked5& type) { - return TElementKeyType(type.TabletId, type.FollowerId); - } - - static TString GetDefaultMergeField() { - return "TabletId,FollowerId"; - } - - static void MergeResponses(TResponseType& result, TMap& responses, const TString& fields = GetDefaultMergeField()) { - if (fields == GetDefaultMergeField()) { - TStaticMergeKey mergeKey; - TWhiteboardMerger::MergeResponsesBaseHybrid(result, responses, mergeKey); - } else { - TWhiteboardMerger::TDynamicMergeKey mergeKey(fields); - TWhiteboardMerger::MergeResponsesBase(result, responses, mergeKey); - } - } -}; - -template <> -struct TWhiteboardMergerComparator { - bool operator ()(const NKikimrWhiteboard::TTabletStateInfo& a, const NKikimrWhiteboard::TTabletStateInfo& b) const { - return std::make_tuple(a.GetGeneration(), a.GetChangeTime()) < std::make_tuple(b.GetGeneration(), b.GetChangeTime()); - } -}; - -template <> -struct TWhiteboardMergerComparator { - bool operator ()(const NNodeWhiteboard::TEvWhiteboard::TEvTabletStateResponsePacked5& a, const NNodeWhiteboard::TEvWhiteboard::TEvTabletStateResponsePacked5& b) const { - return a.Generation < b.Generation; - } -}; - -class TJsonTabletInfo : public TJsonWhiteboardRequest { - static const bool WithRetry = false; - bool ReplyWithDeadTabletsInfo; - using TBase = TJsonWhiteboardRequest; - using TThis = TJsonTabletInfo; - THashMap Tablets; - std::unordered_map EndOfRangeKeyPrefix; - TTabletId HiveId; - bool IsBase64Encode = true; -public: - TJsonTabletInfo(IViewer *viewer, NMon::TEvHttpInfo::TPtr &ev) - : TJsonWhiteboardRequest(viewer, ev) - { - static TString prefix = "json/tabletinfo "; - LogPrefix = prefix; - } - - void Bootstrap() override { - BLOG_TRACE("Bootstrap()"); - const auto& params(Event->Get()->Request.GetParams()); - ReplyWithDeadTabletsInfo = params.Has("path"); - if (params.Has("path")) { - TBase::RequestSettings.Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - IsBase64Encode = FromStringWithDefault(params.Get("base64"), IsBase64Encode); - THolder request(new TEvTxUserProxy::TEvNavigate()); - if (!Event->Get()->UserToken.empty()) { - request->Record.SetUserToken(Event->Get()->UserToken); - } - NKikimrSchemeOp::TDescribePath* record = request->Record.MutableDescribePath(); - record->SetPath(params.Get("path")); - - TActorId txproxy = MakeTxProxyID(); - TBase::Send(txproxy, request.Release()); - UnsafeBecome(&TThis::StateRequestedDescribe, TDuration::MilliSeconds(TBase::RequestSettings.Timeout), new TEvents::TEvWakeup()); - } else { - TBase::Bootstrap(); - if (!TBase::RequestSettings.FilterFields.empty()) { - if (IsMatchesWildcard(TBase::RequestSettings.FilterFields, "(TabletId=*)")) { - TString strTabletId(TBase::RequestSettings.FilterFields.substr(10, TBase::RequestSettings.FilterFields.size() - 11)); - TTabletId uiTabletId(FromStringWithDefault(strTabletId, {})); - if (uiTabletId) { - Tablets[uiTabletId] = NKikimrTabletBase::TTabletTypes::Unknown; - Request->Record.AddFilterTabletId(uiTabletId); - } - } - } - } - } - - TString GetColumnValue(const TCell& cell, const NKikimrSchemeOp::TColumnDescription& type) { - if (cell.IsNull()) { - return "NULL"; - } - switch (type.GetTypeId()) { - case NScheme::NTypeIds::Int32: - return ToString(cell.AsValue()); - case NScheme::NTypeIds::Uint32: - return ToString(cell.AsValue()); - case NScheme::NTypeIds::Int64: - return ToString(cell.AsValue()); - case NScheme::NTypeIds::Uint64: - return ToString(cell.AsValue()); - case NScheme::NTypeIds::Int8: - return ToString(cell.AsValue()); - case NScheme::NTypeIds::Uint8: - return ToString(cell.AsValue()); - case NScheme::NTypeIds::Int16: - return ToString(cell.AsValue()); - case NScheme::NTypeIds::Uint16: - return ToString(cell.AsValue()); - case NScheme::NTypeIds::Bool: - return cell.AsValue() ? "true" : "false"; - case NScheme::NTypeIds::Date: return "Date"; - case NScheme::NTypeIds::Datetime: return "Datetime"; - case NScheme::NTypeIds::Timestamp: return "Timestamp"; - case NScheme::NTypeIds::Interval: return "Interval"; - case NScheme::NTypeIds::Date32: return "Date32"; - case NScheme::NTypeIds::Datetime64: return "Datetime64"; - case NScheme::NTypeIds::Timestamp64: return "Timestamp64"; - case NScheme::NTypeIds::Interval64: return "Interval64"; - case NScheme::NTypeIds::PairUi64Ui64: return "PairUi64Ui64"; - case NScheme::NTypeIds::String: - case NScheme::NTypeIds::String4k: - case NScheme::NTypeIds::String2m: - return IsBase64Encode ? Base64Encode(cell.AsBuf()) : (TStringBuilder() << '"' << cell.AsBuf() << '"'); - case NScheme::NTypeIds::Utf8: - return TStringBuilder() << '"' << cell.AsBuf() << '"'; - case NScheme::NTypeIds::Decimal: return "Decimal"; - case NScheme::NTypeIds::DyNumber: return "DyNumber"; - case NScheme::NTypeIds::Uuid: return "Uuid"; - default: - return "-"; - } - } - - void Handle(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr &ev) { - THolder describeResult = ev->Release(); - if (describeResult->GetRecord().GetStatus() == NKikimrScheme::EStatus::StatusSuccess) { - const auto& pathDescription = describeResult->GetRecord().GetPathDescription(); - for (auto shard : pathDescription.GetColumnTableDescription().GetSharding().GetColumnShards()) { - Tablets[shard] = NKikimrTabletBase::TTabletTypes::ColumnShard; - } - for (auto shard : pathDescription.GetColumnStoreDescription().GetColumnShards()) { - Tablets[shard] = NKikimrTabletBase::TTabletTypes::ColumnShard; - } - if (pathDescription.HasTable()) { - std::vector keyColumns; - for (uint32 id : pathDescription.GetTable().GetKeyColumnIds()) { - for (const auto& column : pathDescription.GetTable().GetColumns()) { - if (column.GetId() == id) { - keyColumns.push_back(column); - break; - } - } - } - for (const auto& partition : pathDescription.GetTablePartitions()) { - Tablets[partition.GetDatashardId()] = NKikimrTabletBase::TTabletTypes::DataShard; - if (partition.HasEndOfRangeKeyPrefix()) { - TSerializedCellVec cellVec; - if (TSerializedCellVec::TryParse(partition.GetEndOfRangeKeyPrefix(), cellVec)) { - TStringBuilder key; - TConstArrayRef cells(cellVec.GetCells()); - if (cells.size() == keyColumns.size()) { - if (cells.size() > 1) { - key << "("; - } - for (size_t idx = 0; idx < cells.size(); ++idx) { - if (idx > 0) { - key << ","; - } - const NKikimrSchemeOp::TColumnDescription& type(keyColumns[idx]); - const TCell& cell(cells[idx]); - key << GetColumnValue(cell, type); - } - if (cells.size() > 1) { - key << ")"; - } - } - if (key) { - EndOfRangeKeyPrefix[partition.GetDatashardId()] = key; - } - } - } - } - } - for (const auto& partition : pathDescription.GetPersQueueGroup().GetPartitions()) { - Tablets[partition.GetTabletId()] = NKikimrTabletBase::TTabletTypes::PersQueue; - } - if (pathDescription.HasRtmrVolumeDescription()) { - for (const auto& partition : pathDescription.GetRtmrVolumeDescription().GetPartitions()) { - Tablets[partition.GetTabletId()] = NKikimrTabletBase::TTabletTypes::RTMRPartition; - } - } - if (pathDescription.HasBlockStoreVolumeDescription()) { - for (const auto& partition : pathDescription.GetBlockStoreVolumeDescription().GetPartitions()) { - Tablets[partition.GetTabletId()] = NKikimrTabletBase::TTabletTypes::BlockStorePartition; - } - if (pathDescription.GetBlockStoreVolumeDescription().HasVolumeTabletId()) { - Tablets[pathDescription.GetBlockStoreVolumeDescription().GetVolumeTabletId()] = NKikimrTabletBase::TTabletTypes::BlockStoreVolume; - } - } - if (pathDescription.GetKesus().HasKesusTabletId()) { - Tablets[pathDescription.GetKesus().GetKesusTabletId()] = NKikimrTabletBase::TTabletTypes::Kesus; - } - if (pathDescription.HasSolomonDescription()) { - for (const auto& partition : pathDescription.GetSolomonDescription().GetPartitions()) { - Tablets[partition.GetTabletId()] = NKikimrTabletBase::TTabletTypes::KeyValue; - } - } - if (pathDescription.GetFileStoreDescription().HasIndexTabletId()) { - Tablets[pathDescription.GetFileStoreDescription().GetIndexTabletId()] = NKikimrTabletBase::TTabletTypes::FileStore; - } - if (pathDescription.GetSequenceDescription().HasSequenceShard()) { - Tablets[pathDescription.GetSequenceDescription().GetSequenceShard()] = NKikimrTabletBase::TTabletTypes::SequenceShard; - } - if (pathDescription.GetReplicationDescription().HasControllerId()) { - Tablets[pathDescription.GetReplicationDescription().GetControllerId()] = NKikimrTabletBase::TTabletTypes::ReplicationController; - } - if (pathDescription.GetBlobDepotDescription().HasTabletId()) { - Tablets[pathDescription.GetBlobDepotDescription().GetTabletId()] = NKikimrTabletBase::TTabletTypes::BlobDepot; - } - - if (pathDescription.GetSelf().GetPathType() == NKikimrSchemeOp::EPathType::EPathTypeDir - || pathDescription.GetSelf().GetPathType() == NKikimrSchemeOp::EPathType::EPathTypeSubDomain - || pathDescription.GetSelf().GetPathType() == NKikimrSchemeOp::EPathType::EPathTypeExtSubDomain) { - if (pathDescription.HasDomainDescription()) { - const auto& domainDescription(pathDescription.GetDomainDescription()); - for (TTabletId tabletId : domainDescription.GetProcessingParams().GetCoordinators()) { - Tablets[tabletId] = NKikimrTabletBase::TTabletTypes::Coordinator; - } - for (TTabletId tabletId : domainDescription.GetProcessingParams().GetMediators()) { - Tablets[tabletId] = NKikimrTabletBase::TTabletTypes::Mediator; - } - if (domainDescription.GetProcessingParams().HasSchemeShard()) { - Tablets[domainDescription.GetProcessingParams().GetSchemeShard()] = NKikimrTabletBase::TTabletTypes::SchemeShard; - } - if (domainDescription.GetProcessingParams().HasHive()) { - Tablets[pathDescription.GetDomainDescription().GetProcessingParams().GetHive()] = NKikimrTabletBase::TTabletTypes::Hive; - HiveId = domainDescription.GetProcessingParams().GetHive(); - } - if (domainDescription.GetProcessingParams().HasGraphShard()) { - Tablets[pathDescription.GetDomainDescription().GetProcessingParams().GetGraphShard()] = NKikimrTabletBase::TTabletTypes::GraphShard; - } - if (domainDescription.GetProcessingParams().HasSysViewProcessor()) { - Tablets[pathDescription.GetDomainDescription().GetProcessingParams().GetSysViewProcessor()] = NKikimrTabletBase::TTabletTypes::SysViewProcessor; - } - if (domainDescription.GetProcessingParams().HasStatisticsAggregator()) { - Tablets[pathDescription.GetDomainDescription().GetProcessingParams().GetStatisticsAggregator()] = NKikimrTabletBase::TTabletTypes::StatisticsAggregator; - } - if (domainDescription.GetProcessingParams().HasBackupController()) { - Tablets[pathDescription.GetDomainDescription().GetProcessingParams().GetBackupController()] = NKikimrTabletBase::TTabletTypes::BackupController; - } - TIntrusivePtr domains = AppData()->DomainsInfo; - auto* domain = domains->GetDomain(); - if (describeResult->GetRecord().GetPathOwnerId() == domain->SchemeRoot && describeResult->GetRecord().GetPathId() == 1) { - Tablets[domain->SchemeRoot] = NKikimrTabletBase::TTabletTypes::SchemeShard; - Tablets[domains->GetHive()] = NKikimrTabletBase::TTabletTypes::Hive; - HiveId = domains->GetHive(); - Tablets[MakeBSControllerID()] = NKikimrTabletBase::TTabletTypes::BSController; - Tablets[MakeConsoleID()] = NKikimrTabletBase::TTabletTypes::Console; - Tablets[MakeNodeBrokerID()] = NKikimrTabletBase::TTabletTypes::NodeBroker; - Tablets[MakeTenantSlotBrokerID()] = NKikimrTabletBase::TTabletTypes::TenantSlotBroker; - Tablets[MakeCmsID()] = NKikimrTabletBase::TTabletTypes::Cms; - for (TTabletId tabletId : domain->Coordinators) { - Tablets[tabletId] = NKikimrTabletBase::TTabletTypes::Coordinator; - } - for (TTabletId tabletId : domain->Mediators) { - Tablets[tabletId] = NKikimrTabletBase::TTabletTypes::Mediator; - } - for (TTabletId tabletId : domain->TxAllocators) { - Tablets[tabletId] = NKikimrTabletBase::TTabletTypes::TxAllocator; - } - } - } - } - } - if (Tablets.empty()) { - ReplyAndPassAway(); - } else { - TBase::Bootstrap(); - for (auto tablet : Tablets) { - Request->Record.AddFilterTabletId(tablet.first); - } - } - } - - virtual void FilterResponse(NKikimrWhiteboard::TEvTabletStateResponse& response) override { - if (!Tablets.empty()) { - NKikimrWhiteboard::TEvTabletStateResponse result; - for (const NKikimrWhiteboard::TTabletStateInfo& info : response.GetTabletStateInfo()) { - auto tablet = Tablets.find(info.GetTabletId()); - if (tablet != Tablets.end()) { - auto tabletInfo = result.MutableTabletStateInfo()->Add(); - tabletInfo->CopyFrom(info); - auto itKey = EndOfRangeKeyPrefix.find(info.GetTabletId()); - if (itKey != EndOfRangeKeyPrefix.end()) { - tabletInfo->SetEndOfRangeKeyPrefix(itKey->second); - } - Tablets.erase(tablet->first); - } - } - if (ReplyWithDeadTabletsInfo) { - for (auto tablet : Tablets) { - auto deadTablet = result.MutableTabletStateInfo()->Add(); - deadTablet->SetTabletId(tablet.first); - deadTablet->SetState(NKikimrWhiteboard::TTabletStateInfo::Dead); - deadTablet->SetType(tablet.second); - deadTablet->SetHiveId(HiveId); - } - } - result.SetResponseTime(response.GetResponseTime()); - response = std::move(result); - } - for (NKikimrWhiteboard::TTabletStateInfo& info : *response.MutableTabletStateInfo()) { - info.SetOverall(GetWhiteboardFlag(GetFlagFromTabletState(info.GetState()))); - } - TBase::FilterResponse(response); - } - - STATEFN(StateRequestedDescribe) { - switch (ev->GetTypeRewrite()) { - hFunc(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult, Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void PassAway() override { - TBase::PassAway(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: node_id - in: query - description: node identifier - required: false - type: integer - - name: path - in: query - description: schema path - required: false - type: string - - name: merge - in: query - description: merge information from nodes - required: false - type: boolean - - name: group - in: query - description: group information by field - required: false - type: string - - name: all - in: query - description: return all possible key combinations (for enums only) - required: false - type: boolean - - name: filter - in: query - description: filter information by field - required: false - type: string - - name: alive - in: query - description: request from alive (connected) nodes only - required: false - type: boolean - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - - name: retries - in: query - description: number of retries - required: false - type: integer - - name: retry_period - in: query - description: retry period in ms - required: false - type: integer - default: 500 - - name: static - in: query - description: request from static nodes only - required: false - type: boolean - - name: since - in: query - description: filter by update time - required: false - type: string - )___"); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Tablet information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns information about tablets"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_tenantinfo.h b/ydb/core/viewer/json_tenantinfo.h deleted file mode 100644 index 11d785086384..000000000000 --- a/ydb/core/viewer/json_tenantinfo.h +++ /dev/null @@ -1,937 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "json_pipe_req.h" -#include "wb_aggregate.h" -#include "wb_merge.h" -#include "log.h" -#include "viewer_request.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonTenantInfo : public TViewerPipeClient { - using TBase = TViewerPipeClient; - IViewer* Viewer; - THashMap TenantByPath; - THashMap TenantBySubDomainKey; - THashMap HcOverallByTenantPath; - THashMap> NavigateResult; - THashMap> HiveDomainStats; - THashMap> HiveStorageStats; - NMon::TEvHttpInfo::TPtr Event; - THashSet Subscribers; - THashSet WhiteboardNodesRequested; - THashSet OffloadTenantsRequested; - THashSet MetadataCacheRequested; - THashMap NodeIdsToTenant; // for tablet info - TMap WhiteboardSystemStateResponse; - THashMap> WhiteboardTabletStateResponse; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - TString User; - TString Path; - TString DomainPath; - bool Tablets = false; - bool SystemTablets = false; - bool Storage = false; - bool Nodes = false; - bool Users = false; - bool OffloadMerge = false; - THashMap> TenantNodes; - THashMap OffloadMergedTabletStateResponse; - THashMap OffloadMergedSystemStateResponse; - TTabletId RootHiveId = 0; - TString RootId; // id of root domain (tenant) - NKikimrViewer::TTenantInfo Result; - - struct TStorageQuota { - uint64 SoftQuota = 0; - uint64 HardQuota = 0; - }; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonTenantInfo(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) - {} - - TString GetLogPrefix() { - static TString prefix = "json/tenantinfo "; - return prefix; - } - - TString GetDomainId(TPathId pathId) { - return TStringBuilder() << pathId.OwnerId << '-' << pathId.LocalPathId; - } - - bool IsFilterByPath() { - return !Path.empty() && DomainPath != Path; - } - - bool IsValidTenant(const TString& path) { - return !IsFilterByPath() || Path == path; - } - - bool IsFilterByOwner() { - return !User.empty(); - } - - bool IsValidOwner(const std::unordered_set& users) { - return !IsFilterByOwner() || users.count(User) != 0; - } - - void Bootstrap() { - BLOG_TRACE("Bootstrap()"); - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Followers = false; - Metrics = true; - InitConfig(params); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - Tablets = FromStringWithDefault(params.Get("tablets"), Tablets); - SystemTablets = FromStringWithDefault(params.Get("system_tablets"), Tablets); // Tablets here is by design - Storage = FromStringWithDefault(params.Get("storage"), Storage); - Nodes = FromStringWithDefault(params.Get("nodes"), Nodes); - Users = FromStringWithDefault(params.Get("users"), Users); - User = params.Get("user"); - Path = params.Get("path"); - OffloadMerge = FromStringWithDefault(params.Get("offload_merge"), OffloadMerge); - - TIntrusivePtr domains = AppData()->DomainsInfo; - auto *domain = domains->GetDomain(); - - RequestConsoleListTenants(); - - DomainPath = "/" + domain->Name; - if (!IsFilterByPath()) { - TPathId subDomainKey(domain->SchemeRoot, 1); - NKikimrViewer::TTenant& tenant = TenantBySubDomainKey[subDomainKey]; - tenant.SetId(GetDomainId(subDomainKey)); - tenant.SetState(Ydb::Cms::GetDatabaseStatusResult::RUNNING); - tenant.SetType(NKikimrViewer::Domain); - RequestSchemeCacheNavigate(DomainPath); - } - RootId = GetDomainId({domain->SchemeRoot, 1}); - RootHiveId = domains->GetHive(); - RequestHiveDomainStats(RootHiveId); - if (Storage) { - RequestHiveStorageStats(RootHiveId); - } - - if (Requests == 0) { - ReplyAndPassAway(); - } - - Become(&TThis::StateRequested, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void PassAway() override { - for (const TNodeId nodeId : Subscribers) { - if (nodeId != SelfId().NodeId()) { - Send(TActivationContext::InterconnectProxy(nodeId), new TEvents::TEvUnsubscribe()); - } - } - TBase::PassAway(); - BLOG_TRACE("PassAway()"); - } - - STATEFN(StateRequested) { - switch (ev->GetTypeRewrite()) { - hFunc(NConsole::TEvConsole::TEvListTenantsResponse, Handle); - hFunc(NConsole::TEvConsole::TEvGetTenantStatusResponse, Handle); - hFunc(TEvHive::TEvResponseHiveDomainStats, Handle); - hFunc(TEvHive::TEvResponseHiveStorageStats, Handle); - hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); - hFunc(TEvWhiteboard::TEvSystemStateResponse, Handle); - hFunc(TEvWhiteboard::TEvTabletStateResponse, Handle); - hFunc(TEvViewer::TEvViewerResponse, Handle); - hFunc(TEvents::TEvUndelivered, Undelivered); - hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); - hFunc(TEvTabletPipe::TEvClientConnected, TBase::Handle); - hFunc(TEvStateStorage::TEvBoardInfo, Handle); - hFunc(NHealthCheck::TEvSelfCheckResultProto, Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(NConsole::TEvConsole::TEvListTenantsResponse::TPtr& ev) { - BLOG_TRACE("Received ListTenantsResponse"); - Ydb::Cms::ListDatabasesResult listTenantsResult; - ev->Get()->Record.GetResponse().operation().result().UnpackTo(&listTenantsResult); - for (const TString& path : listTenantsResult.paths()) { - if (!IsValidTenant(path)) { - continue; - } - RequestConsoleGetTenantStatus(path); - RequestSchemeCacheNavigate(path); - - if (AppData()->FeatureFlags.GetEnableDbMetadataCache()) { - RequestStateStorageMetadataCacheEndpointsLookup(path); - } - } - RequestDone(); - } - - void Handle(NConsole::TEvConsole::TEvGetTenantStatusResponse::TPtr& ev) { - BLOG_TRACE("Received GetTenantStatusResponse"); - Ydb::Cms::GetDatabaseStatusResult getTenantStatusResult; - ev->Get()->Record.GetResponse().operation().result().UnpackTo(&getTenantStatusResult); - TString path = getTenantStatusResult.path(); - NKikimrViewer::TTenant& tenant = TenantByPath[path]; - tenant.SetName(path); - tenant.SetState(getTenantStatusResult.state()); - if (getTenantStatusResult.has_required_shared_resources()) { - tenant.SetType(NKikimrViewer::Shared); - RequestSchemeCacheNavigate(path); - } - for (const Ydb::Cms::StorageUnits& unit : getTenantStatusResult.allocated_resources().storage_units()) { - NKikimrViewer::TTenantResource& resource = *tenant.MutableResources()->AddAllocated(); - resource.SetType("storage"); - resource.SetKind(unit.unit_kind()); - resource.SetCount(unit.count()); - } - for (const Ydb::Cms::StorageUnits& unit : getTenantStatusResult.required_resources().storage_units()) { - NKikimrViewer::TTenantResource& resource = *tenant.MutableResources()->AddRequired(); - resource.SetType("storage"); - resource.SetKind(unit.unit_kind()); - resource.SetCount(unit.count()); - } - for (const Ydb::Cms::ComputationalUnits& unit : getTenantStatusResult.allocated_resources().computational_units()) { - NKikimrViewer::TTenantResource& resource = *tenant.MutableResources()->AddAllocated(); - resource.SetType("compute"); - resource.SetZone(unit.availability_zone()); - resource.SetKind(unit.unit_kind()); - resource.SetCount(unit.count()); - } - for (const Ydb::Cms::ComputationalUnits& unit : getTenantStatusResult.required_resources().computational_units()) { - NKikimrViewer::TTenantResource& resource = *tenant.MutableResources()->AddRequired(); - resource.SetType("compute"); - resource.SetZone(unit.availability_zone()); - resource.SetKind(unit.unit_kind()); - resource.SetCount(unit.count()); - } - Ydb::Cms::DatabaseQuotas& quotas = *tenant.MutableDatabaseQuotas(); - quotas.MergeFrom(getTenantStatusResult.database_quotas()); - - RequestDone(); - } - - void SendWhiteboardSystemStateRequest(const TNodeId nodeId) { - Subscribers.insert(nodeId); - TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(nodeId); - THolder request = MakeHolder(); - BLOG_TRACE("Tenant " << NodeIdsToTenant[nodeId] << " send to " << nodeId << " TEvSystemStateRequest: " << request->Record.ShortDebugString()); - SendRequest(whiteboardServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - } - - void SendWhiteboardTabletStateRequest(const TNodeId nodeId) { - Subscribers.insert(nodeId); - TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(nodeId); - THolder request = MakeHolder(); - request->Record.SetFormat("packed5"); - BLOG_TRACE("Tenant " << NodeIdsToTenant[nodeId] << " send to " << nodeId << " TEvTabletStateRequest: " << request->Record.ShortDebugString()); - SendRequest(whiteboardServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - } - - void SendWhiteboardRequests(const TNodeId nodeId) { - if (WhiteboardNodesRequested.insert(nodeId).second) { - SendWhiteboardSystemStateRequest(nodeId); - if (Tablets) { - SendWhiteboardTabletStateRequest(nodeId); - } - } - } - - void SendOffloadRequests(const TString& tenantId) { - std::vector& nodesIds = TenantNodes[tenantId]; - if (!nodesIds.empty() && OffloadTenantsRequested.insert(tenantId).second) { - ui64 hash = std::hash()(Event->Get()->Request.GetRemoteAddr()); - auto itPos = std::next(nodesIds.begin(), hash % nodesIds.size()); - std::nth_element(nodesIds.begin(), itPos, nodesIds.end()); - TNodeId nodeId = *itPos; - - Subscribers.insert(nodeId); - TActorId viewerServiceId = MakeViewerID(nodeId); - - THolder sysRequest = MakeHolder(); - sysRequest->Record.MutableSystemRequest(); - sysRequest->Record.SetTimeout(Timeout / 3); - for (auto nodeId : nodesIds) { - sysRequest->Record.MutableLocation()->AddNodeId(nodeId); - } - BLOG_TRACE("Tenant " << tenantId << " send to " << nodeId << " TEvViewerRequest: " << sysRequest->Record.ShortDebugString()); - ViewerWhiteboardCookie cookie (NKikimrViewer::TEvViewerRequest::kSystemRequest, nodeId); - SendRequest(viewerServiceId, sysRequest.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, cookie.ToUi64()); - - if (Tablets) { - THolder tblRequest = MakeHolder(); - tblRequest->Record.MutableTabletRequest()->SetFormat("packed5"); - tblRequest->Record.SetTimeout(Timeout / 3); - for (auto nodeId : nodesIds) { - tblRequest->Record.MutableLocation()->AddNodeId(nodeId); - } - BLOG_TRACE("Tenant " << tenantId << " send to " << nodeId << " TEvViewerRequest: " << tblRequest->Record.ShortDebugString()); - ViewerWhiteboardCookie cookie(NKikimrViewer::TEvViewerRequest::kTabletRequest, nodeId); - SendRequest(viewerServiceId, tblRequest.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, cookie.ToUi64()); - } - } - } - - void Handle(TEvHive::TEvResponseHiveDomainStats::TPtr& ev) { - for (const NKikimrHive::THiveDomainStats& hiveStat : ev->Get()->Record.GetDomainStats()) { - TPathId subDomainKey({hiveStat.GetShardId(), hiveStat.GetPathId()}); - NKikimrViewer::TTenant& tenant = TenantBySubDomainKey[subDomainKey]; - TString tenantId = GetDomainId({hiveStat.GetShardId(), hiveStat.GetPathId()}); - tenant.SetId(tenantId); - if (ev->Cookie != RootHiveId || tenant.GetId() == RootId) { - if (!tenant.HasMetrics()) { - tenant.MutableMetrics()->CopyFrom(hiveStat.GetMetrics()); - } - if (tenant.StateStatsSize() == 0) { - tenant.MutableStateStats()->CopyFrom(hiveStat.GetStateStats()); - } - if (tenant.NodeIdsSize() == 0) { - tenant.MutableNodeIds()->CopyFrom(hiveStat.GetNodeIds()); - } - if (tenant.GetAliveNodes() == 0) { - tenant.SetAliveNodes(hiveStat.GetAliveNodes()); - } - } - - BLOG_TRACE("Received HiveDomainStats for " << tenant.GetId() << " from " << ev->Cookie); - std::vector nodesIds; - nodesIds.reserve(hiveStat.NodeIdsSize()); - for (auto nodeId : hiveStat.GetNodeIds()) { - nodesIds.push_back(nodeId); - NodeIdsToTenant.insert({nodeId, tenantId}); - } - TenantNodes[tenantId] = nodesIds; - - if (OffloadMerge) { - SendOffloadRequests(tenantId); - } else { - for (TNodeId nodeId : hiveStat.GetNodeIds()) { - SendWhiteboardRequests(nodeId); - } - } - } - HiveDomainStats[ev->Cookie] = std::move(ev->Release()); - RequestDone(); - } - - void Handle(TEvHive::TEvResponseHiveStorageStats::TPtr& ev) { - BLOG_TRACE("Received HiveStorageStats from " << ev->Cookie); - HiveStorageStats[ev->Cookie] = std::move(ev->Release()); - RequestDone(); - } - - void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { - if (ev->Get()->Request->ResultSet.size() == 1 && ev->Get()->Request->ResultSet.begin()->Status == NSchemeCache::TSchemeCacheNavigate::EStatus::Ok) { - auto domainInfo = ev->Get()->Request->ResultSet.begin()->DomainInfo; - TTabletId hiveId = domainInfo->Params.GetHive(); - if (hiveId) { - RequestHiveDomainStats(hiveId); - if (Storage) { - RequestHiveStorageStats(hiveId); - } - } - NKikimrViewer::TTenant& tenant = TenantBySubDomainKey[domainInfo->DomainKey]; - if (domainInfo->ResourcesDomainKey != domainInfo->DomainKey) { - NKikimrViewer::TTenant& sharedTenant = TenantBySubDomainKey[domainInfo->ResourcesDomainKey]; - if (sharedTenant.GetType() != NKikimrViewer::Shared) { - sharedTenant.SetType(NKikimrViewer::Shared); - RequestSchemeCacheNavigate(domainInfo->ResourcesDomainKey); - } - tenant.SetType(NKikimrViewer::Serverless); - tenant.SetResourceId(GetDomainId(domainInfo->ResourcesDomainKey)); - } - TString id = GetDomainId(domainInfo->DomainKey); - TString path = CanonizePath(ev->Get()->Request->ResultSet.begin()->Path); - BLOG_TRACE("Received Navigate for " << id << " " << path); - tenant.SetId(id); - tenant.SetName(path); - if (tenant.GetType() == NKikimrViewer::UnknownTenantType) { - tenant.SetType(NKikimrViewer::Dedicated); - } - NavigateResult[id] = std::move(ev->Get()->Request); - } - RequestDone(); - } - - void Handle(NNodeWhiteboard::TEvWhiteboard::TEvSystemStateResponse::TPtr& ev) { - ui32 nodeId = ev.Get()->Cookie; - BLOG_TRACE("Received TEvSystemStateResponse from " << nodeId); - WhiteboardSystemStateResponse[nodeId] = std::move(ev->Get()->Record); - RequestDone(); - } - - void Handle(NNodeWhiteboard::TEvWhiteboard::TEvTabletStateResponse::TPtr& ev) { - ui32 nodeId = ev.Get()->Cookie; - BLOG_TRACE("Received TEvTabletStateResponse from " << nodeId << " with " - << TWhiteboardInfo::GetElementsCount(ev->Get()->Record) << " tablets"); - auto tenantId = NodeIdsToTenant[nodeId]; - WhiteboardTabletStateResponse[tenantId][nodeId] = std::move(ev->Get()->Record); - RequestDone(); - } - - void Handle(NHealthCheck::TEvSelfCheckResultProto::TPtr& ev) { - auto result = std::move(ev->Get()->Record); - if (result.database_status_size() == 1) { - HcOverallByTenantPath.emplace(result.database_status(0).name(), GetViewerFlag(result.database_status(0).overall())); - } - - RequestDone(); - } - - void Handle(TEvStateStorage::TEvBoardInfo::TPtr& ev) { - auto activeNode = TDatabaseMetadataCache::PickActiveNode(ev->Get()->InfoEntries); - if (activeNode != 0) { - Subscribers.insert(activeNode); - std::optional cache = MakeDatabaseMetadataCacheId(activeNode); - auto request = MakeHolder(); - if (MetadataCacheRequested.insert(ev->Get()->Path).second) { - SendRequest(*cache, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, activeNode); - } - } - RequestDone(); - } - - void Handle(TEvViewer::TEvViewerResponse::TPtr& ev) { - ui32 nodeId = ev.Get()->Cookie; - auto tenantId = NodeIdsToTenant[nodeId]; - switch (ev->Get()->Record.GetResponseCase()) { - case NKikimrViewer::TEvViewerResponse::kTabletResponse: - BLOG_TRACE("Received TEvViewerResponse from " << nodeId << " with " - << TWhiteboardInfo::GetElementsCount(ev->Get()->Record.GetTabletResponse()) - << " tablets"); - OffloadMergedTabletStateResponse[tenantId] = std::move(ev->Get()->Record); - RequestDone(); - break; - case NKikimrViewer::TEvViewerResponse::kSystemResponse: - BLOG_TRACE("Received TEvViewerResponse from " << nodeId); - OffloadMergedSystemStateResponse[tenantId] = std::move(ev->Get()->Record); - RequestDone(); - break; - default: - break; - } - } - - void Undelivered(TEvents::TEvUndelivered::TPtr &ev) { - if (ev->Get()->SourceType == NHealthCheck::EvSelfCheckRequestProto) { - ui32 nodeId = ev.Get()->Cookie; - BLOG_TRACE("Undelivered for node " << nodeId << " event " << ev->Get()->SourceType); - auto tenantId = NodeIdsToTenant[nodeId]; - if (HcOverallByTenantPath.emplace(tenantId, NKikimrViewer::EFlag::Grey).second) { - RequestDone(); - } - } - if (ev->Get()->SourceType == NNodeWhiteboard::TEvWhiteboard::EvSystemStateRequest) { - ui32 nodeId = ev.Get()->Cookie; - BLOG_TRACE("Undelivered for node " << nodeId << " event " << ev->Get()->SourceType); - if (WhiteboardSystemStateResponse.emplace(nodeId, NKikimrWhiteboard::TEvSystemStateResponse{}).second) { - RequestDone(); - } - } - if (ev->Get()->SourceType == NNodeWhiteboard::TEvWhiteboard::EvTabletStateRequest) { - ui32 nodeId = ev.Get()->Cookie; - BLOG_TRACE("Undelivered for node " << nodeId << " event " << ev->Get()->SourceType); - auto tenantId = NodeIdsToTenant[nodeId]; - if (WhiteboardTabletStateResponse[tenantId].emplace(nodeId, NKikimrWhiteboard::TEvTabletStateResponse{}).second) { - RequestDone(); - } - } - if (ev->Get()->SourceType == NViewer::TEvViewer::EvViewerRequest) { - ViewerWhiteboardCookie cookie(ev.Get()->Cookie); - auto nodeId = cookie.GetNodeId(); - auto tenantId = NodeIdsToTenant[nodeId]; - BLOG_TRACE("Undelivered for node " << cookie.GetNodeId() << " event " << ev->Get()->SourceType); - switch (cookie.GetRequestCase()) { - case NKikimrViewer::TEvViewerRequest::kTabletRequest: - if (OffloadMergedTabletStateResponse.emplace(tenantId, NKikimrViewer::TEvViewerResponse{}).second) { - // fallback - for (TNodeId nodeId : TenantNodes[tenantId]) { - SendWhiteboardTabletStateRequest(nodeId); - } - RequestDone(); - }; - - break; - case NKikimrViewer::TEvViewerRequest::kSystemRequest: - if (OffloadMergedSystemStateResponse.emplace(tenantId, NKikimrViewer::TEvViewerResponse{}).second) { - // fallback - for (TNodeId nodeId : TenantNodes[tenantId]) { - SendWhiteboardSystemStateRequest(nodeId); - } - RequestDone(); - } - break; - default: - break; - } - } - } - - void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &ev) { - TNodeId nodeId = ev->Get()->NodeId; - auto tenantId = NodeIdsToTenant[nodeId]; - BLOG_TRACE("NodeDisconnected for nodeId " << nodeId); - - if (OffloadTenantsRequested.count(tenantId) > 0) { - // fallback - if (OffloadMergedSystemStateResponse.emplace(tenantId, NKikimrViewer::TEvViewerResponse{}).second) { - for (TNodeId nodeId : TenantNodes[tenantId]) { - SendWhiteboardSystemStateRequest(nodeId); - } - RequestDone(); - } - if (Tablets && OffloadMergedSystemStateResponse.emplace(tenantId, NKikimrViewer::TEvViewerResponse{}).second) { - for (TNodeId nodeId : TenantNodes[tenantId]) { - SendWhiteboardSystemStateRequest(nodeId); - } - RequestDone(); - } - } - if (WhiteboardNodesRequested.count(nodeId) > 0) { - if (WhiteboardSystemStateResponse.emplace(nodeId, NKikimrWhiteboard::TEvSystemStateResponse{}).second) { - RequestDone(); - } - if (Tablets && WhiteboardTabletStateResponse[tenantId].emplace(nodeId, NKikimrWhiteboard::TEvTabletStateResponse{}).second) { - RequestDone(); - } - } - if (MetadataCacheRequested.count(tenantId) > 0) { - if (HcOverallByTenantPath.emplace(tenantId, NKikimrViewer::EFlag::Grey).second) { - RequestDone(); - } - } - } - - NKikimrViewer::TStorageUsage::EType GetStorageType(const TString& poolKind) { - auto kind = to_lower(poolKind); - if (kind.StartsWith("ssd") || kind.StartsWith("nvme")) { - return NKikimrViewer::TStorageUsage::SSD; - } - if (kind.StartsWith("hdd") || kind.StartsWith("rot")) { - return NKikimrViewer::TStorageUsage::HDD; - } - return NKikimrViewer::TStorageUsage::None; - } - - void ReplyAndPassAway() { - BLOG_TRACE("ReplyAndPassAway() started"); - TIntrusivePtr domains = AppData()->DomainsInfo; - auto *domain = domains->GetDomain(); - THashMap OverallByDomainId; - TMap NodeSystemStateInfo; - - for (auto& [tenantId, record] : OffloadMergedSystemStateResponse) { - for (auto& systemState : *(record.MutableSystemResponse()->MutableSystemStateInfo())) { - auto ni = systemState.GetNodeId(); - NodeSystemStateInfo[ni] = std::move(systemState); - } - } - for (auto& [nodeId, record] : WhiteboardSystemStateResponse) { - if (record.SystemStateInfoSize() == 1) { - NodeSystemStateInfo[nodeId] = std::move(record.GetSystemStateInfo(0)); - } - } - - for (const auto& [subDomainKey, tenantBySubDomainKey] : TenantBySubDomainKey) { - TString id(GetDomainId(subDomainKey)); - NKikimrWhiteboard::TEvTabletStateResponse tabletInfo; - THashMap tabletInfoIndex; - if (Tablets) { - if (WhiteboardTabletStateResponse[id].size() > 0) { - TWhiteboardInfo::MergeResponses(tabletInfo, WhiteboardTabletStateResponse[id]); - } else if (OffloadMerge) { - tabletInfo = std::move(*(OffloadMergedTabletStateResponse[id].MutableTabletResponse())); - } - if (SystemTablets) { - for (const auto& info : TWhiteboardInfo::GetElementsField(tabletInfo)) { - tabletInfoIndex[info.GetTabletId()] = &info; - } - } - } - - NKikimrViewer::EFlag overall = NKikimrViewer::EFlag::Grey; - auto itNavigate = NavigateResult.find(id); - if (itNavigate != NavigateResult.end()) { - NSchemeCache::TSchemeCacheNavigate::TEntry entry = itNavigate->second->ResultSet.front(); - TString path = CanonizePath(entry.Path); - if (!IsValidTenant(path)) { - continue; - } - std::unordered_set users; - if(!User.empty() || Users) { - if (entry.SecurityObject) { - users.emplace(entry.SecurityObject->GetOwnerSID()); - for (const NACLibProto::TACE& ace : entry.SecurityObject->GetACL().GetACE()) { - if (ace.GetAccessType() == (ui32)NACLib::EAccessType::Allow) { - users.emplace(ace.GetSID()); - } - } - } - if (!IsValidOwner(users)) { - continue; - } - } - NKikimrViewer::TTenant& tenant = *Result.AddTenantInfo(); - auto itTenantByPath = TenantByPath.find(path); - if (itTenantByPath != TenantByPath.end()) { - tenant = std::move(itTenantByPath->second); - TenantByPath.erase(itTenantByPath); - } - if (tenant.GetType() == NKikimrViewer::UnknownTenantType) { - tenant.MergeFrom(tenantBySubDomainKey); - } else { - auto oldType = tenant.GetType(); - tenant.MergeFrom(tenantBySubDomainKey); - tenant.SetType(oldType); - } - if (!tenant.GetId()) { - tenant.SetId(GetDomainId(subDomainKey)); - } - if (tenant.GetType() == NKikimrViewer::UnknownTenantType) { - tenant.SetType(NKikimrViewer::Dedicated); - } - tenant.SetCreateTime(TInstant::MicroSeconds(entry.CreateStep).MilliSeconds()); - if (entry.SecurityObject) { - tenant.SetOwner(entry.SecurityObject->GetOwnerSID()); - } - for (const TString& user : users) { - tenant.AddUsers(user); - } - for (const auto& userAttribute : entry.Attributes) { - tenant.MutableUserAttributes()->insert({userAttribute.first, userAttribute.second}); - } - - TStackVec tablets; - for (TTabletId tabletId : entry.DomainInfo->Params.GetCoordinators()) { - tablets.emplace_back(tabletId); - } - for (TTabletId tabletId : entry.DomainInfo->Params.GetMediators()) { - tablets.emplace_back(tabletId); - } - if (entry.DomainInfo->Params.HasSchemeShard()) { - tablets.emplace_back(entry.DomainInfo->Params.GetSchemeShard()); - } else { - tablets.emplace_back(domain->SchemeRoot); - tablets.emplace_back(MakeBSControllerID()); - tablets.emplace_back(MakeConsoleID()); - } - TTabletId hiveId = domains->GetHive(); - if (entry.DomainInfo->Params.HasHive()) { - hiveId = entry.DomainInfo->Params.GetHive(); - } else { - if (tenant.GetType() == NKikimrViewer::Serverless) { - auto itResourceNavigate = NavigateResult.find(tenant.GetResourceId()); - if (itResourceNavigate != NavigateResult.end()) { - NSchemeCache::TSchemeCacheNavigate::TEntry entry = itResourceNavigate->second->ResultSet.front(); - if (entry.DomainInfo->Params.HasHive()) { - hiveId = entry.DomainInfo->Params.GetHive(); - } - } - } - } - tablets.emplace_back(hiveId); - - if (SystemTablets) { - for (TTabletId tabletId : tablets) { - auto it = tabletInfoIndex.find(tabletId); - if (it != tabletInfoIndex.end()) { - NKikimrWhiteboard::TTabletStateInfo* tabletInfo = tenant.AddSystemTablets(); - tabletInfo->CopyFrom(*it->second); - NKikimrViewer::EFlag flag = GetFlagFromTabletState(tabletInfo->GetState()); - tabletInfo->SetOverall(GetWhiteboardFlag(flag)); - overall = Max(overall, flag); - } - } - } - - if (Storage) { - auto itHiveStorageStats = HiveStorageStats.find(hiveId); - if (itHiveStorageStats != HiveStorageStats.end()) { - const NKikimrHive::TEvResponseHiveStorageStats& record = itHiveStorageStats->second.Get()->Record; - uint64 storageAllocatedSize = 0; - uint64 storageAvailableSize = 0; - uint64 storageMinAvailableSize = std::numeric_limits::max(); - uint64 storageGroups = 0; - for (const NKikimrHive::THiveStoragePoolStats& poolStat : record.GetPools()) { - if (poolStat.GetName().StartsWith(tenantBySubDomainKey.GetName())) { - for (const NKikimrHive::THiveStorageGroupStats& groupStat : poolStat.GetGroups()) { - storageAllocatedSize += groupStat.GetAllocatedSize(); - storageAvailableSize += groupStat.GetAvailableSize(); - storageMinAvailableSize = std::min(storageMinAvailableSize, groupStat.GetAvailableSize()); - ++storageGroups; - } - } - } - uint64 storageAllocatedLimit = storageAllocatedSize + storageAvailableSize; - tenant.SetStorageAllocatedSize(storageAllocatedSize); - tenant.SetStorageAllocatedLimit(storageAllocatedLimit); - tenant.SetStorageMinAvailableSize(storageMinAvailableSize); - tenant.SetStorageGroups(storageGroups); - } - - THashMap storageUsageByType; - THashMap storageQuotasByType; - if (entry.DomainDescription) { - for (const auto& poolUsage : entry.DomainDescription->Description.GetDiskSpaceUsage().GetStoragePoolsUsage()) { - auto type = GetStorageType(poolUsage.GetPoolKind()); - storageUsageByType[type] += poolUsage.GetTotalSize(); - } - } - - for (const auto& quota : tenant.GetDatabaseQuotas().storage_quotas()) { - auto type = GetStorageType(quota.unit_kind()); - auto& usage = storageQuotasByType[type]; - usage.SoftQuota += quota.data_size_soft_quota(); - usage.HardQuota += quota.data_size_hard_quota(); - } - - for (const auto& [type, size] : storageUsageByType) { - auto& storageUsage = *tenant.AddStorageUsage(); - storageUsage.SetType(type); - storageUsage.SetSize(size); - auto it = storageQuotasByType.find(type); - if (it != storageQuotasByType.end()) { - storageUsage.SetLimit(it->second.HardQuota); - storageUsage.SetSoftQuota(it->second.SoftQuota); - storageUsage.SetHardQuota(it->second.HardQuota); - } - } - } - - THashSet tenantNodes; - - for (TNodeId nodeId : tenant.GetNodeIds()) { - auto itNodeInfo = NodeSystemStateInfo.find(nodeId); - if (itNodeInfo != NodeSystemStateInfo.end()) { - if (Nodes) { - tenant.AddNodes()->CopyFrom(itNodeInfo->second); - } - for (const auto& poolStat : itNodeInfo->second.GetPoolStats()) { - TString poolName = poolStat.GetName(); - NKikimrWhiteboard::TSystemStateInfo_TPoolStats* targetPoolStat = nullptr; - for (NKikimrWhiteboard::TSystemStateInfo_TPoolStats& ps : *tenant.MutablePoolStats()) { - if (ps.GetName() == poolName) { - targetPoolStat = &ps; - break; - } - } - if (targetPoolStat == nullptr) { - targetPoolStat = tenant.AddPoolStats(); - targetPoolStat->SetName(poolName); - } - double poolUsage = targetPoolStat->GetUsage() * targetPoolStat->GetThreads(); - poolUsage += poolStat.GetUsage() * poolStat.GetThreads(); - ui32 poolThreads = targetPoolStat->GetThreads() + poolStat.GetThreads(); - if (poolThreads != 0) { - double threadUsage = poolUsage / poolThreads; - targetPoolStat->SetUsage(threadUsage); - targetPoolStat->SetThreads(poolThreads); - } - tenant.SetCoresUsed(tenant.GetCoresUsed() + poolStat.GetUsage() * poolStat.GetThreads()); - } - if (itNodeInfo->second.HasMemoryUsed()) { - tenant.SetMemoryUsed(tenant.GetMemoryUsed() + itNodeInfo->second.GetMemoryUsed()); - } - if (itNodeInfo->second.HasMemoryLimit()) { - tenant.SetMemoryLimit(tenant.GetMemoryLimit() + itNodeInfo->second.GetMemoryLimit()); - } - overall = Max(overall, GetViewerFlag(itNodeInfo->second.GetSystemState())); - } - tenantNodes.emplace(nodeId); - } - - if (tenant.GetType() == NKikimrViewer::Serverless) { - tenant.SetStorageAllocatedSize(tenant.GetMetrics().GetStorage()); - const bool noExclusiveNodes = tenantNodes.empty(); - if (noExclusiveNodes) { - tenant.SetMemoryUsed(tenant.GetMetrics().GetMemory()); - tenant.ClearMemoryLimit(); - tenant.SetCoresUsed(static_cast(tenant.GetMetrics().GetCPU()) / 1000000); - } - } - - if (Tablets) { - THashMap, ui32> tablets; - for (const auto& pbTablet : tabletInfo.GetTabletStateInfo()) { - if (tenantNodes.count(pbTablet.GetNodeId()) > 0) { - NKikimrViewer::EFlag state = GetFlagFromTabletState(pbTablet.GetState()); - tablets[std::make_pair(pbTablet.GetType(), state)]++; - } - } - - for (const auto& [prTypeState, prTabletCount] : tablets) { - NKikimrViewer::TTabletStateInfo& tablet = *tenant.AddTablets(); - tablet.SetType(NKikimrTabletBase::TTabletTypes::EType_Name(prTypeState.first)); - tablet.SetState(prTypeState.second); - tablet.SetCount(prTabletCount); - } - } - if (HcOverallByTenantPath.count(path) > 0 && HcOverallByTenantPath[path] != NKikimrViewer::EFlag::Grey) { - tenant.SetOverall(HcOverallByTenantPath[path]); - OverallByDomainId[tenant.GetId()] = HcOverallByTenantPath[path]; - } else { - tenant.SetOverall(overall); - OverallByDomainId[tenant.GetId()] = overall; - } - } - } - for (const std::pair& prTenant : TenantByPath) { - const TString& path(prTenant.first); - if (!IsValidTenant(path)) { - continue; - } - if (IsFilterByOwner()) { - continue; - } - const NKikimrViewer::TTenant& tenantByPath(prTenant.second); - NKikimrViewer::EFlag overall = NKikimrViewer::EFlag::Red; - NKikimrViewer::TTenant& tenant = *Result.AddTenantInfo(); - tenant.MergeFrom(tenantByPath); - tenant.SetName(path); - tenant.SetOverall(overall); - if (tenant.GetId()) { - OverallByDomainId[tenant.GetId()] = overall; - } - } - for (NKikimrViewer::TTenant& tenant: *Result.MutableTenantInfo()) { - if (tenant.GetType() != NKikimrViewer::Serverless) { - continue; - } - auto it = OverallByDomainId.find(tenant.GetResourceId()); - if (it != OverallByDomainId.end()) { - tenant.SetOverall(it->second); - } - } - std::sort(Result.MutableTenantInfo()->begin(), Result.MutableTenantInfo()->end(), - [](const NKikimrViewer::TTenant& a, const NKikimrViewer::TTenant& b) { - return a.name() < b.name(); - }); - TStringStream json; - TProtoToJson::ProtoToJson(json, Result, JsonSettings); - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void HandleTimeout() { - BLOG_TRACE("Timeout occurred"); - Result.AddErrors("Timeout occurred"); - ReplyAndPassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: path - in: query - description: schema path - required: false - type: string - - name: user - in: query - description: tenant owner - required: false - type: string - - name: followers - in: query - description: return followers - required: false - type: boolean - - name: metrics - in: query - description: return tablet metrics - required: false - type: boolean - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: tablets - in: query - description: return tablets - required: false - type: boolean - - name: system_tablets - in: query - description: return system tablets - required: false - type: boolean - - name: offload_merge - in: query - description: use offload merge - required: false - type: boolean - - name: storage - in: query - description: return storage info - required: false - type: boolean - - name: nodes - in: query - description: return nodes info - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "\"Tenant info (detailed)\""; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "\"Returns information about tenants\""; - } -}; - -} -} diff --git a/ydb/core/viewer/json_tenants.h b/ydb/core/viewer/json_tenants.h deleted file mode 100644 index e15ebdfcc152..000000000000 --- a/ydb/core/viewer/json_tenants.h +++ /dev/null @@ -1,154 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "json_pipe_req.h" -#include "wb_aggregate.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonTenants : public TViewerPipeClient { - using TBase = TViewerPipeClient; - IViewer* Viewer; - NKikimrViewer::TTenants Result; - NMon::TEvHttpInfo::TPtr Event; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - bool State = true; - THashMap TenantIndex; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonTenants(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - InitConfig(params); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - State = FromStringWithDefault(params.Get("state"), true); - TIntrusivePtr domains = AppData()->DomainsInfo; - auto *domain = domains->GetDomain(); - NKikimrViewer::TTenant& tenant = *Result.AddTenants(); - tenant.SetName("/" + domain->Name); - if (State) { - tenant.SetState(Ydb::Cms::GetDatabaseStatusResult::State::GetDatabaseStatusResult_State_RUNNING); - } - RequestConsoleListTenants(); - Become(&TThis::StateWork, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - STATEFN(StateWork) { - switch (ev->GetTypeRewrite()) { - hFunc(NConsole::TEvConsole::TEvListTenantsResponse, Handle); - hFunc(NConsole::TEvConsole::TEvGetTenantStatusResponse, Handle); - hFunc(TEvTabletPipe::TEvClientConnected, TBase::Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(NConsole::TEvConsole::TEvListTenantsResponse::TPtr& ev) { - Ydb::Cms::ListDatabasesResult listTenantsResult; - ev->Get()->Record.GetResponse().operation().result().UnpackTo(&listTenantsResult); - for (const TString& path : listTenantsResult.paths()) { - NKikimrViewer::TTenant& tenant = *Result.AddTenants(); - tenant.SetName(path); - TenantIndex[path] = &tenant; - if (State) { - RequestConsoleGetTenantStatus(path); - } - } - RequestDone(); - } - - void Handle(NConsole::TEvConsole::TEvGetTenantStatusResponse::TPtr& ev) { - Ydb::Cms::GetDatabaseStatusResult getTenantStatusResult; - ev->Get()->Record.GetResponse().operation().result().UnpackTo(&getTenantStatusResult); - auto itTenant = TenantIndex.find(getTenantStatusResult.path()); - if (itTenant != TenantIndex.end()) { - NKikimrViewer::TTenant& tenant = *itTenant->second; - tenant.SetState(getTenantStatusResult.state()); - } - RequestDone(); - } - - void ReplyAndPassAway() { - TStringStream json; - TProtoToJson::ProtoToJson(json, Result, JsonSettings); - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void HandleTimeout() { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: state - in: query - description: return tenant state - required: false - type: boolean - default: true - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Tenant info (brief)"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns list of tenants"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_topicinfo.h b/ydb/core/viewer/json_topicinfo.h deleted file mode 100644 index d5880859ff16..000000000000 --- a/ydb/core/viewer/json_topicinfo.h +++ /dev/null @@ -1,158 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include "viewer.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonTopicInfo : public TActorBootstrapped { - using TBase = TActorBootstrapped; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - NKikimrLabeledCounters::TEvTabletLabeledCountersResponse TopicInfoResult; - TJsonSettings JsonSettings; - TString Topic; - TString Client; - TString GroupNames; - bool ShowAll = false; - ui32 Timeout = 0; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonTopicInfo(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap(const TActorContext& ctx) { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), false); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - Topic = params.Get("path"); - Client = params.Has("client") ? params.Get("client") : "total"; - GroupNames = params.Get("group_names"); - ShowAll = FromStringWithDefault(params.Get("all"), false); - size_t pos = Topic.rfind('/'); - if (pos != TString::npos) - Topic = Topic.substr(pos + 1); - //proxy is not used - CreateClusterLabeledCountersAggregator(ctx.SelfID, TTabletTypes::PersQueue, ctx); - - Become(&TThis::StateRequestedTopicInfo, ctx, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void Die(const TActorContext& ctx) override { - TBase::Die(ctx); - } - - STFUNC(StateRequestedTopicInfo) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvTabletCounters::TEvTabletLabeledCountersResponse, Handle); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvTabletCounters::TEvTabletLabeledCountersResponse::TPtr &ev, const TActorContext &ctx) { - TString groupPrefix = Client + "/"; - TString groupSuffix = "/" + Topic; - for (ui32 i = 0; i < ev->Get()->Record.LabeledCountersByGroupSize(); ++i) { - const auto& uc = ev->Get()->Record.GetLabeledCountersByGroup(i); - const TString& group(uc.GetGroup()); - if (ShowAll - || (group.StartsWith(groupPrefix) && group.EndsWith(groupSuffix)) - || uc.GetGroup() == Topic - || uc.GetGroupNames() == GroupNames) { - TopicInfoResult.AddLabeledCountersByGroup()->CopyFrom(uc); - } - } - ReplyAndDie(ctx); - } - - void ReplyAndDie(const TActorContext &ctx) { - TStringStream json; - TProtoToJson::ProtoToJson(json, TopicInfoResult, JsonSettings); - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } - - void HandleTimeout(const TActorContext &ctx) { - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: path - in: query - description: schema path - required: true - type: string - - name: client - in: query - description: client name - required: false - type: string - default: total - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: all - in: query - description: return all topics and all clients - required: false - type: boolean - default: false - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - default: 10000 - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Topic information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Information about topic"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_vdisk_evict.h b/ydb/core/viewer/json_vdisk_evict.h deleted file mode 100644 index 3af5a956c0b3..000000000000 --- a/ydb/core/viewer/json_vdisk_evict.h +++ /dev/null @@ -1,269 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "json_pipe_req.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonVDiskEvict : public TViewerPipeClient { - enum EEv { - EvRetryNodeRequest = EventSpaceBegin(NActors::TEvents::ES_PRIVATE), - EvEnd - }; - - static_assert(EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE), "expect EvEnd < EventSpaceEnd(TEvents::ES_PRIVATE)"); - - struct TEvRetryNodeRequest : NActors::TEventLocal { - TEvRetryNodeRequest() - {} - }; - -protected: - using TThis = TJsonVDiskEvict; - using TBase = TViewerPipeClient; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - ui32 Timeout = 0; - ui32 ActualRetries = 0; - ui32 Retries = 0; - TDuration RetryPeriod = TDuration::MilliSeconds(500); - - std::unique_ptr Response; - - ui32 GroupId = 0; - ui32 GroupGeneration = 0; - ui32 FailRealmIdx = 0; - ui32 FailDomainIdx = 0; - ui32 VdiskIdx = 0; - bool Force = false; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonVDiskEvict(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) - {} - - inline ui32 GetRequiredParam(const TCgiParameters& params, const std::string& name, ui32& obj) { - if (!TryFromString(params.Get(name), obj)) { - TBase::Send(Event->Sender, new NMon::TEvHttpInfoRes( - Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", TStringBuilder() << "field '" << name << "' or 'vdisk_id' are required"), - 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return false; - } - return true; - } - - void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - TString vdisk_id = params.Get("vdisk_id"); - if (vdisk_id) { - TVector parts = StringSplitter(vdisk_id).Split('-').SkipEmpty(); - if (parts.size() == 5) { - GroupId = FromStringWithDefault(parts[0], Max()); - GroupGeneration = FromStringWithDefault(parts[1], Max()); - FailRealmIdx = FromStringWithDefault(parts[2], Max()); - FailDomainIdx = FromStringWithDefault(parts[3], Max()); - VdiskIdx = FromStringWithDefault(parts[4], Max()); - } - if (parts.size() != 5 || GroupId == Max() - || GroupGeneration == Max() || FailRealmIdx == Max() - || FailDomainIdx == Max() || VdiskIdx == Max()) { - TBase::Send(Event->Sender, new NMon::TEvHttpInfoRes( - Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", TStringBuilder() << "Unable to parse the 'vdisk_id' parameter"), - 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - } else if (!GetRequiredParam(params, "group_id", GroupId) - || !GetRequiredParam(params, "group_generation_id", GroupGeneration) - || !GetRequiredParam(params, "fail_realm_idx", FailRealmIdx) - || !GetRequiredParam(params, "fail_domain_idx", FailDomainIdx) - || !GetRequiredParam(params, "vdisk_idx", VdiskIdx)) { - return PassAway(); - } - - if (Event->Get()->Request.GetMethod() != HTTP_METHOD_POST) { - TBase::Send(Event->Sender, new NMon::TEvHttpInfoRes( - Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "Only POST method is allowed"), - 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - TBase::InitConfig(params); - - Force = FromStringWithDefault(params.Get("force"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - Retries = FromStringWithDefault(params.Get("retries"), 0); - RetryPeriod = TDuration::MilliSeconds(FromStringWithDefault(params.Get("retry_period"), RetryPeriod.MilliSeconds())); - - if (Force && !Viewer->CheckAccessAdministration(Event->Get())) { - TBase::Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPFORBIDDEN(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - - SendRequest(); - - TBase::Become(&TThis::StateWork, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - STATEFN(StateWork) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvBlobStorage::TEvControllerConfigResponse, Handle); - cFunc(TEvRetryNodeRequest::EventType, HandleRetry); - cFunc(TEvents::TEvUndelivered::EventType, Undelivered); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void SendRequest() { - RequestBSControllerVDiskEvict(GroupId, GroupGeneration, FailRealmIdx, FailDomainIdx, VdiskIdx, Force); - } - - bool RetryRequest() { - if (Retries) { - if (++ActualRetries <= Retries) { - TBase::Schedule(RetryPeriod, new TEvRetryNodeRequest()); - return true; - } - } - return false; - } - - void Undelivered() { - if (!RetryRequest()) { - TBase::RequestDone(); - } - } - - void Handle(TEvBlobStorage::TEvControllerConfigResponse::TPtr& ev) { - Response.reset(ev->Release().Release()); - ReplyAndPassAway(); - } - - void HandleRetry() { - SendRequest(); - } - - void HandleTimeout() { - Send(Event->Sender, new NMon::TEvHttpInfoRes( - Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get(), "text/plain", "Timeout receiving response from BSC"), - 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - } - - void PassAway() override { - TBase::PassAway(); - } - - void ReplyAndPassAway() { - NJson::TJsonValue json; - if (Response != nullptr) { - if (Response->Record.GetResponse().GetSuccess()) { - json["result"] = true; - } else { - json["result"] = false; - TString error; - bool forceRetryPossible = false; - Viewer->TranslateFromBSC2Human(Response->Record.GetResponse(), error, forceRetryPossible); - json["error"] = error; - if (forceRetryPossible && Viewer->CheckAccessAdministration(Event->Get())) { - json["forceRetryPossible"] = true; - } - } - json["debugMessage"] = Response->Record.ShortDebugString(); - } else { - json["result"] = false; - json["error"] = "No response was received from BSC"; - } - TBase::Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json)), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -YAML::Node TJsonRequestSwagger::GetSwagger() { - return YAML::Load(R"___( - post: - tags: - - vdisk - summary: VDisk evict - description: VDisk evict - parameters: - - name: vdisk_id - in: query - description: vdisk identifier - required: false - type: string - - name: group_id - in: query - description: group identifier - required: false - type: integer - - name: group_generation_id - in: query - description: group generation identifier - required: false - type: integer - - name: fail_realm_idx - in: query - description: fail realm identifier - required: false - type: integer - - name: fail_domain_ids - in: query - description: fail domain identifier - required: false - type: integer - - name: vdisk_idx - in: query - description: vdisk idx identifier - required: false - type: integer - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - - name: force - in: query - description: attempt forced operation, ignore warnings - required: false - type: boolean - responses: - 200: - description: OK - content: - application/json: - schema: - type: object - properties: - result: - type: boolean - description: was operation successful or not - error: - type: string - description: details about failed operation - forceRetryPossible: - type: boolean - description: if true, operation can be retried with force flag - 400: - description: Bad Request - 403: - description: Forbidden - 504: - description: Gateway Timeout - )___"); -} - -} -} diff --git a/ydb/core/viewer/json_vdisk_req.h b/ydb/core/viewer/json_vdisk_req.h index 43b455a2a11a..71ccc636ca7f 100644 --- a/ydb/core/viewer/json_vdisk_req.h +++ b/ydb/core/viewer/json_vdisk_req.h @@ -1,18 +1,12 @@ #pragma once -#include -#include -#include -#include -#include -#include "viewer.h" #include "json_pipe_req.h" +#include "viewer.h" +#include -namespace NKikimr { -namespace NViewer { +namespace NKikimr::NViewer { using namespace NActors; - template struct TJsonVDiskRequestHelper { static std::unique_ptr MakeRequest(NMon::TEvHttpInfo::TPtr &, TString *) { @@ -24,9 +18,8 @@ struct TJsonVDiskRequestHelper { } }; - template -class TJsonVDiskRequest : public TViewerPipeClient> { +class TJsonVDiskRequest : public TViewerPipeClient { enum EEv { EvRetryNodeRequest = EventSpaceBegin(NActors::TEvents::ES_PRIVATE), EvEnd @@ -42,7 +35,7 @@ class TJsonVDiskRequest : public TViewerPipeClient; - using TBase = TViewerPipeClient; + using TBase = TViewerPipeClient; using THelper = TJsonVDiskRequestHelper; IViewer* Viewer; TActorId Initiator; @@ -63,17 +56,13 @@ class TJsonVDiskRequest : public TViewerPipeClient TcpProxyId; public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - TJsonVDiskRequest(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) : Viewer(viewer) , Initiator(ev->Sender) , Event(ev) {} - virtual void Bootstrap() { + void Bootstrap() override { const auto& params(Event->Get()->Request.GetParams()); NodeId = FromStringWithDefault(params.Get("node_id"), 0); PDiskId = FromStringWithDefault(params.Get("pdisk_id"), Max()); @@ -185,7 +174,7 @@ class TJsonVDiskRequest : public TViewerPipeClient -struct TJsonRequestParameters> { + void ReplyAndPassAway() override { + ReplyAndPassAway({}); + } + + static YAML::Node GetSchema() { + return TProtoToYaml::ProtoToYamlSchema(); + } + static YAML::Node GetParameters() { return YAML::Load(R"___( - name: node_id @@ -251,12 +245,4 @@ struct TJsonRequestParameters> { } }; -template -struct TJsonRequestSchema> { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -} } diff --git a/ydb/core/viewer/json_vdiskinfo.h b/ydb/core/viewer/json_vdiskinfo.h deleted file mode 100644 index 0675c4cd407b..000000000000 --- a/ydb/core/viewer/json_vdiskinfo.h +++ /dev/null @@ -1,96 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include "json_wb_req.h" - -namespace std { - -template <> -struct equal_to { - static decltype(auto) make_tuple(const NKikimrBlobStorage::TVDiskID& id) { - return std::make_tuple( - id.GetGroupID(), - id.GetGroupGeneration(), - id.GetRing(), - id.GetDomain(), - id.GetVDisk() - ); - } - - bool operator ()(const NKikimrBlobStorage::TVDiskID& a, const NKikimrBlobStorage::TVDiskID& b) const { - return make_tuple(a) == make_tuple(b); - } -}; - -template <> -struct less { - bool operator ()(const NKikimrBlobStorage::TVDiskID& a, const NKikimrBlobStorage::TVDiskID& b) const { - return equal_to::make_tuple(a) < equal_to::make_tuple(b); - } -}; - -template <> -struct hash { - size_t operator ()(const NKikimrBlobStorage::TVDiskID& a) const { - auto tp = equal_to::make_tuple(a); - return hash()(tp); - } -}; - -} - -namespace NKikimr { -namespace NViewer { - -template <> -struct TWhiteboardInfo { - using TResponseEventType = TEvWhiteboard::TEvVDiskStateResponse; - using TResponseType = NKikimrWhiteboard::TEvVDiskStateResponse; - using TElementType = NKikimrWhiteboard::TVDiskStateInfo; - using TElementKeyType = NKikimrBlobStorage::TVDiskID; - - static constexpr bool StaticNodesOnly = true; - - static ::google::protobuf::RepeatedPtrField& GetElementsField(TResponseType& response) { - return *response.MutableVDiskStateInfo(); - } - - static const NKikimrBlobStorage::TVDiskID& GetElementKey(const TElementType& type) { - return type.GetVDiskId(); - } - - static TString GetDefaultMergeField() { - return "VDiskId"; - } - - static void MergeResponses(TResponseType& result, TMap& responses, const TString& fields = GetDefaultMergeField()) { - if (fields == GetDefaultMergeField()) { - TWhiteboardMerger::MergeResponsesElementKey(result, responses); - } else { - TWhiteboardMerger::MergeResponses(result, responses, fields); - } - } -}; - -using TJsonVDiskInfo = TJsonWhiteboardRequest; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "VDisk information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "VDisk information"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_vdiskstat.h b/ydb/core/viewer/json_vdiskstat.h deleted file mode 100644 index 8aaa81ed1a9d..000000000000 --- a/ydb/core/viewer/json_vdiskstat.h +++ /dev/null @@ -1,30 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include "json_vdisk_req.h" - -namespace NKikimr { -namespace NViewer { - -using TJsonVDiskStat = TJsonVDiskRequest; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "VDisk statistic"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "VDisk statistic"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_wb_req.cpp b/ydb/core/viewer/json_wb_req.cpp new file mode 100644 index 000000000000..facb6287f283 --- /dev/null +++ b/ydb/core/viewer/json_wb_req.cpp @@ -0,0 +1,231 @@ +#include "viewer_bsgroupinfo.h" +#include "viewer_nodeinfo.h" +#include "viewer_pdiskinfo.h" +#include "viewer_sysinfo.h" +#include "viewer_tabletinfo.h" +#include "viewer_vdiskinfo.h" +#include "json_handlers.h" + +namespace NKikimr::NViewer { + +YAML::Node GetWhiteboardRequestParameters() { + return YAML::Load(R"___( + - name: node_id + in: query + description: node identifier + required: false + type: integer + - name: merge + in: query + description: merge information from nodes + required: false + type: boolean + - name: group + in: query + description: group information by field + required: false + type: string + - name: all + in: query + description: return all possible key combinations (for enums only) + required: false + type: boolean + - name: filter + in: query + description: filter information by field + required: false + type: string + - name: alive + in: query + description: request from alive (connected) nodes only + required: false + type: boolean + - name: enums + in: query + description: convert enums to strings + required: false + type: boolean + - name: ui64 + in: query + description: return ui64 as number + required: false + type: boolean + - name: timeout + in: query + description: timeout in ms + required: false + type: integer + - name: retries + in: query + description: number of retries + required: false + type: integer + - name: retry_period + in: query + description: retry period in ms + required: false + type: integer + default: 500 + - name: static + in: query + description: request from static nodes only + required: false + type: boolean + - name: since + in: query + description: filter by update time + required: false + type: string + )___"); +} + +void InitViewerBSGroupInfoJsonHandler(TJsonHandlers& jsonHandlers) { + TSimpleYamlBuilder yaml({ + .Method = "get", + .Tag = "viewer", + .Summary = "Storage groups information", + .Description = "Returns information about storage groups" + }); + yaml.SetParameters(GetWhiteboardRequestParameters()); + yaml.SetResponseSchema(TProtoToYaml::ProtoToYamlSchema()); + jsonHandlers.AddHandler("/viewer/bsgroupinfo", new TJsonHandler(yaml)); + TWhiteboardInfo::InitMerger(); +} + +void InitViewerNodeInfoJsonHandler(TJsonHandlers& jsonHandlers) { + TSimpleYamlBuilder yaml({ + .Method = "get", + .Tag = "viewer", + .Summary = "Interconnect information", + .Description = "Returns information about node connections" + }); + yaml.SetParameters(GetWhiteboardRequestParameters()); + yaml.SetResponseSchema(TProtoToYaml::ProtoToYamlSchema()); + jsonHandlers.AddHandler("/viewer/nodeinfo", new TJsonHandler(yaml)); + TWhiteboardInfo::InitMerger(); +} + +void InitViewerPDiskInfoJsonHandler(TJsonHandlers& jsonHandlers) { + TSimpleYamlBuilder yaml({ + .Method = "get", + .Tag = "viewer", + .Summary = "PDisk information", + .Description = "Returns information about PDisks" + }); + yaml.SetParameters(GetWhiteboardRequestParameters()); + yaml.SetResponseSchema(TProtoToYaml::ProtoToYamlSchema()); + jsonHandlers.AddHandler("/viewer/pdiskinfo", new TJsonHandler(yaml)); +} + +void InitViewerSysInfoJsonHandler(TJsonHandlers& jsonHandlers) { + TSimpleYamlBuilder yaml({ + .Method = "get", + .Tag = "viewer", + .Summary = "System information", + .Description = "Returns system information" + }); + yaml.SetParameters(GetWhiteboardRequestParameters()); + yaml.SetResponseSchema(TProtoToYaml::ProtoToYamlSchema()); + jsonHandlers.AddHandler("/viewer/sysinfo", new TJsonHandler(yaml)); +} + +void InitViewerTabletInfoJsonHandler(TJsonHandlers& jsonHandlers) { + TSimpleYamlBuilder yaml({ + .Method = "get", + .Tag = "viewer", + .Summary = "Tablet information", + .Description = "Returns information about tablets" + }); + yaml.AddParameter({ + .Name = "database", + .Description = "database name", + .Type = "string", + }); + yaml.AddParameter({ + .Name = "node_id", + .Description = "node identifier", + .Type = "integer", + }); + yaml.AddParameter({ + .Name = "path", + .Description = "schema path", + .Type = "string", + }); + yaml.AddParameter({ + .Name = "merge", + .Description = "merge information from nodes", + .Type = "boolean", + }); + yaml.AddParameter({ + .Name = "group", + .Description = "group information by field", + .Type = "string", + }); + yaml.AddParameter({ + .Name = "all", + .Description = "return all possible key combinations (for enums only)", + .Type = "boolean", + }); + yaml.AddParameter({ + .Name = "filter", + .Description = "filter information by field", + .Type = "string", + }); + yaml.AddParameter({ + .Name = "alive", + .Description = "request from alive (connected) nodes only", + .Type = "boolean", + }); + yaml.AddParameter({ + .Name = "enums", + .Description = "convert enums to strings", + .Type = "boolean", + }); + yaml.AddParameter({ + .Name = "ui64", + .Description = "return ui64 as number", + .Type = "boolean", + }); + yaml.AddParameter({ + .Name = "timeout", + .Description = "timeout in ms", + .Type = "integer", + }); + yaml.AddParameter({ + .Name = "retries", + .Description = "number of retries", + .Type = "integer", + }); + yaml.AddParameter({ + .Name = "retry_period", + .Description = "retry period in ms", + .Type = "integer", + .Default = "500", + }); + yaml.AddParameter({ + .Name = "static", + .Description = "request from static nodes only", + .Type = "boolean", + }); + yaml.AddParameter({ + .Name = "since", + .Description = "filter by update time", + .Type = "string", + }); + yaml.SetResponseSchema(TProtoToYaml::ProtoToYamlSchema()); + jsonHandlers.AddHandler("/viewer/tabletinfo", new TJsonHandler(yaml)); +} + +void InitViewerVDiskInfoJsonHandler(TJsonHandlers& jsonHandlers) { + TSimpleYamlBuilder yaml({ + .Method = "get", + .Tag = "viewer", + .Summary = "VDisk information", + .Description = "Returns information about VDisks" + }); + yaml.SetParameters(GetWhiteboardRequestParameters()); + yaml.SetResponseSchema(TProtoToYaml::ProtoToYamlSchema()); + jsonHandlers.AddHandler("/viewer/vdiskinfo", new TJsonHandler(yaml)); +} + +} diff --git a/ydb/core/viewer/json_wb_req.h b/ydb/core/viewer/json_wb_req.h index ae0905642b00..f5e0cd103336 100644 --- a/ydb/core/viewer/json_wb_req.h +++ b/ydb/core/viewer/json_wb_req.h @@ -1,48 +1,60 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" #include "json_pipe_req.h" -#include "wb_merge.h" -#include "wb_group.h" +#include "log.h" +#include "viewer.h" #include "wb_filter.h" +#include "wb_group.h" +#include "wb_merge.h" #include "wb_req.h" -#include "log.h" +#include +#include +#include +#include -namespace NKikimr { -namespace NViewer { +namespace NKikimr::NViewer { using namespace NActors; using namespace NNodeWhiteboard; +YAML::Node GetWhiteboardRequestParameters(); + template -class TJsonWhiteboardRequest : public TWhiteboardRequest, TRequestEventType, TResponseEventType> { -protected: +class TJsonWhiteboardRequest : public TWhiteboardRequest { +public: using TThis = TJsonWhiteboardRequest; - using TBase = TWhiteboardRequest; + using TBase = TWhiteboardRequest; using TResponseType = typename TResponseEventType::ProtoRecordType; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; + using TBase::Event; + using TBase::ReplyAndPassAway; TJsonSettings JsonSettings; -public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::VIEWER_HANDLER; } TJsonWhiteboardRequest(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) + : TBase(viewer, ev) {} void Bootstrap() override { const auto& params(Event->Get()->Request.GetParams()); - SplitIds(params.Get("node_id"), ',', TBase::RequestSettings.FilterNodeIds); + std::vector nodeIds; + SplitIds(params.Get("node_id"), ',', nodeIds); + if (!nodeIds.empty()) { + if (TBase::RequestSettings.FilterNodeIds.empty()) { + TBase::RequestSettings.FilterNodeIds = nodeIds; + } else { + std::sort(nodeIds.begin(), nodeIds.end()); + std::sort(TBase::RequestSettings.FilterNodeIds.begin(), TBase::RequestSettings.FilterNodeIds.end()); + std::vector intersection; + std::set_intersection(nodeIds.begin(), nodeIds.end(), TBase::RequestSettings.FilterNodeIds.begin(), TBase::RequestSettings.FilterNodeIds.end(), std::back_inserter(intersection)); + if (intersection.empty()) { + TBase::RequestSettings.FilterNodeIds = {0}; + } else { + TBase::RequestSettings.FilterNodeIds = intersection; + } + } + } { TString merge = params.Get("merge"); if (merge.empty() || merge == "1" || merge == "true") { @@ -67,8 +79,14 @@ class TJsonWhiteboardRequest : public TWhiteboardRequest(params.Get("static"), false); } + if (params.Has("fields_required")) { + if (params.Get("fields_required") == "all") { + TBase::RequestSettings.FieldsRequired = {-1}; + } else { + SplitIds(params.Get("fields_required"), ',', TBase::RequestSettings.FieldsRequired); + } + } TBase::RequestSettings.Format = params.Get("format"); - TBase::Bootstrap(); } @@ -85,20 +103,23 @@ class TJsonWhiteboardRequest : public TWhiteboardRequestsecond; + auto it = TBase::NodeResponses.find(nodeId); + if (it != TBase::NodeResponses.end()) { + if (it->second.IsError()) { + if (error.empty()) { + error = it->second.GetError(); + } + errors++; } - errors++; } } } @@ -106,14 +127,14 @@ class TJsonWhiteboardRequest : public TWhiteboardRequestfirst << "\":"; @@ -123,94 +144,11 @@ class TJsonWhiteboardRequest : public TWhiteboardRequestSender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), std::move(json.Str())), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); + ReplyAndPassAway(TBase::GetHTTPOKJSON(json.Str())); } catch (const std::exception& e) { - TBase::Send(Event->Sender, new NMon::TEvHttpInfoRes(TString("HTTP/1.1 400 Bad Request\r\n\r\n") + e.what(), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); + ReplyAndPassAway(TBase::GetHTTPBADREQUEST("text/plain", e.what())); } - TBase::PassAway(); - } -}; - -template -struct TJsonRequestParameters> { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: node_id - in: query - description: node identifier - required: false - type: integer - - name: merge - in: query - description: merge information from nodes - required: false - type: boolean - - name: group - in: query - description: group information by field - required: false - type: string - - name: all - in: query - description: return all possible key combinations (for enums only) - required: false - type: boolean - - name: filter - in: query - description: filter information by field - required: false - type: string - - name: alive - in: query - description: request from alive (connected) nodes only - required: false - type: boolean - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - - name: retries - in: query - description: number of retries - required: false - type: integer - - name: retry_period - in: query - description: retry period in ms - required: false - type: integer - default: 500 - - name: static - in: query - description: request from static nodes only - required: false - type: boolean - - name: since - in: query - description: filter by update time - required: false - type: string - )___"); } }; -template -struct TJsonRequestSchema> { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -} } diff --git a/ydb/core/viewer/json_whoami.h b/ydb/core/viewer/json_whoami.h deleted file mode 100644 index 36e3715268fd..000000000000 --- a/ydb/core/viewer/json_whoami.h +++ /dev/null @@ -1,141 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonWhoAmI : public TActorBootstrapped { - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonWhoAmI(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap(const TActorContext& ctx) { - ReplyAndDie(ctx); - } - - bool CheckGroupMembership(std::unique_ptr& token, const NProtoBuf::RepeatedPtrField& sids) { - if (sids.empty()) { - return true; - } - for (const auto& sid : sids) { - if (token->IsExist(sid)) { - return true; - } - } - return false; - } - - void ReplyAndDie(const TActorContext &ctx) { - NACLibProto::TUserToken userToken; - Y_PROTOBUF_SUPPRESS_NODISCARD userToken.ParseFromString(Event->Get()->UserToken); - NJson::TJsonValue json(NJson::JSON_MAP); - if (userToken.HasUserSID()) { - json["UserSID"] = userToken.GetUserSID(); - } - if (userToken.HasGroupSIDs() && userToken.GetGroupSIDs().BucketsSize() > 0) { - NJson::TJsonValue& groupSIDs(json["GroupSIDs"]); - groupSIDs.SetType(NJson::JSON_ARRAY); - for (const auto& buckets : userToken.GetGroupSIDs().GetBuckets()) { - for (const auto& group : buckets.GetValues()) { - groupSIDs.AppendValue(group); - } - } - } - if (userToken.HasOriginalUserToken()) { - json["OriginalUserToken"] = userToken.GetOriginalUserToken(); - } - if (userToken.HasAuthType()) { - json["AuthType"] = userToken.GetAuthType(); - } - auto token = std::make_unique(userToken); - json["IsViewerAllowed"] = CheckGroupMembership(token, AppData()->DomainsConfig.GetSecurityConfig().GetViewerAllowedSIDs()); - json["IsMonitoringAllowed"] = CheckGroupMembership(token, AppData()->DomainsConfig.GetSecurityConfig().GetMonitoringAllowedSIDs()); - json["IsAdministrationAllowed"] = CheckGroupMembership(token, AppData()->DomainsConfig.GetSecurityConfig().GetAdministrationAllowedSIDs()); - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json, false)), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } - - void HandleTimeout(const TActorContext &ctx) { - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return YAML::Load(R"___( - type: object - title: WhoAmI - properties: - UserSID: - type: string - description: User ID / name - GroupSID: - type: array - items: - type: string - description: User groups - OriginalUserToken: - type: string - description: User's token used to authenticate - AuthType: - type: string - description: Authentication type - IsViewerAllowed: - type: boolean - description: Is user allowed to view data - IsMonitoringAllowed: - type: boolean - description: Is user allowed to view deeper and make simple changes - IsAdministrationAllowed: - type: boolean - description: Is user allowed to do unrestricted changes in the system - )___"); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return {}; - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Information about current user"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns information about user token"; - } -}; - -} -} diff --git a/ydb/core/viewer/log.h b/ydb/core/viewer/log.h index 8a0fe6dcf054..da5229ff0c68 100644 --- a/ydb/core/viewer/log.h +++ b/ydb/core/viewer/log.h @@ -1,16 +1,13 @@ #pragma once - -#include #include +#include -namespace NKikimr { -namespace NViewer { +namespace NKikimr::NViewer { inline TString GetLogPrefix() { return {}; } -} } #define BLOG_D(stream) LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::VIEWER, GetLogPrefix() << stream) diff --git a/ydb/core/viewer/monitoring/asset-manifest.json b/ydb/core/viewer/monitoring/asset-manifest.json index bac25a42dd95..aff0a3b603e9 100644 --- a/ydb/core/viewer/monitoring/asset-manifest.json +++ b/ydb/core/viewer/monitoring/asset-manifest.json @@ -1,253 +1,284 @@ { "files": { - "main.css": "./static/css/main.c8ce3bba.css", - "main.js": "./static/js/main.62a60ecb.js", - "static/js/3457.b193afe6.chunk.js": "./static/js/3457.b193afe6.chunk.js", - "static/js/6876.867b698c.chunk.js": "./static/js/6876.867b698c.chunk.js", - "static/js/2435.092e8d7f.chunk.js": "./static/js/2435.092e8d7f.chunk.js", - "static/js/7409.4408962b.chunk.js": "./static/js/7409.4408962b.chunk.js", - "static/js/8622.49f3054c.chunk.js": "./static/js/8622.49f3054c.chunk.js", - "static/js/598.243fd68d.chunk.js": "./static/js/598.243fd68d.chunk.js", - "static/js/6392.134ee5e4.chunk.js": "./static/js/6392.134ee5e4.chunk.js", - "static/js/4618.131d9563.chunk.js": "./static/js/4618.131d9563.chunk.js", - "static/js/704.45771d88.chunk.js": "./static/js/704.45771d88.chunk.js", - "static/js/515.cd9a8a90.chunk.js": "./static/js/515.cd9a8a90.chunk.js", - "static/js/8858.cd9d49a5.chunk.js": "./static/js/8858.cd9d49a5.chunk.js", - "static/js/6887.0855fd66.chunk.js": "./static/js/6887.0855fd66.chunk.js", - "static/js/4848.64f47dc3.chunk.js": "./static/js/4848.64f47dc3.chunk.js", - "static/js/4198.d0671061.chunk.js": "./static/js/4198.d0671061.chunk.js", - "static/js/620.7aea5425.chunk.js": "./static/js/620.7aea5425.chunk.js", - "static/js/9204.77418f94.chunk.js": "./static/js/9204.77418f94.chunk.js", - "static/js/1736.9f4a6b02.chunk.js": "./static/js/1736.9f4a6b02.chunk.js", - "static/js/8747.baf63d86.chunk.js": "./static/js/8747.baf63d86.chunk.js", - "static/js/1528.2a39d066.chunk.js": "./static/js/1528.2a39d066.chunk.js", - "static/js/6877.d2d51d98.chunk.js": "./static/js/6877.d2d51d98.chunk.js", - "static/js/4814.11309069.chunk.js": "./static/js/4814.11309069.chunk.js", - "static/js/202.52f13cd5.chunk.js": "./static/js/202.52f13cd5.chunk.js", - "static/js/9280.40cff028.chunk.js": "./static/js/9280.40cff028.chunk.js", - "static/js/5863.e2cd2452.chunk.js": "./static/js/5863.e2cd2452.chunk.js", - "static/js/6058.7f474f92.chunk.js": "./static/js/6058.7f474f92.chunk.js", - "static/js/632.b6c03857.chunk.js": "./static/js/632.b6c03857.chunk.js", - "static/js/7202.fefd43ee.chunk.js": "./static/js/7202.fefd43ee.chunk.js", - "static/js/7999.bdf4fe79.chunk.js": "./static/js/7999.bdf4fe79.chunk.js", - "static/js/2367.052e678b.chunk.js": "./static/js/2367.052e678b.chunk.js", - "static/js/5373.90c95a6e.chunk.js": "./static/js/5373.90c95a6e.chunk.js", - "static/js/6393.b0de2d9e.chunk.js": "./static/js/6393.b0de2d9e.chunk.js", - "static/js/5448.cef3c129.chunk.js": "./static/js/5448.cef3c129.chunk.js", - "static/js/6679.6e0a87d5.chunk.js": "./static/js/6679.6e0a87d5.chunk.js", - "static/js/4132.04be158e.chunk.js": "./static/js/4132.04be158e.chunk.js", - "static/js/9219.24a20881.chunk.js": "./static/js/9219.24a20881.chunk.js", - "static/js/556.55f00ac6.chunk.js": "./static/js/556.55f00ac6.chunk.js", - "static/js/8850.97635389.chunk.js": "./static/js/8850.97635389.chunk.js", - "static/js/9297.eadc4dba.chunk.js": "./static/js/9297.eadc4dba.chunk.js", - "static/js/3630.8eda2d3f.chunk.js": "./static/js/3630.8eda2d3f.chunk.js", - "static/js/3231.65396654.chunk.js": "./static/js/3231.65396654.chunk.js", - "static/js/6815.672badd5.chunk.js": "./static/js/6815.672badd5.chunk.js", - "static/js/2620.8e5c52fb.chunk.js": "./static/js/2620.8e5c52fb.chunk.js", - "static/js/6961.f4888ae1.chunk.js": "./static/js/6961.f4888ae1.chunk.js", - "static/js/7257.8ce0d045.chunk.js": "./static/js/7257.8ce0d045.chunk.js", - "static/js/8702.69a3e0d5.chunk.js": "./static/js/8702.69a3e0d5.chunk.js", - "static/js/3304.f5897a96.chunk.js": "./static/js/3304.f5897a96.chunk.js", - "static/js/1508.f0158935.chunk.js": "./static/js/1508.f0158935.chunk.js", - "static/js/3271.7b005742.chunk.js": "./static/js/3271.7b005742.chunk.js", - "static/js/30.b097cbb4.chunk.js": "./static/js/30.b097cbb4.chunk.js", - "static/js/5117.896f7ffb.chunk.js": "./static/js/5117.896f7ffb.chunk.js", - "static/js/5387.8af1d694.chunk.js": "./static/js/5387.8af1d694.chunk.js", - "static/js/5670.5c30cef1.chunk.js": "./static/js/5670.5c30cef1.chunk.js", - "static/js/7388.9f447514.chunk.js": "./static/js/7388.9f447514.chunk.js", - "static/js/3333.ceb196e6.chunk.js": "./static/js/3333.ceb196e6.chunk.js", - "static/js/1278.c0717a20.chunk.js": "./static/js/1278.c0717a20.chunk.js", - "static/js/178.e0df04cc.chunk.js": "./static/js/178.e0df04cc.chunk.js", - "static/js/6892.2c3c2bcb.chunk.js": "./static/js/6892.2c3c2bcb.chunk.js", - "static/js/2229.6687fc46.chunk.js": "./static/js/2229.6687fc46.chunk.js", - "static/js/4326.d5c34c54.chunk.js": "./static/js/4326.d5c34c54.chunk.js", - "static/js/7276.47f377a4.chunk.js": "./static/js/7276.47f377a4.chunk.js", - "static/js/7803.a56cfca6.chunk.js": "./static/js/7803.a56cfca6.chunk.js", - "static/js/5720.39a954f1.chunk.js": "./static/js/5720.39a954f1.chunk.js", - "static/js/6954.e18be130.chunk.js": "./static/js/6954.e18be130.chunk.js", - "static/js/9413.b2921c36.chunk.js": "./static/js/9413.b2921c36.chunk.js", - "static/js/3945.054c871d.chunk.js": "./static/js/3945.054c871d.chunk.js", - "static/js/2981.6d027811.chunk.js": "./static/js/2981.6d027811.chunk.js", - "static/js/1150.2b47004d.chunk.js": "./static/js/1150.2b47004d.chunk.js", - "static/js/3926.8f2c9741.chunk.js": "./static/js/3926.8f2c9741.chunk.js", - "static/js/5643.00957838.chunk.js": "./static/js/5643.00957838.chunk.js", - "static/js/5161.45b4f520.chunk.js": "./static/js/5161.45b4f520.chunk.js", - "static/js/2238.3cf88b79.chunk.js": "./static/js/2238.3cf88b79.chunk.js", - "static/js/8133.2afc4db4.chunk.js": "./static/js/8133.2afc4db4.chunk.js", - "static/js/4949.6bf46e71.chunk.js": "./static/js/4949.6bf46e71.chunk.js", - "static/js/383.4faec08b.chunk.js": "./static/js/383.4faec08b.chunk.js", - "static/js/2701.86912840.chunk.js": "./static/js/2701.86912840.chunk.js", - "static/js/3645.bdd20200.chunk.js": "./static/js/3645.bdd20200.chunk.js", - "static/js/2677.3d7ea3fc.chunk.js": "./static/js/2677.3d7ea3fc.chunk.js", - "static/js/2477.e6121bfd.chunk.js": "./static/js/2477.e6121bfd.chunk.js", - "static/js/5399.f9398084.chunk.js": "./static/js/5399.f9398084.chunk.js", - "static/js/4985.991de003.chunk.js": "./static/js/4985.991de003.chunk.js", - "static/js/674.e6536250.chunk.js": "./static/js/674.e6536250.chunk.js", - "static/js/9207.5881b206.chunk.js": "./static/js/9207.5881b206.chunk.js", - "static/js/7779.9d9b07ae.chunk.js": "./static/js/7779.9d9b07ae.chunk.js", - "static/js/1148.3c629236.chunk.js": "./static/js/1148.3c629236.chunk.js", - "static/js/8011.4fed4307.chunk.js": "./static/js/8011.4fed4307.chunk.js", - "static/js/96.6e1bf3f4.chunk.js": "./static/js/96.6e1bf3f4.chunk.js", - "static/js/8167.b9a90da5.chunk.js": "./static/js/8167.b9a90da5.chunk.js", - "static/js/4347.adf03999.chunk.js": "./static/js/4347.adf03999.chunk.js", - "static/js/2223.63ae5a05.chunk.js": "./static/js/2223.63ae5a05.chunk.js", - "static/js/2033.5c6dfca9.chunk.js": "./static/js/2033.5c6dfca9.chunk.js", - "static/js/8695.f17f8853.chunk.js": "./static/js/8695.f17f8853.chunk.js", - "static/js/8140.8d8e9309.chunk.js": "./static/js/8140.8d8e9309.chunk.js", - "static/js/6227.fc562bbf.chunk.js": "./static/js/6227.fc562bbf.chunk.js", - "static/js/148.b60f0e5e.chunk.js": "./static/js/148.b60f0e5e.chunk.js", - "static/js/9572.9f83f004.chunk.js": "./static/js/9572.9f83f004.chunk.js", - "static/js/1179.15d7ac65.chunk.js": "./static/js/1179.15d7ac65.chunk.js", - "static/js/1746.a8ba5c62.chunk.js": "./static/js/1746.a8ba5c62.chunk.js", - "static/js/3466.98f036ac.chunk.js": "./static/js/3466.98f036ac.chunk.js", - "static/js/4684.27f737c4.chunk.js": "./static/js/4684.27f737c4.chunk.js", - "static/js/5226.675d55fb.chunk.js": "./static/js/5226.675d55fb.chunk.js", - "static/js/115.2c4de87e.chunk.js": "./static/js/115.2c4de87e.chunk.js", - "static/js/4964.c7c75eb0.chunk.js": "./static/js/4964.c7c75eb0.chunk.js", - "static/js/1869.d6661a03.chunk.js": "./static/js/1869.d6661a03.chunk.js", - "static/js/9917.67d792e3.chunk.js": "./static/js/9917.67d792e3.chunk.js", - "static/js/163.eea01641.chunk.js": "./static/js/163.eea01641.chunk.js", - "static/js/3025.7e536c57.chunk.js": "./static/js/3025.7e536c57.chunk.js", - "static/js/6156.0c562627.chunk.js": "./static/js/6156.0c562627.chunk.js", - "static/js/6361.a9f11e7a.chunk.js": "./static/js/6361.a9f11e7a.chunk.js", - "static/js/4663.b893c670.chunk.js": "./static/js/4663.b893c670.chunk.js", - "static/js/7992.20690745.chunk.js": "./static/js/7992.20690745.chunk.js", - "static/js/3756.67bd6b00.chunk.js": "./static/js/3756.67bd6b00.chunk.js", - "static/js/678.b73063ff.chunk.js": "./static/js/678.b73063ff.chunk.js", - "static/js/436.564ff0f8.chunk.js": "./static/js/436.564ff0f8.chunk.js", - "static/js/5112.6189bbe0.chunk.js": "./static/js/5112.6189bbe0.chunk.js", - "static/js/9555.c9b5ee61.chunk.js": "./static/js/9555.c9b5ee61.chunk.js", - "static/js/5809.d78ebebb.chunk.js": "./static/js/5809.d78ebebb.chunk.js", - "static/js/5450.f0dcfc15.chunk.js": "./static/js/5450.f0dcfc15.chunk.js", - "static/js/5491.a460479e.chunk.js": "./static/js/5491.a460479e.chunk.js", - "static/js/8591.93172fe9.chunk.js": "./static/js/8591.93172fe9.chunk.js", - "static/js/7016.4a34a027.chunk.js": "./static/js/7016.4a34a027.chunk.js", - "static/js/9308.c72b8585.chunk.js": "./static/js/9308.c72b8585.chunk.js", - "static/js/9411.96fb3e2f.chunk.js": "./static/js/9411.96fb3e2f.chunk.js", - "static/js/6521.371403ec.chunk.js": "./static/js/6521.371403ec.chunk.js", - "static/js/4159.5e0cfd91.chunk.js": "./static/js/4159.5e0cfd91.chunk.js", - "static/js/9528.9991c023.chunk.js": "./static/js/9528.9991c023.chunk.js", - "static/js/4826.d2723706.chunk.js": "./static/js/4826.d2723706.chunk.js", - "static/js/5352.3d3187b7.chunk.js": "./static/js/5352.3d3187b7.chunk.js", - "static/js/9292.91ed23f7.chunk.js": "./static/js/9292.91ed23f7.chunk.js", - "static/js/7684.a3920b72.chunk.js": "./static/js/7684.a3920b72.chunk.js", - "static/js/9212.870f16f0.chunk.js": "./static/js/9212.870f16f0.chunk.js", - "static/js/6065.b08e9640.chunk.js": "./static/js/6065.b08e9640.chunk.js", - "static/js/5341.2c19c723.chunk.js": "./static/js/5341.2c19c723.chunk.js", - "static/js/4583.1682cf86.chunk.js": "./static/js/4583.1682cf86.chunk.js", - "static/js/3920.11b8c9d7.chunk.js": "./static/js/3920.11b8c9d7.chunk.js", - "static/js/7119.e94f8dac.chunk.js": "./static/js/7119.e94f8dac.chunk.js", - "static/js/6144.e1568f26.chunk.js": "./static/js/6144.e1568f26.chunk.js", - "static/js/1350.21b6a9ef.chunk.js": "./static/js/1350.21b6a9ef.chunk.js", - "static/js/2590.75b6626e.chunk.js": "./static/js/2590.75b6626e.chunk.js", - "static/js/6291.e7cdf7f2.chunk.js": "./static/js/6291.e7cdf7f2.chunk.js", - "static/js/3397.9c0005a3.chunk.js": "./static/js/3397.9c0005a3.chunk.js", - "static/js/1168.91d9e2c2.chunk.js": "./static/js/1168.91d9e2c2.chunk.js", - "static/js/8853.c8f9e9d6.chunk.js": "./static/js/8853.c8f9e9d6.chunk.js", - "static/js/4535.5d1c8322.chunk.js": "./static/js/4535.5d1c8322.chunk.js", - "static/js/9101.ce051539.chunk.js": "./static/js/9101.ce051539.chunk.js", - "static/js/2986.2100fcad.chunk.js": "./static/js/2986.2100fcad.chunk.js", - "static/js/4080.07be3744.chunk.js": "./static/js/4080.07be3744.chunk.js", - "static/js/3898.1fec42e6.chunk.js": "./static/js/3898.1fec42e6.chunk.js", - "static/js/1616.8a217b93.chunk.js": "./static/js/1616.8a217b93.chunk.js", - "static/js/7522.1a0f9c02.chunk.js": "./static/js/7522.1a0f9c02.chunk.js", - "static/js/6531.7eac62d1.chunk.js": "./static/js/6531.7eac62d1.chunk.js", - "static/css/4983.5c3e5de4.chunk.css": "./static/css/4983.5c3e5de4.chunk.css", - "static/js/3757.7c534899.chunk.js": "./static/js/3757.7c534899.chunk.js", - "static/js/4842.57182d38.chunk.js": "./static/js/4842.57182d38.chunk.js", - "static/css/328.c0ade9c1.chunk.css": "./static/css/328.c0ade9c1.chunk.css", - "static/js/328.f24db8bf.chunk.js": "./static/js/328.f24db8bf.chunk.js", - "static/js/599.c58caf58.chunk.js": "./static/js/599.c58caf58.chunk.js", - "static/js/1155.4fce1854.chunk.js": "./static/js/1155.4fce1854.chunk.js", - "static/js/6230.8e64216a.chunk.js": "./static/js/6230.8e64216a.chunk.js", - "static/js/337.b6fc715e.chunk.js": "./static/js/337.b6fc715e.chunk.js", - "static/js/451.3b449e79.chunk.js": "./static/js/451.3b449e79.chunk.js", - "static/js/2322.29255c22.chunk.js": "./static/js/2322.29255c22.chunk.js", - "static/js/4123.64882a16.chunk.js": "./static/js/4123.64882a16.chunk.js", - "static/js/6289.51f8741e.chunk.js": "./static/js/6289.51f8741e.chunk.js", - "static/js/4635.ffa9b6b7.chunk.js": "./static/js/4635.ffa9b6b7.chunk.js", - "static/js/4345.9238776d.chunk.js": "./static/js/4345.9238776d.chunk.js", - "static/js/9319.40f9e46a.chunk.js": "./static/js/9319.40f9e46a.chunk.js", - "static/js/924.382f18b1.chunk.js": "./static/js/924.382f18b1.chunk.js", - "static/js/6795.5ec0c96a.chunk.js": "./static/js/6795.5ec0c96a.chunk.js", - "static/js/2302.7e7a2fb4.chunk.js": "./static/js/2302.7e7a2fb4.chunk.js", - "static/js/4388.edb51304.chunk.js": "./static/js/4388.edb51304.chunk.js", - "static/js/4046.5dac72a9.chunk.js": "./static/js/4046.5dac72a9.chunk.js", - "static/js/2190.27f354f5.chunk.js": "./static/js/2190.27f354f5.chunk.js", - "static/js/3358.c777fe1f.chunk.js": "./static/js/3358.c777fe1f.chunk.js", - "static/js/6142.b2452554.chunk.js": "./static/js/6142.b2452554.chunk.js", - "static/js/2962.66e01691.chunk.js": "./static/js/2962.66e01691.chunk.js", - "static/js/214.99a17949.chunk.js": "./static/js/214.99a17949.chunk.js", - "static/js/8791.b209de42.chunk.js": "./static/js/8791.b209de42.chunk.js", - "static/js/6898.5580b941.chunk.js": "./static/js/6898.5580b941.chunk.js", - "static/js/9173.71d773f2.chunk.js": "./static/js/9173.71d773f2.chunk.js", - "static/js/2532.30bb087d.chunk.js": "./static/js/2532.30bb087d.chunk.js", - "static/js/6329.d78c1432.chunk.js": "./static/js/6329.d78c1432.chunk.js", - "static/js/2840.b69eb597.chunk.js": "./static/js/2840.b69eb597.chunk.js", - "static/js/5311.a500a1ea.chunk.js": "./static/js/5311.a500a1ea.chunk.js", - "static/js/2403.82cd0025.chunk.js": "./static/js/2403.82cd0025.chunk.js", - "static/js/1747.b4331799.chunk.js": "./static/js/1747.b4331799.chunk.js", - "static/js/3498.c7d39060.chunk.js": "./static/js/3498.c7d39060.chunk.js", - "static/js/185.7d51fcfa.chunk.js": "./static/js/185.7d51fcfa.chunk.js", - "static/js/8450.baf3a89d.chunk.js": "./static/js/8450.baf3a89d.chunk.js", - "static/js/3771.764124c3.chunk.js": "./static/js/3771.764124c3.chunk.js", - "static/js/7529.ddf87a9a.chunk.js": "./static/js/7529.ddf87a9a.chunk.js", - "static/js/785.d2eae69c.chunk.js": "./static/js/785.d2eae69c.chunk.js", - "static/js/5107.8cac6a03.chunk.js": "./static/js/5107.8cac6a03.chunk.js", - "static/js/6919.84ed9ccc.chunk.js": "./static/js/6919.84ed9ccc.chunk.js", - "static/js/2104.4f22ecac.chunk.js": "./static/js/2104.4f22ecac.chunk.js", - "static/js/9433.7ce648d0.chunk.js": "./static/js/9433.7ce648d0.chunk.js", - "static/js/1956.0205a5bb.chunk.js": "./static/js/1956.0205a5bb.chunk.js", - "static/js/6619.9e1de7a6.chunk.js": "./static/js/6619.9e1de7a6.chunk.js", - "static/js/2492.64b7d727.chunk.js": "./static/js/2492.64b7d727.chunk.js", - "static/js/2194.38bafdfc.chunk.js": "./static/js/2194.38bafdfc.chunk.js", - "static/js/9526.10bb1684.chunk.js": "./static/js/9526.10bb1684.chunk.js", - "static/js/5790.e3d88e2c.chunk.js": "./static/js/5790.e3d88e2c.chunk.js", - "static/js/8905.b8a9fd91.chunk.js": "./static/js/8905.b8a9fd91.chunk.js", - "static/js/5168.6fb23f08.chunk.js": "./static/js/5168.6fb23f08.chunk.js", - "static/js/619.f27ddcbd.chunk.js": "./static/js/619.f27ddcbd.chunk.js", - "static/js/4550.2e04d705.chunk.js": "./static/js/4550.2e04d705.chunk.js", - "static/js/3644.aeda46ca.chunk.js": "./static/js/3644.aeda46ca.chunk.js", - "static/js/8797.f8f0ce13.chunk.js": "./static/js/8797.f8f0ce13.chunk.js", - "static/js/2521.21bdfab9.chunk.js": "./static/js/2521.21bdfab9.chunk.js", - "static/js/1478.5044be66.chunk.js": "./static/js/1478.5044be66.chunk.js", - "static/js/6300.dca75d45.chunk.js": "./static/js/6300.dca75d45.chunk.js", - "static/js/3074.bbb8aaef.chunk.js": "./static/js/3074.bbb8aaef.chunk.js", - "static/js/9371.b42befbc.chunk.js": "./static/js/9371.b42befbc.chunk.js", - "static/js/9923.270f0a19.chunk.js": "./static/js/9923.270f0a19.chunk.js", - "static/js/358.d6300019.chunk.js": "./static/js/358.d6300019.chunk.js", - "static/js/86.ad271bdc.chunk.js": "./static/js/86.ad271bdc.chunk.js", - "static/js/5661.c83a4eb0.chunk.js": "./static/js/5661.c83a4eb0.chunk.js", - "static/js/3621.9b6c61ab.chunk.js": "./static/js/3621.9b6c61ab.chunk.js", - "static/js/2994.e6c77407.chunk.js": "./static/js/2994.e6c77407.chunk.js", - "static/js/4812.73af8448.chunk.js": "./static/js/4812.73af8448.chunk.js", - "static/js/9621.48073631.chunk.js": "./static/js/9621.48073631.chunk.js", - "static/js/7554.28f3da22.chunk.js": "./static/js/7554.28f3da22.chunk.js", - "static/js/425.c6dd581a.chunk.js": "./static/js/425.c6dd581a.chunk.js", - "static/js/6044.2de9962d.chunk.js": "./static/js/6044.2de9962d.chunk.js", - "static/js/2141.26c930aa.chunk.js": "./static/js/2141.26c930aa.chunk.js", - "static/js/919.53e04507.chunk.js": "./static/js/919.53e04507.chunk.js", - "static/js/6692.9322b59d.chunk.js": "./static/js/6692.9322b59d.chunk.js", - "static/js/6321.aa3e44de.chunk.js": "./static/js/6321.aa3e44de.chunk.js", - "static/js/2931.3ade3bc3.chunk.js": "./static/js/2931.3ade3bc3.chunk.js", - "static/js/2876.afe7e47f.chunk.js": "./static/js/2876.afe7e47f.chunk.js", - "static/js/5868.be04313a.chunk.js": "./static/js/5868.be04313a.chunk.js", - "static/js/2553.5faabf5a.chunk.js": "./static/js/2553.5faabf5a.chunk.js", - "static/js/9876.b336d1f5.chunk.js": "./static/js/9876.b336d1f5.chunk.js", - "static/js/5378.86805fba.chunk.js": "./static/js/5378.86805fba.chunk.js", - "static/js/2183.e2318c37.chunk.js": "./static/js/2183.e2318c37.chunk.js", - "static/js/7543.3fcfd3ba.chunk.js": "./static/js/7543.3fcfd3ba.chunk.js", - "static/js/6390.497d0ec8.chunk.js": "./static/js/6390.497d0ec8.chunk.js", - "static/js/2118.bc169874.chunk.js": "./static/js/2118.bc169874.chunk.js", - "static/js/8065.666ef449.chunk.js": "./static/js/8065.666ef449.chunk.js", - "static/js/7520.d245d6ac.chunk.js": "./static/js/7520.d245d6ac.chunk.js", - "static/js/4789.d52069de.chunk.js": "./static/js/4789.d52069de.chunk.js", - "static/js/8607.1e377882.chunk.js": "./static/js/8607.1e377882.chunk.js", - "static/css/1551.d5e5efc2.chunk.css": "./static/css/1551.d5e5efc2.chunk.css", - "static/js/1551.2e8e3e50.chunk.js": "./static/js/1551.2e8e3e50.chunk.js", - "static/css/8424.308a04db.chunk.css": "./static/css/8424.308a04db.chunk.css", - "static/js/8424.5b5c42b5.chunk.js": "./static/js/8424.5b5c42b5.chunk.js", - "static/js/7645.6565454c.chunk.js": "./static/js/7645.6565454c.chunk.js", + "main.css": "./static/css/main.0bf24cd8.css", + "main.js": "./static/js/main.7cff0321.js", + "static/js/4226.9f88ef38.chunk.js": "./static/js/4226.9f88ef38.chunk.js", + "static/js/6595.53cb237b.chunk.js": "./static/js/6595.53cb237b.chunk.js", + "static/js/2503.96b0c070.chunk.js": "./static/js/2503.96b0c070.chunk.js", + "static/js/3768.988c811a.chunk.js": "./static/js/3768.988c811a.chunk.js", + "static/js/3283.59bb81e0.chunk.js": "./static/js/3283.59bb81e0.chunk.js", + "static/js/1855.17a4a673.chunk.js": "./static/js/1855.17a4a673.chunk.js", + "static/js/3092.117dd3ef.chunk.js": "./static/js/3092.117dd3ef.chunk.js", + "static/js/3643.bec8ad3c.chunk.js": "./static/js/3643.bec8ad3c.chunk.js", + "static/js/9974.57e4b5e1.chunk.js": "./static/js/9974.57e4b5e1.chunk.js", + "static/js/4806.cff0f21e.chunk.js": "./static/js/4806.cff0f21e.chunk.js", + "static/js/6943.a900c8d8.chunk.js": "./static/js/6943.a900c8d8.chunk.js", + "static/js/4463.e01e2d73.chunk.js": "./static/js/4463.e01e2d73.chunk.js", + "static/js/6395.82fdc368.chunk.js": "./static/js/6395.82fdc368.chunk.js", + "static/js/4779.f5b56e5a.chunk.js": "./static/js/4779.f5b56e5a.chunk.js", + "static/js/753.7b0f09c6.chunk.js": "./static/js/753.7b0f09c6.chunk.js", + "static/js/4563.c214a5dd.chunk.js": "./static/js/4563.c214a5dd.chunk.js", + "static/js/6332.982e12ce.chunk.js": "./static/js/6332.982e12ce.chunk.js", + "static/js/2100.06ecdc28.chunk.js": "./static/js/2100.06ecdc28.chunk.js", + "static/js/2845.2216d105.chunk.js": "./static/js/2845.2216d105.chunk.js", + "static/js/8169.3b607870.chunk.js": "./static/js/8169.3b607870.chunk.js", + "static/js/5257.020aa417.chunk.js": "./static/js/5257.020aa417.chunk.js", + "static/js/1074.8bbb31b1.chunk.js": "./static/js/1074.8bbb31b1.chunk.js", + "static/js/8332.c2056746.chunk.js": "./static/js/8332.c2056746.chunk.js", + "static/js/2216.be9ff335.chunk.js": "./static/js/2216.be9ff335.chunk.js", + "static/js/8726.76eea1ab.chunk.js": "./static/js/8726.76eea1ab.chunk.js", + "static/js/9617.3a063882.chunk.js": "./static/js/9617.3a063882.chunk.js", + "static/js/1971.a2544f07.chunk.js": "./static/js/1971.a2544f07.chunk.js", + "static/js/7076.99b06857.chunk.js": "./static/js/7076.99b06857.chunk.js", + "static/js/1035.4d5fe2fd.chunk.js": "./static/js/1035.4d5fe2fd.chunk.js", + "static/js/9757.1fa8ec24.chunk.js": "./static/js/9757.1fa8ec24.chunk.js", + "static/js/7439.9de88a36.chunk.js": "./static/js/7439.9de88a36.chunk.js", + "static/js/9711.5936b47c.chunk.js": "./static/js/9711.5936b47c.chunk.js", + "static/js/3235.884b5b99.chunk.js": "./static/js/3235.884b5b99.chunk.js", + "static/js/3830.4c0547b6.chunk.js": "./static/js/3830.4c0547b6.chunk.js", + "static/js/3997.030699b5.chunk.js": "./static/js/3997.030699b5.chunk.js", + "static/js/9937.2df3d582.chunk.js": "./static/js/9937.2df3d582.chunk.js", + "static/js/2972.f8a04d68.chunk.js": "./static/js/2972.f8a04d68.chunk.js", + "static/js/2804.6549b96e.chunk.js": "./static/js/2804.6549b96e.chunk.js", + "static/js/2070.62b9c9e9.chunk.js": "./static/js/2070.62b9c9e9.chunk.js", + "static/js/1388.dc200448.chunk.js": "./static/js/1388.dc200448.chunk.js", + "static/js/1923.a4cf691b.chunk.js": "./static/js/1923.a4cf691b.chunk.js", + "static/js/7132.d1397593.chunk.js": "./static/js/7132.d1397593.chunk.js", + "static/js/5821.269b9961.chunk.js": "./static/js/5821.269b9961.chunk.js", + "static/js/5957.22d94683.chunk.js": "./static/js/5957.22d94683.chunk.js", + "static/js/3145.cfb85bc6.chunk.js": "./static/js/3145.cfb85bc6.chunk.js", + "static/js/9765.410b7864.chunk.js": "./static/js/9765.410b7864.chunk.js", + "static/js/6170.f5db6881.chunk.js": "./static/js/6170.f5db6881.chunk.js", + "static/js/592.07f568c9.chunk.js": "./static/js/592.07f568c9.chunk.js", + "static/js/2039.674765db.chunk.js": "./static/js/2039.674765db.chunk.js", + "static/js/6070.d22f533e.chunk.js": "./static/js/6070.d22f533e.chunk.js", + "static/js/7446.c434978e.chunk.js": "./static/js/7446.c434978e.chunk.js", + "static/js/3520.5df2f9dc.chunk.js": "./static/js/3520.5df2f9dc.chunk.js", + "static/js/579.ff7e5b95.chunk.js": "./static/js/579.ff7e5b95.chunk.js", + "static/js/6845.e64bd413.chunk.js": "./static/js/6845.e64bd413.chunk.js", + "static/js/323.e36b8268.chunk.js": "./static/js/323.e36b8268.chunk.js", + "static/js/3607.6bedcc44.chunk.js": "./static/js/3607.6bedcc44.chunk.js", + "static/js/9703.c7826405.chunk.js": "./static/js/9703.c7826405.chunk.js", + "static/js/3998.051d103c.chunk.js": "./static/js/3998.051d103c.chunk.js", + "static/js/2974.36362aa2.chunk.js": "./static/js/2974.36362aa2.chunk.js", + "static/js/4408.4cb113d4.chunk.js": "./static/js/4408.4cb113d4.chunk.js", + "static/js/2734.2f81aa93.chunk.js": "./static/js/2734.2f81aa93.chunk.js", + "static/js/1236.3b0b47e7.chunk.js": "./static/js/1236.3b0b47e7.chunk.js", + "static/js/4324.8b4dfeeb.chunk.js": "./static/js/4324.8b4dfeeb.chunk.js", + "static/js/5203.dbaf6737.chunk.js": "./static/js/5203.dbaf6737.chunk.js", + "static/js/4006.183d37e4.chunk.js": "./static/js/4006.183d37e4.chunk.js", + "static/js/4413.424607bb.chunk.js": "./static/js/4413.424607bb.chunk.js", + "static/js/7367.37be1f98.chunk.js": "./static/js/7367.37be1f98.chunk.js", + "static/js/3883.9513e6a0.chunk.js": "./static/js/3883.9513e6a0.chunk.js", + "static/js/4018.76940440.chunk.js": "./static/js/4018.76940440.chunk.js", + "static/js/6663.26d7c75f.chunk.js": "./static/js/6663.26d7c75f.chunk.js", + "static/js/6079.95240888.chunk.js": "./static/js/6079.95240888.chunk.js", + "static/js/5070.58b434a0.chunk.js": "./static/js/5070.58b434a0.chunk.js", + "static/js/9685.590d0ab0.chunk.js": "./static/js/9685.590d0ab0.chunk.js", + "static/js/4254.9abe8ed3.chunk.js": "./static/js/4254.9abe8ed3.chunk.js", + "static/js/846.c37c0f60.chunk.js": "./static/js/846.c37c0f60.chunk.js", + "static/js/3495.77665cfd.chunk.js": "./static/js/3495.77665cfd.chunk.js", + "static/js/6625.1d36f68b.chunk.js": "./static/js/6625.1d36f68b.chunk.js", + "static/js/862.e5ac847a.chunk.js": "./static/js/862.e5ac847a.chunk.js", + "static/js/9139.d6e566d7.chunk.js": "./static/js/9139.d6e566d7.chunk.js", + "static/js/7245.79bffd2e.chunk.js": "./static/js/7245.79bffd2e.chunk.js", + "static/js/610.307fc3ff.chunk.js": "./static/js/610.307fc3ff.chunk.js", + "static/js/328.ff68c87b.chunk.js": "./static/js/328.ff68c87b.chunk.js", + "static/js/2670.33b83982.chunk.js": "./static/js/2670.33b83982.chunk.js", + "static/js/7862.970a7935.chunk.js": "./static/js/7862.970a7935.chunk.js", + "static/js/1389.4a6fe794.chunk.js": "./static/js/1389.4a6fe794.chunk.js", + "static/js/2455.36823616.chunk.js": "./static/js/2455.36823616.chunk.js", + "static/js/6731.36cc76de.chunk.js": "./static/js/6731.36cc76de.chunk.js", + "static/js/2651.a0376f78.chunk.js": "./static/js/2651.a0376f78.chunk.js", + "static/js/127.031dd9bd.chunk.js": "./static/js/127.031dd9bd.chunk.js", + "static/js/9101.c3da85f5.chunk.js": "./static/js/9101.c3da85f5.chunk.js", + "static/js/9433.0240620f.chunk.js": "./static/js/9433.0240620f.chunk.js", + "static/js/7950.868790bd.chunk.js": "./static/js/7950.868790bd.chunk.js", + "static/js/2421.b0e53cec.chunk.js": "./static/js/2421.b0e53cec.chunk.js", + "static/js/2405.6cb977b5.chunk.js": "./static/js/2405.6cb977b5.chunk.js", + "static/js/7273.1cfa7ba2.chunk.js": "./static/js/7273.1cfa7ba2.chunk.js", + "static/js/7443.85993ad1.chunk.js": "./static/js/7443.85993ad1.chunk.js", + "static/js/178.25784a3a.chunk.js": "./static/js/178.25784a3a.chunk.js", + "static/js/3950.57b0da11.chunk.js": "./static/js/3950.57b0da11.chunk.js", + "static/js/8446.e6a23d4b.chunk.js": "./static/js/8446.e6a23d4b.chunk.js", + "static/js/8864.be2503b0.chunk.js": "./static/js/8864.be2503b0.chunk.js", + "static/js/8828.d910f13b.chunk.js": "./static/js/8828.d910f13b.chunk.js", + "static/js/7994.5c64203a.chunk.js": "./static/js/7994.5c64203a.chunk.js", + "static/js/182.a977025f.chunk.js": "./static/js/182.a977025f.chunk.js", + "static/js/5641.b3b5ddf6.chunk.js": "./static/js/5641.b3b5ddf6.chunk.js", + "static/js/4175.62734866.chunk.js": "./static/js/4175.62734866.chunk.js", + "static/js/3493.b911f175.chunk.js": "./static/js/3493.b911f175.chunk.js", + "static/js/4320.f2de8175.chunk.js": "./static/js/4320.f2de8175.chunk.js", + "static/js/452.004a8d50.chunk.js": "./static/js/452.004a8d50.chunk.js", + "static/js/8505.c733306c.chunk.js": "./static/js/8505.c733306c.chunk.js", + "static/js/4040.6f8f7155.chunk.js": "./static/js/4040.6f8f7155.chunk.js", + "static/js/4442.9a979278.chunk.js": "./static/js/4442.9a979278.chunk.js", + "static/js/9821.c0703c78.chunk.js": "./static/js/9821.c0703c78.chunk.js", + "static/js/8731.72990525.chunk.js": "./static/js/8731.72990525.chunk.js", + "static/js/4503.fa229fbe.chunk.js": "./static/js/4503.fa229fbe.chunk.js", + "static/js/2871.3d11e695.chunk.js": "./static/js/2871.3d11e695.chunk.js", + "static/js/9963.fc7b507c.chunk.js": "./static/js/9963.fc7b507c.chunk.js", + "static/js/6789.febacb48.chunk.js": "./static/js/6789.febacb48.chunk.js", + "static/js/6292.fee79b86.chunk.js": "./static/js/6292.fee79b86.chunk.js", + "static/js/2516.44b079ed.chunk.js": "./static/js/2516.44b079ed.chunk.js", + "static/js/5982.b7dba432.chunk.js": "./static/js/5982.b7dba432.chunk.js", + "static/js/2967.b531e7aa.chunk.js": "./static/js/2967.b531e7aa.chunk.js", + "static/js/9350.692675cc.chunk.js": "./static/js/9350.692675cc.chunk.js", + "static/js/9008.d4b4f454.chunk.js": "./static/js/9008.d4b4f454.chunk.js", + "static/js/6660.ad1074e6.chunk.js": "./static/js/6660.ad1074e6.chunk.js", + "static/js/9625.c44ae47a.chunk.js": "./static/js/9625.c44ae47a.chunk.js", + "static/js/8841.d097b94c.chunk.js": "./static/js/8841.d097b94c.chunk.js", + "static/js/5066.a0bc3ca7.chunk.js": "./static/js/5066.a0bc3ca7.chunk.js", + "static/js/5014.51de99f4.chunk.js": "./static/js/5014.51de99f4.chunk.js", + "static/js/7656.527a98cb.chunk.js": "./static/js/7656.527a98cb.chunk.js", + "static/js/4723.b5ccc693.chunk.js": "./static/js/4723.b5ccc693.chunk.js", + "static/js/1577.e3c3298a.chunk.js": "./static/js/1577.e3c3298a.chunk.js", + "static/js/4639.a666f082.chunk.js": "./static/js/4639.a666f082.chunk.js", + "static/js/5748.d17b947f.chunk.js": "./static/js/5748.d17b947f.chunk.js", + "static/js/4712.68848e03.chunk.js": "./static/js/4712.68848e03.chunk.js", + "static/js/6679.5a32e19d.chunk.js": "./static/js/6679.5a32e19d.chunk.js", + "static/js/6927.a0faae6b.chunk.js": "./static/js/6927.a0faae6b.chunk.js", + "static/js/824.c56f39b3.chunk.js": "./static/js/824.c56f39b3.chunk.js", + "static/js/7275.84809a86.chunk.js": "./static/js/7275.84809a86.chunk.js", + "static/js/9352.9d3dd18e.chunk.js": "./static/js/9352.9d3dd18e.chunk.js", + "static/js/1728.782bd268.chunk.js": "./static/js/1728.782bd268.chunk.js", + "static/js/1886.e1f61c67.chunk.js": "./static/js/1886.e1f61c67.chunk.js", + "static/js/689.e9ec08fb.chunk.js": "./static/js/689.e9ec08fb.chunk.js", + "static/js/4023.1cf957ab.chunk.js": "./static/js/4023.1cf957ab.chunk.js", + "static/js/225.66110a65.chunk.js": "./static/js/225.66110a65.chunk.js", + "static/js/7199.fa3c4603.chunk.js": "./static/js/7199.fa3c4603.chunk.js", + "static/js/253.35728f09.chunk.js": "./static/js/253.35728f09.chunk.js", + "static/js/4657.793f8fff.chunk.js": "./static/js/4657.793f8fff.chunk.js", + "static/js/2780.3e502f16.chunk.js": "./static/js/2780.3e502f16.chunk.js", + "static/js/8645.08b4eef2.chunk.js": "./static/js/8645.08b4eef2.chunk.js", + "static/css/5426.8f2389ae.chunk.css": "./static/css/5426.8f2389ae.chunk.css", + "static/js/5426.c44b6444.chunk.js": "./static/js/5426.c44b6444.chunk.js", + "static/css/4808.146b1de9.chunk.css": "./static/css/4808.146b1de9.chunk.css", + "static/js/4808.382260ae.chunk.js": "./static/js/4808.382260ae.chunk.js", + "static/css/1001.d55e6f5e.chunk.css": "./static/css/1001.d55e6f5e.chunk.css", + "static/js/1001.2a472a60.chunk.js": "./static/js/1001.2a472a60.chunk.js", + "static/css/3209.28cb5984.chunk.css": "./static/css/3209.28cb5984.chunk.css", + "static/js/3209.c7f538f3.chunk.js": "./static/js/3209.c7f538f3.chunk.js", + "static/css/7512.b17f7aaf.chunk.css": "./static/css/7512.b17f7aaf.chunk.css", + "static/js/7512.923733a2.chunk.js": "./static/js/7512.923733a2.chunk.js", + "static/css/3812.b1faff6b.chunk.css": "./static/css/3812.b1faff6b.chunk.css", + "static/js/3812.62e6074c.chunk.js": "./static/js/3812.62e6074c.chunk.js", + "static/css/7054.44580026.chunk.css": "./static/css/7054.44580026.chunk.css", + "static/js/7054.3701f39e.chunk.js": "./static/js/7054.3701f39e.chunk.js", + "static/css/1276.f723033a.chunk.css": "./static/css/1276.f723033a.chunk.css", + "static/js/1276.f7603cad.chunk.js": "./static/js/1276.f7603cad.chunk.js", + "static/css/6866.3cc21bd9.chunk.css": "./static/css/6866.3cc21bd9.chunk.css", + "static/js/6866.ea949d3d.chunk.js": "./static/js/6866.ea949d3d.chunk.js", + "static/js/1072.9a47be2b.chunk.js": "./static/js/1072.9a47be2b.chunk.js", + "static/js/633.834b6a1c.chunk.js": "./static/js/633.834b6a1c.chunk.js", + "static/css/5118.ac57fa01.chunk.css": "./static/css/5118.ac57fa01.chunk.css", + "static/js/4609.28ef81ef.chunk.js": "./static/js/4609.28ef81ef.chunk.js", + "static/js/9530.ada06b35.chunk.js": "./static/js/9530.ada06b35.chunk.js", + "static/js/4535.457b09fd.chunk.js": "./static/js/4535.457b09fd.chunk.js", + "static/js/5682.966a8cc0.chunk.js": "./static/js/5682.966a8cc0.chunk.js", + "static/js/7043.e2958b70.chunk.js": "./static/js/7043.e2958b70.chunk.js", + "static/js/2262.2ed64c2e.chunk.js": "./static/js/2262.2ed64c2e.chunk.js", + "static/js/6062.6eaa8611.chunk.js": "./static/js/6062.6eaa8611.chunk.js", + "static/js/1222.1b644c9d.chunk.js": "./static/js/1222.1b644c9d.chunk.js", + "static/js/4231.14c1bca0.chunk.js": "./static/js/4231.14c1bca0.chunk.js", + "static/js/4628.388e99ac.chunk.js": "./static/js/4628.388e99ac.chunk.js", + "static/js/5685.d6c42c56.chunk.js": "./static/js/5685.d6c42c56.chunk.js", + "static/js/6659.4c0edc60.chunk.js": "./static/js/6659.4c0edc60.chunk.js", + "static/js/4465.a054353e.chunk.js": "./static/js/4465.a054353e.chunk.js", + "static/js/1410.9bec28d3.chunk.js": "./static/js/1410.9bec28d3.chunk.js", + "static/js/6010.23d4bb8d.chunk.js": "./static/js/6010.23d4bb8d.chunk.js", + "static/js/7828.5627775e.chunk.js": "./static/js/7828.5627775e.chunk.js", + "static/js/6435.15f0b61d.chunk.js": "./static/js/6435.15f0b61d.chunk.js", + "static/js/8504.b46947f0.chunk.js": "./static/js/8504.b46947f0.chunk.js", + "static/js/835.fcf4f3fd.chunk.js": "./static/js/835.fcf4f3fd.chunk.js", + "static/js/3164.b52af9a7.chunk.js": "./static/js/3164.b52af9a7.chunk.js", + "static/js/7852.6a83a8ff.chunk.js": "./static/js/7852.6a83a8ff.chunk.js", + "static/js/1460.53037d83.chunk.js": "./static/js/1460.53037d83.chunk.js", + "static/js/453.ea7639b5.chunk.js": "./static/js/453.ea7639b5.chunk.js", + "static/js/3230.ce6d3a8f.chunk.js": "./static/js/3230.ce6d3a8f.chunk.js", + "static/js/4859.5bb93a34.chunk.js": "./static/js/4859.5bb93a34.chunk.js", + "static/js/346.f47bbaef.chunk.js": "./static/js/346.f47bbaef.chunk.js", + "static/js/1109.739c032d.chunk.js": "./static/js/1109.739c032d.chunk.js", + "static/js/5467.23a85e74.chunk.js": "./static/js/5467.23a85e74.chunk.js", + "static/js/5319.acb1fb2c.chunk.js": "./static/js/5319.acb1fb2c.chunk.js", + "static/js/6914.3b644505.chunk.js": "./static/js/6914.3b644505.chunk.js", + "static/js/2769.49fdcd1a.chunk.js": "./static/js/2769.49fdcd1a.chunk.js", + "static/js/8297.67ab228e.chunk.js": "./static/js/8297.67ab228e.chunk.js", + "static/js/5484.c5ede353.chunk.js": "./static/js/5484.c5ede353.chunk.js", + "static/js/4087.46473bf5.chunk.js": "./static/js/4087.46473bf5.chunk.js", + "static/js/8427.e5a51e3c.chunk.js": "./static/js/8427.e5a51e3c.chunk.js", + "static/js/5160.17372d9c.chunk.js": "./static/js/5160.17372d9c.chunk.js", + "static/js/682.229cd996.chunk.js": "./static/js/682.229cd996.chunk.js", + "static/js/228.21aa1ffb.chunk.js": "./static/js/228.21aa1ffb.chunk.js", + "static/js/6953.3f8107dd.chunk.js": "./static/js/6953.3f8107dd.chunk.js", + "static/js/7441.8f4b4d65.chunk.js": "./static/js/7441.8f4b4d65.chunk.js", + "static/js/521.4d640434.chunk.js": "./static/js/521.4d640434.chunk.js", + "static/js/2570.8e1850a3.chunk.js": "./static/js/2570.8e1850a3.chunk.js", + "static/js/6698.7c473cb5.chunk.js": "./static/js/6698.7c473cb5.chunk.js", + "static/js/3972.1d8f063d.chunk.js": "./static/js/3972.1d8f063d.chunk.js", + "static/js/1854.ed793d70.chunk.js": "./static/js/1854.ed793d70.chunk.js", + "static/js/5124.661ee0ac.chunk.js": "./static/js/5124.661ee0ac.chunk.js", + "static/js/8908.eb7bba4b.chunk.js": "./static/js/8908.eb7bba4b.chunk.js", + "static/js/6289.ce9965dc.chunk.js": "./static/js/6289.ce9965dc.chunk.js", + "static/js/4099.0a311abf.chunk.js": "./static/js/4099.0a311abf.chunk.js", + "static/js/1758.8e77088d.chunk.js": "./static/js/1758.8e77088d.chunk.js", + "static/js/5605.49c4cf85.chunk.js": "./static/js/5605.49c4cf85.chunk.js", + "static/js/9776.c950cc09.chunk.js": "./static/js/9776.c950cc09.chunk.js", + "static/js/2293.c95dbf41.chunk.js": "./static/js/2293.c95dbf41.chunk.js", + "static/js/9396.e8ca3d45.chunk.js": "./static/js/9396.e8ca3d45.chunk.js", + "static/js/7690.ec833ac0.chunk.js": "./static/js/7690.ec833ac0.chunk.js", + "static/js/6246.acd51635.chunk.js": "./static/js/6246.acd51635.chunk.js", + "static/js/778.af57bf4a.chunk.js": "./static/js/778.af57bf4a.chunk.js", + "static/js/3421.1b907a2a.chunk.js": "./static/js/3421.1b907a2a.chunk.js", + "static/js/8835.e0455f9c.chunk.js": "./static/js/8835.e0455f9c.chunk.js", + "static/js/1376.41e671ac.chunk.js": "./static/js/1376.41e671ac.chunk.js", + "static/js/6118.9403a72b.chunk.js": "./static/js/6118.9403a72b.chunk.js", + "static/js/1670.886ee3f9.chunk.js": "./static/js/1670.886ee3f9.chunk.js", + "static/js/5154.e5b3bec6.chunk.js": "./static/js/5154.e5b3bec6.chunk.js", + "static/js/6898.25df1470.chunk.js": "./static/js/6898.25df1470.chunk.js", + "static/js/1957.b5fe44dd.chunk.js": "./static/js/1957.b5fe44dd.chunk.js", + "static/js/8606.3593957e.chunk.js": "./static/js/8606.3593957e.chunk.js", + "static/js/7628.66f1aa47.chunk.js": "./static/js/7628.66f1aa47.chunk.js", + "static/js/8329.e74d4179.chunk.js": "./static/js/8329.e74d4179.chunk.js", + "static/js/7083.04557a73.chunk.js": "./static/js/7083.04557a73.chunk.js", + "static/js/1948.47c6f139.chunk.js": "./static/js/1948.47c6f139.chunk.js", + "static/js/7289.188a7958.chunk.js": "./static/js/7289.188a7958.chunk.js", + "static/js/9811.52fb63e7.chunk.js": "./static/js/9811.52fb63e7.chunk.js", + "static/js/8122.fae00246.chunk.js": "./static/js/8122.fae00246.chunk.js", + "static/js/3107.902f71ab.chunk.js": "./static/js/3107.902f71ab.chunk.js", + "static/js/2807.2824c79c.chunk.js": "./static/js/2807.2824c79c.chunk.js", + "static/js/4222.d4ec5a9d.chunk.js": "./static/js/4222.d4ec5a9d.chunk.js", + "static/js/766.70c23808.chunk.js": "./static/js/766.70c23808.chunk.js", + "static/js/4066.de4c1e02.chunk.js": "./static/js/4066.de4c1e02.chunk.js", + "static/js/2869.3192ed9a.chunk.js": "./static/js/2869.3192ed9a.chunk.js", + "static/js/5503.ecc3bcca.chunk.js": "./static/js/5503.ecc3bcca.chunk.js", + "static/js/1640.eae19c57.chunk.js": "./static/js/1640.eae19c57.chunk.js", + "static/js/3166.95b28d63.chunk.js": "./static/js/3166.95b28d63.chunk.js", + "static/js/3010.b103dfa4.chunk.js": "./static/js/3010.b103dfa4.chunk.js", + "static/js/1487.4365b2d5.chunk.js": "./static/js/1487.4365b2d5.chunk.js", + "static/js/3828.a61513fa.chunk.js": "./static/js/3828.a61513fa.chunk.js", + "static/js/2598.77403ca9.chunk.js": "./static/js/2598.77403ca9.chunk.js", + "static/js/856.0c5fe91c.chunk.js": "./static/js/856.0c5fe91c.chunk.js", + "static/js/165.fe88e976.chunk.js": "./static/js/165.fe88e976.chunk.js", + "static/css/5573.3ce054a3.chunk.css": "./static/css/5573.3ce054a3.chunk.css", + "static/js/5573.6b78d499.chunk.js": "./static/js/5573.6b78d499.chunk.js", + "static/js/3366.7739db1f.chunk.js": "./static/js/3366.7739db1f.chunk.js", + "static/js/7981.7e989bc3.chunk.js": "./static/js/7981.7e989bc3.chunk.js", + "static/js/8337.93117b59.chunk.js": "./static/js/8337.93117b59.chunk.js", + "static/css/511.0a2e691a.chunk.css": "./static/css/511.0a2e691a.chunk.css", + "static/js/511.34323faa.chunk.js": "./static/js/511.34323faa.chunk.js", + "static/js/1139.e67ff859.chunk.js": "./static/js/1139.e67ff859.chunk.js", + "static/js/5130.12a9aaae.chunk.js": "./static/js/5130.12a9aaae.chunk.js", + "static/js/9368.8f2cf4ea.chunk.js": "./static/js/9368.8f2cf4ea.chunk.js", + "static/css/3295.8975fa00.chunk.css": "./static/css/3295.8975fa00.chunk.css", + "static/js/896.a21d0b2a.chunk.js": "./static/js/896.a21d0b2a.chunk.js", + "static/js/8021.341b37dd.chunk.js": "./static/js/8021.341b37dd.chunk.js", + "static/css/6869.1d0912d9.chunk.css": "./static/css/6869.1d0912d9.chunk.css", + "ts.worker.js": "./ts.worker.js", + "css.worker.js": "./css.worker.js", + "html.worker.js": "./html.worker.js", + "json.worker.js": "./json.worker.js", + "editor.worker.js": "./editor.worker.js", "static/media/codicon.ttf": "./static/media/codicon.762fced46d6cddbda272.ttf", "static/media/thumbsUp.svg": "./static/media/thumbsUp.d4a03fbaa64ce85a0045bf8ba77f8e2b.svg", "static/media/error.svg": "./static/media/error.ca9e31d5d3dc34da07e11a00f7af0842.svg", @@ -255,7 +286,7 @@ "index.html": "./index.html" }, "entrypoints": [ - "static/css/main.c8ce3bba.css", - "static/js/main.62a60ecb.js" + "static/css/main.0bf24cd8.css", + "static/js/main.7cff0321.js" ] } \ No newline at end of file diff --git a/ydb/core/viewer/monitoring/css.worker.js b/ydb/core/viewer/monitoring/css.worker.js new file mode 100644 index 000000000000..c0848a5c36fe --- /dev/null +++ b/ydb/core/viewer/monitoring/css.worker.js @@ -0,0 +1,2 @@ +/*! For license information please see css.worker.js.LICENSE.txt */ +(()=>{"use strict";const e=new class{constructor(){this.listeners=[],this.unexpectedErrorHandler=function(e){setTimeout((()=>{if(e.stack){if(a.isErrorNoTelemetry(e))throw new a(e.message+"\n\n"+e.stack);throw new Error(e.message+"\n\n"+e.stack)}throw e}),0)}}emit(e){this.listeners.forEach((t=>{t(e)}))}onUnexpectedError(e){this.unexpectedErrorHandler(e),this.emit(e)}onUnexpectedExternalError(e){this.unexpectedErrorHandler(e)}};function t(t){r(t)||e.onUnexpectedError(t)}function n(e){if(e instanceof Error){const{name:t,message:n}=e;return{$isError:!0,name:t,message:n,stack:e.stacktrace||e.stack,noTelemetry:a.isErrorNoTelemetry(e)}}return e}const i="Canceled";function r(e){return e instanceof o||e instanceof Error&&e.name===i&&e.message===i}class o extends Error{constructor(){super(i),this.name=this.message}}class s extends Error{constructor(e){super("NotSupported"),e&&(this.message=e)}}class a extends Error{constructor(e){super(e),this.name="CodeExpectedError"}static fromError(e){if(e instanceof a)return e;const t=new a;return t.message=e.message,t.stack=e.stack,t}static isErrorNoTelemetry(e){return"CodeExpectedError"===e.name}}class l extends Error{constructor(e){super(e||"An unexpected bug occurred."),Object.setPrototypeOf(this,l.prototype)}}function c(e,t){const n=this;let i,r=!1;return function(){if(r)return i;if(r=!0,t)try{i=e.apply(n,arguments)}finally{t()}else i=e.apply(n,arguments);return i}}var h;!function(e){function t(e){return e&&"object"===typeof e&&"function"===typeof e[Symbol.iterator]}e.is=t;const n=Object.freeze([]);function*i(e){yield e}e.empty=function(){return n},e.single=i,e.wrap=function(e){return t(e)?e:i(e)},e.from=function(e){return e||n},e.reverse=function*(e){for(let t=e.length-1;t>=0;t--)yield e[t]},e.isEmpty=function(e){return!e||!0===e[Symbol.iterator]().next().done},e.first=function(e){return e[Symbol.iterator]().next().value},e.some=function(e,t){for(const n of e)if(t(n))return!0;return!1},e.find=function(e,t){for(const n of e)if(t(n))return n},e.filter=function*(e,t){for(const n of e)t(n)&&(yield n)},e.map=function*(e,t){let n=0;for(const i of e)yield t(i,n++)},e.concat=function*(){for(var e=arguments.length,t=new Array(e),n=0;n2&&void 0!==arguments[2]?arguments[2]:e.length;return function*(){for(t<0&&(t+=e.length),n<0?n+=e.length:n>e.length&&(n=e.length);t1&&void 0!==arguments[1]?arguments[1]:Number.POSITIVE_INFINITY;const i=[];if(0===n)return[i,t];const r=t[Symbol.iterator]();for(let o=0;or}]},e.asyncToArray=async function(e){const t=[];for await(const n of e)t.push(n);return Promise.resolve(t)}}(h||(h={}));let d=null;function p(e){return null===d||void 0===d||d.trackDisposable(e),e}function u(e){null===d||void 0===d||d.markAsDisposed(e)}function m(e,t){null===d||void 0===d||d.setParent(e,t)}function f(e){if(h.is(e)){const n=[];for(const i of e)if(i)try{i.dispose()}catch(t){n.push(t)}if(1===n.length)throw n[0];if(n.length>1)throw new AggregateError(n,"Encountered errors while disposing of store");return Array.isArray(e)?[]:e}if(e)return e.dispose(),e}function g(){for(var e=arguments.length,t=new Array(e),n=0;nf(t)));return function(e,t){if(d)for(const n of e)d.setParent(n,t)}(t,i),i}function b(e){const t=p({dispose:c((()=>{u(t),e()}))});return t}class v{constructor(){this._toDispose=new Set,this._isDisposed=!1,p(this)}dispose(){this._isDisposed||(u(this),this._isDisposed=!0,this.clear())}get isDisposed(){return this._isDisposed}clear(){if(0!==this._toDispose.size)try{f(this._toDispose)}finally{this._toDispose.clear()}}add(e){if(!e)return e;if(e===this)throw new Error("Cannot register a disposable on itself!");return m(e,this),this._isDisposed?v.DISABLE_DISPOSED_WARNING||console.warn(new Error("Trying to add a disposable to a DisposableStore that has already been disposed of. The added object will be leaked!").stack):this._toDispose.add(e),e}deleteAndLeak(e){e&&this._toDispose.has(e)&&(this._toDispose.delete(e),m(e,null))}}v.DISABLE_DISPOSED_WARNING=!1;class y{constructor(){this._store=new v,p(this),m(this._store,this)}dispose(){u(this),this._store.dispose()}_register(e){if(e===this)throw new Error("Cannot register a disposable on itself!");return this._store.add(e)}}y.None=Object.freeze({dispose(){}});class w{constructor(){this._store=new Map,this._isDisposed=!1,p(this)}dispose(){u(this),this._isDisposed=!0,this.clearAndDisposeAll()}clearAndDisposeAll(){if(this._store.size)try{f(this._store.values())}finally{this._store.clear()}}get(e){return this._store.get(e)}set(e,t){let n=arguments.length>2&&void 0!==arguments[2]&&arguments[2];var i;this._isDisposed&&console.warn(new Error("Trying to add a disposable to a DisposableMap that has already been disposed of. The added object will be leaked!").stack),n||null===this._store.get(e)||void 0===i||i.dispose(),this._store.set(e,t)}deleteAndDispose(e){var t;null===this._store.get(e)||void 0===t||t.dispose(),this._store.delete(e)}[Symbol.iterator](){return this._store[Symbol.iterator]()}}class x{constructor(e){this.element=e,this.next=x.Undefined,this.prev=x.Undefined}}x.Undefined=new x(void 0);class S{constructor(){this._first=x.Undefined,this._last=x.Undefined,this._size=0}get size(){return this._size}isEmpty(){return this._first===x.Undefined}clear(){let e=this._first;for(;e!==x.Undefined;){const t=e.next;e.prev=x.Undefined,e.next=x.Undefined,e=t}this._first=x.Undefined,this._last=x.Undefined,this._size=0}unshift(e){return this._insert(e,!1)}push(e){return this._insert(e,!0)}_insert(e,t){const n=new x(e);if(this._first===x.Undefined)this._first=n,this._last=n;else if(t){const e=this._last;this._last=n,n.prev=e,e.next=n}else{const e=this._first;this._first=n,n.next=e,e.prev=n}this._size+=1;let i=!1;return()=>{i||(i=!0,this._remove(n))}}shift(){if(this._first!==x.Undefined){const e=this._first.element;return this._remove(this._first),e}}pop(){if(this._last!==x.Undefined){const e=this._last.element;return this._remove(this._last),e}}_remove(e){if(e.prev!==x.Undefined&&e.next!==x.Undefined){const t=e.prev;t.next=e.next,e.next.prev=t}else e.prev===x.Undefined&&e.next===x.Undefined?(this._first=x.Undefined,this._last=x.Undefined):e.next===x.Undefined?(this._last=this._last.prev,this._last.next=x.Undefined):e.prev===x.Undefined&&(this._first=this._first.next,this._first.prev=x.Undefined);this._size-=1}*[Symbol.iterator](){let e=this._first;for(;e!==x.Undefined;)yield e.element,e=e.next}}const C=globalThis.performance&&"function"===typeof globalThis.performance.now;class _{static create(e){return new _(e)}constructor(e){this._now=C&&!1===e?Date.now:globalThis.performance.now.bind(globalThis.performance),this._startTime=this._now(),this._stopTime=-1}stop(){this._stopTime=this._now()}reset(){this._startTime=this._now(),this._stopTime=-1}elapsed(){return-1!==this._stopTime?this._stopTime-this._startTime:this._now()-this._startTime}}const k=!1;var E;!function(e){function t(e){if(k){const{onDidAddListener:t}=e,n=F.create();let i=0;e.onDidAddListener=()=>{2===++i&&(console.warn("snapshotted emitter LIKELY used public and SHOULD HAVE BEEN created with DisposableStore. snapshotted here"),n.print()),null===t||void 0===t||t()}}}function n(e){return function(t){let n,i=arguments.length>1&&void 0!==arguments[1]?arguments[1]:null,r=!1;return n=e((e=>{if(!r)return n?n.dispose():r=!0,t.call(i,e)}),null,arguments.length>2?arguments[2]:void 0),r&&n.dispose(),n}}function i(e,t,n){return o((function(n){let i=arguments.length>1&&void 0!==arguments[1]?arguments[1]:null;return e((e=>n.call(i,t(e))),null,arguments.length>2?arguments[2]:void 0)}),n)}function r(e,t,n){return o((function(n){let i=arguments.length>1&&void 0!==arguments[1]?arguments[1]:null;return e((e=>t(e)&&n.call(i,e)),null,arguments.length>2?arguments[2]:void 0)}),n)}function o(e,n){let i;const r={onWillAddFirstListener(){i=e(o.fire,o)},onDidRemoveLastListener(){null===i||void 0===i||i.dispose()}};n||t(r);const o=new A(r);return null===n||void 0===n||n.add(o),o.event}function s(e,n){let i,r,o,s,a=arguments.length>2&&void 0!==arguments[2]?arguments[2]:100,l=arguments.length>3&&void 0!==arguments[3]&&arguments[3],c=arguments.length>4&&void 0!==arguments[4]&&arguments[4],h=arguments.length>6?arguments[6]:void 0,d=0;const p={leakWarningThreshold:arguments.length>5?arguments[5]:void 0,onWillAddFirstListener(){i=e((e=>{d++,r=n(r,e),l&&!o&&(u.fire(r),r=void 0),s=()=>{const e=r;r=void 0,o=void 0,(!l||d>1)&&u.fire(e),d=0},"number"===typeof a?(clearTimeout(o),o=setTimeout(s,a)):void 0===o&&(o=0,queueMicrotask(s))}))},onWillRemoveListener(){c&&d>0&&(null===s||void 0===s||s())},onDidRemoveLastListener(){s=void 0,i.dispose()}};h||t(p);const u=new A(p);return null===h||void 0===h||h.add(u),u.event}e.None=()=>y.None,e.defer=function(e,t){return s(e,(()=>{}),0,void 0,!0,void 0,t)},e.once=n,e.map=i,e.forEach=function(e,t,n){return o((function(n){let i=arguments.length>1&&void 0!==arguments[1]?arguments[1]:null;return e((e=>{t(e),n.call(i,e)}),null,arguments.length>2?arguments[2]:void 0)}),n)},e.filter=r,e.signal=function(e){return e},e.any=function(){for(var e=arguments.length,t=new Array(e),n=0;n1&&void 0!==arguments[1]?arguments[1]:null,i=arguments.length>2?arguments[2]:void 0;return function(e,t){t instanceof Array?t.push(e):t&&t.add(e);return e}(g(...t.map((t=>t((t=>e.call(n,t)))))),i)}},e.reduce=function(e,t,n,r){let o=n;return i(e,(e=>(o=t(o,e),o)),r)},e.debounce=s,e.accumulate=function(t){let n=arguments.length>1&&void 0!==arguments[1]?arguments[1]:0,i=arguments.length>2?arguments[2]:void 0;return e.debounce(t,((e,t)=>e?(e.push(t),e):[t]),n,void 0,!0,void 0,i)},e.latch=function(e){let t,n=arguments.length>1&&void 0!==arguments[1]?arguments[1]:(e,t)=>e===t,i=!0;return r(e,(e=>{const r=i||!n(e,t);return i=!1,t=e,r}),arguments.length>2?arguments[2]:void 0)},e.split=function(t,n,i){return[e.filter(t,n,i),e.filter(t,(e=>!n(e)),i)]},e.buffer=function(e){let t=arguments.length>1&&void 0!==arguments[1]&&arguments[1],n=arguments.length>3?arguments[3]:void 0,i=(arguments.length>2&&void 0!==arguments[2]?arguments[2]:[]).slice(),r=e((e=>{i?i.push(e):s.fire(e)}));n&&n.add(r);const o=()=>{null===i||void 0===i||i.forEach((e=>s.fire(e))),i=null},s=new A({onWillAddFirstListener(){r||(r=e((e=>s.fire(e))),n&&n.add(r))},onDidAddFirstListener(){i&&(t?setTimeout(o):o())},onDidRemoveLastListener(){r&&r.dispose(),r=null}});return n&&n.add(s),s.event},e.chain=function(e,t){return(n,i,r)=>{const o=t(new l);return e((function(e){const t=o.evaluate(e);t!==a&&n.call(i,t)}),void 0,r)}};const a=Symbol("HaltChainable");class l{constructor(){this.steps=[]}map(e){return this.steps.push(e),this}forEach(e){return this.steps.push((t=>(e(t),t))),this}filter(e){return this.steps.push((t=>e(t)?t:a)),this}reduce(e,t){let n=t;return this.steps.push((t=>(n=e(n,t),n))),this}latch(){let e,t=arguments.length>0&&void 0!==arguments[0]?arguments[0]:(e,t)=>e===t,n=!0;return this.steps.push((i=>{const r=n||!t(i,e);return n=!1,e=i,r?i:a})),this}evaluate(e){for(const t of this.steps)if((e=t(e))===a)break;return e}}e.fromNodeEventEmitter=function(e,t){let n=arguments.length>2&&void 0!==arguments[2]?arguments[2]:e=>e;const i=function(){return r.fire(n(...arguments))},r=new A({onWillAddFirstListener:()=>e.on(t,i),onDidRemoveLastListener:()=>e.removeListener(t,i)});return r.event},e.fromDOMEventEmitter=function(e,t){let n=arguments.length>2&&void 0!==arguments[2]?arguments[2]:e=>e;const i=function(){return r.fire(n(...arguments))},r=new A({onWillAddFirstListener:()=>e.addEventListener(t,i),onDidRemoveLastListener:()=>e.removeEventListener(t,i)});return r.event},e.toPromise=function(e){return new Promise((t=>n(e)(t)))},e.fromPromise=function(e){const t=new A;return e.then((e=>{t.fire(e)}),(()=>{t.fire(void 0)})).finally((()=>{t.dispose()})),t.event},e.runAndSubscribe=function(e,t,n){return t(n),e((e=>t(e)))};class c{constructor(e,n){this._observable=e,this._counter=0,this._hasChanged=!1;const i={onWillAddFirstListener:()=>{e.addObserver(this)},onDidRemoveLastListener:()=>{e.removeObserver(this)}};n||t(i),this.emitter=new A(i),n&&n.add(this.emitter)}beginUpdate(e){this._counter++}handlePossibleChange(e){}handleChange(e,t){this._hasChanged=!0}endUpdate(e){this._counter--,0===this._counter&&(this._observable.reportChanges(),this._hasChanged&&(this._hasChanged=!1,this.emitter.fire(this._observable.get())))}}e.fromObservable=function(e,t){return new c(e,t).emitter.event},e.fromObservableLight=function(e){return(t,n,i)=>{let r=0,o=!1;const s={beginUpdate(){r++},endUpdate(){r--,0===r&&(e.reportChanges(),o&&(o=!1,t.call(n)))},handlePossibleChange(){},handleChange(){o=!0}};e.addObserver(s),e.reportChanges();const a={dispose(){e.removeObserver(s)}};return i instanceof v?i.add(a):Array.isArray(i)&&i.push(a),a}}}(E||(E={}));class R{constructor(e){this.listenerCount=0,this.invocationCount=0,this.elapsedOverall=0,this.durations=[],this.name=`${e}_${R._idPool++}`,R.all.add(this)}start(e){this._stopWatch=new _,this.listenerCount=e}stop(){if(this._stopWatch){const e=this._stopWatch.elapsed();this.durations.push(e),this.elapsedOverall+=e,this.invocationCount+=1,this._stopWatch=void 0}}}R.all=new Set,R._idPool=0;class N{constructor(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:Math.random().toString(18).slice(2,5);this.threshold=e,this.name=t,this._warnCountdown=0}dispose(){var e;null===(e=this._stacks)||void 0===e||e.clear()}check(e,t){const n=this.threshold;if(n<=0||t{const t=this._stacks.get(e.value)||0;this._stacks.set(e.value,t-1)}}}class F{static create(){var e;return new F(null!==(e=(new Error).stack)&&void 0!==e?e:"")}constructor(e){this.value=e}print(){console.warn(this.value.split("\n").slice(2).join("\n"))}}class D{constructor(e){this.value=e}}const T=void 0;class A{constructor(e){var t,n,i,r,o;this._size=0,this._options=e,this._leakageMon=(null===(t=this._options)||void 0===t?void 0:t.leakWarningThreshold)?new N(null!==(i=null===(n=this._options)||void 0===n?void 0:n.leakWarningThreshold)&&void 0!==i?i:-1):void 0,this._perfMon=(null===(r=this._options)||void 0===r?void 0:r._profName)?new R(this._options._profName):void 0,this._deliveryQueue=null===(o=this._options)||void 0===o?void 0:o.deliveryQueue}dispose(){var e,t,n,i;this._disposed||(this._disposed=!0,(null===(e=this._deliveryQueue)||void 0===e?void 0:e.current)===this&&this._deliveryQueue.reset(),this._listeners&&(this._listeners=void 0,this._size=0),null===(n=null===(t=this._options)||void 0===t?void 0:t.onDidRemoveLastListener)||void 0===n||n.call(t),null===(i=this._leakageMon)||void 0===i||i.dispose())}get event(){var e;return null!==(e=this._event)&&void 0!==e||(this._event=(e,t,n)=>{var i,r,o,s,a;if(this._leakageMon&&this._size>3*this._leakageMon.threshold)return console.warn(`[${this._leakageMon.name}] REFUSES to accept new listeners because it exceeded its threshold by far`),y.None;if(this._disposed)return y.None;t&&(e=e.bind(t));const l=new D(e);let c;this._leakageMon&&this._size>=Math.ceil(.2*this._leakageMon.threshold)&&(l.stack=F.create(),c=this._leakageMon.check(l.stack,this._size+1)),this._listeners?this._listeners instanceof D?(null!==(a=this._deliveryQueue)&&void 0!==a||(this._deliveryQueue=new M),this._listeners=[this._listeners,l]):this._listeners.push(l):(null===(r=null===(i=this._options)||void 0===i?void 0:i.onWillAddFirstListener)||void 0===r||r.call(i,this),this._listeners=l,null===(s=null===(o=this._options)||void 0===o?void 0:o.onDidAddFirstListener)||void 0===s||s.call(o,this)),this._size++;const h=b((()=>{null===T||void 0===T||T.unregister(h),null===c||void 0===c||c(),this._removeListener(l)}));if(n instanceof v?n.add(h):Array.isArray(n)&&n.push(h),T){const e=(new Error).stack.split("\n").slice(2).join("\n").trim();T.register(h,e,h)}return h}),this._event}_removeListener(e){var t,n,i,r;if(null===(n=null===(t=this._options)||void 0===t?void 0:t.onWillRemoveListener)||void 0===n||n.call(t,this),!this._listeners)return;if(1===this._size)return this._listeners=void 0,null===(r=null===(i=this._options)||void 0===i?void 0:i.onDidRemoveLastListener)||void 0===r||r.call(i,this),void(this._size=0);const o=this._listeners,s=o.indexOf(e);if(-1===s)throw console.log("disposed?",this._disposed),console.log("size?",this._size),console.log("arr?",JSON.stringify(this._listeners)),new Error("Attempted to dispose unknown listener");this._size--,o[s]=void 0;const a=this._deliveryQueue.current===this;if(2*this._size<=o.length){let e=0;for(let t=0;t0}}class M{constructor(){this.i=-1,this.end=0}enqueue(e,t,n){this.i=0,this.end=n,this.current=e,this.value=t}reset(){this.i=this.end,this.current=void 0,this.value=void 0}}Object.prototype.hasOwnProperty;function z(e){const t=[];for(const n of function(e){let t=[];for(;Object.prototype!==e;)t=t.concat(Object.getOwnPropertyNames(e)),e=Object.getPrototypeOf(e);return t}(e))"function"===typeof e[n]&&t.push(n);return t}let I="undefined"!==typeof document&&document.location&&document.location.hash.indexOf("pseudo=true")>=0;function L(e,t){let n;return n=0===t.length?e:e.replace(/\{(\d+)\}/g,((e,n)=>{const i=n[0],r=t[i];let o=e;return"string"===typeof r?o=r:"number"!==typeof r&&"boolean"!==typeof r&&void 0!==r&&null!==r||(o=String(r)),o})),I&&(n="\uff3b"+n.replace(/[aouei]/g,"$&$&")+"\uff3d"),n}function P(e,t){for(var n=arguments.length,i=new Array(n>2?n-2:0),r=2;r=0,B=K.indexOf("Macintosh")>=0,X=(K.indexOf("Macintosh")>=0||K.indexOf("iPad")>=0||K.indexOf("iPhone")>=0)&&!!navigator.maxTouchPoints&&navigator.maxTouchPoints>0,j=K.indexOf("Linux")>=0,Q=(null===K||void 0===K?void 0:K.indexOf("Mobi"))>=0,G=!0;U=void P({key:"ensureLoaderPluginIsLoaded",comment:["{Locked}"]},"_")||V,Z=U,ee=navigator.language}let se=0;B?se=1:q?se=3:j&&(se=2);const ae=q,le=B,ce=(G&&"function"===typeof ne.importScripts&&ne.origin,K),he="function"===typeof ne.postMessage&&!ne.importScripts;(()=>{if(he){const e=[];ne.addEventListener("message",(t=>{if(t.data&&t.data.vscodeScheduleAsyncWork)for(let n=0,i=e.length;n{const i=++t;e.push({id:i,callback:n}),ne.postMessage({vscodeScheduleAsyncWork:i},"*")}}})();const de=!!(ce&&ce.indexOf("Chrome")>=0);ce&&ce.indexOf("Firefox"),!de&&ce&&ce.indexOf("Safari"),ce&&ce.indexOf("Edg/"),ce&&ce.indexOf("Android");class pe{constructor(e){this.executor=e,this._didRun=!1}get value(){if(!this._didRun)try{this._value=this.executor()}catch(e){this._error=e}finally{this._didRun=!0}if(this._error)throw this._error;return this._value}get rawValue(){return this._value}}var ue;function me(e){return e.replace(/[\\\{\}\*\+\?\|\^\$\.\[\]\(\)]/g,"\\$&")}function fe(e){return e>=65&&e<=90}function ge(e){return 55296<=e&&e<=56319}function be(e){return 56320<=e&&e<=57343}function ve(e,t){return t-56320+(e-55296<<10)+65536}function ye(e,t,n){const i=e.charCodeAt(n);if(ge(i)&&n+1t[3*i+1]))return t[3*i+2];i=2*i+1}return 0}}xe._INSTANCE=null;class Se{static getInstance(e){return ue.cache.get(Array.from(e))}static getLocales(){return ue._locales.value}constructor(e){this.confusableDictionary=e}isAmbiguous(e){return this.confusableDictionary.has(e)}getPrimaryConfusable(e){return this.confusableDictionary.get(e)}getConfusableCodePoints(){return new Set(this.confusableDictionary.keys())}}ue=Se,Se.ambiguousCharacterData=new pe((()=>JSON.parse('{"_common":[8232,32,8233,32,5760,32,8192,32,8193,32,8194,32,8195,32,8196,32,8197,32,8198,32,8200,32,8201,32,8202,32,8287,32,8199,32,8239,32,2042,95,65101,95,65102,95,65103,95,8208,45,8209,45,8210,45,65112,45,1748,45,8259,45,727,45,8722,45,10134,45,11450,45,1549,44,1643,44,8218,44,184,44,42233,44,894,59,2307,58,2691,58,1417,58,1795,58,1796,58,5868,58,65072,58,6147,58,6153,58,8282,58,1475,58,760,58,42889,58,8758,58,720,58,42237,58,451,33,11601,33,660,63,577,63,2429,63,5038,63,42731,63,119149,46,8228,46,1793,46,1794,46,42510,46,68176,46,1632,46,1776,46,42232,46,1373,96,65287,96,8219,96,8242,96,1370,96,1523,96,8175,96,65344,96,900,96,8189,96,8125,96,8127,96,8190,96,697,96,884,96,712,96,714,96,715,96,756,96,699,96,701,96,700,96,702,96,42892,96,1497,96,2036,96,2037,96,5194,96,5836,96,94033,96,94034,96,65339,91,10088,40,10098,40,12308,40,64830,40,65341,93,10089,41,10099,41,12309,41,64831,41,10100,123,119060,123,10101,125,65342,94,8270,42,1645,42,8727,42,66335,42,5941,47,8257,47,8725,47,8260,47,9585,47,10187,47,10744,47,119354,47,12755,47,12339,47,11462,47,20031,47,12035,47,65340,92,65128,92,8726,92,10189,92,10741,92,10745,92,119311,92,119355,92,12756,92,20022,92,12034,92,42872,38,708,94,710,94,5869,43,10133,43,66203,43,8249,60,10094,60,706,60,119350,60,5176,60,5810,60,5120,61,11840,61,12448,61,42239,61,8250,62,10095,62,707,62,119351,62,5171,62,94015,62,8275,126,732,126,8128,126,8764,126,65372,124,65293,45,120784,50,120794,50,120804,50,120814,50,120824,50,130034,50,42842,50,423,50,1000,50,42564,50,5311,50,42735,50,119302,51,120785,51,120795,51,120805,51,120815,51,120825,51,130035,51,42923,51,540,51,439,51,42858,51,11468,51,1248,51,94011,51,71882,51,120786,52,120796,52,120806,52,120816,52,120826,52,130036,52,5070,52,71855,52,120787,53,120797,53,120807,53,120817,53,120827,53,130037,53,444,53,71867,53,120788,54,120798,54,120808,54,120818,54,120828,54,130038,54,11474,54,5102,54,71893,54,119314,55,120789,55,120799,55,120809,55,120819,55,120829,55,130039,55,66770,55,71878,55,2819,56,2538,56,2666,56,125131,56,120790,56,120800,56,120810,56,120820,56,120830,56,130040,56,547,56,546,56,66330,56,2663,57,2920,57,2541,57,3437,57,120791,57,120801,57,120811,57,120821,57,120831,57,130041,57,42862,57,11466,57,71884,57,71852,57,71894,57,9082,97,65345,97,119834,97,119886,97,119938,97,119990,97,120042,97,120094,97,120146,97,120198,97,120250,97,120302,97,120354,97,120406,97,120458,97,593,97,945,97,120514,97,120572,97,120630,97,120688,97,120746,97,65313,65,119808,65,119860,65,119912,65,119964,65,120016,65,120068,65,120120,65,120172,65,120224,65,120276,65,120328,65,120380,65,120432,65,913,65,120488,65,120546,65,120604,65,120662,65,120720,65,5034,65,5573,65,42222,65,94016,65,66208,65,119835,98,119887,98,119939,98,119991,98,120043,98,120095,98,120147,98,120199,98,120251,98,120303,98,120355,98,120407,98,120459,98,388,98,5071,98,5234,98,5551,98,65314,66,8492,66,119809,66,119861,66,119913,66,120017,66,120069,66,120121,66,120173,66,120225,66,120277,66,120329,66,120381,66,120433,66,42932,66,914,66,120489,66,120547,66,120605,66,120663,66,120721,66,5108,66,5623,66,42192,66,66178,66,66209,66,66305,66,65347,99,8573,99,119836,99,119888,99,119940,99,119992,99,120044,99,120096,99,120148,99,120200,99,120252,99,120304,99,120356,99,120408,99,120460,99,7428,99,1010,99,11429,99,43951,99,66621,99,128844,67,71922,67,71913,67,65315,67,8557,67,8450,67,8493,67,119810,67,119862,67,119914,67,119966,67,120018,67,120174,67,120226,67,120278,67,120330,67,120382,67,120434,67,1017,67,11428,67,5087,67,42202,67,66210,67,66306,67,66581,67,66844,67,8574,100,8518,100,119837,100,119889,100,119941,100,119993,100,120045,100,120097,100,120149,100,120201,100,120253,100,120305,100,120357,100,120409,100,120461,100,1281,100,5095,100,5231,100,42194,100,8558,68,8517,68,119811,68,119863,68,119915,68,119967,68,120019,68,120071,68,120123,68,120175,68,120227,68,120279,68,120331,68,120383,68,120435,68,5024,68,5598,68,5610,68,42195,68,8494,101,65349,101,8495,101,8519,101,119838,101,119890,101,119942,101,120046,101,120098,101,120150,101,120202,101,120254,101,120306,101,120358,101,120410,101,120462,101,43826,101,1213,101,8959,69,65317,69,8496,69,119812,69,119864,69,119916,69,120020,69,120072,69,120124,69,120176,69,120228,69,120280,69,120332,69,120384,69,120436,69,917,69,120492,69,120550,69,120608,69,120666,69,120724,69,11577,69,5036,69,42224,69,71846,69,71854,69,66182,69,119839,102,119891,102,119943,102,119995,102,120047,102,120099,102,120151,102,120203,102,120255,102,120307,102,120359,102,120411,102,120463,102,43829,102,42905,102,383,102,7837,102,1412,102,119315,70,8497,70,119813,70,119865,70,119917,70,120021,70,120073,70,120125,70,120177,70,120229,70,120281,70,120333,70,120385,70,120437,70,42904,70,988,70,120778,70,5556,70,42205,70,71874,70,71842,70,66183,70,66213,70,66853,70,65351,103,8458,103,119840,103,119892,103,119944,103,120048,103,120100,103,120152,103,120204,103,120256,103,120308,103,120360,103,120412,103,120464,103,609,103,7555,103,397,103,1409,103,119814,71,119866,71,119918,71,119970,71,120022,71,120074,71,120126,71,120178,71,120230,71,120282,71,120334,71,120386,71,120438,71,1292,71,5056,71,5107,71,42198,71,65352,104,8462,104,119841,104,119945,104,119997,104,120049,104,120101,104,120153,104,120205,104,120257,104,120309,104,120361,104,120413,104,120465,104,1211,104,1392,104,5058,104,65320,72,8459,72,8460,72,8461,72,119815,72,119867,72,119919,72,120023,72,120179,72,120231,72,120283,72,120335,72,120387,72,120439,72,919,72,120494,72,120552,72,120610,72,120668,72,120726,72,11406,72,5051,72,5500,72,42215,72,66255,72,731,105,9075,105,65353,105,8560,105,8505,105,8520,105,119842,105,119894,105,119946,105,119998,105,120050,105,120102,105,120154,105,120206,105,120258,105,120310,105,120362,105,120414,105,120466,105,120484,105,618,105,617,105,953,105,8126,105,890,105,120522,105,120580,105,120638,105,120696,105,120754,105,1110,105,42567,105,1231,105,43893,105,5029,105,71875,105,65354,106,8521,106,119843,106,119895,106,119947,106,119999,106,120051,106,120103,106,120155,106,120207,106,120259,106,120311,106,120363,106,120415,106,120467,106,1011,106,1112,106,65322,74,119817,74,119869,74,119921,74,119973,74,120025,74,120077,74,120129,74,120181,74,120233,74,120285,74,120337,74,120389,74,120441,74,42930,74,895,74,1032,74,5035,74,5261,74,42201,74,119844,107,119896,107,119948,107,120000,107,120052,107,120104,107,120156,107,120208,107,120260,107,120312,107,120364,107,120416,107,120468,107,8490,75,65323,75,119818,75,119870,75,119922,75,119974,75,120026,75,120078,75,120130,75,120182,75,120234,75,120286,75,120338,75,120390,75,120442,75,922,75,120497,75,120555,75,120613,75,120671,75,120729,75,11412,75,5094,75,5845,75,42199,75,66840,75,1472,108,8739,73,9213,73,65512,73,1633,108,1777,73,66336,108,125127,108,120783,73,120793,73,120803,73,120813,73,120823,73,130033,73,65321,73,8544,73,8464,73,8465,73,119816,73,119868,73,119920,73,120024,73,120128,73,120180,73,120232,73,120284,73,120336,73,120388,73,120440,73,65356,108,8572,73,8467,108,119845,108,119897,108,119949,108,120001,108,120053,108,120105,73,120157,73,120209,73,120261,73,120313,73,120365,73,120417,73,120469,73,448,73,120496,73,120554,73,120612,73,120670,73,120728,73,11410,73,1030,73,1216,73,1493,108,1503,108,1575,108,126464,108,126592,108,65166,108,65165,108,1994,108,11599,73,5825,73,42226,73,93992,73,66186,124,66313,124,119338,76,8556,76,8466,76,119819,76,119871,76,119923,76,120027,76,120079,76,120131,76,120183,76,120235,76,120287,76,120339,76,120391,76,120443,76,11472,76,5086,76,5290,76,42209,76,93974,76,71843,76,71858,76,66587,76,66854,76,65325,77,8559,77,8499,77,119820,77,119872,77,119924,77,120028,77,120080,77,120132,77,120184,77,120236,77,120288,77,120340,77,120392,77,120444,77,924,77,120499,77,120557,77,120615,77,120673,77,120731,77,1018,77,11416,77,5047,77,5616,77,5846,77,42207,77,66224,77,66321,77,119847,110,119899,110,119951,110,120003,110,120055,110,120107,110,120159,110,120211,110,120263,110,120315,110,120367,110,120419,110,120471,110,1400,110,1404,110,65326,78,8469,78,119821,78,119873,78,119925,78,119977,78,120029,78,120081,78,120185,78,120237,78,120289,78,120341,78,120393,78,120445,78,925,78,120500,78,120558,78,120616,78,120674,78,120732,78,11418,78,42208,78,66835,78,3074,111,3202,111,3330,111,3458,111,2406,111,2662,111,2790,111,3046,111,3174,111,3302,111,3430,111,3664,111,3792,111,4160,111,1637,111,1781,111,65359,111,8500,111,119848,111,119900,111,119952,111,120056,111,120108,111,120160,111,120212,111,120264,111,120316,111,120368,111,120420,111,120472,111,7439,111,7441,111,43837,111,959,111,120528,111,120586,111,120644,111,120702,111,120760,111,963,111,120532,111,120590,111,120648,111,120706,111,120764,111,11423,111,4351,111,1413,111,1505,111,1607,111,126500,111,126564,111,126596,111,65259,111,65260,111,65258,111,65257,111,1726,111,64428,111,64429,111,64427,111,64426,111,1729,111,64424,111,64425,111,64423,111,64422,111,1749,111,3360,111,4125,111,66794,111,71880,111,71895,111,66604,111,1984,79,2534,79,2918,79,12295,79,70864,79,71904,79,120782,79,120792,79,120802,79,120812,79,120822,79,130032,79,65327,79,119822,79,119874,79,119926,79,119978,79,120030,79,120082,79,120134,79,120186,79,120238,79,120290,79,120342,79,120394,79,120446,79,927,79,120502,79,120560,79,120618,79,120676,79,120734,79,11422,79,1365,79,11604,79,4816,79,2848,79,66754,79,42227,79,71861,79,66194,79,66219,79,66564,79,66838,79,9076,112,65360,112,119849,112,119901,112,119953,112,120005,112,120057,112,120109,112,120161,112,120213,112,120265,112,120317,112,120369,112,120421,112,120473,112,961,112,120530,112,120544,112,120588,112,120602,112,120646,112,120660,112,120704,112,120718,112,120762,112,120776,112,11427,112,65328,80,8473,80,119823,80,119875,80,119927,80,119979,80,120031,80,120083,80,120187,80,120239,80,120291,80,120343,80,120395,80,120447,80,929,80,120504,80,120562,80,120620,80,120678,80,120736,80,11426,80,5090,80,5229,80,42193,80,66197,80,119850,113,119902,113,119954,113,120006,113,120058,113,120110,113,120162,113,120214,113,120266,113,120318,113,120370,113,120422,113,120474,113,1307,113,1379,113,1382,113,8474,81,119824,81,119876,81,119928,81,119980,81,120032,81,120084,81,120188,81,120240,81,120292,81,120344,81,120396,81,120448,81,11605,81,119851,114,119903,114,119955,114,120007,114,120059,114,120111,114,120163,114,120215,114,120267,114,120319,114,120371,114,120423,114,120475,114,43847,114,43848,114,7462,114,11397,114,43905,114,119318,82,8475,82,8476,82,8477,82,119825,82,119877,82,119929,82,120033,82,120189,82,120241,82,120293,82,120345,82,120397,82,120449,82,422,82,5025,82,5074,82,66740,82,5511,82,42211,82,94005,82,65363,115,119852,115,119904,115,119956,115,120008,115,120060,115,120112,115,120164,115,120216,115,120268,115,120320,115,120372,115,120424,115,120476,115,42801,115,445,115,1109,115,43946,115,71873,115,66632,115,65331,83,119826,83,119878,83,119930,83,119982,83,120034,83,120086,83,120138,83,120190,83,120242,83,120294,83,120346,83,120398,83,120450,83,1029,83,1359,83,5077,83,5082,83,42210,83,94010,83,66198,83,66592,83,119853,116,119905,116,119957,116,120009,116,120061,116,120113,116,120165,116,120217,116,120269,116,120321,116,120373,116,120425,116,120477,116,8868,84,10201,84,128872,84,65332,84,119827,84,119879,84,119931,84,119983,84,120035,84,120087,84,120139,84,120191,84,120243,84,120295,84,120347,84,120399,84,120451,84,932,84,120507,84,120565,84,120623,84,120681,84,120739,84,11430,84,5026,84,42196,84,93962,84,71868,84,66199,84,66225,84,66325,84,119854,117,119906,117,119958,117,120010,117,120062,117,120114,117,120166,117,120218,117,120270,117,120322,117,120374,117,120426,117,120478,117,42911,117,7452,117,43854,117,43858,117,651,117,965,117,120534,117,120592,117,120650,117,120708,117,120766,117,1405,117,66806,117,71896,117,8746,85,8899,85,119828,85,119880,85,119932,85,119984,85,120036,85,120088,85,120140,85,120192,85,120244,85,120296,85,120348,85,120400,85,120452,85,1357,85,4608,85,66766,85,5196,85,42228,85,94018,85,71864,85,8744,118,8897,118,65366,118,8564,118,119855,118,119907,118,119959,118,120011,118,120063,118,120115,118,120167,118,120219,118,120271,118,120323,118,120375,118,120427,118,120479,118,7456,118,957,118,120526,118,120584,118,120642,118,120700,118,120758,118,1141,118,1496,118,71430,118,43945,118,71872,118,119309,86,1639,86,1783,86,8548,86,119829,86,119881,86,119933,86,119985,86,120037,86,120089,86,120141,86,120193,86,120245,86,120297,86,120349,86,120401,86,120453,86,1140,86,11576,86,5081,86,5167,86,42719,86,42214,86,93960,86,71840,86,66845,86,623,119,119856,119,119908,119,119960,119,120012,119,120064,119,120116,119,120168,119,120220,119,120272,119,120324,119,120376,119,120428,119,120480,119,7457,119,1121,119,1309,119,1377,119,71434,119,71438,119,71439,119,43907,119,71919,87,71910,87,119830,87,119882,87,119934,87,119986,87,120038,87,120090,87,120142,87,120194,87,120246,87,120298,87,120350,87,120402,87,120454,87,1308,87,5043,87,5076,87,42218,87,5742,120,10539,120,10540,120,10799,120,65368,120,8569,120,119857,120,119909,120,119961,120,120013,120,120065,120,120117,120,120169,120,120221,120,120273,120,120325,120,120377,120,120429,120,120481,120,5441,120,5501,120,5741,88,9587,88,66338,88,71916,88,65336,88,8553,88,119831,88,119883,88,119935,88,119987,88,120039,88,120091,88,120143,88,120195,88,120247,88,120299,88,120351,88,120403,88,120455,88,42931,88,935,88,120510,88,120568,88,120626,88,120684,88,120742,88,11436,88,11613,88,5815,88,42219,88,66192,88,66228,88,66327,88,66855,88,611,121,7564,121,65369,121,119858,121,119910,121,119962,121,120014,121,120066,121,120118,121,120170,121,120222,121,120274,121,120326,121,120378,121,120430,121,120482,121,655,121,7935,121,43866,121,947,121,8509,121,120516,121,120574,121,120632,121,120690,121,120748,121,1199,121,4327,121,71900,121,65337,89,119832,89,119884,89,119936,89,119988,89,120040,89,120092,89,120144,89,120196,89,120248,89,120300,89,120352,89,120404,89,120456,89,933,89,978,89,120508,89,120566,89,120624,89,120682,89,120740,89,11432,89,1198,89,5033,89,5053,89,42220,89,94019,89,71844,89,66226,89,119859,122,119911,122,119963,122,120015,122,120067,122,120119,122,120171,122,120223,122,120275,122,120327,122,120379,122,120431,122,120483,122,7458,122,43923,122,71876,122,66293,90,71909,90,65338,90,8484,90,8488,90,119833,90,119885,90,119937,90,119989,90,120041,90,120197,90,120249,90,120301,90,120353,90,120405,90,120457,90,918,90,120493,90,120551,90,120609,90,120667,90,120725,90,5059,90,42204,90,71849,90,65282,34,65284,36,65285,37,65286,38,65290,42,65291,43,65294,46,65295,47,65296,48,65297,49,65298,50,65299,51,65300,52,65301,53,65302,54,65303,55,65304,56,65305,57,65308,60,65309,61,65310,62,65312,64,65316,68,65318,70,65319,71,65324,76,65329,81,65330,82,65333,85,65334,86,65335,87,65343,95,65346,98,65348,100,65350,102,65355,107,65357,109,65358,110,65361,113,65362,114,65364,116,65365,117,65367,119,65370,122,65371,123,65373,125,119846,109],"_default":[160,32,8211,45,65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],"cs":[65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],"de":[65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],"es":[8211,45,65374,126,65306,58,65281,33,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],"fr":[65374,126,65306,58,65281,33,8216,96,8245,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],"it":[160,32,8211,45,65374,126,65306,58,65281,33,8216,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],"ja":[8211,45,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65292,44,65307,59],"ko":[8211,45,65374,126,65306,58,65281,33,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],"pl":[65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],"pt-BR":[65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],"qps-ploc":[160,32,8211,45,65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],"ru":[65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,305,105,921,73,1009,112,215,120,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],"tr":[160,32,8211,45,65374,126,65306,58,65281,33,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],"zh-hans":[65374,126,65306,58,65281,33,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65288,40,65289,41],"zh-hant":[8211,45,65374,126,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65307,59]}'))),Se.cache=new class{constructor(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:JSON.stringify;this.fn=e,this._computeKey=t,this.lastCache=void 0,this.lastArgKey=void 0}get(e){const t=this._computeKey(e);return this.lastArgKey!==t&&(this.lastArgKey=t,this.lastCache=this.fn(e)),this.lastCache}}((e=>{function t(e){const t=new Map;for(let n=0;n!e.startsWith("_")&&e in i));0===o.length&&(o=["_default"]);for(const a of o){r=n(r,t(i[a]))}const s=function(e,t){const n=new Map(e);for(const[i,r]of t)n.set(i,r);return n}(t(i._common),r);return new ue(s)})),Se._locales=new pe((()=>Object.keys(ue.ambiguousCharacterData.value).filter((e=>!e.startsWith("_")))));class Ce{static getRawData(){return JSON.parse("[9,10,11,12,13,32,127,160,173,847,1564,4447,4448,6068,6069,6155,6156,6157,6158,7355,7356,8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207,8234,8235,8236,8237,8238,8239,8287,8288,8289,8290,8291,8292,8293,8294,8295,8296,8297,8298,8299,8300,8301,8302,8303,10240,12288,12644,65024,65025,65026,65027,65028,65029,65030,65031,65032,65033,65034,65035,65036,65037,65038,65039,65279,65440,65520,65521,65522,65523,65524,65525,65526,65527,65528,65532,78844,119155,119156,119157,119158,119159,119160,119161,119162,917504,917505,917506,917507,917508,917509,917510,917511,917512,917513,917514,917515,917516,917517,917518,917519,917520,917521,917522,917523,917524,917525,917526,917527,917528,917529,917530,917531,917532,917533,917534,917535,917536,917537,917538,917539,917540,917541,917542,917543,917544,917545,917546,917547,917548,917549,917550,917551,917552,917553,917554,917555,917556,917557,917558,917559,917560,917561,917562,917563,917564,917565,917566,917567,917568,917569,917570,917571,917572,917573,917574,917575,917576,917577,917578,917579,917580,917581,917582,917583,917584,917585,917586,917587,917588,917589,917590,917591,917592,917593,917594,917595,917596,917597,917598,917599,917600,917601,917602,917603,917604,917605,917606,917607,917608,917609,917610,917611,917612,917613,917614,917615,917616,917617,917618,917619,917620,917621,917622,917623,917624,917625,917626,917627,917628,917629,917630,917631,917760,917761,917762,917763,917764,917765,917766,917767,917768,917769,917770,917771,917772,917773,917774,917775,917776,917777,917778,917779,917780,917781,917782,917783,917784,917785,917786,917787,917788,917789,917790,917791,917792,917793,917794,917795,917796,917797,917798,917799,917800,917801,917802,917803,917804,917805,917806,917807,917808,917809,917810,917811,917812,917813,917814,917815,917816,917817,917818,917819,917820,917821,917822,917823,917824,917825,917826,917827,917828,917829,917830,917831,917832,917833,917834,917835,917836,917837,917838,917839,917840,917841,917842,917843,917844,917845,917846,917847,917848,917849,917850,917851,917852,917853,917854,917855,917856,917857,917858,917859,917860,917861,917862,917863,917864,917865,917866,917867,917868,917869,917870,917871,917872,917873,917874,917875,917876,917877,917878,917879,917880,917881,917882,917883,917884,917885,917886,917887,917888,917889,917890,917891,917892,917893,917894,917895,917896,917897,917898,917899,917900,917901,917902,917903,917904,917905,917906,917907,917908,917909,917910,917911,917912,917913,917914,917915,917916,917917,917918,917919,917920,917921,917922,917923,917924,917925,917926,917927,917928,917929,917930,917931,917932,917933,917934,917935,917936,917937,917938,917939,917940,917941,917942,917943,917944,917945,917946,917947,917948,917949,917950,917951,917952,917953,917954,917955,917956,917957,917958,917959,917960,917961,917962,917963,917964,917965,917966,917967,917968,917969,917970,917971,917972,917973,917974,917975,917976,917977,917978,917979,917980,917981,917982,917983,917984,917985,917986,917987,917988,917989,917990,917991,917992,917993,917994,917995,917996,917997,917998,917999]")}static getData(){return this._data||(this._data=new Set(Ce.getRawData())),this._data}static isInvisibleCharacter(e){return Ce.getData().has(e)}static get codePoints(){return Ce.getData()}}Ce._data=void 0;const _e="$initialize";let ke;class Ee{constructor(e,t,n,i){this.vsWorker=e,this.req=t,this.method=n,this.args=i,this.type=0}}class Re{constructor(e,t,n,i){this.vsWorker=e,this.seq=t,this.res=n,this.err=i,this.type=1}}class Ne{constructor(e,t,n,i){this.vsWorker=e,this.req=t,this.eventName=n,this.arg=i,this.type=2}}class Fe{constructor(e,t,n){this.vsWorker=e,this.req=t,this.event=n,this.type=3}}class De{constructor(e,t){this.vsWorker=e,this.req=t,this.type=4}}class Te{constructor(e){this._workerId=-1,this._handler=e,this._lastSentReq=0,this._pendingReplies=Object.create(null),this._pendingEmitters=new Map,this._pendingEvents=new Map}setWorkerId(e){this._workerId=e}sendMessage(e,t){const n=String(++this._lastSentReq);return new Promise(((i,r)=>{this._pendingReplies[n]={resolve:i,reject:r},this._send(new Ee(this._workerId,n,e,t))}))}listen(e,t){let n=null;const i=new A({onWillAddFirstListener:()=>{n=String(++this._lastSentReq),this._pendingEmitters.set(n,i),this._send(new Ne(this._workerId,n,e,t))},onDidRemoveLastListener:()=>{this._pendingEmitters.delete(n),this._send(new De(this._workerId,n)),n=null}});return i.event}handleMessage(e){e&&e.vsWorker&&(-1!==this._workerId&&e.vsWorker!==this._workerId||this._handleMessage(e))}_handleMessage(e){switch(e.type){case 1:return this._handleReplyMessage(e);case 0:return this._handleRequestMessage(e);case 2:return this._handleSubscribeEventMessage(e);case 3:return this._handleEventMessage(e);case 4:return this._handleUnsubscribeEventMessage(e)}}_handleReplyMessage(e){if(!this._pendingReplies[e.seq])return void console.warn("Got reply to unknown seq");const t=this._pendingReplies[e.seq];if(delete this._pendingReplies[e.seq],e.err){let n=e.err;return e.err.$isError&&(n=new Error,n.name=e.err.name,n.message=e.err.message,n.stack=e.err.stack),void t.reject(n)}t.resolve(e.res)}_handleRequestMessage(e){const t=e.req;this._handler.handleMessage(e.method,e.args).then((e=>{this._send(new Re(this._workerId,t,e,void 0))}),(e=>{e.detail instanceof Error&&(e.detail=n(e.detail)),this._send(new Re(this._workerId,t,void 0,n(e)))}))}_handleSubscribeEventMessage(e){const t=e.req,n=this._handler.handleEvent(e.eventName,e.arg)((e=>{this._send(new Fe(this._workerId,t,e))}));this._pendingEvents.set(t,n)}_handleEventMessage(e){this._pendingEmitters.has(e.req)?this._pendingEmitters.get(e.req).fire(e.event):console.warn("Got event for unknown req")}_handleUnsubscribeEventMessage(e){this._pendingEvents.has(e.req)?(this._pendingEvents.get(e.req).dispose(),this._pendingEvents.delete(e.req)):console.warn("Got unsubscribe for unknown req")}_send(e){const t=[];if(0===e.type)for(let n=0;nfunction(){const n=Array.prototype.slice.call(arguments,0);return t(e,n)},r=e=>function(t){return n(e,t)},o={};for(const s of e)Me(s)?o[s]=r(s):Ae(s)?o[s]=n(s,void 0):o[s]=i(s);return o}class Ie{constructor(e,t){this._requestHandlerFactory=t,this._requestHandler=null,this._protocol=new Te({sendMessage:(t,n)=>{e(t,n)},handleMessage:(e,t)=>this._handleMessage(e,t),handleEvent:(e,t)=>this._handleEvent(e,t)})}onmessage(e){this._protocol.handleMessage(e)}_handleMessage(e,t){if(e===_e)return this.initialize(t[0],t[1],t[2],t[3]);if(!this._requestHandler||"function"!==typeof this._requestHandler[e])return Promise.reject(new Error("Missing requestHandler or method: "+e));try{return Promise.resolve(this._requestHandler[e].apply(this._requestHandler,t))}catch(Du){return Promise.reject(Du)}}_handleEvent(e,t){if(!this._requestHandler)throw new Error("Missing requestHandler");if(Me(e)){const n=this._requestHandler[e].call(this._requestHandler,t);if("function"!==typeof n)throw new Error(`Missing dynamic event ${e} on request handler.`);return n}if(Ae(e)){const t=this._requestHandler[e];if("function"!==typeof t)throw new Error(`Missing event ${e} on request handler.`);return t}throw new Error(`Malformed event name ${e}`)}initialize(e,t,n,i){this._protocol.setWorkerId(e);const r=ze(i,((e,t)=>this._protocol.sendMessage(e,t)),((e,t)=>this._protocol.listen(e,t)));return this._requestHandlerFactory?(this._requestHandler=this._requestHandlerFactory(r),Promise.resolve(z(this._requestHandler))):(t&&("undefined"!==typeof t.baseUrl&&delete t.baseUrl,"undefined"!==typeof t.paths&&"undefined"!==typeof t.paths.vs&&delete t.paths.vs,"undefined"!==typeof t.trustedTypesPolicy&&delete t.trustedTypesPolicy,t.catchError=!0,globalThis.require.config(t)),new Promise(((e,t)=>{(0,globalThis.require)([n],(n=>{this._requestHandler=n.create(r),this._requestHandler?e(z(this._requestHandler)):t(new Error("No RequestHandler!"))}),t)})))}}class Le{constructor(e,t,n,i){this.originalStart=e,this.originalLength=t,this.modifiedStart=n,this.modifiedLength=i}getOriginalEnd(){return this.originalStart+this.originalLength}getModifiedEnd(){return this.modifiedStart+this.modifiedLength}}function Pe(e,t){return(t<<5)-t+e|0}function Oe(e,t){t=Pe(149417,t);for(let n=0,i=e.length;n2&&void 0!==arguments[2]?arguments[2]:32)-t;return(e<>>n)>>>0}function Ve(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:0,n=arguments.length>2&&void 0!==arguments[2]?arguments[2]:e.byteLength,i=arguments.length>3&&void 0!==arguments[3]?arguments[3]:0;for(let r=0;r1&&void 0!==arguments[1]?arguments[1]:32;return e instanceof ArrayBuffer?Array.from(new Uint8Array(e)).map((e=>e.toString(16).padStart(2,"0"))).join(""):function(e,t){let n=arguments.length>2&&void 0!==arguments[2]?arguments[2]:"0";for(;e.length>>0).toString(16),t/4)}class Ke{constructor(){this._h0=1732584193,this._h1=4023233417,this._h2=2562383102,this._h3=271733878,this._h4=3285377520,this._buff=new Uint8Array(67),this._buffDV=new DataView(this._buff.buffer),this._buffLen=0,this._totalLen=0,this._leftoverHighSurrogate=0,this._finished=!1}update(e){const t=e.length;if(0===t)return;const n=this._buff;let i,r,o=this._buffLen,s=this._leftoverHighSurrogate;for(0!==s?(i=s,r=-1,s=0):(i=e.charCodeAt(0),r=0);;){let a=i;if(ge(i)){if(!(r+1>>6,e[t++]=128|(63&n)>>>0):n<65536?(e[t++]=224|(61440&n)>>>12,e[t++]=128|(4032&n)>>>6,e[t++]=128|(63&n)>>>0):(e[t++]=240|(1835008&n)>>>18,e[t++]=128|(258048&n)>>>12,e[t++]=128|(4032&n)>>>6,e[t++]=128|(63&n)>>>0),t>=64&&(this._step(),t-=64,this._totalLen+=64,e[0]=e[64],e[1]=e[65],e[2]=e[66]),t}digest(){return this._finished||(this._finished=!0,this._leftoverHighSurrogate&&(this._leftoverHighSurrogate=0,this._buffLen=this._push(this._buff,this._buffLen,65533)),this._totalLen+=this._buffLen,this._wrapUp()),Ue(this._h0)+Ue(this._h1)+Ue(this._h2)+Ue(this._h3)+Ue(this._h4)}_wrapUp(){this._buff[this._buffLen++]=128,Ve(this._buff,this._buffLen),this._buffLen>56&&(this._step(),Ve(this._buff));const e=8*this._totalLen;this._buffDV.setUint32(56,Math.floor(e/4294967296),!1),this._buffDV.setUint32(60,e%4294967296,!1),this._step()}_step(){const e=Ke._bigBlock32,t=this._buffDV;for(let h=0;h<64;h+=4)e.setUint32(h,t.getUint32(h,!1),!1);for(let h=64;h<320;h+=4)e.setUint32(h,We(e.getUint32(h-12,!1)^e.getUint32(h-32,!1)^e.getUint32(h-56,!1)^e.getUint32(h-64,!1),1),!1);let n,i,r,o=this._h0,s=this._h1,a=this._h2,l=this._h3,c=this._h4;for(let h=0;h<80;h++)h<20?(n=s&a|~s&l,i=1518500249):h<40?(n=s^a^l,i=1859775393):h<60?(n=s&a|s&l|a&l,i=2400959708):(n=s^a^l,i=3395469782),r=We(o,5)+n+c+i+e.getUint32(4*h,!1)&4294967295,c=l,l=a,a=We(s,30),s=o,o=r;this._h0=this._h0+o&4294967295,this._h1=this._h1+s&4294967295,this._h2=this._h2+a&4294967295,this._h3=this._h3+l&4294967295,this._h4=this._h4+c&4294967295}}Ke._bigBlock32=new DataView(new ArrayBuffer(320));class qe{constructor(e){this.source=e}getElements(){const e=this.source,t=new Int32Array(e.length);for(let n=0,i=e.length;n0||this.m_modifiedCount>0)&&this.m_changes.push(new Le(this.m_originalStart,this.m_originalCount,this.m_modifiedStart,this.m_modifiedCount)),this.m_originalCount=0,this.m_modifiedCount=0,this.m_originalStart=1073741824,this.m_modifiedStart=1073741824}AddOriginalElement(e,t){this.m_originalStart=Math.min(this.m_originalStart,e),this.m_modifiedStart=Math.min(this.m_modifiedStart,t),this.m_originalCount++}AddModifiedElement(e,t){this.m_originalStart=Math.min(this.m_originalStart,e),this.m_modifiedStart=Math.min(this.m_modifiedStart,t),this.m_modifiedCount++}getChanges(){return(this.m_originalCount>0||this.m_modifiedCount>0)&&this.MarkNextChange(),this.m_changes}getReverseChanges(){return(this.m_originalCount>0||this.m_modifiedCount>0)&&this.MarkNextChange(),this.m_changes.reverse(),this.m_changes}}class Ge{constructor(e,t){let n=arguments.length>2&&void 0!==arguments[2]?arguments[2]:null;this.ContinueProcessingPredicate=n,this._originalSequence=e,this._modifiedSequence=t;const[i,r,o]=Ge._getElements(e),[s,a,l]=Ge._getElements(t);this._hasStrings=o&&l,this._originalStringElements=i,this._originalElementsOrHash=r,this._modifiedStringElements=s,this._modifiedElementsOrHash=a,this.m_forwardHistory=[],this.m_reverseHistory=[]}static _isStringArray(e){return e.length>0&&"string"===typeof e[0]}static _getElements(e){const t=e.getElements();if(Ge._isStringArray(t)){const e=new Int32Array(t.length);for(let n=0,i=t.length;n=e&&i>=n&&this.ElementsAreEqual(t,i);)t--,i--;if(e>t||n>i){let r;return n<=i?(je.Assert(e===t+1,"originalStart should only be one more than originalEnd"),r=[new Le(e,0,n,i-n+1)]):e<=t?(je.Assert(n===i+1,"modifiedStart should only be one more than modifiedEnd"),r=[new Le(e,t-e+1,n,0)]):(je.Assert(e===t+1,"originalStart should only be one more than originalEnd"),je.Assert(n===i+1,"modifiedStart should only be one more than modifiedEnd"),r=[]),r}const o=[0],s=[0],a=this.ComputeRecursionPoint(e,t,n,i,o,s,r),l=o[0],c=s[0];if(null!==a)return a;if(!r[0]){const o=this.ComputeDiffRecursive(e,l,n,c,r);let s=[];return s=r[0]?[new Le(l+1,t-(l+1)+1,c+1,i-(c+1)+1)]:this.ComputeDiffRecursive(l+1,t,c+1,i,r),this.ConcatenateChanges(o,s)}return[new Le(e,t-e+1,n,i-n+1)]}WALKTRACE(e,t,n,i,r,o,s,a,l,c,h,d,p,u,m,f,g,b){let v=null,y=null,w=new He,x=t,S=n,C=p[0]-f[0]-i,_=-1073741824,k=this.m_forwardHistory.length-1;do{const t=C+e;t===x||t=0&&(e=(l=this.m_forwardHistory[k])[0],x=1,S=l.length-1)}while(--k>=-1);if(v=w.getReverseChanges(),b[0]){let e=p[0]+1,t=f[0]+1;if(null!==v&&v.length>0){const n=v[v.length-1];e=Math.max(e,n.getOriginalEnd()),t=Math.max(t,n.getModifiedEnd())}y=[new Le(e,d-e+1,t,m-t+1)]}else{w=new He,x=o,S=s,C=p[0]-f[0]-a,_=1073741824,k=g?this.m_reverseHistory.length-1:this.m_reverseHistory.length-2;do{const e=C+r;e===x||e=c[e+1]?(u=(h=c[e+1]-1)-C-a,h>_&&w.MarkNextChange(),_=h+1,w.AddOriginalElement(h+1,u+1),C=e+1-r):(u=(h=c[e-1])-C-a,h>_&&w.MarkNextChange(),_=h,w.AddModifiedElement(h+1,u+1),C=e-1-r),k>=0&&(r=(c=this.m_reverseHistory[k])[0],x=1,S=c.length-1)}while(--k>=-1);y=w.getChanges()}return this.ConcatenateChanges(v,y)}ComputeRecursionPoint(e,t,n,i,r,o,s){let a=0,l=0,c=0,h=0,d=0,p=0;e--,n--,r[0]=0,o[0]=0,this.m_forwardHistory=[],this.m_reverseHistory=[];const u=t-e+(i-n),m=u+1,f=new Int32Array(m),g=new Int32Array(m),b=i-n,v=t-e,y=e-n,w=t-i,x=(v-b)%2===0;f[b]=e,g[v]=t,s[0]=!1;for(let S=1;S<=u/2+1;S++){let u=0,C=0;c=this.ClipDiagonalBound(b-S,S,b,m),h=this.ClipDiagonalBound(b+S,S,b,m);for(let e=c;e<=h;e+=2){a=e===c||eu+C&&(u=a,C=l),!x&&Math.abs(e-v)<=S-1&&a>=g[e])return r[0]=a,o[0]=l,n<=g[e]&&S<=1448?this.WALKTRACE(b,c,h,y,v,d,p,w,f,g,a,t,r,l,i,o,x,s):null}const _=(u-e+(C-n)-S)/2;if(null!==this.ContinueProcessingPredicate&&!this.ContinueProcessingPredicate(u,_))return s[0]=!0,r[0]=u,o[0]=C,_>0&&S<=1448?this.WALKTRACE(b,c,h,y,v,d,p,w,f,g,a,t,r,l,i,o,x,s):(e++,n++,[new Le(e,t-e+1,n,i-n+1)]);d=this.ClipDiagonalBound(v-S,S,v,m),p=this.ClipDiagonalBound(v+S,S,v,m);for(let m=d;m<=p;m+=2){a=m===d||m=g[m+1]?g[m+1]-1:g[m-1],l=a-(m-v)-w;const u=a;for(;a>e&&l>n&&this.ElementsAreEqual(a,l);)a--,l--;if(g[m]=a,x&&Math.abs(m-b)<=S&&a<=f[m])return r[0]=a,o[0]=l,u>=f[m]&&S<=1448?this.WALKTRACE(b,c,h,y,v,d,p,w,f,g,a,t,r,l,i,o,x,s):null}if(S<=1447){let e=new Int32Array(h-c+2);e[0]=b-c+1,$e.Copy2(f,c,e,1,h-c+1),this.m_forwardHistory.push(e),e=new Int32Array(p-d+2),e[0]=v-d+1,$e.Copy2(g,d,e,1,p-d+1),this.m_reverseHistory.push(e)}}return this.WALKTRACE(b,c,h,y,v,d,p,w,f,g,a,t,r,l,i,o,x,s)}PrettifyChanges(e){for(let t=0;t0,s=n.modifiedLength>0;for(;n.originalStart+n.originalLength=0;t--){const n=e[t];let i=0,r=0;if(t>0){const n=e[t-1];i=n.originalStart+n.originalLength,r=n.modifiedStart+n.modifiedLength}const o=n.originalLength>0,s=n.modifiedLength>0;let a=0,l=this._boundaryScore(n.originalStart,n.originalLength,n.modifiedStart,n.modifiedLength);for(let e=1;;e++){const t=n.originalStart-e,c=n.modifiedStart-e;if(tl&&(l=h,a=e)}n.originalStart-=a,n.modifiedStart-=a;const c=[null];t>0&&this.ChangesOverlap(e[t-1],e[t],c)&&(e[t-1]=c[0],e.splice(t,1),t++)}if(this._hasStrings)for(let t=1,n=e.length;t0&&t>a&&(a=t,l=h,c=e)}return a>0?[l,c]:null}_contiguousSequenceScore(e,t,n){let i=0;for(let r=0;r=this._originalElementsOrHash.length-1||this._hasStrings&&/^\s*$/.test(this._originalStringElements[e])}_OriginalRegionIsBoundary(e,t){if(this._OriginalIsBoundary(e)||this._OriginalIsBoundary(e-1))return!0;if(t>0){const n=e+t;if(this._OriginalIsBoundary(n-1)||this._OriginalIsBoundary(n))return!0}return!1}_ModifiedIsBoundary(e){return e<=0||e>=this._modifiedElementsOrHash.length-1||this._hasStrings&&/^\s*$/.test(this._modifiedStringElements[e])}_ModifiedRegionIsBoundary(e,t){if(this._ModifiedIsBoundary(e)||this._ModifiedIsBoundary(e-1))return!0;if(t>0){const n=e+t;if(this._ModifiedIsBoundary(n-1)||this._ModifiedIsBoundary(n))return!0}return!1}_boundaryScore(e,t,n,i){return(this._OriginalRegionIsBoundary(e,t)?1:0)+(this._ModifiedRegionIsBoundary(n,i)?1:0)}ConcatenateChanges(e,t){const n=[];if(0===e.length||0===t.length)return t.length>0?t:e;if(this.ChangesOverlap(e[e.length-1],t[0],n)){const i=new Array(e.length+t.length-1);return $e.Copy(e,0,i,0,e.length-1),i[e.length-1]=n[0],$e.Copy(t,1,i,e.length,t.length-1),i}{const n=new Array(e.length+t.length);return $e.Copy(e,0,n,0,e.length),$e.Copy(t,0,n,e.length,t.length),n}}ChangesOverlap(e,t,n){if(je.Assert(e.originalStart<=t.originalStart,"Left change is not less than or equal to right change"),je.Assert(e.modifiedStart<=t.modifiedStart,"Left change is not less than or equal to right change"),e.originalStart+e.originalLength>=t.originalStart||e.modifiedStart+e.modifiedLength>=t.modifiedStart){const i=e.originalStart;let r=e.originalLength;const o=e.modifiedStart;let s=e.modifiedLength;return e.originalStart+e.originalLength>=t.originalStart&&(r=t.originalStart+t.originalLength-e.originalStart),e.modifiedStart+e.modifiedLength>=t.modifiedStart&&(s=t.modifiedStart+t.modifiedLength-e.modifiedStart),n[0]=new Le(i,r,o,s),!0}return n[0]=null,!1}ClipDiagonalBound(e,t,n,i){if(e>=0&&ee.cwd()}}else ke="undefined"!==typeof process?{get platform(){return process.platform},get arch(){return process.arch},get env(){return{NODE_ENV:"production",PUBLIC_URL:".",WDS_SOCKET_HOST:void 0,WDS_SOCKET_PATH:void 0,WDS_SOCKET_PORT:void 0,FAST_REFRESH:!0,REACT_APP_BACKEND:"http://localhost:8765",REACT_APP_META_BACKEND:"undefined"}},cwd:()=>({NODE_ENV:"production",PUBLIC_URL:".",WDS_SOCKET_HOST:void 0,WDS_SOCKET_PATH:void 0,WDS_SOCKET_PORT:void 0,FAST_REFRESH:!0,REACT_APP_BACKEND:"http://localhost:8765",REACT_APP_META_BACKEND:"undefined"}.VSCODE_CWD||process.cwd())}:{get platform(){return ae?"win32":le?"darwin":"linux"},get arch(){},get env(){return{}},cwd:()=>"/"};const Xe=ke.cwd,Ye=(ke.env,ke.platform),Qe=65,Ze=97,et=90,tt=122,nt=46,it=47,rt=92,ot=58;class st extends Error{constructor(e,t,n){let i;"string"===typeof t&&0===t.indexOf("not ")?(i="must not be",t=t.replace(/^not /,"")):i="must be";const r=-1!==e.indexOf(".")?"property":"argument";let o=`The "${e}" ${r} ${i} of type ${t}`;o+=". Received type "+typeof n,super(o),this.code="ERR_INVALID_ARG_TYPE"}}function at(e,t){if("string"!==typeof e)throw new st(t,"string",e)}const lt="win32"===Ye;function ct(e){return e===it||e===rt}function ht(e){return e===it}function dt(e){return e>=Qe&&e<=et||e>=Ze&&e<=tt}function pt(e,t,n,i){let r="",o=0,s=-1,a=0,l=0;for(let c=0;c<=e.length;++c){if(c2){const e=r.lastIndexOf(n);-1===e?(r="",o=0):(r=r.slice(0,e),o=r.length-1-r.lastIndexOf(n)),s=c,a=0;continue}if(0!==r.length){r="",o=0,s=c,a=0;continue}}t&&(r+=r.length>0?`${n}..`:"..",o=2)}else r.length>0?r+=`${n}${e.slice(s+1,c)}`:r=e.slice(s+1,c),o=c-s-1;s=c,a=0}else l===nt&&-1!==a?++a:a=-1}return r}function ut(e,t){!function(e,t){if(null===e||"object"!==typeof e)throw new st(t,"Object",e)}(t,"pathObject");const n=t.dir||t.root,i=t.base||`${t.name||""}${t.ext||""}`;return n?n===t.root?`${n}${i}`:`${n}${e}${i}`:i}const mt={resolve(){let e="",t="",n=!1;for(let i=arguments.length-1;i>=-1;i--){let r;if(i>=0){if(r=i<0||arguments.length<=i?void 0:arguments[i],at(r,"path"),0===r.length)continue}else 0===e.length?r=Xe():(r={NODE_ENV:"production",PUBLIC_URL:".",WDS_SOCKET_HOST:void 0,WDS_SOCKET_PATH:void 0,WDS_SOCKET_PORT:void 0,FAST_REFRESH:!0,REACT_APP_BACKEND:"http://localhost:8765",REACT_APP_META_BACKEND:"undefined"}[`=${e}`]||Xe(),(void 0===r||r.slice(0,2).toLowerCase()!==e.toLowerCase()&&r.charCodeAt(2)===rt)&&(r=`${e}\\`));const o=r.length;let s=0,a="",l=!1;const c=r.charCodeAt(0);if(1===o)ct(c)&&(s=1,l=!0);else if(ct(c))if(l=!0,ct(r.charCodeAt(1))){let e=2,t=e;for(;e2&&ct(r.charCodeAt(2))&&(l=!0,s=3));if(a.length>0)if(e.length>0){if(a.toLowerCase()!==e.toLowerCase())continue}else e=a;if(n){if(e.length>0)break}else if(t=`${r.slice(s)}\\${t}`,n=l,l&&e.length>0)break}return t=pt(t,!n,"\\",ct),n?`${e}\\${t}`:`${e}${t}`||"."},normalize(e){at(e,"path");const t=e.length;if(0===t)return".";let n,i=0,r=!1;const o=e.charCodeAt(0);if(1===t)return ht(o)?"\\":e;if(ct(o))if(r=!0,ct(e.charCodeAt(1))){let r=2,o=r;for(;r2&&ct(e.charCodeAt(2))&&(r=!0,i=3));let s=i0&&ct(e.charCodeAt(t-1))&&(s+="\\"),void 0===n?r?`\\${s}`:s:r?`${n}\\${s}`:`${n}${s}`},isAbsolute(e){at(e,"path");const t=e.length;if(0===t)return!1;const n=e.charCodeAt(0);return ct(n)||t>2&&dt(n)&&e.charCodeAt(1)===ot&&ct(e.charCodeAt(2))},join(){if(0===arguments.length)return".";let e,t;for(let r=0;r0&&(void 0===e?e=t=n:e+=`\\${n}`)}if(void 0===e)return".";let n=!0,i=0;if("string"===typeof t&&ct(t.charCodeAt(0))){++i;const e=t.length;e>1&&ct(t.charCodeAt(1))&&(++i,e>2&&(ct(t.charCodeAt(2))?++i:n=!1))}if(n){for(;i=2&&(e=`\\${e.slice(i)}`)}return mt.normalize(e)},relative(e,t){if(at(e,"from"),at(t,"to"),e===t)return"";const n=mt.resolve(e),i=mt.resolve(t);if(n===i)return"";if((e=n.toLowerCase())===(t=i.toLowerCase()))return"";let r=0;for(;rr&&e.charCodeAt(o-1)===rt;)o--;const s=o-r;let a=0;for(;aa&&t.charCodeAt(l-1)===rt;)l--;const c=l-a,h=sh){if(t.charCodeAt(a+p)===rt)return i.slice(a+p+1);if(2===p)return i.slice(a+p)}s>h&&(e.charCodeAt(r+p)===rt?d=p:2===p&&(d=3)),-1===d&&(d=0)}let u="";for(p=r+d+1;p<=o;++p)p!==o&&e.charCodeAt(p)!==rt||(u+=0===u.length?"..":"\\..");return a+=d,u.length>0?`${u}${i.slice(a,l)}`:(i.charCodeAt(a)===rt&&++a,i.slice(a,l))},toNamespacedPath(e){if("string"!==typeof e||0===e.length)return e;const t=mt.resolve(e);if(t.length<=2)return e;if(t.charCodeAt(0)===rt){if(t.charCodeAt(1)===rt){const e=t.charCodeAt(2);if(63!==e&&e!==nt)return`\\\\?\\UNC\\${t.slice(2)}`}}else if(dt(t.charCodeAt(0))&&t.charCodeAt(1)===ot&&t.charCodeAt(2)===rt)return`\\\\?\\${t}`;return e},dirname(e){at(e,"path");const t=e.length;if(0===t)return".";let n=-1,i=0;const r=e.charCodeAt(0);if(1===t)return ct(r)?e:".";if(ct(r)){if(n=i=1,ct(e.charCodeAt(1))){let r=2,o=r;for(;r2&&ct(e.charCodeAt(2))?3:2,i=n);let o=-1,s=!0;for(let a=t-1;a>=i;--a)if(ct(e.charCodeAt(a))){if(!s){o=a;break}}else s=!1;if(-1===o){if(-1===n)return".";o=n}return e.slice(0,o)},basename(e,t){void 0!==t&&at(t,"ext"),at(e,"path");let n,i=0,r=-1,o=!0;if(e.length>=2&&dt(e.charCodeAt(0))&&e.charCodeAt(1)===ot&&(i=2),void 0!==t&&t.length>0&&t.length<=e.length){if(t===e)return"";let s=t.length-1,a=-1;for(n=e.length-1;n>=i;--n){const l=e.charCodeAt(n);if(ct(l)){if(!o){i=n+1;break}}else-1===a&&(o=!1,a=n+1),s>=0&&(l===t.charCodeAt(s)?-1===--s&&(r=n):(s=-1,r=a))}return i===r?r=a:-1===r&&(r=e.length),e.slice(i,r)}for(n=e.length-1;n>=i;--n)if(ct(e.charCodeAt(n))){if(!o){i=n+1;break}}else-1===r&&(o=!1,r=n+1);return-1===r?"":e.slice(i,r)},extname(e){at(e,"path");let t=0,n=-1,i=0,r=-1,o=!0,s=0;e.length>=2&&e.charCodeAt(1)===ot&&dt(e.charCodeAt(0))&&(t=i=2);for(let a=e.length-1;a>=t;--a){const t=e.charCodeAt(a);if(ct(t)){if(!o){i=a+1;break}}else-1===r&&(o=!1,r=a+1),t===nt?-1===n?n=a:1!==s&&(s=1):-1!==n&&(s=-1)}return-1===n||-1===r||0===s||1===s&&n===r-1&&n===i+1?"":e.slice(n,r)},format:ut.bind(null,"\\"),parse(e){at(e,"path");const t={root:"",dir:"",base:"",ext:"",name:""};if(0===e.length)return t;const n=e.length;let i=0,r=e.charCodeAt(0);if(1===n)return ct(r)?(t.root=t.dir=e,t):(t.base=t.name=e,t);if(ct(r)){if(i=1,ct(e.charCodeAt(1))){let t=2,r=t;for(;t0&&(t.root=e.slice(0,i));let o=-1,s=i,a=-1,l=!0,c=e.length-1,h=0;for(;c>=i;--c)if(r=e.charCodeAt(c),ct(r)){if(!l){s=c+1;break}}else-1===a&&(l=!1,a=c+1),r===nt?-1===o?o=c:1!==h&&(h=1):-1!==o&&(h=-1);return-1!==a&&(-1===o||0===h||1===h&&o===a-1&&o===s+1?t.base=t.name=e.slice(s,a):(t.name=e.slice(s,o),t.base=e.slice(s,a),t.ext=e.slice(o,a))),t.dir=s>0&&s!==i?e.slice(0,s-1):t.root,t},sep:"\\",delimiter:";",win32:null,posix:null},ft=(()=>{if(lt){const e=/\\/g;return()=>{const t=Xe().replace(e,"/");return t.slice(t.indexOf("/"))}}return()=>Xe()})(),gt={resolve(){let e="",t=!1;for(let n=arguments.length-1;n>=-1&&!t;n--){const i=n>=0?n<0||arguments.length<=n?void 0:arguments[n]:ft();at(i,"path"),0!==i.length&&(e=`${i}/${e}`,t=i.charCodeAt(0)===it)}return e=pt(e,!t,"/",ht),t?`/${e}`:e.length>0?e:"."},normalize(e){if(at(e,"path"),0===e.length)return".";const t=e.charCodeAt(0)===it,n=e.charCodeAt(e.length-1)===it;return 0===(e=pt(e,!t,"/",ht)).length?t?"/":n?"./":".":(n&&(e+="/"),t?`/${e}`:e)},isAbsolute:e=>(at(e,"path"),e.length>0&&e.charCodeAt(0)===it),join(){if(0===arguments.length)return".";let e;for(let t=0;t0&&(void 0===e?e=n:e+=`/${n}`)}return void 0===e?".":gt.normalize(e)},relative(e,t){if(at(e,"from"),at(t,"to"),e===t)return"";if((e=gt.resolve(e))===(t=gt.resolve(t)))return"";const n=e.length,i=n-1,r=t.length-1,o=io){if(t.charCodeAt(1+a)===it)return t.slice(1+a+1);if(0===a)return t.slice(1+a)}else i>o&&(e.charCodeAt(1+a)===it?s=a:0===a&&(s=0));let l="";for(a=1+s+1;a<=n;++a)a!==n&&e.charCodeAt(a)!==it||(l+=0===l.length?"..":"/..");return`${l}${t.slice(1+s)}`},toNamespacedPath:e=>e,dirname(e){if(at(e,"path"),0===e.length)return".";const t=e.charCodeAt(0)===it;let n=-1,i=!0;for(let r=e.length-1;r>=1;--r)if(e.charCodeAt(r)===it){if(!i){n=r;break}}else i=!1;return-1===n?t?"/":".":t&&1===n?"//":e.slice(0,n)},basename(e,t){void 0!==t&&at(t,"ext"),at(e,"path");let n,i=0,r=-1,o=!0;if(void 0!==t&&t.length>0&&t.length<=e.length){if(t===e)return"";let s=t.length-1,a=-1;for(n=e.length-1;n>=0;--n){const l=e.charCodeAt(n);if(l===it){if(!o){i=n+1;break}}else-1===a&&(o=!1,a=n+1),s>=0&&(l===t.charCodeAt(s)?-1===--s&&(r=n):(s=-1,r=a))}return i===r?r=a:-1===r&&(r=e.length),e.slice(i,r)}for(n=e.length-1;n>=0;--n)if(e.charCodeAt(n)===it){if(!o){i=n+1;break}}else-1===r&&(o=!1,r=n+1);return-1===r?"":e.slice(i,r)},extname(e){at(e,"path");let t=-1,n=0,i=-1,r=!0,o=0;for(let s=e.length-1;s>=0;--s){const a=e.charCodeAt(s);if(a!==it)-1===i&&(r=!1,i=s+1),a===nt?-1===t?t=s:1!==o&&(o=1):-1!==t&&(o=-1);else if(!r){n=s+1;break}}return-1===t||-1===i||0===o||1===o&&t===i-1&&t===n+1?"":e.slice(t,i)},format:ut.bind(null,"/"),parse(e){at(e,"path");const t={root:"",dir:"",base:"",ext:"",name:""};if(0===e.length)return t;const n=e.charCodeAt(0)===it;let i;n?(t.root="/",i=1):i=0;let r=-1,o=0,s=-1,a=!0,l=e.length-1,c=0;for(;l>=i;--l){const t=e.charCodeAt(l);if(t!==it)-1===s&&(a=!1,s=l+1),t===nt?-1===r?r=l:1!==c&&(c=1):-1!==r&&(c=-1);else if(!a){o=l+1;break}}if(-1!==s){const i=0===o&&n?1:o;-1===r||0===c||1===c&&r===s-1&&r===o+1?t.base=t.name=e.slice(i,s):(t.name=e.slice(i,r),t.base=e.slice(i,s),t.ext=e.slice(r,s))}return o>0?t.dir=e.slice(0,o-1):n&&(t.dir="/"),t},sep:"/",delimiter:":",win32:null,posix:null};gt.win32=mt.win32=mt,gt.posix=mt.posix=gt;lt?mt.normalize:gt.normalize,lt?mt.resolve:gt.resolve,lt?mt.relative:gt.relative,lt?mt.dirname:gt.dirname,lt?mt.basename:gt.basename,lt?mt.extname:gt.extname,lt?mt.sep:gt.sep;const bt=/^\w[\w\d+.-]*$/,vt=/^\//,yt=/^\/\//;const wt="",xt="/",St=/^(([^:/?#]+?):)?(\/\/([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/;class Ct{static isUri(e){return e instanceof Ct||!!e&&("string"===typeof e.authority&&"string"===typeof e.fragment&&"string"===typeof e.path&&"string"===typeof e.query&&"string"===typeof e.scheme&&"string"===typeof e.fsPath&&"function"===typeof e.with&&"function"===typeof e.toString)}constructor(e,t,n,i,r){let o=arguments.length>5&&void 0!==arguments[5]&&arguments[5];"object"===typeof e?(this.scheme=e.scheme||wt,this.authority=e.authority||wt,this.path=e.path||wt,this.query=e.query||wt,this.fragment=e.fragment||wt):(this.scheme=function(e,t){return e||t?e:"file"}(e,o),this.authority=t||wt,this.path=function(e,t){switch(e){case"https":case"http":case"file":t?t[0]!==xt&&(t=xt+t):t=xt}return t}(this.scheme,n||wt),this.query=i||wt,this.fragment=r||wt,function(e,t){if(!e.scheme&&t)throw new Error(`[UriError]: Scheme is missing: {scheme: "", authority: "${e.authority}", path: "${e.path}", query: "${e.query}", fragment: "${e.fragment}"}`);if(e.scheme&&!bt.test(e.scheme))throw new Error("[UriError]: Scheme contains illegal characters.");if(e.path)if(e.authority){if(!vt.test(e.path))throw new Error('[UriError]: If a URI contains an authority component, then the path component must either be empty or begin with a slash ("/") character')}else if(yt.test(e.path))throw new Error('[UriError]: If a URI does not contain an authority component, then the path cannot begin with two slash characters ("//")')}(this,o))}get fsPath(){return Ft(this,!1)}with(e){if(!e)return this;let{scheme:t,authority:n,path:i,query:r,fragment:o}=e;return void 0===t?t=this.scheme:null===t&&(t=wt),void 0===n?n=this.authority:null===n&&(n=wt),void 0===i?i=this.path:null===i&&(i=wt),void 0===r?r=this.query:null===r&&(r=wt),void 0===o?o=this.fragment:null===o&&(o=wt),t===this.scheme&&n===this.authority&&i===this.path&&r===this.query&&o===this.fragment?this:new kt(t,n,i,r,o)}static parse(e){let t=arguments.length>1&&void 0!==arguments[1]&&arguments[1];const n=St.exec(e);return n?new kt(n[2]||wt,Mt(n[4]||wt),Mt(n[5]||wt),Mt(n[7]||wt),Mt(n[9]||wt),t):new kt(wt,wt,wt,wt,wt)}static file(e){let t=wt;if(ae&&(e=e.replace(/\\/g,xt)),e[0]===xt&&e[1]===xt){const n=e.indexOf(xt,2);-1===n?(t=e.substring(2),e=xt):(t=e.substring(2,n),e=e.substring(n)||xt)}return new kt("file",t,e,wt,wt)}static from(e,t){return new kt(e.scheme,e.authority,e.path,e.query,e.fragment,t)}static joinPath(e){if(!e.path)throw new Error("[UriError]: cannot call joinPath on URI without path");let t;for(var n=arguments.length,i=new Array(n>1?n-1:0),r=1;r0&&void 0!==arguments[0]&&arguments[0])}toJSON(){return this}static revive(e){var t,n;if(e){if(e instanceof Ct)return e;{const i=new kt(e);return i._formatted=null!==(t=e.external)&&void 0!==t?t:null,i._fsPath=e._sep===_t&&null!==(n=e.fsPath)&&void 0!==n?n:null,i}}return e}}const _t=ae?1:void 0;class kt extends Ct{constructor(){super(...arguments),this._formatted=null,this._fsPath=null}get fsPath(){return this._fsPath||(this._fsPath=Ft(this,!1)),this._fsPath}toString(){return arguments.length>0&&void 0!==arguments[0]&&arguments[0]?Dt(this,!0):(this._formatted||(this._formatted=Dt(this,!1)),this._formatted)}toJSON(){const e={$mid:1};return this._fsPath&&(e.fsPath=this._fsPath,e._sep=_t),this._formatted&&(e.external=this._formatted),this.path&&(e.path=this.path),this.scheme&&(e.scheme=this.scheme),this.authority&&(e.authority=this.authority),this.query&&(e.query=this.query),this.fragment&&(e.fragment=this.fragment),e}}const Et={58:"%3A",47:"%2F",63:"%3F",35:"%23",91:"%5B",93:"%5D",64:"%40",33:"%21",36:"%24",38:"%26",39:"%27",40:"%28",41:"%29",42:"%2A",43:"%2B",44:"%2C",59:"%3B",61:"%3D",32:"%20"};function Rt(e,t,n){let i,r=-1;for(let o=0;o=97&&s<=122||s>=65&&s<=90||s>=48&&s<=57||45===s||46===s||95===s||126===s||t&&47===s||n&&91===s||n&&93===s||n&&58===s)-1!==r&&(i+=encodeURIComponent(e.substring(r,o)),r=-1),void 0!==i&&(i+=e.charAt(o));else{void 0===i&&(i=e.substr(0,o));const t=Et[s];void 0!==t?(-1!==r&&(i+=encodeURIComponent(e.substring(r,o)),r=-1),i+=t):-1===r&&(r=o)}}return-1!==r&&(i+=encodeURIComponent(e.substring(r))),void 0!==i?i:e}function Nt(e){let t;for(let n=0;n1&&"file"===e.scheme?`//${e.authority}${e.path}`:47===e.path.charCodeAt(0)&&(e.path.charCodeAt(1)>=65&&e.path.charCodeAt(1)<=90||e.path.charCodeAt(1)>=97&&e.path.charCodeAt(1)<=122)&&58===e.path.charCodeAt(2)?t?e.path.substr(1):e.path[1].toLowerCase()+e.path.substr(2):e.path,ae&&(n=n.replace(/\//g,"\\")),n}function Dt(e,t){const n=t?Nt:Rt;let i="",{scheme:r,authority:o,path:s,query:a,fragment:l}=e;if(r&&(i+=r,i+=":"),(o||"file"===r)&&(i+=xt,i+=xt),o){let e=o.indexOf("@");if(-1!==e){const t=o.substr(0,e);o=o.substr(e+1),e=t.lastIndexOf(":"),-1===e?i+=n(t,!1,!1):(i+=n(t.substr(0,e),!1,!1),i+=":",i+=n(t.substr(e+1),!1,!0)),i+="@"}o=o.toLowerCase(),e=o.lastIndexOf(":"),-1===e?i+=n(o,!1,!0):(i+=n(o.substr(0,e),!1,!0),i+=o.substr(e))}if(s){if(s.length>=3&&47===s.charCodeAt(0)&&58===s.charCodeAt(2)){const e=s.charCodeAt(1);e>=65&&e<=90&&(s=`/${String.fromCharCode(e+32)}:${s.substr(3)}`)}else if(s.length>=2&&58===s.charCodeAt(1)){const e=s.charCodeAt(0);e>=65&&e<=90&&(s=`${String.fromCharCode(e+32)}:${s.substr(2)}`)}i+=n(s,!0,!1)}return a&&(i+="?",i+=n(a,!1,!1)),l&&(i+="#",i+=t?l:Rt(l,!1,!1)),i}function Tt(e){try{return decodeURIComponent(e)}catch(O){return e.length>3?e.substr(0,3)+Tt(e.substr(3)):e}}const At=/(%[0-9A-Za-z][0-9A-Za-z])+/g;function Mt(e){return e.match(At)?e.replace(At,(e=>Tt(e))):e}class zt{constructor(e,t){this.lineNumber=e,this.column=t}with(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:this.lineNumber,t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:this.column;return e===this.lineNumber&&t===this.column?this:new zt(e,t)}delta(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:0,t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:0;return this.with(this.lineNumber+e,this.column+t)}equals(e){return zt.equals(this,e)}static equals(e,t){return!e&&!t||!!e&&!!t&&e.lineNumber===t.lineNumber&&e.column===t.column}isBefore(e){return zt.isBefore(this,e)}static isBefore(e,t){return e.lineNumbern||e===n&&t>i?(this.startLineNumber=n,this.startColumn=i,this.endLineNumber=e,this.endColumn=t):(this.startLineNumber=e,this.startColumn=t,this.endLineNumber=n,this.endColumn=i)}isEmpty(){return It.isEmpty(this)}static isEmpty(e){return e.startLineNumber===e.endLineNumber&&e.startColumn===e.endColumn}containsPosition(e){return It.containsPosition(this,e)}static containsPosition(e,t){return!(t.lineNumbere.endLineNumber)&&(!(t.lineNumber===e.startLineNumber&&t.columne.endColumn))}static strictContainsPosition(e,t){return!(t.lineNumbere.endLineNumber)&&(!(t.lineNumber===e.startLineNumber&&t.column<=e.startColumn)&&!(t.lineNumber===e.endLineNumber&&t.column>=e.endColumn))}containsRange(e){return It.containsRange(this,e)}static containsRange(e,t){return!(t.startLineNumbere.endLineNumber||t.endLineNumber>e.endLineNumber)&&(!(t.startLineNumber===e.startLineNumber&&t.startColumne.endColumn)))}strictContainsRange(e){return It.strictContainsRange(this,e)}static strictContainsRange(e,t){return!(t.startLineNumbere.endLineNumber||t.endLineNumber>e.endLineNumber)&&(!(t.startLineNumber===e.startLineNumber&&t.startColumn<=e.startColumn)&&!(t.endLineNumber===e.endLineNumber&&t.endColumn>=e.endColumn)))}plusRange(e){return It.plusRange(this,e)}static plusRange(e,t){let n,i,r,o;return t.startLineNumbere.endLineNumber?(r=t.endLineNumber,o=t.endColumn):t.endLineNumber===e.endLineNumber?(r=t.endLineNumber,o=Math.max(t.endColumn,e.endColumn)):(r=e.endLineNumber,o=e.endColumn),new It(n,i,r,o)}intersectRanges(e){return It.intersectRanges(this,e)}static intersectRanges(e,t){let n=e.startLineNumber,i=e.startColumn,r=e.endLineNumber,o=e.endColumn;const s=t.startLineNumber,a=t.startColumn,l=t.endLineNumber,c=t.endColumn;return nl?(r=l,o=c):r===l&&(o=Math.min(o,c)),n>r||n===r&&i>o?null:new It(n,i,r,o)}equalsRange(e){return It.equalsRange(this,e)}static equalsRange(e,t){return!e&&!t||!!e&&!!t&&e.startLineNumber===t.startLineNumber&&e.startColumn===t.startColumn&&e.endLineNumber===t.endLineNumber&&e.endColumn===t.endColumn}getEndPosition(){return It.getEndPosition(this)}static getEndPosition(e){return new zt(e.endLineNumber,e.endColumn)}getStartPosition(){return It.getStartPosition(this)}static getStartPosition(e){return new zt(e.startLineNumber,e.startColumn)}toString(){return"["+this.startLineNumber+","+this.startColumn+" -> "+this.endLineNumber+","+this.endColumn+"]"}setEndPosition(e,t){return new It(this.startLineNumber,this.startColumn,e,t)}setStartPosition(e,t){return new It(e,t,this.endLineNumber,this.endColumn)}collapseToStart(){return It.collapseToStart(this)}static collapseToStart(e){return new It(e.startLineNumber,e.startColumn,e.startLineNumber,e.startColumn)}collapseToEnd(){return It.collapseToEnd(this)}static collapseToEnd(e){return new It(e.endLineNumber,e.endColumn,e.endLineNumber,e.endColumn)}delta(e){return new It(this.startLineNumber+e,this.startColumn,this.endLineNumber+e,this.endColumn)}static fromPositions(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:e;return new It(e.lineNumber,e.column,t.lineNumber,t.column)}static lift(e){return e?new It(e.startLineNumber,e.startColumn,e.endLineNumber,e.endColumn):null}static isIRange(e){return e&&"number"===typeof e.startLineNumber&&"number"===typeof e.startColumn&&"number"===typeof e.endLineNumber&&"number"===typeof e.endColumn}static areIntersectingOrTouching(e,t){return!(e.endLineNumbere.startLineNumber}toJSON(){return this}}var Lt;function Pt(e,t){return(n,i)=>t(e(n),e(i))}!function(e){e.isLessThan=function(e){return e<0},e.isLessThanOrEqual=function(e){return e<=0},e.isGreaterThan=function(e){return e>0},e.isNeitherLessOrGreaterThan=function(e){return 0===e},e.greaterThan=1,e.lessThan=-1,e.neitherLessOrGreaterThan=0}(Lt||(Lt={}));const Ot=(e,t)=>e-t;class Wt{constructor(e){this.iterate=e}toArray(){const e=[];return this.iterate((t=>(e.push(t),!0))),e}filter(e){return new Wt((t=>this.iterate((n=>!e(n)||t(n)))))}map(e){return new Wt((t=>this.iterate((n=>t(e(n))))))}findLast(e){let t;return this.iterate((n=>(e(n)&&(t=n),!0))),t}findLastMaxBy(e){let t,n=!0;return this.iterate((i=>((n||Lt.isGreaterThan(e(i,t)))&&(n=!1,t=i),!0))),t}}Wt.empty=new Wt((e=>{}));function Vt(e){return e<0?0:e>255?255:0|e}function Ut(e){return e<0?0:e>4294967295?4294967295:0|e}class Kt{constructor(e){this.values=e,this.prefixSum=new Uint32Array(e.length),this.prefixSumValidIndex=new Int32Array(1),this.prefixSumValidIndex[0]=-1}insertValues(e,t){e=Ut(e);const n=this.values,i=this.prefixSum,r=t.length;return 0!==r&&(this.values=new Uint32Array(n.length+r),this.values.set(n.subarray(0,e),0),this.values.set(n.subarray(e),e+r),this.values.set(t,e),e-1=0&&this.prefixSum.set(i.subarray(0,this.prefixSumValidIndex[0]+1)),!0)}setValue(e,t){return e=Ut(e),t=Ut(t),this.values[e]!==t&&(this.values[e]=t,e-1=n.length)return!1;const r=n.length-e;return t>=r&&(t=r),0!==t&&(this.values=new Uint32Array(n.length-t),this.values.set(n.subarray(0,e),0),this.values.set(n.subarray(e+t),e),this.prefixSum=new Uint32Array(this.values.length),e-1=0&&this.prefixSum.set(i.subarray(0,this.prefixSumValidIndex[0]+1)),!0)}getTotalSum(){return 0===this.values.length?0:this._getPrefixSum(this.values.length-1)}getPrefixSum(e){return e<0?0:(e=Ut(e),this._getPrefixSum(e))}_getPrefixSum(e){if(e<=this.prefixSumValidIndex[0])return this.prefixSum[e];let t=this.prefixSumValidIndex[0]+1;0===t&&(this.prefixSum[0]=this.values[0],t++),e>=this.values.length&&(e=this.values.length-1);for(let n=t;n<=e;n++)this.prefixSum[n]=this.prefixSum[n-1]+this.values[n];return this.prefixSumValidIndex[0]=Math.max(this.prefixSumValidIndex[0],e),this.prefixSum[e]}getIndexOf(e){e=Math.floor(e),this.getTotalSum();let t=0,n=this.values.length-1,i=0,r=0,o=0;for(;t<=n;)if(i=t+(n-t)/2|0,r=this.prefixSum[i],o=r-this.values[i],e=r))break;t=i+1}return new qt(i,e-o)}}class qt{constructor(e,t){this.index=e,this.remainder=t,this._prefixSumIndexOfResultBrand=void 0,this.index=e,this.remainder=t}}class Bt{constructor(e,t,n,i){this._uri=e,this._lines=t,this._eol=n,this._versionId=i,this._lineStarts=null,this._cachedTextValue=null}dispose(){this._lines.length=0}get version(){return this._versionId}getText(){return null===this._cachedTextValue&&(this._cachedTextValue=this._lines.join(this._eol)),this._cachedTextValue}onEvents(e){e.eol&&e.eol!==this._eol&&(this._eol=e.eol,this._lineStarts=null);const t=e.changes;for(const n of t)this._acceptDeleteRange(n.range),this._acceptInsertText(new zt(n.range.startLineNumber,n.range.startColumn),n.text);this._versionId=e.versionId,this._cachedTextValue=null}_ensureLineStarts(){if(!this._lineStarts){const e=this._eol.length,t=this._lines.length,n=new Uint32Array(t);for(let i=0;i/?";const $t=function(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:"",t="(-?\\d*\\.\\d\\w*)|([^";for(const n of jt)e.indexOf(n)>=0||(t+="\\"+n);return t+="\\s]+)",new RegExp(t,"g")}();function Ht(e){let t=$t;if(e&&e instanceof RegExp)if(e.global)t=e;else{let n="g";e.ignoreCase&&(n+="i"),e.multiline&&(n+="m"),e.unicode&&(n+="u"),t=new RegExp(e.source,n)}return t.lastIndex=0,t}const Gt=new S;function Jt(e,t,n,i,r){if(t=Ht(t),r||(r=h.first(Gt)),n.length>r.maxLen){let o=e-r.maxLen/2;return o<0?o=0:i+=o,Jt(e,t,n=n.substring(o,e+r.maxLen/2),i,r)}const o=Date.now(),s=e-1-i;let a=-1,l=null;for(let c=1;!(Date.now()-o>=r.timeBudget);c++){const e=s-r.windowSize*c;t.lastIndex=Math.max(0,e);const i=Xt(t,n,s,a);if(!i&&l)break;if(l=i,e<=0)break;a=e}if(l){const e={word:l[0],startColumn:i+1+l.index,endColumn:i+1+l.index+l[0].length};return t.lastIndex=0,e}return null}function Xt(e,t,n,i){let r;for(;r=e.exec(t);){const t=r.index||0;if(t<=n&&e.lastIndex>=n)return r;if(i>0&&t>i)return null}return null}Gt.unshift({maxLen:1e3,windowSize:15,timeBudget:150});class Yt{constructor(e){const t=Vt(e);this._defaultValue=t,this._asciiMap=Yt._createAsciiMap(t),this._map=new Map}static _createAsciiMap(e){const t=new Uint8Array(256);return t.fill(e),t}set(e,t){const n=Vt(t);e>=0&&e<256?this._asciiMap[e]=n:this._map.set(e,n)}get(e){return e>=0&&e<256?this._asciiMap[e]:this._map.get(e)||this._defaultValue}clear(){this._asciiMap.fill(this._defaultValue),this._map.clear()}}class Qt{constructor(e,t,n){const i=new Uint8Array(e*t);for(let r=0,o=e*t;rt&&(t=o),i>n&&(n=i),s>n&&(n=s)}t++,n++;const i=new Qt(n,t,0);for(let r=0,o=e.length;r=this._maxCharCode?0:this._states.get(e,t)}}let en=null;let tn=null;class nn{static _createLink(e,t,n,i,r){let o=r-1;do{const n=t.charCodeAt(o);if(2!==e.get(n))break;o--}while(o>i);if(i>0){const e=t.charCodeAt(i-1),n=t.charCodeAt(o);(40===e&&41===n||91===e&&93===n||123===e&&125===n)&&o--}return{range:{startLineNumber:n,startColumn:i+1,endLineNumber:n,endColumn:o+2},url:t.substring(i,o+1)}}static computeLinks(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:(null===en&&(en=new Zt([[1,104,2],[1,72,2],[1,102,6],[1,70,6],[2,116,3],[2,84,3],[3,116,4],[3,84,4],[4,112,5],[4,80,5],[5,115,9],[5,83,9],[5,58,10],[6,105,7],[6,73,7],[7,108,8],[7,76,8],[8,101,9],[8,69,9],[9,58,10],[10,47,11],[11,47,12]])),en);const n=function(){if(null===tn){tn=new Yt(0);const e=" \t<>'\"\u3001\u3002\uff61\uff64\uff0c\uff0e\uff1a\uff1b\u2018\u3008\u300c\u300e\u3014\uff08\uff3b\uff5b\uff62\uff63\uff5d\uff3d\uff09\u3015\u300f\u300d\u3009\u2019\uff40\uff5e\u2026";for(let n=0;n=0?(i+=n?1:-1,i<0?i=e.length-1:i%=e.length,e[i]):null}}rn.INSTANCE=new rn;const on=Object.freeze((function(e,t){const n=setTimeout(e.bind(t),0);return{dispose(){clearTimeout(n)}}}));var sn;!function(e){e.isCancellationToken=function(t){return t===e.None||t===e.Cancelled||(t instanceof an||!(!t||"object"!==typeof t)&&("boolean"===typeof t.isCancellationRequested&&"function"===typeof t.onCancellationRequested))},e.None=Object.freeze({isCancellationRequested:!1,onCancellationRequested:E.None}),e.Cancelled=Object.freeze({isCancellationRequested:!0,onCancellationRequested:on})}(sn||(sn={}));class an{constructor(){this._isCancelled=!1,this._emitter=null}cancel(){this._isCancelled||(this._isCancelled=!0,this._emitter&&(this._emitter.fire(void 0),this.dispose()))}get isCancellationRequested(){return this._isCancelled}get onCancellationRequested(){return this._isCancelled?on:(this._emitter||(this._emitter=new A),this._emitter.event)}dispose(){this._emitter&&(this._emitter.dispose(),this._emitter=null)}}class ln{constructor(e){this._token=void 0,this._parentListener=void 0,this._parentListener=e&&e.onCancellationRequested(this.cancel,this)}get token(){return this._token||(this._token=new an),this._token}cancel(){this._token?this._token instanceof an&&this._token.cancel():this._token=sn.Cancelled}dispose(){var e;arguments.length>0&&void 0!==arguments[0]&&arguments[0]&&this.cancel(),null===(e=this._parentListener)||void 0===e||e.dispose(),this._token?this._token instanceof an&&this._token.dispose():this._token=sn.None}}class cn{constructor(){this._keyCodeToStr=[],this._strToKeyCode=Object.create(null)}define(e,t){this._keyCodeToStr[e]=t,this._strToKeyCode[t.toLowerCase()]=e}keyCodeToStr(e){return this._keyCodeToStr[e]}strToKeyCode(e){return this._strToKeyCode[e.toLowerCase()]||0}}const hn=new cn,dn=new cn,pn=new cn,un=new Array(230),mn={},fn=[],gn=Object.create(null),bn=Object.create(null),vn=[],yn=[];for(let Tu=0;Tu<=193;Tu++)vn[Tu]=-1;for(let Tu=0;Tu<=132;Tu++)yn[Tu]=-1;var wn;!function(){const e="",t=[[1,0,"None",0,"unknown",0,"VK_UNKNOWN",e,e],[1,1,"Hyper",0,e,0,e,e,e],[1,2,"Super",0,e,0,e,e,e],[1,3,"Fn",0,e,0,e,e,e],[1,4,"FnLock",0,e,0,e,e,e],[1,5,"Suspend",0,e,0,e,e,e],[1,6,"Resume",0,e,0,e,e,e],[1,7,"Turbo",0,e,0,e,e,e],[1,8,"Sleep",0,e,0,"VK_SLEEP",e,e],[1,9,"WakeUp",0,e,0,e,e,e],[0,10,"KeyA",31,"A",65,"VK_A",e,e],[0,11,"KeyB",32,"B",66,"VK_B",e,e],[0,12,"KeyC",33,"C",67,"VK_C",e,e],[0,13,"KeyD",34,"D",68,"VK_D",e,e],[0,14,"KeyE",35,"E",69,"VK_E",e,e],[0,15,"KeyF",36,"F",70,"VK_F",e,e],[0,16,"KeyG",37,"G",71,"VK_G",e,e],[0,17,"KeyH",38,"H",72,"VK_H",e,e],[0,18,"KeyI",39,"I",73,"VK_I",e,e],[0,19,"KeyJ",40,"J",74,"VK_J",e,e],[0,20,"KeyK",41,"K",75,"VK_K",e,e],[0,21,"KeyL",42,"L",76,"VK_L",e,e],[0,22,"KeyM",43,"M",77,"VK_M",e,e],[0,23,"KeyN",44,"N",78,"VK_N",e,e],[0,24,"KeyO",45,"O",79,"VK_O",e,e],[0,25,"KeyP",46,"P",80,"VK_P",e,e],[0,26,"KeyQ",47,"Q",81,"VK_Q",e,e],[0,27,"KeyR",48,"R",82,"VK_R",e,e],[0,28,"KeyS",49,"S",83,"VK_S",e,e],[0,29,"KeyT",50,"T",84,"VK_T",e,e],[0,30,"KeyU",51,"U",85,"VK_U",e,e],[0,31,"KeyV",52,"V",86,"VK_V",e,e],[0,32,"KeyW",53,"W",87,"VK_W",e,e],[0,33,"KeyX",54,"X",88,"VK_X",e,e],[0,34,"KeyY",55,"Y",89,"VK_Y",e,e],[0,35,"KeyZ",56,"Z",90,"VK_Z",e,e],[0,36,"Digit1",22,"1",49,"VK_1",e,e],[0,37,"Digit2",23,"2",50,"VK_2",e,e],[0,38,"Digit3",24,"3",51,"VK_3",e,e],[0,39,"Digit4",25,"4",52,"VK_4",e,e],[0,40,"Digit5",26,"5",53,"VK_5",e,e],[0,41,"Digit6",27,"6",54,"VK_6",e,e],[0,42,"Digit7",28,"7",55,"VK_7",e,e],[0,43,"Digit8",29,"8",56,"VK_8",e,e],[0,44,"Digit9",30,"9",57,"VK_9",e,e],[0,45,"Digit0",21,"0",48,"VK_0",e,e],[1,46,"Enter",3,"Enter",13,"VK_RETURN",e,e],[1,47,"Escape",9,"Escape",27,"VK_ESCAPE",e,e],[1,48,"Backspace",1,"Backspace",8,"VK_BACK",e,e],[1,49,"Tab",2,"Tab",9,"VK_TAB",e,e],[1,50,"Space",10,"Space",32,"VK_SPACE",e,e],[0,51,"Minus",88,"-",189,"VK_OEM_MINUS","-","OEM_MINUS"],[0,52,"Equal",86,"=",187,"VK_OEM_PLUS","=","OEM_PLUS"],[0,53,"BracketLeft",92,"[",219,"VK_OEM_4","[","OEM_4"],[0,54,"BracketRight",94,"]",221,"VK_OEM_6","]","OEM_6"],[0,55,"Backslash",93,"\\",220,"VK_OEM_5","\\","OEM_5"],[0,56,"IntlHash",0,e,0,e,e,e],[0,57,"Semicolon",85,";",186,"VK_OEM_1",";","OEM_1"],[0,58,"Quote",95,"'",222,"VK_OEM_7","'","OEM_7"],[0,59,"Backquote",91,"`",192,"VK_OEM_3","`","OEM_3"],[0,60,"Comma",87,",",188,"VK_OEM_COMMA",",","OEM_COMMA"],[0,61,"Period",89,".",190,"VK_OEM_PERIOD",".","OEM_PERIOD"],[0,62,"Slash",90,"/",191,"VK_OEM_2","/","OEM_2"],[1,63,"CapsLock",8,"CapsLock",20,"VK_CAPITAL",e,e],[1,64,"F1",59,"F1",112,"VK_F1",e,e],[1,65,"F2",60,"F2",113,"VK_F2",e,e],[1,66,"F3",61,"F3",114,"VK_F3",e,e],[1,67,"F4",62,"F4",115,"VK_F4",e,e],[1,68,"F5",63,"F5",116,"VK_F5",e,e],[1,69,"F6",64,"F6",117,"VK_F6",e,e],[1,70,"F7",65,"F7",118,"VK_F7",e,e],[1,71,"F8",66,"F8",119,"VK_F8",e,e],[1,72,"F9",67,"F9",120,"VK_F9",e,e],[1,73,"F10",68,"F10",121,"VK_F10",e,e],[1,74,"F11",69,"F11",122,"VK_F11",e,e],[1,75,"F12",70,"F12",123,"VK_F12",e,e],[1,76,"PrintScreen",0,e,0,e,e,e],[1,77,"ScrollLock",84,"ScrollLock",145,"VK_SCROLL",e,e],[1,78,"Pause",7,"PauseBreak",19,"VK_PAUSE",e,e],[1,79,"Insert",19,"Insert",45,"VK_INSERT",e,e],[1,80,"Home",14,"Home",36,"VK_HOME",e,e],[1,81,"PageUp",11,"PageUp",33,"VK_PRIOR",e,e],[1,82,"Delete",20,"Delete",46,"VK_DELETE",e,e],[1,83,"End",13,"End",35,"VK_END",e,e],[1,84,"PageDown",12,"PageDown",34,"VK_NEXT",e,e],[1,85,"ArrowRight",17,"RightArrow",39,"VK_RIGHT","Right",e],[1,86,"ArrowLeft",15,"LeftArrow",37,"VK_LEFT","Left",e],[1,87,"ArrowDown",18,"DownArrow",40,"VK_DOWN","Down",e],[1,88,"ArrowUp",16,"UpArrow",38,"VK_UP","Up",e],[1,89,"NumLock",83,"NumLock",144,"VK_NUMLOCK",e,e],[1,90,"NumpadDivide",113,"NumPad_Divide",111,"VK_DIVIDE",e,e],[1,91,"NumpadMultiply",108,"NumPad_Multiply",106,"VK_MULTIPLY",e,e],[1,92,"NumpadSubtract",111,"NumPad_Subtract",109,"VK_SUBTRACT",e,e],[1,93,"NumpadAdd",109,"NumPad_Add",107,"VK_ADD",e,e],[1,94,"NumpadEnter",3,e,0,e,e,e],[1,95,"Numpad1",99,"NumPad1",97,"VK_NUMPAD1",e,e],[1,96,"Numpad2",100,"NumPad2",98,"VK_NUMPAD2",e,e],[1,97,"Numpad3",101,"NumPad3",99,"VK_NUMPAD3",e,e],[1,98,"Numpad4",102,"NumPad4",100,"VK_NUMPAD4",e,e],[1,99,"Numpad5",103,"NumPad5",101,"VK_NUMPAD5",e,e],[1,100,"Numpad6",104,"NumPad6",102,"VK_NUMPAD6",e,e],[1,101,"Numpad7",105,"NumPad7",103,"VK_NUMPAD7",e,e],[1,102,"Numpad8",106,"NumPad8",104,"VK_NUMPAD8",e,e],[1,103,"Numpad9",107,"NumPad9",105,"VK_NUMPAD9",e,e],[1,104,"Numpad0",98,"NumPad0",96,"VK_NUMPAD0",e,e],[1,105,"NumpadDecimal",112,"NumPad_Decimal",110,"VK_DECIMAL",e,e],[0,106,"IntlBackslash",97,"OEM_102",226,"VK_OEM_102",e,e],[1,107,"ContextMenu",58,"ContextMenu",93,e,e,e],[1,108,"Power",0,e,0,e,e,e],[1,109,"NumpadEqual",0,e,0,e,e,e],[1,110,"F13",71,"F13",124,"VK_F13",e,e],[1,111,"F14",72,"F14",125,"VK_F14",e,e],[1,112,"F15",73,"F15",126,"VK_F15",e,e],[1,113,"F16",74,"F16",127,"VK_F16",e,e],[1,114,"F17",75,"F17",128,"VK_F17",e,e],[1,115,"F18",76,"F18",129,"VK_F18",e,e],[1,116,"F19",77,"F19",130,"VK_F19",e,e],[1,117,"F20",78,"F20",131,"VK_F20",e,e],[1,118,"F21",79,"F21",132,"VK_F21",e,e],[1,119,"F22",80,"F22",133,"VK_F22",e,e],[1,120,"F23",81,"F23",134,"VK_F23",e,e],[1,121,"F24",82,"F24",135,"VK_F24",e,e],[1,122,"Open",0,e,0,e,e,e],[1,123,"Help",0,e,0,e,e,e],[1,124,"Select",0,e,0,e,e,e],[1,125,"Again",0,e,0,e,e,e],[1,126,"Undo",0,e,0,e,e,e],[1,127,"Cut",0,e,0,e,e,e],[1,128,"Copy",0,e,0,e,e,e],[1,129,"Paste",0,e,0,e,e,e],[1,130,"Find",0,e,0,e,e,e],[1,131,"AudioVolumeMute",117,"AudioVolumeMute",173,"VK_VOLUME_MUTE",e,e],[1,132,"AudioVolumeUp",118,"AudioVolumeUp",175,"VK_VOLUME_UP",e,e],[1,133,"AudioVolumeDown",119,"AudioVolumeDown",174,"VK_VOLUME_DOWN",e,e],[1,134,"NumpadComma",110,"NumPad_Separator",108,"VK_SEPARATOR",e,e],[0,135,"IntlRo",115,"ABNT_C1",193,"VK_ABNT_C1",e,e],[1,136,"KanaMode",0,e,0,e,e,e],[0,137,"IntlYen",0,e,0,e,e,e],[1,138,"Convert",0,e,0,e,e,e],[1,139,"NonConvert",0,e,0,e,e,e],[1,140,"Lang1",0,e,0,e,e,e],[1,141,"Lang2",0,e,0,e,e,e],[1,142,"Lang3",0,e,0,e,e,e],[1,143,"Lang4",0,e,0,e,e,e],[1,144,"Lang5",0,e,0,e,e,e],[1,145,"Abort",0,e,0,e,e,e],[1,146,"Props",0,e,0,e,e,e],[1,147,"NumpadParenLeft",0,e,0,e,e,e],[1,148,"NumpadParenRight",0,e,0,e,e,e],[1,149,"NumpadBackspace",0,e,0,e,e,e],[1,150,"NumpadMemoryStore",0,e,0,e,e,e],[1,151,"NumpadMemoryRecall",0,e,0,e,e,e],[1,152,"NumpadMemoryClear",0,e,0,e,e,e],[1,153,"NumpadMemoryAdd",0,e,0,e,e,e],[1,154,"NumpadMemorySubtract",0,e,0,e,e,e],[1,155,"NumpadClear",131,"Clear",12,"VK_CLEAR",e,e],[1,156,"NumpadClearEntry",0,e,0,e,e,e],[1,0,e,5,"Ctrl",17,"VK_CONTROL",e,e],[1,0,e,4,"Shift",16,"VK_SHIFT",e,e],[1,0,e,6,"Alt",18,"VK_MENU",e,e],[1,0,e,57,"Meta",91,"VK_COMMAND",e,e],[1,157,"ControlLeft",5,e,0,"VK_LCONTROL",e,e],[1,158,"ShiftLeft",4,e,0,"VK_LSHIFT",e,e],[1,159,"AltLeft",6,e,0,"VK_LMENU",e,e],[1,160,"MetaLeft",57,e,0,"VK_LWIN",e,e],[1,161,"ControlRight",5,e,0,"VK_RCONTROL",e,e],[1,162,"ShiftRight",4,e,0,"VK_RSHIFT",e,e],[1,163,"AltRight",6,e,0,"VK_RMENU",e,e],[1,164,"MetaRight",57,e,0,"VK_RWIN",e,e],[1,165,"BrightnessUp",0,e,0,e,e,e],[1,166,"BrightnessDown",0,e,0,e,e,e],[1,167,"MediaPlay",0,e,0,e,e,e],[1,168,"MediaRecord",0,e,0,e,e,e],[1,169,"MediaFastForward",0,e,0,e,e,e],[1,170,"MediaRewind",0,e,0,e,e,e],[1,171,"MediaTrackNext",124,"MediaTrackNext",176,"VK_MEDIA_NEXT_TRACK",e,e],[1,172,"MediaTrackPrevious",125,"MediaTrackPrevious",177,"VK_MEDIA_PREV_TRACK",e,e],[1,173,"MediaStop",126,"MediaStop",178,"VK_MEDIA_STOP",e,e],[1,174,"Eject",0,e,0,e,e,e],[1,175,"MediaPlayPause",127,"MediaPlayPause",179,"VK_MEDIA_PLAY_PAUSE",e,e],[1,176,"MediaSelect",128,"LaunchMediaPlayer",181,"VK_MEDIA_LAUNCH_MEDIA_SELECT",e,e],[1,177,"LaunchMail",129,"LaunchMail",180,"VK_MEDIA_LAUNCH_MAIL",e,e],[1,178,"LaunchApp2",130,"LaunchApp2",183,"VK_MEDIA_LAUNCH_APP2",e,e],[1,179,"LaunchApp1",0,e,0,"VK_MEDIA_LAUNCH_APP1",e,e],[1,180,"SelectTask",0,e,0,e,e,e],[1,181,"LaunchScreenSaver",0,e,0,e,e,e],[1,182,"BrowserSearch",120,"BrowserSearch",170,"VK_BROWSER_SEARCH",e,e],[1,183,"BrowserHome",121,"BrowserHome",172,"VK_BROWSER_HOME",e,e],[1,184,"BrowserBack",122,"BrowserBack",166,"VK_BROWSER_BACK",e,e],[1,185,"BrowserForward",123,"BrowserForward",167,"VK_BROWSER_FORWARD",e,e],[1,186,"BrowserStop",0,e,0,"VK_BROWSER_STOP",e,e],[1,187,"BrowserRefresh",0,e,0,"VK_BROWSER_REFRESH",e,e],[1,188,"BrowserFavorites",0,e,0,"VK_BROWSER_FAVORITES",e,e],[1,189,"ZoomToggle",0,e,0,e,e,e],[1,190,"MailReply",0,e,0,e,e,e],[1,191,"MailForward",0,e,0,e,e,e],[1,192,"MailSend",0,e,0,e,e,e],[1,0,e,114,"KeyInComposition",229,e,e,e],[1,0,e,116,"ABNT_C2",194,"VK_ABNT_C2",e,e],[1,0,e,96,"OEM_8",223,"VK_OEM_8",e,e],[1,0,e,0,e,0,"VK_KANA",e,e],[1,0,e,0,e,0,"VK_HANGUL",e,e],[1,0,e,0,e,0,"VK_JUNJA",e,e],[1,0,e,0,e,0,"VK_FINAL",e,e],[1,0,e,0,e,0,"VK_HANJA",e,e],[1,0,e,0,e,0,"VK_KANJI",e,e],[1,0,e,0,e,0,"VK_CONVERT",e,e],[1,0,e,0,e,0,"VK_NONCONVERT",e,e],[1,0,e,0,e,0,"VK_ACCEPT",e,e],[1,0,e,0,e,0,"VK_MODECHANGE",e,e],[1,0,e,0,e,0,"VK_SELECT",e,e],[1,0,e,0,e,0,"VK_PRINT",e,e],[1,0,e,0,e,0,"VK_EXECUTE",e,e],[1,0,e,0,e,0,"VK_SNAPSHOT",e,e],[1,0,e,0,e,0,"VK_HELP",e,e],[1,0,e,0,e,0,"VK_APPS",e,e],[1,0,e,0,e,0,"VK_PROCESSKEY",e,e],[1,0,e,0,e,0,"VK_PACKET",e,e],[1,0,e,0,e,0,"VK_DBE_SBCSCHAR",e,e],[1,0,e,0,e,0,"VK_DBE_DBCSCHAR",e,e],[1,0,e,0,e,0,"VK_ATTN",e,e],[1,0,e,0,e,0,"VK_CRSEL",e,e],[1,0,e,0,e,0,"VK_EXSEL",e,e],[1,0,e,0,e,0,"VK_EREOF",e,e],[1,0,e,0,e,0,"VK_PLAY",e,e],[1,0,e,0,e,0,"VK_ZOOM",e,e],[1,0,e,0,e,0,"VK_NONAME",e,e],[1,0,e,0,e,0,"VK_PA1",e,e],[1,0,e,0,e,0,"VK_OEM_CLEAR",e,e]],n=[],i=[];for(const r of t){const[e,t,o,s,a,l,c,h,d]=r;if(i[t]||(i[t]=!0,fn[t]=o,gn[o]=t,bn[o.toLowerCase()]=t,e&&(vn[t]=s,0!==s&&3!==s&&5!==s&&4!==s&&6!==s&&57!==s&&(yn[s]=t))),!n[s]){if(n[s]=!0,!a)throw new Error(`String representation missing for key code ${s} around scan code ${o}`);hn.define(s,a),dn.define(s,h||a),pn.define(s,d||h||a)}l&&(un[l]=s),c&&(mn[c]=s)}yn[3]=46}(),function(e){e.toString=function(e){return hn.keyCodeToStr(e)},e.fromString=function(e){return hn.strToKeyCode(e)},e.toUserSettingsUS=function(e){return dn.keyCodeToStr(e)},e.toUserSettingsGeneral=function(e){return pn.keyCodeToStr(e)},e.fromUserSettings=function(e){return dn.strToKeyCode(e)||pn.strToKeyCode(e)},e.toElectronAccelerator=function(e){if(e>=98&&e<=113)return null;switch(e){case 16:return"Up";case 18:return"Down";case 15:return"Left";case 17:return"Right"}return hn.keyCodeToStr(e)}}(wn||(wn={}));class xn extends It{constructor(e,t,n,i){super(e,t,n,i),this.selectionStartLineNumber=e,this.selectionStartColumn=t,this.positionLineNumber=n,this.positionColumn=i}toString(){return"["+this.selectionStartLineNumber+","+this.selectionStartColumn+" -> "+this.positionLineNumber+","+this.positionColumn+"]"}equalsSelection(e){return xn.selectionsEqual(this,e)}static selectionsEqual(e,t){return e.selectionStartLineNumber===t.selectionStartLineNumber&&e.selectionStartColumn===t.selectionStartColumn&&e.positionLineNumber===t.positionLineNumber&&e.positionColumn===t.positionColumn}getDirection(){return this.selectionStartLineNumber===this.startLineNumber&&this.selectionStartColumn===this.startColumn?0:1}setEndPosition(e,t){return 0===this.getDirection()?new xn(this.startLineNumber,this.startColumn,e,t):new xn(e,t,this.startLineNumber,this.startColumn)}getPosition(){return new zt(this.positionLineNumber,this.positionColumn)}getSelectionStart(){return new zt(this.selectionStartLineNumber,this.selectionStartColumn)}setStartPosition(e,t){return 0===this.getDirection()?new xn(e,t,this.endLineNumber,this.endColumn):new xn(this.endLineNumber,this.endColumn,e,t)}static fromPositions(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:e;return new xn(e.lineNumber,e.column,t.lineNumber,t.column)}static fromRange(e,t){return 0===t?new xn(e.startLineNumber,e.startColumn,e.endLineNumber,e.endColumn):new xn(e.endLineNumber,e.endColumn,e.startLineNumber,e.startColumn)}static liftSelection(e){return new xn(e.selectionStartLineNumber,e.selectionStartColumn,e.positionLineNumber,e.positionColumn)}static selectionsArrEqual(e,t){if(e&&!t||!e&&t)return!1;if(!e&&!t)return!0;if(e.length!==t.length)return!1;for(let n=0,i=e.length;n{this._tokenizationSupports.get(e)===t&&(this._tokenizationSupports.delete(e),this.handleChange([e]))}))}get(e){return this._tokenizationSupports.get(e)||null}registerFactory(e,t){var n;null===(n=this._factories.get(e))||void 0===n||n.dispose();const i=new En(this,e,t);return this._factories.set(e,i),b((()=>{const t=this._factories.get(e);t&&t===i&&(this._factories.delete(e),t.dispose())}))}async getOrCreate(e){const t=this.get(e);if(t)return t;const n=this._factories.get(e);return!n||n.isResolved?null:(await n.resolve(),this.get(e))}isResolved(e){if(this.get(e))return!0;const t=this._factories.get(e);return!(t&&!t.isResolved)}setColorMap(e){this._colorMap=e,this._onDidChange.fire({changedLanguages:Array.from(this._tokenizationSupports.keys()),changedColorMap:!0})}getColorMap(){return this._colorMap}getDefaultBackground(){return this._colorMap&&this._colorMap.length>2?this._colorMap[2]:null}};var On,Wn,Vn,Un,Kn,qn,Bn,jn,$n,Hn,Gn,Jn,Xn,Yn,Qn,Zn,ei,ti,ni,ii,ri,oi,si,ai,li,ci,hi,di,pi,ui,mi,fi,gi,bi,vi,yi,wi,xi,Si,Ci,_i,ki,Ei,Ri,Ni,Fi,Di;!function(e){e[e.Invoke=0]="Invoke",e[e.Automatic=1]="Automatic"}(On||(On={})),function(e){e[e.Unknown=0]="Unknown",e[e.Disabled=1]="Disabled",e[e.Enabled=2]="Enabled"}(Wn||(Wn={})),function(e){e[e.Invoke=1]="Invoke",e[e.Auto=2]="Auto"}(Vn||(Vn={})),function(e){e[e.None=0]="None",e[e.KeepWhitespace=1]="KeepWhitespace",e[e.InsertAsSnippet=4]="InsertAsSnippet"}(Un||(Un={})),function(e){e[e.Method=0]="Method",e[e.Function=1]="Function",e[e.Constructor=2]="Constructor",e[e.Field=3]="Field",e[e.Variable=4]="Variable",e[e.Class=5]="Class",e[e.Struct=6]="Struct",e[e.Interface=7]="Interface",e[e.Module=8]="Module",e[e.Property=9]="Property",e[e.Event=10]="Event",e[e.Operator=11]="Operator",e[e.Unit=12]="Unit",e[e.Value=13]="Value",e[e.Constant=14]="Constant",e[e.Enum=15]="Enum",e[e.EnumMember=16]="EnumMember",e[e.Keyword=17]="Keyword",e[e.Text=18]="Text",e[e.Color=19]="Color",e[e.File=20]="File",e[e.Reference=21]="Reference",e[e.Customcolor=22]="Customcolor",e[e.Folder=23]="Folder",e[e.TypeParameter=24]="TypeParameter",e[e.User=25]="User",e[e.Issue=26]="Issue",e[e.Snippet=27]="Snippet"}(Kn||(Kn={})),function(e){e[e.Deprecated=1]="Deprecated"}(qn||(qn={})),function(e){e[e.Invoke=0]="Invoke",e[e.TriggerCharacter=1]="TriggerCharacter",e[e.TriggerForIncompleteCompletions=2]="TriggerForIncompleteCompletions"}(Bn||(Bn={})),function(e){e[e.EXACT=0]="EXACT",e[e.ABOVE=1]="ABOVE",e[e.BELOW=2]="BELOW"}(jn||(jn={})),function(e){e[e.NotSet=0]="NotSet",e[e.ContentFlush=1]="ContentFlush",e[e.RecoverFromMarkers=2]="RecoverFromMarkers",e[e.Explicit=3]="Explicit",e[e.Paste=4]="Paste",e[e.Undo=5]="Undo",e[e.Redo=6]="Redo"}($n||($n={})),function(e){e[e.LF=1]="LF",e[e.CRLF=2]="CRLF"}(Hn||(Hn={})),function(e){e[e.Text=0]="Text",e[e.Read=1]="Read",e[e.Write=2]="Write"}(Gn||(Gn={})),function(e){e[e.None=0]="None",e[e.Keep=1]="Keep",e[e.Brackets=2]="Brackets",e[e.Advanced=3]="Advanced",e[e.Full=4]="Full"}(Jn||(Jn={})),function(e){e[e.acceptSuggestionOnCommitCharacter=0]="acceptSuggestionOnCommitCharacter",e[e.acceptSuggestionOnEnter=1]="acceptSuggestionOnEnter",e[e.accessibilitySupport=2]="accessibilitySupport",e[e.accessibilityPageSize=3]="accessibilityPageSize",e[e.ariaLabel=4]="ariaLabel",e[e.ariaRequired=5]="ariaRequired",e[e.autoClosingBrackets=6]="autoClosingBrackets",e[e.autoClosingComments=7]="autoClosingComments",e[e.screenReaderAnnounceInlineSuggestion=8]="screenReaderAnnounceInlineSuggestion",e[e.autoClosingDelete=9]="autoClosingDelete",e[e.autoClosingOvertype=10]="autoClosingOvertype",e[e.autoClosingQuotes=11]="autoClosingQuotes",e[e.autoIndent=12]="autoIndent",e[e.automaticLayout=13]="automaticLayout",e[e.autoSurround=14]="autoSurround",e[e.bracketPairColorization=15]="bracketPairColorization",e[e.guides=16]="guides",e[e.codeLens=17]="codeLens",e[e.codeLensFontFamily=18]="codeLensFontFamily",e[e.codeLensFontSize=19]="codeLensFontSize",e[e.colorDecorators=20]="colorDecorators",e[e.colorDecoratorsLimit=21]="colorDecoratorsLimit",e[e.columnSelection=22]="columnSelection",e[e.comments=23]="comments",e[e.contextmenu=24]="contextmenu",e[e.copyWithSyntaxHighlighting=25]="copyWithSyntaxHighlighting",e[e.cursorBlinking=26]="cursorBlinking",e[e.cursorSmoothCaretAnimation=27]="cursorSmoothCaretAnimation",e[e.cursorStyle=28]="cursorStyle",e[e.cursorSurroundingLines=29]="cursorSurroundingLines",e[e.cursorSurroundingLinesStyle=30]="cursorSurroundingLinesStyle",e[e.cursorWidth=31]="cursorWidth",e[e.disableLayerHinting=32]="disableLayerHinting",e[e.disableMonospaceOptimizations=33]="disableMonospaceOptimizations",e[e.domReadOnly=34]="domReadOnly",e[e.dragAndDrop=35]="dragAndDrop",e[e.dropIntoEditor=36]="dropIntoEditor",e[e.emptySelectionClipboard=37]="emptySelectionClipboard",e[e.experimentalWhitespaceRendering=38]="experimentalWhitespaceRendering",e[e.extraEditorClassName=39]="extraEditorClassName",e[e.fastScrollSensitivity=40]="fastScrollSensitivity",e[e.find=41]="find",e[e.fixedOverflowWidgets=42]="fixedOverflowWidgets",e[e.folding=43]="folding",e[e.foldingStrategy=44]="foldingStrategy",e[e.foldingHighlight=45]="foldingHighlight",e[e.foldingImportsByDefault=46]="foldingImportsByDefault",e[e.foldingMaximumRegions=47]="foldingMaximumRegions",e[e.unfoldOnClickAfterEndOfLine=48]="unfoldOnClickAfterEndOfLine",e[e.fontFamily=49]="fontFamily",e[e.fontInfo=50]="fontInfo",e[e.fontLigatures=51]="fontLigatures",e[e.fontSize=52]="fontSize",e[e.fontWeight=53]="fontWeight",e[e.fontVariations=54]="fontVariations",e[e.formatOnPaste=55]="formatOnPaste",e[e.formatOnType=56]="formatOnType",e[e.glyphMargin=57]="glyphMargin",e[e.gotoLocation=58]="gotoLocation",e[e.hideCursorInOverviewRuler=59]="hideCursorInOverviewRuler",e[e.hover=60]="hover",e[e.inDiffEditor=61]="inDiffEditor",e[e.inlineSuggest=62]="inlineSuggest",e[e.inlineEdit=63]="inlineEdit",e[e.letterSpacing=64]="letterSpacing",e[e.lightbulb=65]="lightbulb",e[e.lineDecorationsWidth=66]="lineDecorationsWidth",e[e.lineHeight=67]="lineHeight",e[e.lineNumbers=68]="lineNumbers",e[e.lineNumbersMinChars=69]="lineNumbersMinChars",e[e.linkedEditing=70]="linkedEditing",e[e.links=71]="links",e[e.matchBrackets=72]="matchBrackets",e[e.minimap=73]="minimap",e[e.mouseStyle=74]="mouseStyle",e[e.mouseWheelScrollSensitivity=75]="mouseWheelScrollSensitivity",e[e.mouseWheelZoom=76]="mouseWheelZoom",e[e.multiCursorMergeOverlapping=77]="multiCursorMergeOverlapping",e[e.multiCursorModifier=78]="multiCursorModifier",e[e.multiCursorPaste=79]="multiCursorPaste",e[e.multiCursorLimit=80]="multiCursorLimit",e[e.occurrencesHighlight=81]="occurrencesHighlight",e[e.overviewRulerBorder=82]="overviewRulerBorder",e[e.overviewRulerLanes=83]="overviewRulerLanes",e[e.padding=84]="padding",e[e.pasteAs=85]="pasteAs",e[e.parameterHints=86]="parameterHints",e[e.peekWidgetDefaultFocus=87]="peekWidgetDefaultFocus",e[e.definitionLinkOpensInPeek=88]="definitionLinkOpensInPeek",e[e.quickSuggestions=89]="quickSuggestions",e[e.quickSuggestionsDelay=90]="quickSuggestionsDelay",e[e.readOnly=91]="readOnly",e[e.readOnlyMessage=92]="readOnlyMessage",e[e.renameOnType=93]="renameOnType",e[e.renderControlCharacters=94]="renderControlCharacters",e[e.renderFinalNewline=95]="renderFinalNewline",e[e.renderLineHighlight=96]="renderLineHighlight",e[e.renderLineHighlightOnlyWhenFocus=97]="renderLineHighlightOnlyWhenFocus",e[e.renderValidationDecorations=98]="renderValidationDecorations",e[e.renderWhitespace=99]="renderWhitespace",e[e.revealHorizontalRightPadding=100]="revealHorizontalRightPadding",e[e.roundedSelection=101]="roundedSelection",e[e.rulers=102]="rulers",e[e.scrollbar=103]="scrollbar",e[e.scrollBeyondLastColumn=104]="scrollBeyondLastColumn",e[e.scrollBeyondLastLine=105]="scrollBeyondLastLine",e[e.scrollPredominantAxis=106]="scrollPredominantAxis",e[e.selectionClipboard=107]="selectionClipboard",e[e.selectionHighlight=108]="selectionHighlight",e[e.selectOnLineNumbers=109]="selectOnLineNumbers",e[e.showFoldingControls=110]="showFoldingControls",e[e.showUnused=111]="showUnused",e[e.snippetSuggestions=112]="snippetSuggestions",e[e.smartSelect=113]="smartSelect",e[e.smoothScrolling=114]="smoothScrolling",e[e.stickyScroll=115]="stickyScroll",e[e.stickyTabStops=116]="stickyTabStops",e[e.stopRenderingLineAfter=117]="stopRenderingLineAfter",e[e.suggest=118]="suggest",e[e.suggestFontSize=119]="suggestFontSize",e[e.suggestLineHeight=120]="suggestLineHeight",e[e.suggestOnTriggerCharacters=121]="suggestOnTriggerCharacters",e[e.suggestSelection=122]="suggestSelection",e[e.tabCompletion=123]="tabCompletion",e[e.tabIndex=124]="tabIndex",e[e.unicodeHighlighting=125]="unicodeHighlighting",e[e.unusualLineTerminators=126]="unusualLineTerminators",e[e.useShadowDOM=127]="useShadowDOM",e[e.useTabStops=128]="useTabStops",e[e.wordBreak=129]="wordBreak",e[e.wordSegmenterLocales=130]="wordSegmenterLocales",e[e.wordSeparators=131]="wordSeparators",e[e.wordWrap=132]="wordWrap",e[e.wordWrapBreakAfterCharacters=133]="wordWrapBreakAfterCharacters",e[e.wordWrapBreakBeforeCharacters=134]="wordWrapBreakBeforeCharacters",e[e.wordWrapColumn=135]="wordWrapColumn",e[e.wordWrapOverride1=136]="wordWrapOverride1",e[e.wordWrapOverride2=137]="wordWrapOverride2",e[e.wrappingIndent=138]="wrappingIndent",e[e.wrappingStrategy=139]="wrappingStrategy",e[e.showDeprecated=140]="showDeprecated",e[e.inlayHints=141]="inlayHints",e[e.editorClassName=142]="editorClassName",e[e.pixelRatio=143]="pixelRatio",e[e.tabFocusMode=144]="tabFocusMode",e[e.layoutInfo=145]="layoutInfo",e[e.wrappingInfo=146]="wrappingInfo",e[e.defaultColorDecorators=147]="defaultColorDecorators",e[e.colorDecoratorsActivatedOn=148]="colorDecoratorsActivatedOn",e[e.inlineCompletionsAccessibilityVerbose=149]="inlineCompletionsAccessibilityVerbose"}(Xn||(Xn={})),function(e){e[e.TextDefined=0]="TextDefined",e[e.LF=1]="LF",e[e.CRLF=2]="CRLF"}(Yn||(Yn={})),function(e){e[e.LF=0]="LF",e[e.CRLF=1]="CRLF"}(Qn||(Qn={})),function(e){e[e.Left=1]="Left",e[e.Center=2]="Center",e[e.Right=3]="Right"}(Zn||(Zn={})),function(e){e[e.None=0]="None",e[e.Indent=1]="Indent",e[e.IndentOutdent=2]="IndentOutdent",e[e.Outdent=3]="Outdent"}(ei||(ei={})),function(e){e[e.Both=0]="Both",e[e.Right=1]="Right",e[e.Left=2]="Left",e[e.None=3]="None"}(ti||(ti={})),function(e){e[e.Type=1]="Type",e[e.Parameter=2]="Parameter"}(ni||(ni={})),function(e){e[e.Automatic=0]="Automatic",e[e.Explicit=1]="Explicit"}(ii||(ii={})),function(e){e[e.Invoke=0]="Invoke",e[e.Automatic=1]="Automatic"}(ri||(ri={})),function(e){e[e.DependsOnKbLayout=-1]="DependsOnKbLayout",e[e.Unknown=0]="Unknown",e[e.Backspace=1]="Backspace",e[e.Tab=2]="Tab",e[e.Enter=3]="Enter",e[e.Shift=4]="Shift",e[e.Ctrl=5]="Ctrl",e[e.Alt=6]="Alt",e[e.PauseBreak=7]="PauseBreak",e[e.CapsLock=8]="CapsLock",e[e.Escape=9]="Escape",e[e.Space=10]="Space",e[e.PageUp=11]="PageUp",e[e.PageDown=12]="PageDown",e[e.End=13]="End",e[e.Home=14]="Home",e[e.LeftArrow=15]="LeftArrow",e[e.UpArrow=16]="UpArrow",e[e.RightArrow=17]="RightArrow",e[e.DownArrow=18]="DownArrow",e[e.Insert=19]="Insert",e[e.Delete=20]="Delete",e[e.Digit0=21]="Digit0",e[e.Digit1=22]="Digit1",e[e.Digit2=23]="Digit2",e[e.Digit3=24]="Digit3",e[e.Digit4=25]="Digit4",e[e.Digit5=26]="Digit5",e[e.Digit6=27]="Digit6",e[e.Digit7=28]="Digit7",e[e.Digit8=29]="Digit8",e[e.Digit9=30]="Digit9",e[e.KeyA=31]="KeyA",e[e.KeyB=32]="KeyB",e[e.KeyC=33]="KeyC",e[e.KeyD=34]="KeyD",e[e.KeyE=35]="KeyE",e[e.KeyF=36]="KeyF",e[e.KeyG=37]="KeyG",e[e.KeyH=38]="KeyH",e[e.KeyI=39]="KeyI",e[e.KeyJ=40]="KeyJ",e[e.KeyK=41]="KeyK",e[e.KeyL=42]="KeyL",e[e.KeyM=43]="KeyM",e[e.KeyN=44]="KeyN",e[e.KeyO=45]="KeyO",e[e.KeyP=46]="KeyP",e[e.KeyQ=47]="KeyQ",e[e.KeyR=48]="KeyR",e[e.KeyS=49]="KeyS",e[e.KeyT=50]="KeyT",e[e.KeyU=51]="KeyU",e[e.KeyV=52]="KeyV",e[e.KeyW=53]="KeyW",e[e.KeyX=54]="KeyX",e[e.KeyY=55]="KeyY",e[e.KeyZ=56]="KeyZ",e[e.Meta=57]="Meta",e[e.ContextMenu=58]="ContextMenu",e[e.F1=59]="F1",e[e.F2=60]="F2",e[e.F3=61]="F3",e[e.F4=62]="F4",e[e.F5=63]="F5",e[e.F6=64]="F6",e[e.F7=65]="F7",e[e.F8=66]="F8",e[e.F9=67]="F9",e[e.F10=68]="F10",e[e.F11=69]="F11",e[e.F12=70]="F12",e[e.F13=71]="F13",e[e.F14=72]="F14",e[e.F15=73]="F15",e[e.F16=74]="F16",e[e.F17=75]="F17",e[e.F18=76]="F18",e[e.F19=77]="F19",e[e.F20=78]="F20",e[e.F21=79]="F21",e[e.F22=80]="F22",e[e.F23=81]="F23",e[e.F24=82]="F24",e[e.NumLock=83]="NumLock",e[e.ScrollLock=84]="ScrollLock",e[e.Semicolon=85]="Semicolon",e[e.Equal=86]="Equal",e[e.Comma=87]="Comma",e[e.Minus=88]="Minus",e[e.Period=89]="Period",e[e.Slash=90]="Slash",e[e.Backquote=91]="Backquote",e[e.BracketLeft=92]="BracketLeft",e[e.Backslash=93]="Backslash",e[e.BracketRight=94]="BracketRight",e[e.Quote=95]="Quote",e[e.OEM_8=96]="OEM_8",e[e.IntlBackslash=97]="IntlBackslash",e[e.Numpad0=98]="Numpad0",e[e.Numpad1=99]="Numpad1",e[e.Numpad2=100]="Numpad2",e[e.Numpad3=101]="Numpad3",e[e.Numpad4=102]="Numpad4",e[e.Numpad5=103]="Numpad5",e[e.Numpad6=104]="Numpad6",e[e.Numpad7=105]="Numpad7",e[e.Numpad8=106]="Numpad8",e[e.Numpad9=107]="Numpad9",e[e.NumpadMultiply=108]="NumpadMultiply",e[e.NumpadAdd=109]="NumpadAdd",e[e.NUMPAD_SEPARATOR=110]="NUMPAD_SEPARATOR",e[e.NumpadSubtract=111]="NumpadSubtract",e[e.NumpadDecimal=112]="NumpadDecimal",e[e.NumpadDivide=113]="NumpadDivide",e[e.KEY_IN_COMPOSITION=114]="KEY_IN_COMPOSITION",e[e.ABNT_C1=115]="ABNT_C1",e[e.ABNT_C2=116]="ABNT_C2",e[e.AudioVolumeMute=117]="AudioVolumeMute",e[e.AudioVolumeUp=118]="AudioVolumeUp",e[e.AudioVolumeDown=119]="AudioVolumeDown",e[e.BrowserSearch=120]="BrowserSearch",e[e.BrowserHome=121]="BrowserHome",e[e.BrowserBack=122]="BrowserBack",e[e.BrowserForward=123]="BrowserForward",e[e.MediaTrackNext=124]="MediaTrackNext",e[e.MediaTrackPrevious=125]="MediaTrackPrevious",e[e.MediaStop=126]="MediaStop",e[e.MediaPlayPause=127]="MediaPlayPause",e[e.LaunchMediaPlayer=128]="LaunchMediaPlayer",e[e.LaunchMail=129]="LaunchMail",e[e.LaunchApp2=130]="LaunchApp2",e[e.Clear=131]="Clear",e[e.MAX_VALUE=132]="MAX_VALUE"}(oi||(oi={})),function(e){e[e.Hint=1]="Hint",e[e.Info=2]="Info",e[e.Warning=4]="Warning",e[e.Error=8]="Error"}(si||(si={})),function(e){e[e.Unnecessary=1]="Unnecessary",e[e.Deprecated=2]="Deprecated"}(ai||(ai={})),function(e){e[e.Inline=1]="Inline",e[e.Gutter=2]="Gutter"}(li||(li={})),function(e){e[e.Normal=1]="Normal",e[e.Underlined=2]="Underlined"}(ci||(ci={})),function(e){e[e.UNKNOWN=0]="UNKNOWN",e[e.TEXTAREA=1]="TEXTAREA",e[e.GUTTER_GLYPH_MARGIN=2]="GUTTER_GLYPH_MARGIN",e[e.GUTTER_LINE_NUMBERS=3]="GUTTER_LINE_NUMBERS",e[e.GUTTER_LINE_DECORATIONS=4]="GUTTER_LINE_DECORATIONS",e[e.GUTTER_VIEW_ZONE=5]="GUTTER_VIEW_ZONE",e[e.CONTENT_TEXT=6]="CONTENT_TEXT",e[e.CONTENT_EMPTY=7]="CONTENT_EMPTY",e[e.CONTENT_VIEW_ZONE=8]="CONTENT_VIEW_ZONE",e[e.CONTENT_WIDGET=9]="CONTENT_WIDGET",e[e.OVERVIEW_RULER=10]="OVERVIEW_RULER",e[e.SCROLLBAR=11]="SCROLLBAR",e[e.OVERLAY_WIDGET=12]="OVERLAY_WIDGET",e[e.OUTSIDE_EDITOR=13]="OUTSIDE_EDITOR"}(hi||(hi={})),function(e){e[e.AIGenerated=1]="AIGenerated"}(di||(di={})),function(e){e[e.TOP_RIGHT_CORNER=0]="TOP_RIGHT_CORNER",e[e.BOTTOM_RIGHT_CORNER=1]="BOTTOM_RIGHT_CORNER",e[e.TOP_CENTER=2]="TOP_CENTER"}(pi||(pi={})),function(e){e[e.Left=1]="Left",e[e.Center=2]="Center",e[e.Right=4]="Right",e[e.Full=7]="Full"}(ui||(ui={})),function(e){e[e.Word=0]="Word",e[e.Line=1]="Line",e[e.Suggest=2]="Suggest"}(mi||(mi={})),function(e){e[e.Left=0]="Left",e[e.Right=1]="Right",e[e.None=2]="None",e[e.LeftOfInjectedText=3]="LeftOfInjectedText",e[e.RightOfInjectedText=4]="RightOfInjectedText"}(fi||(fi={})),function(e){e[e.Off=0]="Off",e[e.On=1]="On",e[e.Relative=2]="Relative",e[e.Interval=3]="Interval",e[e.Custom=4]="Custom"}(gi||(gi={})),function(e){e[e.None=0]="None",e[e.Text=1]="Text",e[e.Blocks=2]="Blocks"}(bi||(bi={})),function(e){e[e.Smooth=0]="Smooth",e[e.Immediate=1]="Immediate"}(vi||(vi={})),function(e){e[e.Auto=1]="Auto",e[e.Hidden=2]="Hidden",e[e.Visible=3]="Visible"}(yi||(yi={})),function(e){e[e.LTR=0]="LTR",e[e.RTL=1]="RTL"}(wi||(wi={})),function(e){e.Off="off",e.OnCode="onCode",e.On="on"}(xi||(xi={})),function(e){e[e.Invoke=1]="Invoke",e[e.TriggerCharacter=2]="TriggerCharacter",e[e.ContentChange=3]="ContentChange"}(Si||(Si={})),function(e){e[e.File=0]="File",e[e.Module=1]="Module",e[e.Namespace=2]="Namespace",e[e.Package=3]="Package",e[e.Class=4]="Class",e[e.Method=5]="Method",e[e.Property=6]="Property",e[e.Field=7]="Field",e[e.Constructor=8]="Constructor",e[e.Enum=9]="Enum",e[e.Interface=10]="Interface",e[e.Function=11]="Function",e[e.Variable=12]="Variable",e[e.Constant=13]="Constant",e[e.String=14]="String",e[e.Number=15]="Number",e[e.Boolean=16]="Boolean",e[e.Array=17]="Array",e[e.Object=18]="Object",e[e.Key=19]="Key",e[e.Null=20]="Null",e[e.EnumMember=21]="EnumMember",e[e.Struct=22]="Struct",e[e.Event=23]="Event",e[e.Operator=24]="Operator",e[e.TypeParameter=25]="TypeParameter"}(Ci||(Ci={})),function(e){e[e.Deprecated=1]="Deprecated"}(_i||(_i={})),function(e){e[e.Hidden=0]="Hidden",e[e.Blink=1]="Blink",e[e.Smooth=2]="Smooth",e[e.Phase=3]="Phase",e[e.Expand=4]="Expand",e[e.Solid=5]="Solid"}(ki||(ki={})),function(e){e[e.Line=1]="Line",e[e.Block=2]="Block",e[e.Underline=3]="Underline",e[e.LineThin=4]="LineThin",e[e.BlockOutline=5]="BlockOutline",e[e.UnderlineThin=6]="UnderlineThin"}(Ei||(Ei={})),function(e){e[e.AlwaysGrowsWhenTypingAtEdges=0]="AlwaysGrowsWhenTypingAtEdges",e[e.NeverGrowsWhenTypingAtEdges=1]="NeverGrowsWhenTypingAtEdges",e[e.GrowsOnlyWhenTypingBefore=2]="GrowsOnlyWhenTypingBefore",e[e.GrowsOnlyWhenTypingAfter=3]="GrowsOnlyWhenTypingAfter"}(Ri||(Ri={})),function(e){e[e.None=0]="None",e[e.Same=1]="Same",e[e.Indent=2]="Indent",e[e.DeepIndent=3]="DeepIndent"}(Ni||(Ni={}));class Ti{static chord(e,t){return function(e,t){return(e|(65535&t)<<16>>>0)>>>0}(e,t)}}Ti.CtrlCmd=2048,Ti.Shift=1024,Ti.Alt=512,Ti.WinCtrl=256;class Ai{constructor(e,t){this.uri=e,this.value=t}}class Mi{constructor(e,t){if(this[Fi]="ResourceMap",e instanceof Mi)this.map=new Map(e.map),this.toKey=null!==t&&void 0!==t?t:Mi.defaultToKey;else if(function(e){return Array.isArray(e)}(e)){this.map=new Map,this.toKey=null!==t&&void 0!==t?t:Mi.defaultToKey;for(const[t,n]of e)this.set(t,n)}else this.map=new Map,this.toKey=null!==e&&void 0!==e?e:Mi.defaultToKey}set(e,t){return this.map.set(this.toKey(e),new Ai(e,t)),this}get(e){var t;return null===(t=this.map.get(this.toKey(e)))||void 0===t?void 0:t.value}has(e){return this.map.has(this.toKey(e))}get size(){return this.map.size}clear(){this.map.clear()}delete(e){return this.map.delete(this.toKey(e))}forEach(e,t){"undefined"!==typeof t&&(e=e.bind(t));for(const[n,i]of this.map)e(i.value,i.uri,this)}*values(){for(const e of this.map.values())yield e.value}*keys(){for(const e of this.map.values())yield e.uri}*entries(){for(const e of this.map.values())yield[e.uri,e.value]}*[(Fi=Symbol.toStringTag,Symbol.iterator)](){for(const[,e]of this.map)yield[e.uri,e.value]}}Mi.defaultToKey=e=>e.toString();class zi{constructor(){this[Di]="LinkedMap",this._map=new Map,this._head=void 0,this._tail=void 0,this._size=0,this._state=0}clear(){this._map.clear(),this._head=void 0,this._tail=void 0,this._size=0,this._state++}isEmpty(){return!this._head&&!this._tail}get size(){return this._size}get first(){var e;return null===(e=this._head)||void 0===e?void 0:e.value}get last(){var e;return null===(e=this._tail)||void 0===e?void 0:e.value}has(e){return this._map.has(e)}get(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:0;const n=this._map.get(e);if(n)return 0!==t&&this.touch(n,t),n.value}set(e,t){let n=arguments.length>2&&void 0!==arguments[2]?arguments[2]:0,i=this._map.get(e);if(i)i.value=t,0!==n&&this.touch(i,n);else{switch(i={key:e,value:t,next:void 0,previous:void 0},n){case 0:case 2:default:this.addItemLast(i);break;case 1:this.addItemFirst(i)}this._map.set(e,i),this._size++}return this}delete(e){return!!this.remove(e)}remove(e){const t=this._map.get(e);if(t)return this._map.delete(e),this.removeItem(t),this._size--,t.value}shift(){if(!this._head&&!this._tail)return;if(!this._head||!this._tail)throw new Error("Invalid list");const e=this._head;return this._map.delete(e.key),this.removeItem(e),this._size--,e.value}forEach(e,t){const n=this._state;let i=this._head;for(;i;){if(t?e.bind(t)(i.value,i.key,this):e(i.value,i.key,this),this._state!==n)throw new Error("LinkedMap got modified during iteration.");i=i.next}}keys(){const e=this,t=this._state;let n=this._head;const i={[Symbol.iterator]:()=>i,next(){if(e._state!==t)throw new Error("LinkedMap got modified during iteration.");if(n){const e={value:n.key,done:!1};return n=n.next,e}return{value:void 0,done:!0}}};return i}values(){const e=this,t=this._state;let n=this._head;const i={[Symbol.iterator]:()=>i,next(){if(e._state!==t)throw new Error("LinkedMap got modified during iteration.");if(n){const e={value:n.value,done:!1};return n=n.next,e}return{value:void 0,done:!0}}};return i}entries(){const e=this,t=this._state;let n=this._head;const i={[Symbol.iterator]:()=>i,next(){if(e._state!==t)throw new Error("LinkedMap got modified during iteration.");if(n){const e={value:[n.key,n.value],done:!1};return n=n.next,e}return{value:void 0,done:!0}}};return i}[(Di=Symbol.toStringTag,Symbol.iterator)](){return this.entries()}trimOld(e){if(e>=this.size)return;if(0===e)return void this.clear();let t=this._head,n=this.size;for(;t&&n>e;)this._map.delete(t.key),t=t.next,n--;this._head=t,this._size=n,t&&(t.previous=void 0),this._state++}addItemFirst(e){if(this._head||this._tail){if(!this._head)throw new Error("Invalid list");e.next=this._head,this._head.previous=e}else this._tail=e;this._head=e,this._state++}addItemLast(e){if(this._head||this._tail){if(!this._tail)throw new Error("Invalid list");e.previous=this._tail,this._tail.next=e}else this._head=e;this._tail=e,this._state++}removeItem(e){if(e===this._head&&e===this._tail)this._head=void 0,this._tail=void 0;else if(e===this._head){if(!e.next)throw new Error("Invalid list");e.next.previous=void 0,this._head=e.next}else if(e===this._tail){if(!e.previous)throw new Error("Invalid list");e.previous.next=void 0,this._tail=e.previous}else{const t=e.next,n=e.previous;if(!t||!n)throw new Error("Invalid list");t.previous=n,n.next=t}e.next=void 0,e.previous=void 0,this._state++}touch(e,t){if(!this._head||!this._tail)throw new Error("Invalid list");if(1===t||2===t)if(1===t){if(e===this._head)return;const t=e.next,n=e.previous;e===this._tail?(n.next=void 0,this._tail=n):(t.previous=n,n.next=t),e.previous=void 0,e.next=this._head,this._head.previous=e,this._head=e,this._state++}else if(2===t){if(e===this._tail)return;const t=e.next,n=e.previous;e===this._head?(t.previous=void 0,this._head=t):(t.previous=n,n.next=t),e.next=void 0,e.previous=this._tail,this._tail.next=e,this._tail=e,this._state++}}toJSON(){const e=[];return this.forEach(((t,n)=>{e.push([n,t])})),e}fromJSON(e){this.clear();for(const[t,n]of e)this.set(t,n)}}class Ii{constructor(){this.map=new Map}add(e,t){let n=this.map.get(e);n||(n=new Set,this.map.set(e,n)),n.add(t)}delete(e,t){const n=this.map.get(e);n&&(n.delete(t),0===n.size&&this.map.delete(e))}forEach(e,t){const n=this.map.get(e);n&&n.forEach(t)}get(e){const t=this.map.get(e);return t||new Set}}new class extends zi{constructor(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:1;super(),this._limit=e,this._ratio=Math.min(Math.max(0,t),1)}get limit(){return this._limit}set limit(e){this._limit=e,this.checkTrim()}get(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:2;return super.get(e,t)}peek(e){return super.get(e,0)}set(e,t){return super.set(e,t,2),this.checkTrim(),this}checkTrim(){this.size>this._limit&&this.trimOld(Math.round(this._limit*this._ratio))}}(10);var Li,Pi,Oi;!function(e){e[e.Left=1]="Left",e[e.Center=2]="Center",e[e.Right=4]="Right",e[e.Full=7]="Full"}(Li||(Li={})),function(e){e[e.Left=1]="Left",e[e.Center=2]="Center",e[e.Right=3]="Right"}(Pi||(Pi={})),function(e){e[e.Both=0]="Both",e[e.Right=1]="Right",e[e.Left=2]="Left",e[e.None=3]="None"}(Oi||(Oi={}));function Wi(e,t,n,i,r){return function(e,t,n,i,r){if(0===i)return!0;const o=t.charCodeAt(i-1);if(0!==e.get(o))return!0;if(13===o||10===o)return!0;if(r>0){const n=t.charCodeAt(i);if(0!==e.get(n))return!0}return!1}(e,t,0,i,r)&&function(e,t,n,i,r){if(i+r===n)return!0;const o=t.charCodeAt(i+r);if(0!==e.get(o))return!0;if(13===o||10===o)return!0;if(r>0){const n=t.charCodeAt(i+r-1);if(0!==e.get(n))return!0}return!1}(e,t,n,i,r)}class Vi{constructor(e,t){this._wordSeparators=e,this._searchRegex=t,this._prevMatchStartIndex=-1,this._prevMatchLength=0}reset(e){this._searchRegex.lastIndex=e,this._prevMatchStartIndex=-1,this._prevMatchLength=0}next(e){const t=e.length;let n;do{if(this._prevMatchStartIndex+this._prevMatchLength===t)return null;if(n=this._searchRegex.exec(e),!n)return null;const i=n.index,r=n[0].length;if(i===this._prevMatchStartIndex&&r===this._prevMatchLength){if(0===r){ye(e,t,this._searchRegex.lastIndex)>65535?this._searchRegex.lastIndex+=2:this._searchRegex.lastIndex+=1;continue}return null}if(this._prevMatchStartIndex=i,this._prevMatchLength=r,!this._wordSeparators||Wi(this._wordSeparators,e,t,i,r))return n}while(n);return null}}function Ui(e){throw new Error(arguments.length>1&&void 0!==arguments[1]?arguments[1]:"Unreachable")}function Ki(e){e()||(e(),t(new l("Assertion Failed")))}function qi(e,t){let n=0;for(;nString.fromCodePoint(e))).join(""))}]`,"g");const l=new Vi(null,a),c=[];let h,d=!1,p=0,u=0,m=0;e:for(let f=i,g=r;f<=g;f++){const t=e.getLineContent(f),n=t.length;l.reset(0);do{if(h=l.next(t),h){let e=h.index,i=h.index+h[0].length;if(e>0){ge(t.charCodeAt(e-1))&&e--}if(i+1=t){d=!0;break e}c.push(new It(f,e+1,f,i+1))}}}while(h)}return{ranges:c,hasMore:d,ambiguousCharacterCount:p,invisibleCharacterCount:u,nonBasicAsciiCharacterCount:m}}static computeUnicodeHighlightReason(e,t){const n=new ji(t);switch(n.shouldHighlightNonBasicASCII(e,null)){case 0:return null;case 2:return{kind:1};case 3:{const i=e.codePointAt(0),r=n.ambiguousCharacters.getPrimaryConfusable(i),o=Se.getLocales().filter((e=>!Se.getInstance(new Set([...t.allowedLocales,e])).isAmbiguous(i)));return{kind:0,confusableWith:String.fromCodePoint(r),notAmbiguousInLocales:o}}case 1:return{kind:2}}}}class ji{constructor(e){this.options=e,this.allowedCodePoints=new Set(e.allowedCodePoints),this.ambiguousCharacters=Se.getInstance(new Set(e.allowedLocales))}getCandidateCodePoints(){if(this.options.nonBasicASCII)return"allNonBasicAscii";const e=new Set;if(this.options.invisibleCharacters)for(const t of Ce.codePoints)$i(String.fromCodePoint(t))||e.add(t);if(this.options.ambiguousCharacters)for(const t of this.ambiguousCharacters.getConfusableCodePoints())e.add(t);for(const t of this.allowedCodePoints)e.delete(t);return e}shouldHighlightNonBasicASCII(e,t){const n=e.codePointAt(0);if(this.allowedCodePoints.has(n))return 0;if(this.options.nonBasicASCII)return 1;let i=!1,r=!1;if(t)for(const s of t){const e=s.codePointAt(0),t=(o=s,we.test(o));i=i||t,t||this.ambiguousCharacters.isAmbiguous(e)||Ce.isInvisibleCharacter(e)||(r=!0)}var o;return!i&&r?0:this.options.invisibleCharacters&&!$i(e)&&Ce.isInvisibleCharacter(n)?2:this.options.ambiguousCharacters&&this.ambiguousCharacters.isAmbiguous(n)?3:0}}function $i(e){return" "===e||"\n"===e||"\t"===e}class Hi{constructor(e,t,n){this.changes=e,this.moves=t,this.hitTimeout=n}}class Gi{constructor(e,t){this.lineRangeMapping=e,this.changes=t}}class Ji{static addRange(e,t){let n=0;for(;nt))return new Ji(e,t)}static ofLength(e){return new Ji(0,e)}static ofStartAndLength(e,t){return new Ji(e,e+t)}constructor(e,t){if(this.start=e,this.endExclusive=t,e>t)throw new l(`Invalid range: ${this.toString()}`)}get isEmpty(){return this.start===this.endExclusive}delta(e){return new Ji(this.start+e,this.endExclusive+e)}deltaStart(e){return new Ji(this.start+e,this.endExclusive)}deltaEnd(e){return new Ji(this.start,this.endExclusive+e)}get length(){return this.endExclusive-this.start}toString(){return`[${this.start}, ${this.endExclusive})`}contains(e){return this.start<=e&&e=e.endExclusive}slice(e){return e.slice(this.start,this.endExclusive)}substring(e){return e.substring(this.start,this.endExclusive)}clip(e){if(this.isEmpty)throw new l(`Invalid clipping range: ${this.toString()}`);return Math.max(this.start,Math.min(this.endExclusive-1,e))}clipCyclic(e){if(this.isEmpty)throw new l(`Invalid clipping range: ${this.toString()}`);return e=this.endExclusive?this.start+(e-this.start)%this.length:e}forEach(e){for(let t=this.start;t2&&void 0!==arguments[2]?arguments[2]:0,i=arguments.length>3&&void 0!==arguments[3]?arguments[3]:e.length;for(;n2&&void 0!==arguments[2]?arguments[2]:0,i=arguments.length>3&&void 0!==arguments[3]?arguments[3]:e.length;for(;nt)throw new l(`startLineNumber ${e} cannot be after endLineNumberExclusive ${t}`);this.startLineNumber=e,this.endLineNumberExclusive=t}contains(e){return this.startLineNumber<=e&&e0&&void 0!==arguments[0]?arguments[0]:[];this._normalizedRanges=e}get ranges(){return this._normalizedRanges}addRange(e){if(0===e.length)return;const t=Qi(this._normalizedRanges,(t=>t.endLineNumberExclusive>=e.startLineNumber)),n=Yi(this._normalizedRanges,(t=>t.startLineNumber<=e.endLineNumberExclusive))+1;if(t===n)this._normalizedRanges.splice(t,0,e);else if(t===n-1){const n=this._normalizedRanges[t];this._normalizedRanges[t]=n.join(e)}else{const i=this._normalizedRanges[t].join(this._normalizedRanges[n-1]).join(e);this._normalizedRanges.splice(t,n-t,i)}}contains(e){const t=Xi(this._normalizedRanges,(t=>t.startLineNumber<=e));return!!t&&t.endLineNumberExclusive>e}intersects(e){const t=Xi(this._normalizedRanges,(t=>t.startLineNumbere.startLineNumber}getUnion(e){if(0===this._normalizedRanges.length)return e;if(0===e._normalizedRanges.length)return this;const t=[];let n=0,i=0,r=null;for(;n=o.startLineNumber?r=new er(r.startLineNumber,Math.max(r.endLineNumberExclusive,o.endLineNumberExclusive)):(t.push(r),r=o)}return null!==r&&t.push(r),new tr(t)}subtractFrom(e){const t=Qi(this._normalizedRanges,(t=>t.endLineNumberExclusive>=e.startLineNumber)),n=Yi(this._normalizedRanges,(t=>t.startLineNumber<=e.endLineNumberExclusive))+1;if(t===n)return new tr([e]);const i=[];let r=e.startLineNumber;for(let o=t;or&&i.push(new er(r,e.startLineNumber)),r=e.endLineNumberExclusive}return re.toString())).join(", ")}getIntersection(e){const t=[];let n=0,i=0;for(;nt.delta(e))))}}class nr{static inverse(e,t,n){const i=[];let r=1,o=1;for(const a of e){const e=new nr(new er(r,a.original.startLineNumber),new er(o,a.modified.startLineNumber));e.modified.isEmpty||i.push(e),r=a.original.endLineNumberExclusive,o=a.modified.endLineNumberExclusive}const s=new nr(new er(r,t+1),new er(o,n+1));return s.modified.isEmpty||i.push(s),i}static clip(e,t,n){const i=[];for(const r of e){const e=r.original.intersect(t),o=r.modified.intersect(n);e&&!e.isEmpty&&o&&!o.isEmpty&&i.push(new nr(e,o))}return i}constructor(e,t){this.original=e,this.modified=t}toString(){return`{${this.original.toString()}->${this.modified.toString()}}`}flip(){return new nr(this.modified,this.original)}join(e){return new nr(this.original.join(e.original),this.modified.join(e.modified))}}class ir extends nr{static fromRangeMappings(e){const t=er.join(e.map((e=>er.fromRangeInclusive(e.originalRange)))),n=er.join(e.map((e=>er.fromRangeInclusive(e.modifiedRange))));return new ir(t,n,e)}constructor(e,t,n){super(e,t),this.innerChanges=n}flip(){var e;return new ir(this.modified,this.original,null===(e=this.innerChanges)||void 0===e?void 0:e.map((e=>e.flip())))}withInnerChangesFromLineRanges(){return new ir(this.original,this.modified,[new rr(this.original.toExclusiveRange(),this.modified.toExclusiveRange())])}}class rr{constructor(e,t){this.originalRange=e,this.modifiedRange=t}toString(){return`{${this.originalRange.toString()}->${this.modifiedRange.toString()}}`}flip(){return new rr(this.modifiedRange,this.originalRange)}}const or=3;class sr{computeDiff(e,t,n){var i;const r=new pr(e,t,{maxComputationTime:n.maxComputationTimeMs,shouldIgnoreTrimWhitespace:n.ignoreTrimWhitespace,shouldComputeCharChanges:!0,shouldMakePrettyDiff:!0,shouldPostProcessCharChanges:!0}).computeDiff(),o=[];let s=null;for(const a of r.changes){let e,t;e=0===a.originalEndLineNumber?new er(a.originalStartLineNumber+1,a.originalStartLineNumber+1):new er(a.originalStartLineNumber,a.originalEndLineNumber+1),t=0===a.modifiedEndLineNumber?new er(a.modifiedStartLineNumber+1,a.modifiedStartLineNumber+1):new er(a.modifiedStartLineNumber,a.modifiedEndLineNumber+1);let n=new ir(e,t,null===(i=a.charChanges)||void 0===i?void 0:i.map((e=>new rr(new It(e.originalStartLineNumber,e.originalStartColumn,e.originalEndLineNumber,e.originalEndColumn),new It(e.modifiedStartLineNumber,e.modifiedStartColumn,e.modifiedEndLineNumber,e.modifiedEndColumn)))));s&&(s.modified.endLineNumberExclusive!==n.modified.startLineNumber&&s.original.endLineNumberExclusive!==n.original.startLineNumber||(n=new ir(s.original.join(n.original),s.modified.join(n.modified),s.innerChanges&&n.innerChanges?s.innerChanges.concat(n.innerChanges):void 0),o.pop())),o.push(n),s=n}return Ki((()=>qi(o,((e,t)=>t.original.startLineNumber-e.original.endLineNumberExclusive===t.modified.startLineNumber-e.modified.endLineNumberExclusive&&e.original.endLineNumberExclusive(10===e?"\\n":String.fromCharCode(e))+`-(${this._lineNumbers[t]},${this._columns[t]})`)).join(", ")+"]"}_assertIndex(e,t){if(e<0||e>=t.length)throw new Error("Illegal index")}getElements(){return this._charCodes}getStartLineNumber(e){return e>0&&e===this._lineNumbers.length?this.getEndLineNumber(e-1):(this._assertIndex(e,this._lineNumbers),this._lineNumbers[e])}getEndLineNumber(e){return-1===e?this.getStartLineNumber(e+1):(this._assertIndex(e,this._lineNumbers),10===this._charCodes[e]?this._lineNumbers[e]+1:this._lineNumbers[e])}getStartColumn(e){return e>0&&e===this._columns.length?this.getEndColumn(e-1):(this._assertIndex(e,this._columns),this._columns[e])}getEndColumn(e){return-1===e?this.getStartColumn(e+1):(this._assertIndex(e,this._columns),10===this._charCodes[e]?1:this._columns[e]+1)}}class hr{constructor(e,t,n,i,r,o,s,a){this.originalStartLineNumber=e,this.originalStartColumn=t,this.originalEndLineNumber=n,this.originalEndColumn=i,this.modifiedStartLineNumber=r,this.modifiedStartColumn=o,this.modifiedEndLineNumber=s,this.modifiedEndColumn=a}static createFromDiffChange(e,t,n){const i=t.getStartLineNumber(e.originalStart),r=t.getStartColumn(e.originalStart),o=t.getEndLineNumber(e.originalStart+e.originalLength-1),s=t.getEndColumn(e.originalStart+e.originalLength-1),a=n.getStartLineNumber(e.modifiedStart),l=n.getStartColumn(e.modifiedStart),c=n.getEndLineNumber(e.modifiedStart+e.modifiedLength-1),h=n.getEndColumn(e.modifiedStart+e.modifiedLength-1);return new hr(i,r,o,s,a,l,c,h)}}class dr{constructor(e,t,n,i,r){this.originalStartLineNumber=e,this.originalEndLineNumber=t,this.modifiedStartLineNumber=n,this.modifiedEndLineNumber=i,this.charChanges=r}static createFromDiffResult(e,t,n,i,r,o,s){let a,l,c,h,d;if(0===t.originalLength?(a=n.getStartLineNumber(t.originalStart)-1,l=0):(a=n.getStartLineNumber(t.originalStart),l=n.getEndLineNumber(t.originalStart+t.originalLength-1)),0===t.modifiedLength?(c=i.getStartLineNumber(t.modifiedStart)-1,h=0):(c=i.getStartLineNumber(t.modifiedStart),h=i.getEndLineNumber(t.modifiedStart+t.modifiedLength-1)),o&&t.originalLength>0&&t.originalLength<20&&t.modifiedLength>0&&t.modifiedLength<20&&r()){const o=n.createCharSequence(e,t.originalStart,t.originalStart+t.originalLength-1),a=i.createCharSequence(e,t.modifiedStart,t.modifiedStart+t.modifiedLength-1);if(o.getElements().length>0&&a.getElements().length>0){let e=ar(o,a,r,!0).changes;s&&(e=function(e){if(e.length<=1)return e;const t=[e[0]];let n=t[0];for(let i=1,r=e.length;i1&&s>1;){if(e.charCodeAt(n-2)!==t.charCodeAt(s-2))break;n--,s--}(n>1||s>1)&&this._pushTrimWhitespaceCharChange(i,r+1,1,n,o+1,1,s)}{let n=mr(e,1),s=mr(t,1);const a=e.length+1,l=t.length+1;for(;n1&&void 0!==arguments[1]?arguments[1]:e.length-1;t>=0;t--){const n=e.charCodeAt(t);if(32!==n&&9!==n)return t}return-1}(e);return-1===n?t:n+2}function fr(e){if(0===e)return()=>!0;const t=Date.now();return()=>Date.now()-t{n.push(br.fromOffsetPairs(e?e.getEndExclusives():vr.zero,i?i.getStarts():new vr(t,(e?e.seq2Range.endExclusive-e.seq1Range.endExclusive:0)+t)))})),n}static fromOffsetPairs(e,t){return new br(new Ji(e.offset1,t.offset1),new Ji(e.offset2,t.offset2))}constructor(e,t){this.seq1Range=e,this.seq2Range=t}swap(){return new br(this.seq2Range,this.seq1Range)}toString(){return`${this.seq1Range} <-> ${this.seq2Range}`}join(e){return new br(this.seq1Range.join(e.seq1Range),this.seq2Range.join(e.seq2Range))}delta(e){return 0===e?this:new br(this.seq1Range.delta(e),this.seq2Range.delta(e))}deltaStart(e){return 0===e?this:new br(this.seq1Range.deltaStart(e),this.seq2Range.deltaStart(e))}deltaEnd(e){return 0===e?this:new br(this.seq1Range.deltaEnd(e),this.seq2Range.deltaEnd(e))}intersect(e){const t=this.seq1Range.intersect(e.seq1Range),n=this.seq2Range.intersect(e.seq2Range);if(t&&n)return new br(t,n)}getStarts(){return new vr(this.seq1Range.start,this.seq2Range.start)}getEndExclusives(){return new vr(this.seq1Range.endExclusive,this.seq2Range.endExclusive)}}class vr{constructor(e,t){this.offset1=e,this.offset2=t}toString(){return`${this.offset1} <-> ${this.offset2}`}delta(e){return 0===e?this:new vr(this.offset1+e,this.offset2+e)}equals(e){return this.offset1===e.offset1&&this.offset2===e.offset2}}vr.zero=new vr(0,0),vr.max=new vr(Number.MAX_SAFE_INTEGER,Number.MAX_SAFE_INTEGER);class yr{isValid(){return!0}}yr.instance=new yr;class wr{constructor(e){if(this.timeout=e,this.startTime=Date.now(),this.valid=!0,e<=0)throw new l("timeout must be positive")}isValid(){return!(Date.now()-this.startTime2&&void 0!==arguments[2]?arguments[2]:yr.instance,i=arguments.length>3?arguments[3]:void 0;if(0===e.length||0===t.length)return gr.trivial(e,t);const r=new xr(e.length,t.length),o=new xr(e.length,t.length),s=new xr(e.length,t.length);for(let u=0;u0&&a>0&&3===o.get(u-1,a-1)&&(h+=s.get(u-1,a-1)),h+=i?i(u,a):1):h=-1;const d=Math.max(l,c,h);if(d===h){const e=u>0&&a>0?s.get(u-1,a-1):0;s.set(u,a,e+1),o.set(u,a,3)}else d===l?(s.set(u,a,0),o.set(u,a,1)):d===c&&(s.set(u,a,0),o.set(u,a,2));r.set(u,a,d)}const a=[];let l=e.length,c=t.length;function h(e,t){e+1===l&&t+1===c||a.push(new br(new Ji(e+1,l),new Ji(t+1,c))),l=e,c=t}let d=e.length-1,p=t.length-1;for(;d>=0&&p>=0;)3===o.get(d,p)?(h(d,p),d--,p--):1===o.get(d,p)?d--:p--;return h(-1,-1),a.reverse(),new gr(a,!1)}}class kr{compute(e,t){let n=arguments.length>2&&void 0!==arguments[2]?arguments[2]:yr.instance;if(0===e.length||0===t.length)return gr.trivial(e,t);const i=e,r=t;function o(e,t){for(;ei.length||p>r.length)continue;const u=o(d,p);a.set(c,u);const m=d===s?l.get(c+1):l.get(c-1);if(l.set(c,u!==d?new Er(m,d,p,u-d):m),a.get(c)===i.length&&a.get(c)-c===r.length)break e}}let h=l.get(c);const d=[];let p=i.length,u=r.length;for(;;){const e=h?h.x+h.length:0,t=h?h.y+h.length:0;if(e===p&&t===u||d.push(new br(new Ji(e,p),new Ji(t,u))),!h)break;p=h.x,u=h.y,h=h.prev}return d.reverse(),new gr(d,!1)}}class Er{constructor(e,t,n,i){this.prev=e,this.x=t,this.y=n,this.length=i}}class Rr{constructor(){this.positiveArr=new Int32Array(10),this.negativeArr=new Int32Array(10)}get(e){return e<0?(e=-e-1,this.negativeArr[e]):this.positiveArr[e]}set(e,t){if(e<0){if((e=-e-1)>=this.negativeArr.length){const e=this.negativeArr;this.negativeArr=new Int32Array(2*e.length),this.negativeArr.set(e)}this.negativeArr[e]=t}else{if(e>=this.positiveArr.length){const e=this.positiveArr;this.positiveArr=new Int32Array(2*e.length),this.positiveArr.set(e)}this.positiveArr[e]=t}}}class Nr{constructor(){this.positiveArr=[],this.negativeArr=[]}get(e){return e<0?(e=-e-1,this.negativeArr[e]):this.positiveArr[e]}set(e,t){e<0?(e=-e-1,this.negativeArr[e]=t):this.positiveArr[e]=t}}class Fr{constructor(e,t,n){this.lines=e,this.considerWhitespaceChanges=n,this.elements=[],this.firstCharOffsetByLine=[],this.additionalOffsetByLine=[];let i=!1;t.start>0&&t.endExclusive>=e.length&&(t=new Ji(t.start-1,t.endExclusive),i=!0),this.lineRange=t,this.firstCharOffsetByLine[0]=0;for(let r=this.lineRange.start;rString.fromCharCode(e))).join("")}getElement(e){return this.elements[e]}get length(){return this.elements.length}getBoundaryScore(e){const t=Mr(e>0?this.elements[e-1]:-1),n=Mr(et<=e));return new zt(this.lineRange.start+t+1,e-this.firstCharOffsetByLine[t]+this.additionalOffsetByLine[t]+1)}translateRange(e){return It.fromPositions(this.translateOffset(e.start),this.translateOffset(e.endExclusive))}findWordContaining(e){if(e<0||e>=this.elements.length)return;if(!Dr(this.elements[e]))return;let t=e;for(;t>0&&Dr(this.elements[t-1]);)t--;let n=e;for(;nt<=e.start)))&&void 0!==t?t:0,r=null!==(n=function(e,t){const n=Qi(e,t);return n===e.length?void 0:e[n]}(this.firstCharOffsetByLine,(t=>e.endExclusive<=t)))&&void 0!==n?n:this.elements.length;return new Ji(i,r)}}function Dr(e){return e>=97&&e<=122||e>=65&&e<=90||e>=48&&e<=57}const Tr={0:0,1:0,2:0,3:10,4:2,5:30,6:3,7:10,8:10};function Ar(e){return Tr[e]}function Mr(e){return 10===e?8:13===e?7:Sr(e)?6:e>=97&&e<=122?0:e>=65&&e<=90?1:e>=48&&e<=57?2:-1===e?3:44===e||59===e?5:4}function zr(e,t,n,i,r,o){let{moves:s,excludedChanges:a}=function(e,t,n,i){const r=[],o=e.filter((e=>e.modified.isEmpty&&e.original.length>=3)).map((e=>new Cr(e.original,t,e))),s=new Set(e.filter((e=>e.original.isEmpty&&e.modified.length>=3)).map((e=>new Cr(e.modified,n,e)))),a=new Set;for(const l of o){let e,t=-1;for(const n of s){const i=l.computeSimilarity(n);i>t&&(t=i,e=n)}if(t>.9&&e&&(s.delete(e),r.push(new nr(l.range,e.range)),a.add(l.source),a.add(e.source)),!i.isValid())return{moves:r,excludedChanges:a}}return{moves:r,excludedChanges:a}}(e,t,n,o);if(!o.isValid())return[];const l=function(e,t,n,i,r,o){const s=[],a=new Ii;for(const u of e)for(let e=u.original.startLineNumber;ee.modified.startLineNumber),Ot));for(const u of e){let e=[];for(let t=u.modified.startLineNumber;t{let{range:n}=t;for(const s of e)if(s.originalLineRange.endLineNumberExclusive+1===n.endLineNumberExclusive&&s.modifiedLineRange.endLineNumberExclusive+1===r.endLineNumberExclusive)return s.originalLineRange=new er(s.originalLineRange.startLineNumber,n.endLineNumberExclusive),s.modifiedLineRange=new er(s.modifiedLineRange.startLineNumber,r.endLineNumberExclusive),void o.push(s);const i={modifiedLineRange:r,originalLineRange:n};l.push(i),o.push(i)})),e=o}if(!o.isValid())return[]}l.sort((c=Pt((e=>e.modifiedLineRange.length),Ot),(e,t)=>-c(e,t)));var c;const h=new tr,d=new tr;for(const u of l){const e=u.modifiedLineRange.startLineNumber-u.originalLineRange.startLineNumber,t=h.subtractFrom(u.modifiedLineRange),n=d.subtractFrom(u.originalLineRange).getWithDelta(e),i=t.getIntersection(n);for(const r of i.ranges){if(r.length<3)continue;const t=r,n=r.delta(-e);s.push(new nr(n,t)),h.addRange(t),d.addRange(n)}}s.sort(Pt((e=>e.original.startLineNumber),Ot));const p=new Zi(e);for(let u=0;ue.original.startLineNumber<=t.original.startLineNumber)),a=Xi(e,(e=>e.modified.startLineNumber<=t.modified.startLineNumber)),l=Math.max(t.original.startLineNumber-n.original.startLineNumber,t.modified.startLineNumber-a.modified.startLineNumber),c=p.findLastMonotonous((e=>e.original.startLineNumbere.modified.startLineNumberi.length||n>r.length)break;if(h.contains(n)||d.contains(e))break;if(!Ir(i[e-1],r[n-1],o))break}for(g>0&&(d.addRange(new er(t.original.startLineNumber-g,t.original.startLineNumber)),h.addRange(new er(t.modified.startLineNumber-g,t.modified.startLineNumber))),b=0;bi.length||n>r.length)break;if(h.contains(n)||d.contains(e))break;if(!Ir(i[e-1],r[n-1],o))break}b>0&&(d.addRange(new er(t.original.endLineNumberExclusive,t.original.endLineNumberExclusive+b)),h.addRange(new er(t.modified.endLineNumberExclusive,t.modified.endLineNumberExclusive+b))),(g>0||b>0)&&(s[u]=new nr(new er(t.original.startLineNumber-g,t.original.endLineNumberExclusive+b),new er(t.modified.startLineNumber-g,t.modified.endLineNumberExclusive+b)))}return s}(e.filter((e=>!a.has(e))),i,r,t,n,o);return function(e,t){for(const n of t)e.push(n)}(s,l),s=function(e){if(0===e.length)return e;e.sort(Pt((e=>e.original.startLineNumber),Ot));const t=[e[0]];for(let n=1;n=0&&s>=0&&o+s<=2?t[t.length-1]=i.join(r):t.push(r)}return t}(s),s=s.filter((e=>{const n=e.original.toOffsetRange().slice(t).map((e=>e.trim()));return n.join("\n").length>=15&&function(e,t){let n=0;for(const i of e)t(i)&&n++;return n}(n,(e=>e.length>=2))>=2})),s=function(e,t){const n=new Zi(e);return t=t.filter((t=>(n.findLastMonotonous((e=>e.original.startLineNumbere.modified.startLineNumber300&&t.length>300)return!1;const i=(new kr).compute(new Fr([e],new Ji(0,1),!1),new Fr([t],new Ji(0,1),!1),n);let r=0;const o=br.invert(i.diffs,e.length);for(const a of o)a.seq1Range.forEach((t=>{Sr(e.charCodeAt(t))||r++}));const s=function(t){let n=0;for(let i=0;it.length?e:t);return r/s>.6&&s>10}function Lr(e,t,n){let i=n;return i=Pr(e,t,i),i=Pr(e,t,i),i=function(e,t,n){if(!e.getBoundaryScore||!t.getBoundaryScore)return n;for(let i=0;i0?n[i-1]:void 0,o=n[i],s=i+10&&(s=s.delta(a))}r.push(s)}return i.length>0&&r.push(i[i.length-1]),r}function Or(e,t,n,i,r){let o=1;for(;e.seq1Range.start-o>=i.start&&e.seq2Range.start-o>=r.start&&n.isStronglyEqual(e.seq2Range.start-o,e.seq2Range.endExclusive-o)&&o<100;)o++;o--;let s=0;for(;e.seq1Range.start+sl&&(l=s,a=c)}return e.delta(a)}class Wr{constructor(e,t){this.trimmedHash=e,this.lines=t}getElement(e){return this.trimmedHash[e]}get length(){return this.trimmedHash.length}getBoundaryScore(e){return 1e3-((0===e?0:Vr(this.lines[e-1]))+(e===this.lines.length?0:Vr(this.lines[e])))}getText(e){return this.lines.slice(e.start,e.endExclusive).join("\n")}isStronglyEqual(e,t){return this.lines[e]===this.lines[t]}}function Vr(e){let t=0;for(;t2&&void 0!==arguments[2]?arguments[2]:(e,t)=>e===t;if(e===t)return!0;if(!e||!t)return!1;if(e.length!==t.length)return!1;for(let i=0,r=e.length;ie===t)))return new Hi([],[],!1);if(1===e.length&&0===e[0].length||1===t.length&&0===t[0].length)return new Hi([new ir(new er(1,e.length+1),new er(1,t.length+1),[new rr(new It(1,1,e.length,e[0].length+1),new It(1,1,t.length,t[0].length+1))])],[],!1);const i=0===n.maxComputationTimeMs?yr.instance:new wr(n.maxComputationTimeMs),r=!n.ignoreTrimWhitespace,o=new Map;function s(e){let t=o.get(e);return void 0===t&&(t=o.size,o.set(e,t)),t}const a=e.map((e=>s(e.trim()))),l=t.map((e=>s(e.trim()))),c=new Wr(a,e),h=new Wr(l,t),d=(()=>c.length+h.length<1700?this.dynamicProgrammingDiffing.compute(c,h,i,((n,i)=>e[n]===t[i]?0===t[i].length?.1:1+Math.log(1+t[i].length):.99)):this.myersDiffingAlgorithm.compute(c,h))();let p=d.diffs,u=d.hitTimeout;p=Lr(c,h,p),p=function(e,t,n){let i=n;if(0===i.length)return i;let r,o=0;do{r=!1;const s=[i[0]];for(let a=1;a5||n.seq1Range.length+n.seq2Range.length>5)}h(c,l)?(r=!0,s[s.length-1]=s[s.length-1].join(l)):s.push(l)}i=s}while(o++<10&&r);return i}(c,0,p);const m=[],f=n=>{if(r)for(let o=0;ow.seq1Range.start-g===w.seq2Range.start-b));f(w.seq1Range.start-g),g=w.seq1Range.endExclusive,b=w.seq2Range.endExclusive;const n=this.refineDiff(e,t,w,i,r);n.hitTimeout&&(u=!0);for(const e of n.mappings)m.push(e)}f(e.length-g);const v=Kr(m,e,t);let y=[];return n.computeMoves&&(y=this.computeMoves(v,e,t,a,l,i,r)),Ki((()=>{function n(e,t){if(e.lineNumber<1||e.lineNumber>t.length)return!1;const n=t[e.lineNumber-1];return!(e.column<1||e.column>n.length+1)}function i(e,t){return!(e.startLineNumber<1||e.startLineNumber>t.length+1)&&!(e.endLineNumberExclusive<1||e.endLineNumberExclusive>t.length+1)}for(const r of v){if(!r.innerChanges)return!1;for(const i of r.innerChanges){if(!(n(i.modifiedRange.getStartPosition(),t)&&n(i.modifiedRange.getEndPosition(),t)&&n(i.originalRange.getStartPosition(),e)&&n(i.originalRange.getEndPosition(),e)))return!1}if(!i(r.modified,t)||!i(r.original,e))return!1}return!0})),new Hi(v,y,u)}computeMoves(e,t,n,i,r,o,s){return zr(e,t,n,i,r,o).map((e=>{const i=Kr(this.refineDiff(t,n,new br(e.original.toOffsetRange(),e.modified.toOffsetRange()),o,s).mappings,t,n,!0);return new Gi(e,i)}))}refineDiff(e,t,n,i,r){const o=new Fr(e,n.seq1Range,r),s=new Fr(t,n.seq2Range,r),a=o.length+s.length<500?this.dynamicProgrammingDiffing.compute(o,s,i):this.myersDiffingAlgorithm.compute(o,s,i);let l=a.diffs;l=Lr(o,s,l),l=function(e,t,n){const i=br.invert(n,e.length),r=[];let o=new vr(0,0);function s(n,s){if(n.offset10;){const n=i[0];if(!n.seq1Range.intersects(c.seq1Range)&&!n.seq2Range.intersects(c.seq2Range))break;const r=e.findWordContaining(n.seq1Range.start),o=t.findWordContaining(n.seq2Range.start),s=new br(r,o),a=s.intersect(n);if(d+=a.seq1Range.length,p+=a.seq2Range.length,c=c.join(s),!(c.seq1Range.endExclusive>=n.seq1Range.endExclusive))break;i.shift()}d+p<2*(c.seq1Range.length+c.seq2Range.length)/3&&r.push(c),o=c.getEndExclusives()}for(;i.length>0;){const e=i.shift();e.seq1Range.isEmpty||(s(e.getStarts(),e),s(e.getEndExclusives().delta(-1),e))}return function(e,t){const n=[];for(;e.length>0||t.length>0;){const i=e[0],r=t[0];let o;o=i&&(!r||i.seq1Range.start0&&n[n.length-1].seq1Range.endExclusive>=o.seq1Range.start?n[n.length-1]=n[n.length-1].join(o):n.push(o)}return n}(n,r)}(o,s,l),l=function(e,t,n){const i=[];for(const r of n){const e=i[i.length-1];e&&(r.seq1Range.start-e.seq1Range.endExclusive<=2||r.seq2Range.start-e.seq2Range.endExclusive<=2)?i[i.length-1]=new br(e.seq1Range.join(r.seq1Range),e.seq2Range.join(r.seq2Range)):i.push(r)}return i}(0,0,l),l=function(e,t,n){let i=n;if(0===i.length)return i;let r,o=0;do{r=!1;const a=[i[0]];for(let l=1;l5||r.length>500)return!1;const o=e.getText(r).trim();if(o.length>20||o.split(/\r\n|\r|\n/).length>1)return!1;const s=e.countLinesIn(n.seq1Range),a=n.seq1Range.length,l=t.countLinesIn(n.seq2Range),d=n.seq2Range.length,p=e.countLinesIn(i.seq1Range),u=i.seq1Range.length,m=t.countLinesIn(i.seq2Range),f=i.seq2Range.length,g=130;function b(e){return Math.min(e,g)}return Math.pow(Math.pow(b(40*s+a),1.5)+Math.pow(b(40*l+d),1.5),1.5)+Math.pow(Math.pow(b(40*p+u),1.5)+Math.pow(b(40*m+f),1.5),1.5)>(g**1.5)**1.5*1.3}d(h,c)?(r=!0,a[a.length-1]=a[a.length-1].join(c)):a.push(c)}i=a}while(o++<10&&r);const s=[];return function(e,t){for(let n=0;n{let r=n;function o(e){return e.length>0&&e.trim().length<=3&&n.seq1Range.length+n.seq2Range.length>100}const a=e.extendToFullLines(n.seq1Range),l=e.getText(new Ji(a.start,n.seq1Range.start));o(l)&&(r=r.deltaStart(-l.length));const c=e.getText(new Ji(n.seq1Range.endExclusive,a.endExclusive));o(c)&&(r=r.deltaEnd(c.length));const h=br.fromOffsetPairs(t?t.getEndExclusives():vr.zero,i?i.getStarts():vr.max),d=r.intersect(h);s.length>0&&d.getStarts().equals(s[s.length-1].getEndExclusives())?s[s.length-1]=s[s.length-1].join(d):s.push(d)})),s}(o,s,l);return{mappings:l.map((e=>new rr(o.translateRange(e.seq1Range),s.translateRange(e.seq2Range)))),hitTimeout:a.hitTimeout}}}function Kr(e,t,n){let i=arguments.length>3&&void 0!==arguments[3]&&arguments[3];const r=[];for(const o of function*(e,t){let n,i;for(const r of e)void 0!==i&&t(i,r)?n.push(r):(n&&(yield n),n=[r]),i=r;n&&(yield n)}(e.map((e=>function(e,t,n){let i=0,r=0;1===e.modifiedRange.endColumn&&1===e.originalRange.endColumn&&e.originalRange.startLineNumber+i<=e.originalRange.endLineNumber&&e.modifiedRange.startLineNumber+i<=e.modifiedRange.endLineNumber&&(r=-1);e.modifiedRange.startColumn-1>=n[e.modifiedRange.startLineNumber-1].length&&e.originalRange.startColumn-1>=t[e.originalRange.startLineNumber-1].length&&e.originalRange.startLineNumber<=e.originalRange.endLineNumber+r&&e.modifiedRange.startLineNumber<=e.modifiedRange.endLineNumber+r&&(i=1);const o=new er(e.originalRange.startLineNumber+i,e.originalRange.endLineNumber+1+r),s=new er(e.modifiedRange.startLineNumber+i,e.modifiedRange.endLineNumber+1+r);return new ir(o,s,[e])}(e,t,n))),((e,t)=>e.original.overlapOrTouch(t.original)||e.modified.overlapOrTouch(t.modified)))){const e=o[0],t=o[o.length-1];r.push(new ir(e.original.join(t.original),e.modified.join(t.modified),o.map((e=>e.innerChanges[0]))))}return Ki((()=>{if(!i&&r.length>0){if(r[0].modified.startLineNumber!==r[0].original.startLineNumber)return!1;if(n.length-r[r.length-1].modified.endLineNumberExclusive!==t.length-r[r.length-1].original.endLineNumberExclusive)return!1}return qi(r,((e,t)=>t.original.startLineNumber-e.original.endLineNumberExclusive===t.modified.startLineNumber-e.modified.endLineNumberExclusive&&e.original.endLineNumberExclusivenew sr,Br=()=>new Ur;function jr(e,t){const n=Math.pow(10,t);return Math.round(e*n)/n}class $r{constructor(e,t,n){let i=arguments.length>3&&void 0!==arguments[3]?arguments[3]:1;this._rgbaBrand=void 0,this.r=0|Math.min(255,Math.max(0,e)),this.g=0|Math.min(255,Math.max(0,t)),this.b=0|Math.min(255,Math.max(0,n)),this.a=jr(Math.max(Math.min(1,i),0),3)}static equals(e,t){return e.r===t.r&&e.g===t.g&&e.b===t.b&&e.a===t.a}}class Hr{constructor(e,t,n,i){this._hslaBrand=void 0,this.h=0|Math.max(Math.min(360,e),0),this.s=jr(Math.max(Math.min(1,t),0),3),this.l=jr(Math.max(Math.min(1,n),0),3),this.a=jr(Math.max(Math.min(1,i),0),3)}static equals(e,t){return e.h===t.h&&e.s===t.s&&e.l===t.l&&e.a===t.a}static fromRGBA(e){const t=e.r/255,n=e.g/255,i=e.b/255,r=e.a,o=Math.max(t,n,i),s=Math.min(t,n,i);let a=0,l=0;const c=(s+o)/2,h=o-s;if(h>0){switch(l=Math.min(c<=.5?h/(2*c):h/(2-2*c),1),o){case t:a=(n-i)/h+(n1&&(n-=1),n<1/6?e+6*(t-e)*n:n<.5?t:n<2/3?e+(t-e)*(2/3-n)*6:e}static toRGBA(e){const t=e.h/360,{s:n,l:i,a:r}=e;let o,s,a;if(0===n)o=s=a=i;else{const e=i<.5?i*(1+n):i+n-i*n,r=2*i-e;o=Hr._hue2rgb(r,e,t+1/3),s=Hr._hue2rgb(r,e,t),a=Hr._hue2rgb(r,e,t-1/3)}return new $r(Math.round(255*o),Math.round(255*s),Math.round(255*a),r)}}class Gr{constructor(e,t,n,i){this._hsvaBrand=void 0,this.h=0|Math.max(Math.min(360,e),0),this.s=jr(Math.max(Math.min(1,t),0),3),this.v=jr(Math.max(Math.min(1,n),0),3),this.a=jr(Math.max(Math.min(1,i),0),3)}static equals(e,t){return e.h===t.h&&e.s===t.s&&e.v===t.v&&e.a===t.a}static fromRGBA(e){const t=e.r/255,n=e.g/255,i=e.b/255,r=Math.max(t,n,i),o=r-Math.min(t,n,i),s=0===r?0:o/r;let a;return a=0===o?0:r===t?((n-i)/o%6+6)%6:r===n?(i-t)/o+2:(t-n)/o+4,new Gr(Math.round(60*a),s,r,e.a)}static toRGBA(e){const{h:t,s:n,v:i,a:r}=e,o=i*n,s=o*(1-Math.abs(t/60%2-1)),a=i-o;let[l,c,h]=[0,0,0];return t<60?(l=o,c=s):t<120?(l=s,c=o):t<180?(c=o,h=s):t<240?(c=s,h=o):t<300?(l=s,h=o):t<=360&&(l=o,h=s),l=Math.round(255*(l+a)),c=Math.round(255*(c+a)),h=Math.round(255*(h+a)),new $r(l,c,h,r)}}class Jr{static fromHex(e){return Jr.Format.CSS.parseHex(e)||Jr.red}static equals(e,t){return!e&&!t||!(!e||!t)&&e.equals(t)}get hsla(){return this._hsla?this._hsla:Hr.fromRGBA(this.rgba)}get hsva(){return this._hsva?this._hsva:Gr.fromRGBA(this.rgba)}constructor(e){if(!e)throw new Error("Color needs a value");if(e instanceof $r)this.rgba=e;else if(e instanceof Hr)this._hsla=e,this.rgba=Hr.toRGBA(e);else{if(!(e instanceof Gr))throw new Error("Invalid color ctor argument");this._hsva=e,this.rgba=Gr.toRGBA(e)}}equals(e){return!!e&&$r.equals(this.rgba,e.rgba)&&Hr.equals(this.hsla,e.hsla)&&Gr.equals(this.hsva,e.hsva)}getRelativeLuminance(){return jr(.2126*Jr._relativeLuminanceForComponent(this.rgba.r)+.7152*Jr._relativeLuminanceForComponent(this.rgba.g)+.0722*Jr._relativeLuminanceForComponent(this.rgba.b),4)}static _relativeLuminanceForComponent(e){const t=e/255;return t<=.03928?t/12.92:Math.pow((t+.055)/1.055,2.4)}isLighter(){return(299*this.rgba.r+587*this.rgba.g+114*this.rgba.b)/1e3>=128}isLighterThan(e){return this.getRelativeLuminance()>e.getRelativeLuminance()}isDarkerThan(e){return this.getRelativeLuminance()0)for(const i of n){const n=i.filter((e=>void 0!==e)),r=n[1],o=n[2];if(!o)continue;let s;if("rgb"===r){const t=/^\(\s*(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])\s*,\s*(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])\s*,\s*(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])\s*\)$/gm;s=eo(Qr(e,i),no(o,t),!1)}else if("rgba"===r){const t=/^\(\s*(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])\s*,\s*(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])\s*,\s*(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])\s*,\s*(0[.][0-9]+|[.][0-9]+|[01][.]|[01])\s*\)$/gm;s=eo(Qr(e,i),no(o,t),!0)}else if("hsl"===r){const t=/^\(\s*(36[0]|3[0-5][0-9]|[12][0-9][0-9]|[1-9]?[0-9])\s*,\s*(100|\d{1,2}[.]\d*|\d{1,2})%\s*,\s*(100|\d{1,2}[.]\d*|\d{1,2})%\s*\)$/gm;s=to(Qr(e,i),no(o,t),!1)}else if("hsla"===r){const t=/^\(\s*(36[0]|3[0-5][0-9]|[12][0-9][0-9]|[1-9]?[0-9])\s*,\s*(100|\d{1,2}[.]\d*|\d{1,2})%\s*,\s*(100|\d{1,2}[.]\d*|\d{1,2})%\s*,\s*(0[.][0-9]+|[.][0-9]+|[01][.]|[01])\s*\)$/gm;s=to(Qr(e,i),no(o,t),!0)}else"#"===r&&(s=Zr(Qr(e,i),r+o));s&&t.push(s)}return t}(e):[]}Jr.white=new Jr(new $r(255,255,255,1)),Jr.black=new Jr(new $r(0,0,0,1)),Jr.red=new Jr(new $r(255,0,0,1)),Jr.blue=new Jr(new $r(0,0,255,1)),Jr.green=new Jr(new $r(0,255,0,1)),Jr.cyan=new Jr(new $r(0,255,255,1)),Jr.lightgrey=new Jr(new $r(211,211,211,1)),Jr.transparent=new Jr(new $r(0,0,0,0)),function(e){let t;!function(t){let n;!function(t){function n(e){const t=e.toString(16);return 2!==t.length?"0"+t:t}function i(e){switch(e){case 48:return 0;case 49:return 1;case 50:return 2;case 51:return 3;case 52:return 4;case 53:return 5;case 54:return 6;case 55:return 7;case 56:return 8;case 57:return 9;case 97:case 65:return 10;case 98:case 66:return 11;case 99:case 67:return 12;case 100:case 68:return 13;case 101:case 69:return 14;case 102:case 70:return 15}return 0}t.formatRGB=function(t){return 1===t.rgba.a?`rgb(${t.rgba.r}, ${t.rgba.g}, ${t.rgba.b})`:e.Format.CSS.formatRGBA(t)},t.formatRGBA=function(e){return`rgba(${e.rgba.r}, ${e.rgba.g}, ${e.rgba.b}, ${+e.rgba.a.toFixed(2)})`},t.formatHSL=function(t){return 1===t.hsla.a?`hsl(${t.hsla.h}, ${(100*t.hsla.s).toFixed(2)}%, ${(100*t.hsla.l).toFixed(2)}%)`:e.Format.CSS.formatHSLA(t)},t.formatHSLA=function(e){return`hsla(${e.hsla.h}, ${(100*e.hsla.s).toFixed(2)}%, ${(100*e.hsla.l).toFixed(2)}%, ${e.hsla.a.toFixed(2)})`},t.formatHex=function(e){return`#${n(e.rgba.r)}${n(e.rgba.g)}${n(e.rgba.b)}`},t.formatHexA=function(t){return arguments.length>1&&void 0!==arguments[1]&&arguments[1]&&1===t.rgba.a?e.Format.CSS.formatHex(t):`#${n(t.rgba.r)}${n(t.rgba.g)}${n(t.rgba.b)}${n(Math.round(255*t.rgba.a))}`},t.format=function(t){return t.isOpaque()?e.Format.CSS.formatHex(t):e.Format.CSS.formatRGBA(t)},t.parseHex=function(t){const n=t.length;if(0===n)return null;if(35!==t.charCodeAt(0))return null;if(7===n){const n=16*i(t.charCodeAt(1))+i(t.charCodeAt(2)),r=16*i(t.charCodeAt(3))+i(t.charCodeAt(4)),o=16*i(t.charCodeAt(5))+i(t.charCodeAt(6));return new e(new $r(n,r,o,1))}if(9===n){const n=16*i(t.charCodeAt(1))+i(t.charCodeAt(2)),r=16*i(t.charCodeAt(3))+i(t.charCodeAt(4)),o=16*i(t.charCodeAt(5))+i(t.charCodeAt(6)),s=16*i(t.charCodeAt(7))+i(t.charCodeAt(8));return new e(new $r(n,r,o,s/255))}if(4===n){const n=i(t.charCodeAt(1)),r=i(t.charCodeAt(2)),o=i(t.charCodeAt(3));return new e(new $r(16*n+n,16*r+r,16*o+o))}if(5===n){const n=i(t.charCodeAt(1)),r=i(t.charCodeAt(2)),o=i(t.charCodeAt(3)),s=i(t.charCodeAt(4));return new e(new $r(16*n+n,16*r+r,16*o+o,(16*s+s)/255))}return null}}(n=t.CSS||(t.CSS={}))}(t=e.Format||(e.Format={}))}(Jr||(Jr={}));const ro=/\bMARK:\s*(.*)$/d,oo=/^-+|-+$/g;function so(e,t){var n;let i=[];if(t.findRegionSectionHeaders&&(null===(n=t.foldingRules)||void 0===n?void 0:n.markers)){const n=function(e,t){const n=[],i=e.getLineCount();for(let r=1;r<=i;r++){const i=e.getLineContent(r),o=i.match(t.foldingRules.markers.start);if(o){const e={startLineNumber:r,startColumn:o[0].length+1,endLineNumber:r,endColumn:i.length+1};if(e.endColumn>e.startColumn){const t={range:e,...lo(i.substring(o[0].length)),shouldBeInComments:!1};(t.text||t.hasSeparatorLine)&&n.push(t)}}}return n}(e,t);i=i.concat(n)}if(t.findMarkSectionHeaders){const t=function(e){const t=[],n=e.getLineCount();for(let i=1;i<=n;i++){ao(e.getLineContent(i),i,t)}return t}(e);i=i.concat(t)}return i}function ao(e,t,n){ro.lastIndex=0;const i=ro.exec(e);if(i){const e={startLineNumber:t,startColumn:i.indices[1][0]+1,endLineNumber:t,endColumn:i.indices[1][1]+1};if(e.endColumn>e.startColumn){const t={range:e,...lo(i[1]),shouldBeInComments:!0};(t.text||t.hasSeparatorLine)&&n.push(t)}}}function lo(e){const t=(e=e.trim()).startsWith("-");return{text:e=e.replace(oo,""),hasSeparatorLine:t}}class co extends Bt{get uri(){return this._uri}get eol(){return this._eol}getValue(){return this.getText()}findMatches(e){const t=[];for(let n=0;nthis._lines.length)t=this._lines.length,n=this._lines[t-1].length+1,i=!0;else{const e=this._lines[t-1].length+1;n<1?(n=1,i=!0):n>e&&(n=e,i=!0)}return i?{lineNumber:t,column:n}:e}}class ho{constructor(e,t){this._host=e,this._models=Object.create(null),this._foreignModuleFactory=t,this._foreignModule=null}dispose(){this._models=Object.create(null)}_getModel(e){return this._models[e]}_getModels(){const e=[];return Object.keys(this._models).forEach((t=>e.push(this._models[t]))),e}acceptNewModel(e){this._models[e.url]=new co(Ct.parse(e.url),e.lines,e.EOL,e.versionId)}acceptModelChanged(e,t){if(!this._models[e])return;this._models[e].onEvents(t)}acceptRemovedModel(e){this._models[e]&&delete this._models[e]}async computeUnicodeHighlights(e,t,n){const i=this._getModel(e);return i?Bi.computeUnicodeHighlights(i,t,n):{ranges:[],hasMore:!1,ambiguousCharacterCount:0,invisibleCharacterCount:0,nonBasicAsciiCharacterCount:0}}async findSectionHeaders(e,t){const n=this._getModel(e);return n?so(n,t):[]}async computeDiff(e,t,n,i){const r=this._getModel(e),o=this._getModel(t);if(!r||!o)return null;return ho.computeDiff(r,o,n,i)}static computeDiff(e,t,n,i){const r="advanced"===i?Br():qr(),o=e.getLinesContent(),s=t.getLinesContent(),a=r.computeDiff(o,s,n);function l(e){return e.map((e=>{var t;return[e.original.startLineNumber,e.original.endLineNumberExclusive,e.modified.startLineNumber,e.modified.endLineNumberExclusive,null===(t=e.innerChanges)||void 0===t?void 0:t.map((e=>[e.originalRange.startLineNumber,e.originalRange.startColumn,e.originalRange.endLineNumber,e.originalRange.endColumn,e.modifiedRange.startLineNumber,e.modifiedRange.startColumn,e.modifiedRange.endLineNumber,e.modifiedRange.endColumn]))]}))}return{identical:!(a.changes.length>0)&&this._modelsAreIdentical(e,t),quitEarly:a.hitTimeout,changes:l(a.changes),moves:a.moves.map((e=>[e.lineRangeMapping.original.startLineNumber,e.lineRangeMapping.original.endLineNumberExclusive,e.lineRangeMapping.modified.startLineNumber,e.lineRangeMapping.modified.endLineNumberExclusive,l(e.changes)]))}}static _modelsAreIdentical(e,t){const n=e.getLineCount();if(n!==t.getLineCount())return!1;for(let i=1;i<=n;i++){if(e.getLineContent(i)!==t.getLineContent(i))return!1}return!0}async computeMoreMinimalEdits(e,t,n){const i=this._getModel(e);if(!i)return t;const r=[];let o;t=t.slice(0).sort(((e,t)=>{if(e.range&&t.range)return It.compareRangesUsingStarts(e.range,t.range);return(e.range?0:1)-(t.range?0:1)}));let s=0;for(let a=1;aho._diffLimit){r.push({range:a,text:l});continue}const t=Be(e,l,n),s=i.offsetAt(It.lift(a).getStartPosition());for(const n of t){const e=i.positionAt(s+n.originalStart),t=i.positionAt(s+n.originalStart+n.originalLength),o={text:l.substr(n.modifiedStart,n.modifiedLength),range:{startLineNumber:e.lineNumber,startColumn:e.column,endLineNumber:t.lineNumber,endColumn:t.column}};i.getValueInRange(o.range)!==o.text&&r.push(o)}}return"number"===typeof o&&r.push({eol:o,text:"",range:{startLineNumber:0,startColumn:0,endLineNumber:0,endColumn:0}}),r}async computeLinks(e){const t=this._getModel(e);return t?function(e){return e&&"function"===typeof e.getLineCount&&"function"===typeof e.getLineContent?nn.computeLinks(e):[]}(t):null}async computeDefaultDocumentColors(e){const t=this._getModel(e);return t?io(t):null}async textualSuggest(e,t,n,i){const r=new _,o=new RegExp(n,i),s=new Set;e:for(const a of e){const e=this._getModel(a);if(e)for(const n of e.words(o))if(n!==t&&isNaN(Number(n))&&(s.add(n),s.size>ho._suggestionsLimit))break e}return{words:Array.from(s),duration:r.elapsed()}}async computeWordRanges(e,t,n,i){const r=this._getModel(e);if(!r)return Object.create(null);const o=new RegExp(n,i),s=Object.create(null);for(let a=t.startLineNumber;afunction(){const n=Array.prototype.slice.call(arguments,0);return t(e,n)},i={};for(const r of e)i[r]=n(r);return i}(n,((e,t)=>this._host.fhr(e,t))),r={host:i,getMirrorModels:()=>this._getModels()};return this._foreignModuleFactory?(this._foreignModule=this._foreignModuleFactory(r,t),Promise.resolve(z(this._foreignModule))):Promise.reject(new Error("Unexpected usage"))}fmr(e,t){if(!this._foreignModule||"function"!==typeof this._foreignModule[e])return Promise.reject(new Error("Missing requestHandler or method: "+e));try{return Promise.resolve(this._foreignModule[e].apply(this._foreignModule,t))}catch(Du){return Promise.reject(Du)}}}ho._diffLimit=1e5,ho._suggestionsLimit=1e4,"function"===typeof importScripts&&(globalThis.monaco={editor:void 0,languages:void 0,CancellationTokenSource:ln,Emitter:A,KeyCode:oi,KeyMod:Ti,Position:zt,Range:It,Selection:xn,SelectionDirection:wi,MarkerSeverity:si,MarkerTag:ai,Uri:Ct,Token:Rn});let po=!1;function uo(e){if(po)return;po=!0;const t=new Ie((e=>{globalThis.postMessage(e)}),(t=>new ho(t,e)));globalThis.onmessage=e=>{t.onmessage(e.data)}}var mo,fo;globalThis.onmessage=e=>{po||uo(null)},(fo=mo||(mo={}))[fo.Ident=0]="Ident",fo[fo.AtKeyword=1]="AtKeyword",fo[fo.String=2]="String",fo[fo.BadString=3]="BadString",fo[fo.UnquotedString=4]="UnquotedString",fo[fo.Hash=5]="Hash",fo[fo.Num=6]="Num",fo[fo.Percentage=7]="Percentage",fo[fo.Dimension=8]="Dimension",fo[fo.UnicodeRange=9]="UnicodeRange",fo[fo.CDO=10]="CDO",fo[fo.CDC=11]="CDC",fo[fo.Colon=12]="Colon",fo[fo.SemiColon=13]="SemiColon",fo[fo.CurlyL=14]="CurlyL",fo[fo.CurlyR=15]="CurlyR",fo[fo.ParenthesisL=16]="ParenthesisL",fo[fo.ParenthesisR=17]="ParenthesisR",fo[fo.BracketL=18]="BracketL",fo[fo.BracketR=19]="BracketR",fo[fo.Whitespace=20]="Whitespace",fo[fo.Includes=21]="Includes",fo[fo.Dashmatch=22]="Dashmatch",fo[fo.SubstringOperator=23]="SubstringOperator",fo[fo.PrefixOperator=24]="PrefixOperator",fo[fo.SuffixOperator=25]="SuffixOperator",fo[fo.Delim=26]="Delim",fo[fo.EMS=27]="EMS",fo[fo.EXS=28]="EXS",fo[fo.Length=29]="Length",fo[fo.Angle=30]="Angle",fo[fo.Time=31]="Time",fo[fo.Freq=32]="Freq",fo[fo.Exclamation=33]="Exclamation",fo[fo.Resolution=34]="Resolution",fo[fo.Comma=35]="Comma",fo[fo.Charset=36]="Charset",fo[fo.EscapedJavaScript=37]="EscapedJavaScript",fo[fo.BadEscapedJavaScript=38]="BadEscapedJavaScript",fo[fo.Comment=39]="Comment",fo[fo.SingleLineComment=40]="SingleLineComment",fo[fo.EOF=41]="EOF",fo[fo.CustomToken=42]="CustomToken";var go=function(){function e(e){this.source=e,this.len=e.length,this.position=0}return e.prototype.substring=function(e,t){return void 0===t&&(t=this.position),this.source.substring(e,t)},e.prototype.eos=function(){return this.len<=this.position},e.prototype.pos=function(){return this.position},e.prototype.goBackTo=function(e){this.position=e},e.prototype.goBack=function(e){this.position-=e},e.prototype.advance=function(e){this.position+=e},e.prototype.nextChar=function(){return this.source.charCodeAt(this.position++)||0},e.prototype.peekChar=function(e){return void 0===e&&(e=0),this.source.charCodeAt(this.position+e)||0},e.prototype.lookbackChar=function(e){return void 0===e&&(e=0),this.source.charCodeAt(this.position-e)||0},e.prototype.advanceIfChar=function(e){return e===this.source.charCodeAt(this.position)&&(this.position++,!0)},e.prototype.advanceIfChars=function(e){if(this.position+e.length>this.source.length)return!1;for(var t=0;t".charCodeAt(0),Po="@".charCodeAt(0),Oo="#".charCodeAt(0),Wo="$".charCodeAt(0),Vo="\\".charCodeAt(0),Uo="/".charCodeAt(0),Ko="\n".charCodeAt(0),qo="\r".charCodeAt(0),Bo="\f".charCodeAt(0),jo='"'.charCodeAt(0),$o="'".charCodeAt(0),Ho=" ".charCodeAt(0),Go="\t".charCodeAt(0),Jo=";".charCodeAt(0),Xo=":".charCodeAt(0),Yo="{".charCodeAt(0),Qo="}".charCodeAt(0),Zo="[".charCodeAt(0),es="]".charCodeAt(0),ts=",".charCodeAt(0),ns=".".charCodeAt(0),is="!".charCodeAt(0),rs="?".charCodeAt(0),os="+".charCodeAt(0),ss={};ss[Jo]=mo.SemiColon,ss[Xo]=mo.Colon,ss[Yo]=mo.CurlyL,ss[Qo]=mo.CurlyR,ss[es]=mo.BracketR,ss[Zo]=mo.BracketL,ss[Mo]=mo.ParenthesisL,ss[zo]=mo.ParenthesisR,ss[ts]=mo.Comma;var as={};as.em=mo.EMS,as.ex=mo.EXS,as.px=mo.Length,as.cm=mo.Length,as.mm=mo.Length,as.in=mo.Length,as.pt=mo.Length,as.pc=mo.Length,as.deg=mo.Angle,as.rad=mo.Angle,as.grad=mo.Angle,as.ms=mo.Time,as.s=mo.Time,as.hz=mo.Freq,as.khz=mo.Freq,as["%"]=mo.Percentage,as.fr=mo.Percentage,as.dpi=mo.Resolution,as.dpcm=mo.Resolution;var ls=function(){function e(){this.stream=new go(""),this.ignoreComment=!0,this.ignoreWhitespace=!0,this.inURL=!1}return e.prototype.setSource=function(e){this.stream=new go(e)},e.prototype.finishToken=function(e,t,n){return{offset:e,len:this.stream.pos()-e,type:t,text:n||this.stream.substring(e)}},e.prototype.substring=function(e,t){return this.stream.substring(e,e+t)},e.prototype.pos=function(){return this.stream.pos()},e.prototype.goBackTo=function(e){this.stream.goBackTo(e)},e.prototype.scanUnquotedString=function(){var e=this.stream.pos(),t=[];return this._unquotedString(t)?this.finishToken(e,mo.UnquotedString,t.join("")):null},e.prototype.scan=function(){var e=this.trivia();if(null!==e)return e;var t=this.stream.pos();return this.stream.eos()?this.finishToken(t,mo.EOF):this.scanNext(t)},e.prototype.tryScanUnicode=function(){var e=this.stream.pos();if(!this.stream.eos()&&this._unicodeRange())return this.finishToken(e,mo.UnicodeRange);this.stream.goBackTo(e)},e.prototype.scanNext=function(e){if(this.stream.advanceIfChars([Io,is,Fo,Fo]))return this.finishToken(e,mo.CDO);if(this.stream.advanceIfChars([Fo,Fo,Lo]))return this.finishToken(e,mo.CDC);var t=[];if(this.ident(t))return this.finishToken(e,mo.Ident,t.join(""));if(this.stream.advanceIfChar(Po)){if(t=["@"],this._name(t)){var n=t.join("");return"@charset"===n?this.finishToken(e,mo.Charset,n):this.finishToken(e,mo.AtKeyword,n)}return this.finishToken(e,mo.Delim)}if(this.stream.advanceIfChar(Oo))return t=["#"],this._name(t)?this.finishToken(e,mo.Hash,t.join("")):this.finishToken(e,mo.Delim);if(this.stream.advanceIfChar(is))return this.finishToken(e,mo.Exclamation);if(this._number()){var i=this.stream.pos();if(t=[this.stream.substring(e,i)],this.stream.advanceIfChar(To))return this.finishToken(e,mo.Percentage);if(this.ident(t)){var r=this.stream.substring(i).toLowerCase(),o=as[r];return"undefined"!==typeof o?this.finishToken(e,o,t.join("")):this.finishToken(e,mo.Dimension,t.join(""))}return this.finishToken(e,mo.Num)}t=[];var s=this._string(t);return null!==s?this.finishToken(e,s,t.join("")):"undefined"!==typeof(s=ss[this.stream.peekChar()])?(this.stream.advance(1),this.finishToken(e,s)):this.stream.peekChar(0)===ko&&this.stream.peekChar(1)===Ro?(this.stream.advance(2),this.finishToken(e,mo.Includes)):this.stream.peekChar(0)===No&&this.stream.peekChar(1)===Ro?(this.stream.advance(2),this.finishToken(e,mo.Dashmatch)):this.stream.peekChar(0)===Ao&&this.stream.peekChar(1)===Ro?(this.stream.advance(2),this.finishToken(e,mo.SubstringOperator)):this.stream.peekChar(0)===Eo&&this.stream.peekChar(1)===Ro?(this.stream.advance(2),this.finishToken(e,mo.PrefixOperator)):this.stream.peekChar(0)===Wo&&this.stream.peekChar(1)===Ro?(this.stream.advance(2),this.finishToken(e,mo.SuffixOperator)):(this.stream.nextChar(),this.finishToken(e,mo.Delim))},e.prototype.trivia=function(){for(;;){var e=this.stream.pos();if(this._whitespace()){if(!this.ignoreWhitespace)return this.finishToken(e,mo.Whitespace)}else{if(!this.comment())return null;if(!this.ignoreComment)return this.finishToken(e,mo.Comment)}}},e.prototype.comment=function(){if(this.stream.advanceIfChars([Uo,Ao])){var e=!1,t=!1;return this.stream.advanceWhileChar((function(n){return t&&n===Uo?(e=!0,!1):(t=n===Ao,!0)})),e&&this.stream.advance(1),!0}return!1},e.prototype._number=function(){var e,t=0;return this.stream.peekChar()===ns&&(t=1),(e=this.stream.peekChar(t))>=Co&&e<=_o&&(this.stream.advance(t+1),this.stream.advanceWhileChar((function(e){return e>=Co&&e<=_o||0===t&&e===ns})),!0)},e.prototype._newline=function(e){var t=this.stream.peekChar();switch(t){case qo:case Bo:case Ko:return this.stream.advance(1),e.push(String.fromCharCode(t)),t===qo&&this.stream.advanceIfChar(Ko)&&e.push("\n"),!0}return!1},e.prototype._escape=function(e,t){var n=this.stream.peekChar();if(n===Vo){this.stream.advance(1),n=this.stream.peekChar();for(var i=0;i<6&&(n>=Co&&n<=_o||n>=bo&&n<=vo||n>=wo&&n<=xo);)this.stream.advance(1),n=this.stream.peekChar(),i++;if(i>0){try{var r=parseInt(this.stream.substring(this.stream.pos()-i),16);r&&e.push(String.fromCharCode(r))}catch(Du){}return n===Ho||n===Go?this.stream.advance(1):this._newline([]),!0}if(n!==qo&&n!==Bo&&n!==Ko)return this.stream.advance(1),e.push(String.fromCharCode(n)),!0;if(t)return this._newline(e)}return!1},e.prototype._stringChar=function(e,t){var n=this.stream.peekChar();return 0!==n&&n!==e&&n!==Vo&&n!==qo&&n!==Bo&&n!==Ko&&(this.stream.advance(1),t.push(String.fromCharCode(n)),!0)},e.prototype._string=function(e){if(this.stream.peekChar()===$o||this.stream.peekChar()===jo){var t=this.stream.nextChar();for(e.push(String.fromCharCode(t));this._stringChar(t,e)||this._escape(e,!0););return this.stream.peekChar()===t?(this.stream.nextChar(),e.push(String.fromCharCode(t)),mo.String):mo.BadString}return null},e.prototype._unquotedChar=function(e){var t=this.stream.peekChar();return 0!==t&&t!==Vo&&t!==$o&&t!==jo&&t!==Mo&&t!==zo&&t!==Ho&&t!==Go&&t!==Ko&&t!==Bo&&t!==qo&&(this.stream.advance(1),e.push(String.fromCharCode(t)),!0)},e.prototype._unquotedString=function(e){for(var t=!1;this._unquotedChar(e)||this._escape(e);)t=!0;return t},e.prototype._whitespace=function(){return this.stream.advanceWhileChar((function(e){return e===Ho||e===Go||e===Ko||e===Bo||e===qo}))>0},e.prototype._name=function(e){for(var t=!1;this._identChar(e)||this._escape(e);)t=!0;return t},e.prototype.ident=function(e){var t=this.stream.pos();if(this._minus(e)){if(this._minus(e)||this._identFirstChar(e)||this._escape(e)){for(;this._identChar(e)||this._escape(e););return!0}}else if(this._identFirstChar(e)||this._escape(e)){for(;this._identChar(e)||this._escape(e););return!0}return this.stream.goBackTo(t),!1},e.prototype._identFirstChar=function(e){var t=this.stream.peekChar();return(t===Do||t>=bo&&t<=yo||t>=wo&&t<=So||t>=128&&t<=65535)&&(this.stream.advance(1),e.push(String.fromCharCode(t)),!0)},e.prototype._minus=function(e){var t=this.stream.peekChar();return t===Fo&&(this.stream.advance(1),e.push(String.fromCharCode(t)),!0)},e.prototype._identChar=function(e){var t=this.stream.peekChar();return(t===Do||t===Fo||t>=bo&&t<=yo||t>=wo&&t<=So||t>=Co&&t<=_o||t>=128&&t<=65535)&&(this.stream.advance(1),e.push(String.fromCharCode(t)),!0)},e.prototype._unicodeRange=function(){if(this.stream.advanceIfChar(os)){var e=function(e){return e>=Co&&e<=_o||e>=bo&&e<=vo||e>=wo&&e<=xo},t=this.stream.advanceWhileChar(e)+this.stream.advanceWhileChar((function(e){return e===rs}));if(t>=1&&t<=6){if(!this.stream.advanceIfChar(Fo))return!0;var n=this.stream.advanceWhileChar(e);if(n>=1&&n<=6)return!0}}return!1},e}();function cs(e,t){if(e.length0?e.lastIndexOf(t)===n:0===n&&e===t}function ds(e,t){return void 0===t&&(t=!0),e?e.length<140?e:e.slice(0,140)+(t?"\u2026":""):""}function ps(e,t){for(var n="";t>0;)1===(1&t)&&(n+=e),e+=e,t>>>=1;return n}var us,ms,fs,gs,bs=function(){var e=function(t,n){return(e=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(e,t){e.__proto__=t}||function(e,t){for(var n in t)Object.prototype.hasOwnProperty.call(t,n)&&(e[n]=t[n])})(t,n)};return function(t,n){if("function"!==typeof n&&null!==n)throw new TypeError("Class extends value "+String(n)+" is not a constructor or null");function i(){this.constructor=t}e(t,n),t.prototype=null===n?Object.create(n):(i.prototype=n.prototype,new i)}}();function vs(e,t){var n=null;return!e||te.end?null:(e.accept((function(e){return-1===e.offset&&-1===e.length||e.offset<=t&&e.end>=t&&(n?e.length<=n.length&&(n=e):n=e,!0)})),n)}function ys(e,t){for(var n=vs(e,t),i=[];n;)i.unshift(n),n=n.parent;return i}(ms=us||(us={}))[ms.Undefined=0]="Undefined",ms[ms.Identifier=1]="Identifier",ms[ms.Stylesheet=2]="Stylesheet",ms[ms.Ruleset=3]="Ruleset",ms[ms.Selector=4]="Selector",ms[ms.SimpleSelector=5]="SimpleSelector",ms[ms.SelectorInterpolation=6]="SelectorInterpolation",ms[ms.SelectorCombinator=7]="SelectorCombinator",ms[ms.SelectorCombinatorParent=8]="SelectorCombinatorParent",ms[ms.SelectorCombinatorSibling=9]="SelectorCombinatorSibling",ms[ms.SelectorCombinatorAllSiblings=10]="SelectorCombinatorAllSiblings",ms[ms.SelectorCombinatorShadowPiercingDescendant=11]="SelectorCombinatorShadowPiercingDescendant",ms[ms.Page=12]="Page",ms[ms.PageBoxMarginBox=13]="PageBoxMarginBox",ms[ms.ClassSelector=14]="ClassSelector",ms[ms.IdentifierSelector=15]="IdentifierSelector",ms[ms.ElementNameSelector=16]="ElementNameSelector",ms[ms.PseudoSelector=17]="PseudoSelector",ms[ms.AttributeSelector=18]="AttributeSelector",ms[ms.Declaration=19]="Declaration",ms[ms.Declarations=20]="Declarations",ms[ms.Property=21]="Property",ms[ms.Expression=22]="Expression",ms[ms.BinaryExpression=23]="BinaryExpression",ms[ms.Term=24]="Term",ms[ms.Operator=25]="Operator",ms[ms.Value=26]="Value",ms[ms.StringLiteral=27]="StringLiteral",ms[ms.URILiteral=28]="URILiteral",ms[ms.EscapedValue=29]="EscapedValue",ms[ms.Function=30]="Function",ms[ms.NumericValue=31]="NumericValue",ms[ms.HexColorValue=32]="HexColorValue",ms[ms.RatioValue=33]="RatioValue",ms[ms.MixinDeclaration=34]="MixinDeclaration",ms[ms.MixinReference=35]="MixinReference",ms[ms.VariableName=36]="VariableName",ms[ms.VariableDeclaration=37]="VariableDeclaration",ms[ms.Prio=38]="Prio",ms[ms.Interpolation=39]="Interpolation",ms[ms.NestedProperties=40]="NestedProperties",ms[ms.ExtendsReference=41]="ExtendsReference",ms[ms.SelectorPlaceholder=42]="SelectorPlaceholder",ms[ms.Debug=43]="Debug",ms[ms.If=44]="If",ms[ms.Else=45]="Else",ms[ms.For=46]="For",ms[ms.Each=47]="Each",ms[ms.While=48]="While",ms[ms.MixinContentReference=49]="MixinContentReference",ms[ms.MixinContentDeclaration=50]="MixinContentDeclaration",ms[ms.Media=51]="Media",ms[ms.Keyframe=52]="Keyframe",ms[ms.FontFace=53]="FontFace",ms[ms.Import=54]="Import",ms[ms.Namespace=55]="Namespace",ms[ms.Invocation=56]="Invocation",ms[ms.FunctionDeclaration=57]="FunctionDeclaration",ms[ms.ReturnStatement=58]="ReturnStatement",ms[ms.MediaQuery=59]="MediaQuery",ms[ms.MediaCondition=60]="MediaCondition",ms[ms.MediaFeature=61]="MediaFeature",ms[ms.FunctionParameter=62]="FunctionParameter",ms[ms.FunctionArgument=63]="FunctionArgument",ms[ms.KeyframeSelector=64]="KeyframeSelector",ms[ms.ViewPort=65]="ViewPort",ms[ms.Document=66]="Document",ms[ms.AtApplyRule=67]="AtApplyRule",ms[ms.CustomPropertyDeclaration=68]="CustomPropertyDeclaration",ms[ms.CustomPropertySet=69]="CustomPropertySet",ms[ms.ListEntry=70]="ListEntry",ms[ms.Supports=71]="Supports",ms[ms.SupportsCondition=72]="SupportsCondition",ms[ms.NamespacePrefix=73]="NamespacePrefix",ms[ms.GridLine=74]="GridLine",ms[ms.Plugin=75]="Plugin",ms[ms.UnknownAtRule=76]="UnknownAtRule",ms[ms.Use=77]="Use",ms[ms.ModuleConfiguration=78]="ModuleConfiguration",ms[ms.Forward=79]="Forward",ms[ms.ForwardVisibility=80]="ForwardVisibility",ms[ms.Module=81]="Module",ms[ms.UnicodeRange=82]="UnicodeRange",(gs=fs||(fs={}))[gs.Mixin=0]="Mixin",gs[gs.Rule=1]="Rule",gs[gs.Variable=2]="Variable",gs[gs.Function=3]="Function",gs[gs.Keyframe=4]="Keyframe",gs[gs.Unknown=5]="Unknown",gs[gs.Module=6]="Module",gs[gs.Forward=7]="Forward",gs[gs.ForwardVisibility=8]="ForwardVisibility";var ws,xs,Ss=function(){function e(e,t,n){void 0===e&&(e=-1),void 0===t&&(t=-1),this.parent=null,this.offset=e,this.length=t,n&&(this.nodeType=n)}return Object.defineProperty(e.prototype,"end",{get:function(){return this.offset+this.length},enumerable:!1,configurable:!0}),Object.defineProperty(e.prototype,"type",{get:function(){return this.nodeType||us.Undefined},set:function(e){this.nodeType=e},enumerable:!1,configurable:!0}),e.prototype.getTextProvider=function(){for(var e=this;e&&!e.textProvider;)e=e.parent;return e?e.textProvider:function(){return"unknown"}},e.prototype.getText=function(){return this.getTextProvider()(this.offset,this.length)},e.prototype.matches=function(e){return this.length===e.length&&this.getTextProvider()(this.offset,this.length)===e},e.prototype.startsWith=function(e){return this.length>=e.length&&this.getTextProvider()(this.offset,e.length)===e},e.prototype.endsWith=function(e){return this.length>=e.length&&this.getTextProvider()(this.end-e.length,e.length)===e},e.prototype.accept=function(e){if(e(this)&&this.children)for(var t=0,n=this.children;t=0&&e.parent.children.splice(n,1)}e.parent=this;var i=this.children;return i||(i=this.children=[]),-1!==t?i.splice(t,0,e):i.push(e),e},e.prototype.attachTo=function(e,t){return void 0===t&&(t=-1),e&&e.adoptChild(this,t),this},e.prototype.collectIssues=function(e){this.issues&&e.push.apply(e,this.issues)},e.prototype.addIssue=function(e){this.issues||(this.issues=[]),this.issues.push(e)},e.prototype.hasIssue=function(e){return Array.isArray(this.issues)&&this.issues.some((function(t){return t.getRule()===e}))},e.prototype.isErroneous=function(e){return void 0===e&&(e=!1),!!(this.issues&&this.issues.length>0)||e&&Array.isArray(this.children)&&this.children.some((function(e){return e.isErroneous(!0)}))},e.prototype.setNode=function(e,t,n){return void 0===n&&(n=-1),!!t&&(t.attachTo(this,n),this[e]=t,!0)},e.prototype.addChild=function(e){return!!e&&(this.children||(this.children=[]),e.attachTo(this),this.updateOffsetAndLength(e),!0)},e.prototype.updateOffsetAndLength=function(e){(e.offsetthis.end||-1===this.length)&&(this.length=t-this.offset)},e.prototype.hasChildren=function(){return!!this.children&&this.children.length>0},e.prototype.getChildren=function(){return this.children?this.children.slice(0):[]},e.prototype.getChild=function(e){return this.children&&e=0;n--)if((t=this.children[n]).offset<=e)return t;return null},e.prototype.findChildAtOffset=function(e,t){var n=this.findFirstChildBeforeOffset(e);return n&&n.end>=e?t&&n.findChildAtOffset(e,!0)||n:null},e.prototype.encloses=function(e){return this.offset<=e.offset&&this.offset+this.length>=e.offset+e.length},e.prototype.getParent=function(){for(var e=this.parent;e instanceof Cs;)e=e.parent;return e},e.prototype.findParent=function(e){for(var t=this;t&&t.type!==e;)t=t.parent;return t},e.prototype.findAParent=function(){for(var e=[],t=0;t2?n-2:0),r=2;r{let i=n[0];return"undefined"!==typeof t[i]?t[i]:e})),n}(t,i)}function Wa(e){return Oa}var Va,Ua,Ka,qa,Ba,ja,$a,Ha,Ga,Ja,Xa,Ya,Qa,Za,el,tl,nl,il,rl,ol,sl,al,ll,cl,hl,dl,pl,ul,ml,fl,gl,bl,vl,yl,wl,xl,Sl,Cl,_l,kl,El,Rl,Nl,Fl,Dl,Tl,Al,Ml,zl,Il=Wa(),Ll=function(){return function(e,t){this.id=e,this.message=t}}(),Pl={NumberExpected:new Ll("css-numberexpected",Il("expected.number","number expected")),ConditionExpected:new Ll("css-conditionexpected",Il("expected.condt","condition expected")),RuleOrSelectorExpected:new Ll("css-ruleorselectorexpected",Il("expected.ruleorselector","at-rule or selector expected")),DotExpected:new Ll("css-dotexpected",Il("expected.dot","dot expected")),ColonExpected:new Ll("css-colonexpected",Il("expected.colon","colon expected")),SemiColonExpected:new Ll("css-semicolonexpected",Il("expected.semicolon","semi-colon expected")),TermExpected:new Ll("css-termexpected",Il("expected.term","term expected")),ExpressionExpected:new Ll("css-expressionexpected",Il("expected.expression","expression expected")),OperatorExpected:new Ll("css-operatorexpected",Il("expected.operator","operator expected")),IdentifierExpected:new Ll("css-identifierexpected",Il("expected.ident","identifier expected")),PercentageExpected:new Ll("css-percentageexpected",Il("expected.percentage","percentage expected")),URIOrStringExpected:new Ll("css-uriorstringexpected",Il("expected.uriorstring","uri or string expected")),URIExpected:new Ll("css-uriexpected",Il("expected.uri","URI expected")),VariableNameExpected:new Ll("css-varnameexpected",Il("expected.varname","variable name expected")),VariableValueExpected:new Ll("css-varvalueexpected",Il("expected.varvalue","variable value expected")),PropertyValueExpected:new Ll("css-propertyvalueexpected",Il("expected.propvalue","property value expected")),LeftCurlyExpected:new Ll("css-lcurlyexpected",Il("expected.lcurly","{ expected")),RightCurlyExpected:new Ll("css-rcurlyexpected",Il("expected.rcurly","} expected")),LeftSquareBracketExpected:new Ll("css-rbracketexpected",Il("expected.lsquare","[ expected")),RightSquareBracketExpected:new Ll("css-lbracketexpected",Il("expected.rsquare","] expected")),LeftParenthesisExpected:new Ll("css-lparentexpected",Il("expected.lparen","( expected")),RightParenthesisExpected:new Ll("css-rparentexpected",Il("expected.rparent",") expected")),CommaExpected:new Ll("css-commaexpected",Il("expected.comma","comma expected")),PageDirectiveOrDeclarationExpected:new Ll("css-pagedirordeclexpected",Il("expected.pagedirordecl","page directive or declaraton expected")),UnknownAtRule:new Ll("css-unknownatrule",Il("unknown.atrule","at-rule unknown")),UnknownKeyword:new Ll("css-unknownkeyword",Il("unknown.keyword","unknown keyword")),SelectorExpected:new Ll("css-selectorexpected",Il("expected.selector","selector expected")),StringLiteralExpected:new Ll("css-stringliteralexpected",Il("expected.stringliteral","string literal expected")),WhitespaceExpected:new Ll("css-whitespaceexpected",Il("expected.whitespace","whitespace expected")),MediaQueryExpected:new Ll("css-mediaqueryexpected",Il("expected.mediaquery","media query expected")),IdentifierOrWildcardExpected:new Ll("css-idorwildcardexpected",Il("expected.idorwildcard","identifier or wildcard expected")),WildcardExpected:new Ll("css-wildcardexpected",Il("expected.wildcard","wildcard expected")),IdentifierOrVariableExpected:new Ll("css-idorvarexpected",Il("expected.idorvar","identifier or variable expected"))};(Ua=Va||(Va={})).MIN_VALUE=-2147483648,Ua.MAX_VALUE=2147483647,(qa=Ka||(Ka={})).MIN_VALUE=0,qa.MAX_VALUE=2147483647,(ja=Ba||(Ba={})).create=function(e,t){return e===Number.MAX_VALUE&&(e=Ka.MAX_VALUE),t===Number.MAX_VALUE&&(t=Ka.MAX_VALUE),{line:e,character:t}},ja.is=function(e){var t=e;return Pc.objectLiteral(t)&&Pc.uinteger(t.line)&&Pc.uinteger(t.character)},(Ha=$a||($a={})).create=function(e,t,n,i){if(Pc.uinteger(e)&&Pc.uinteger(t)&&Pc.uinteger(n)&&Pc.uinteger(i))return{start:Ba.create(e,t),end:Ba.create(n,i)};if(Ba.is(e)&&Ba.is(t))return{start:e,end:t};throw new Error("Range#create called with invalid arguments["+e+", "+t+", "+n+", "+i+"]")},Ha.is=function(e){var t=e;return Pc.objectLiteral(t)&&Ba.is(t.start)&&Ba.is(t.end)},(Ja=Ga||(Ga={})).create=function(e,t){return{uri:e,range:t}},Ja.is=function(e){var t=e;return Pc.defined(t)&&$a.is(t.range)&&(Pc.string(t.uri)||Pc.undefined(t.uri))},(Ya=Xa||(Xa={})).create=function(e,t,n,i){return{targetUri:e,targetRange:t,targetSelectionRange:n,originSelectionRange:i}},Ya.is=function(e){var t=e;return Pc.defined(t)&&$a.is(t.targetRange)&&Pc.string(t.targetUri)&&($a.is(t.targetSelectionRange)||Pc.undefined(t.targetSelectionRange))&&($a.is(t.originSelectionRange)||Pc.undefined(t.originSelectionRange))},(Za=Qa||(Qa={})).create=function(e,t,n,i){return{red:e,green:t,blue:n,alpha:i}},Za.is=function(e){var t=e;return Pc.numberRange(t.red,0,1)&&Pc.numberRange(t.green,0,1)&&Pc.numberRange(t.blue,0,1)&&Pc.numberRange(t.alpha,0,1)},(tl=el||(el={})).create=function(e,t){return{range:e,color:t}},tl.is=function(e){var t=e;return $a.is(t.range)&&Qa.is(t.color)},(il=nl||(nl={})).create=function(e,t,n){return{label:e,textEdit:t,additionalTextEdits:n}},il.is=function(e){var t=e;return Pc.string(t.label)&&(Pc.undefined(t.textEdit)||yl.is(t))&&(Pc.undefined(t.additionalTextEdits)||Pc.typedArray(t.additionalTextEdits,yl.is))},(ol=rl||(rl={})).Comment="comment",ol.Imports="imports",ol.Region="region",(al=sl||(sl={})).create=function(e,t,n,i,r){var o={startLine:e,endLine:t};return Pc.defined(n)&&(o.startCharacter=n),Pc.defined(i)&&(o.endCharacter=i),Pc.defined(r)&&(o.kind=r),o},al.is=function(e){var t=e;return Pc.uinteger(t.startLine)&&Pc.uinteger(t.startLine)&&(Pc.undefined(t.startCharacter)||Pc.uinteger(t.startCharacter))&&(Pc.undefined(t.endCharacter)||Pc.uinteger(t.endCharacter))&&(Pc.undefined(t.kind)||Pc.string(t.kind))},(cl=ll||(ll={})).create=function(e,t){return{location:e,message:t}},cl.is=function(e){var t=e;return Pc.defined(t)&&Ga.is(t.location)&&Pc.string(t.message)},(dl=hl||(hl={})).Error=1,dl.Warning=2,dl.Information=3,dl.Hint=4,(ul=pl||(pl={})).Unnecessary=1,ul.Deprecated=2,(ml||(ml={})).is=function(e){var t=e;return void 0!==t&&null!==t&&Pc.string(t.href)},(gl=fl||(fl={})).create=function(e,t,n,i,r,o){var s={range:e,message:t};return Pc.defined(n)&&(s.severity=n),Pc.defined(i)&&(s.code=i),Pc.defined(r)&&(s.source=r),Pc.defined(o)&&(s.relatedInformation=o),s},gl.is=function(e){var t,n=e;return Pc.defined(n)&&$a.is(n.range)&&Pc.string(n.message)&&(Pc.number(n.severity)||Pc.undefined(n.severity))&&(Pc.integer(n.code)||Pc.string(n.code)||Pc.undefined(n.code))&&(Pc.undefined(n.codeDescription)||Pc.string(null===(t=n.codeDescription)||void 0===t?void 0:t.href))&&(Pc.string(n.source)||Pc.undefined(n.source))&&(Pc.undefined(n.relatedInformation)||Pc.typedArray(n.relatedInformation,ll.is))},(vl=bl||(bl={})).create=function(e,t){for(var n=[],i=2;i0&&(r.arguments=n),r},vl.is=function(e){var t=e;return Pc.defined(t)&&Pc.string(t.title)&&Pc.string(t.command)},(wl=yl||(yl={})).replace=function(e,t){return{range:e,newText:t}},wl.insert=function(e,t){return{range:{start:e,end:e},newText:t}},wl.del=function(e){return{range:e,newText:""}},wl.is=function(e){var t=e;return Pc.objectLiteral(t)&&Pc.string(t.newText)&&$a.is(t.range)},(Sl=xl||(xl={})).create=function(e,t,n){var i={label:e};return void 0!==t&&(i.needsConfirmation=t),void 0!==n&&(i.description=n),i},Sl.is=function(e){var t=e;return void 0!==t&&Pc.objectLiteral(t)&&Pc.string(t.label)&&(Pc.boolean(t.needsConfirmation)||void 0===t.needsConfirmation)&&(Pc.string(t.description)||void 0===t.description)},(Cl||(Cl={})).is=function(e){return"string"===typeof e},(kl=_l||(_l={})).replace=function(e,t,n){return{range:e,newText:t,annotationId:n}},kl.insert=function(e,t,n){return{range:{start:e,end:e},newText:t,annotationId:n}},kl.del=function(e,t){return{range:e,newText:"",annotationId:t}},kl.is=function(e){var t=e;return yl.is(t)&&(xl.is(t.annotationId)||Cl.is(t.annotationId))},(Rl=El||(El={})).create=function(e,t){return{textDocument:e,edits:t}},Rl.is=function(e){var t=e;return Pc.defined(t)&&Kl.is(t.textDocument)&&Array.isArray(t.edits)},(Fl=Nl||(Nl={})).create=function(e,t,n){var i={kind:"create",uri:e};return void 0===t||void 0===t.overwrite&&void 0===t.ignoreIfExists||(i.options=t),void 0!==n&&(i.annotationId=n),i},Fl.is=function(e){var t=e;return t&&"create"===t.kind&&Pc.string(t.uri)&&(void 0===t.options||(void 0===t.options.overwrite||Pc.boolean(t.options.overwrite))&&(void 0===t.options.ignoreIfExists||Pc.boolean(t.options.ignoreIfExists)))&&(void 0===t.annotationId||Cl.is(t.annotationId))},(Tl=Dl||(Dl={})).create=function(e,t,n,i){var r={kind:"rename",oldUri:e,newUri:t};return void 0===n||void 0===n.overwrite&&void 0===n.ignoreIfExists||(r.options=n),void 0!==i&&(r.annotationId=i),r},Tl.is=function(e){var t=e;return t&&"rename"===t.kind&&Pc.string(t.oldUri)&&Pc.string(t.newUri)&&(void 0===t.options||(void 0===t.options.overwrite||Pc.boolean(t.options.overwrite))&&(void 0===t.options.ignoreIfExists||Pc.boolean(t.options.ignoreIfExists)))&&(void 0===t.annotationId||Cl.is(t.annotationId))},(Ml=Al||(Al={})).create=function(e,t,n){var i={kind:"delete",uri:e};return void 0===t||void 0===t.recursive&&void 0===t.ignoreIfNotExists||(i.options=t),void 0!==n&&(i.annotationId=n),i},Ml.is=function(e){var t=e;return t&&"delete"===t.kind&&Pc.string(t.uri)&&(void 0===t.options||(void 0===t.options.recursive||Pc.boolean(t.options.recursive))&&(void 0===t.options.ignoreIfNotExists||Pc.boolean(t.options.ignoreIfNotExists)))&&(void 0===t.annotationId||Cl.is(t.annotationId))},(zl||(zl={})).is=function(e){var t=e;return t&&(void 0!==t.changes||void 0!==t.documentChanges)&&(void 0===t.documentChanges||t.documentChanges.every((function(e){return Pc.string(e.kind)?Nl.is(e)||Dl.is(e)||Al.is(e):El.is(e)})))};var Ol,Wl,Vl,Ul,Kl,ql,Bl,jl,$l,Hl,Gl,Jl,Xl,Yl,Ql,Zl,ec,tc,nc,ic,rc,oc,sc,ac,lc,cc,hc,dc,pc,uc,mc,fc,gc,bc,vc,yc,wc,xc,Sc,Cc,_c,kc,Ec,Rc,Nc,Fc,Dc,Tc,Ac,Mc,zc,Ic=function(){function e(e,t){this.edits=e,this.changeAnnotations=t}return e.prototype.insert=function(e,t,n){var i,r;if(void 0===n?i=yl.insert(e,t):Cl.is(n)?(r=n,i=_l.insert(e,t,n)):(this.assertChangeAnnotations(this.changeAnnotations),r=this.changeAnnotations.manage(n),i=_l.insert(e,t,r)),this.edits.push(i),void 0!==r)return r},e.prototype.replace=function(e,t,n){var i,r;if(void 0===n?i=yl.replace(e,t):Cl.is(n)?(r=n,i=_l.replace(e,t,n)):(this.assertChangeAnnotations(this.changeAnnotations),r=this.changeAnnotations.manage(n),i=_l.replace(e,t,r)),this.edits.push(i),void 0!==r)return r},e.prototype.delete=function(e,t){var n,i;if(void 0===t?n=yl.del(e):Cl.is(t)?(i=t,n=_l.del(e,t)):(this.assertChangeAnnotations(this.changeAnnotations),i=this.changeAnnotations.manage(t),n=_l.del(e,i)),this.edits.push(n),void 0!==i)return i},e.prototype.add=function(e){this.edits.push(e)},e.prototype.all=function(){return this.edits},e.prototype.clear=function(){this.edits.splice(0,this.edits.length)},e.prototype.assertChangeAnnotations=function(e){if(void 0===e)throw new Error("Text edit change is not configured to manage change annotations.")},e}(),Lc=function(){function e(e){this._annotations=void 0===e?Object.create(null):e,this._counter=0,this._size=0}return e.prototype.all=function(){return this._annotations},Object.defineProperty(e.prototype,"size",{get:function(){return this._size},enumerable:!1,configurable:!0}),e.prototype.manage=function(e,t){var n;if(Cl.is(e)?n=e:(n=this.nextId(),t=e),void 0!==this._annotations[n])throw new Error("Id "+n+" is already in use.");if(void 0===t)throw new Error("No annotation provided for id "+n);return this._annotations[n]=t,this._size++,n},e.prototype.nextId=function(){return this._counter++,this._counter.toString()},e}();!function(){function e(e){var t=this;this._textEditChanges=Object.create(null),void 0!==e?(this._workspaceEdit=e,e.documentChanges?(this._changeAnnotations=new Lc(e.changeAnnotations),e.changeAnnotations=this._changeAnnotations.all(),e.documentChanges.forEach((function(e){if(El.is(e)){var n=new Ic(e.edits,t._changeAnnotations);t._textEditChanges[e.textDocument.uri]=n}}))):e.changes&&Object.keys(e.changes).forEach((function(n){var i=new Ic(e.changes[n]);t._textEditChanges[n]=i}))):this._workspaceEdit={}}Object.defineProperty(e.prototype,"edit",{get:function(){return this.initDocumentChanges(),void 0!==this._changeAnnotations&&(0===this._changeAnnotations.size?this._workspaceEdit.changeAnnotations=void 0:this._workspaceEdit.changeAnnotations=this._changeAnnotations.all()),this._workspaceEdit},enumerable:!1,configurable:!0}),e.prototype.getTextEditChange=function(e){if(Kl.is(e)){if(this.initDocumentChanges(),void 0===this._workspaceEdit.documentChanges)throw new Error("Workspace edit is not configured for document changes.");var t={uri:e.uri,version:e.version};if(!(i=this._textEditChanges[t.uri])){var n={textDocument:t,edits:r=[]};this._workspaceEdit.documentChanges.push(n),i=new Ic(r,this._changeAnnotations),this._textEditChanges[t.uri]=i}return i}if(this.initChanges(),void 0===this._workspaceEdit.changes)throw new Error("Workspace edit is not configured for normal text edit changes.");var i;if(!(i=this._textEditChanges[e])){var r=[];this._workspaceEdit.changes[e]=r,i=new Ic(r),this._textEditChanges[e]=i}return i},e.prototype.initDocumentChanges=function(){void 0===this._workspaceEdit.documentChanges&&void 0===this._workspaceEdit.changes&&(this._changeAnnotations=new Lc,this._workspaceEdit.documentChanges=[],this._workspaceEdit.changeAnnotations=this._changeAnnotations.all())},e.prototype.initChanges=function(){void 0===this._workspaceEdit.documentChanges&&void 0===this._workspaceEdit.changes&&(this._workspaceEdit.changes=Object.create(null))},e.prototype.createFile=function(e,t,n){if(this.initDocumentChanges(),void 0===this._workspaceEdit.documentChanges)throw new Error("Workspace edit is not configured for document changes.");var i,r,o;if(xl.is(t)||Cl.is(t)?i=t:n=t,void 0===i?r=Nl.create(e,n):(o=Cl.is(i)?i:this._changeAnnotations.manage(i),r=Nl.create(e,n,o)),this._workspaceEdit.documentChanges.push(r),void 0!==o)return o},e.prototype.renameFile=function(e,t,n,i){if(this.initDocumentChanges(),void 0===this._workspaceEdit.documentChanges)throw new Error("Workspace edit is not configured for document changes.");var r,o,s;if(xl.is(n)||Cl.is(n)?r=n:i=n,void 0===r?o=Dl.create(e,t,i):(s=Cl.is(r)?r:this._changeAnnotations.manage(r),o=Dl.create(e,t,i,s)),this._workspaceEdit.documentChanges.push(o),void 0!==s)return s},e.prototype.deleteFile=function(e,t,n){if(this.initDocumentChanges(),void 0===this._workspaceEdit.documentChanges)throw new Error("Workspace edit is not configured for document changes.");var i,r,o;if(xl.is(t)||Cl.is(t)?i=t:n=t,void 0===i?r=Al.create(e,n):(o=Cl.is(i)?i:this._changeAnnotations.manage(i),r=Al.create(e,n,o)),this._workspaceEdit.documentChanges.push(r),void 0!==o)return o}}();(Wl=Ol||(Ol={})).create=function(e){return{uri:e}},Wl.is=function(e){var t=e;return Pc.defined(t)&&Pc.string(t.uri)},(Ul=Vl||(Vl={})).create=function(e,t){return{uri:e,version:t}},Ul.is=function(e){var t=e;return Pc.defined(t)&&Pc.string(t.uri)&&Pc.integer(t.version)},(ql=Kl||(Kl={})).create=function(e,t){return{uri:e,version:t}},ql.is=function(e){var t=e;return Pc.defined(t)&&Pc.string(t.uri)&&(null===t.version||Pc.integer(t.version))},(jl=Bl||(Bl={})).create=function(e,t,n,i){return{uri:e,languageId:t,version:n,text:i}},jl.is=function(e){var t=e;return Pc.defined(t)&&Pc.string(t.uri)&&Pc.string(t.languageId)&&Pc.integer(t.version)&&Pc.string(t.text)},(Hl=$l||($l={})).PlainText="plaintext",Hl.Markdown="markdown",function(e){e.is=function(t){var n=t;return n===e.PlainText||n===e.Markdown}}($l||($l={})),(Gl||(Gl={})).is=function(e){var t=e;return Pc.objectLiteral(e)&&$l.is(t.kind)&&Pc.string(t.value)},(Xl=Jl||(Jl={})).Text=1,Xl.Method=2,Xl.Function=3,Xl.Constructor=4,Xl.Field=5,Xl.Variable=6,Xl.Class=7,Xl.Interface=8,Xl.Module=9,Xl.Property=10,Xl.Unit=11,Xl.Value=12,Xl.Enum=13,Xl.Keyword=14,Xl.Snippet=15,Xl.Color=16,Xl.File=17,Xl.Reference=18,Xl.Folder=19,Xl.EnumMember=20,Xl.Constant=21,Xl.Struct=22,Xl.Event=23,Xl.Operator=24,Xl.TypeParameter=25,(Ql=Yl||(Yl={})).PlainText=1,Ql.Snippet=2,(Zl||(Zl={})).Deprecated=1,(tc=ec||(ec={})).create=function(e,t,n){return{newText:e,insert:t,replace:n}},tc.is=function(e){var t=e;return t&&Pc.string(t.newText)&&$a.is(t.insert)&&$a.is(t.replace)},(ic=nc||(nc={})).asIs=1,ic.adjustIndentation=2,(rc||(rc={})).create=function(e){return{label:e}},(oc||(oc={})).create=function(e,t){return{items:e||[],isIncomplete:!!t}},(ac=sc||(sc={})).fromPlainText=function(e){return e.replace(/[\\`*_{}[\]()#+\-.!]/g,"\\$&")},ac.is=function(e){var t=e;return Pc.string(t)||Pc.objectLiteral(t)&&Pc.string(t.language)&&Pc.string(t.value)},(lc||(lc={})).is=function(e){var t=e;return!!t&&Pc.objectLiteral(t)&&(Gl.is(t.contents)||sc.is(t.contents)||Pc.typedArray(t.contents,sc.is))&&(void 0===e.range||$a.is(e.range))},(cc||(cc={})).create=function(e,t){return t?{label:e,documentation:t}:{label:e}},(hc||(hc={})).create=function(e,t){for(var n=[],i=2;i=0;s--){var a=r[s],l=e.offsetAt(a.range.start),c=e.offsetAt(a.range.end);if(!(c<=o))throw new Error("Overlapping edit");i=i.substring(0,l)+a.newText+i.substring(c,i.length),o=l}return i}}(zc||(zc={}));var Pc,Oc=function(){function e(e,t,n,i){this._uri=e,this._languageId=t,this._version=n,this._content=i,this._lineOffsets=void 0}return Object.defineProperty(e.prototype,"uri",{get:function(){return this._uri},enumerable:!1,configurable:!0}),Object.defineProperty(e.prototype,"languageId",{get:function(){return this._languageId},enumerable:!1,configurable:!0}),Object.defineProperty(e.prototype,"version",{get:function(){return this._version},enumerable:!1,configurable:!0}),e.prototype.getText=function(e){if(e){var t=this.offsetAt(e.start),n=this.offsetAt(e.end);return this._content.substring(t,n)}return this._content},e.prototype.update=function(e,t){this._content=e.text,this._version=t,this._lineOffsets=void 0},e.prototype.getLineOffsets=function(){if(void 0===this._lineOffsets){for(var e=[],t=this._content,n=!0,i=0;i0&&e.push(t.length),this._lineOffsets=e}return this._lineOffsets},e.prototype.positionAt=function(e){e=Math.max(Math.min(e,this._content.length),0);var t=this.getLineOffsets(),n=0,i=t.length;if(0===i)return Ba.create(0,e);for(;ne?i=r:n=r+1}var o=n-1;return Ba.create(o,e-t[o])},e.prototype.offsetAt=function(e){var t=this.getLineOffsets();if(e.line>=t.length)return this._content.length;if(e.line<0)return 0;var n=t[e.line],i=e.line+1e?i=r:n=r+1}let r=n-1;return{line:r,character:e-t[r]}}offsetAt(e){let t=this.getLineOffsets();if(e.line>=t.length)return this._content.length;if(e.line<0)return 0;let n=t[e.line],i=e.line+12&&void 0!==arguments[2]?arguments[2]:0;const i=t?[n]:[];for(let r=0;rn.line||t.line===n.line&&t.character>n.character?{start:n,end:t}:e}function Gc(e){const t=Hc(e.range);return t!==e.range?{newText:e.newText,range:t}:e}(Vc=Wc||(Wc={})).create=function(e,t,n,i){return new Bc(e,t,n,i)},Vc.update=function(e,t,n){if(e instanceof Bc)return e.update(t,n),e;throw new Error("TextDocument.update: document must be created by TextDocument.create")},Vc.applyEdits=function(e,t){let n=e.getText(),i=jc(t.map(Gc),((e,t)=>{let n=e.range.start.line-t.range.start.line;return 0===n?e.range.start.character-t.range.start.character:n})),r=0;const o=[];for(const s of i){let t=e.offsetAt(s.range.start);if(tr&&o.push(n.substring(r,t)),s.newText.length&&o.push(s.newText),r=e.offsetAt(s.range.end)}return o.push(n.substr(r)),o.join("")},(Uc||(Uc={})).LATEST={textDocument:{completion:{completionItem:{documentationFormat:[$l.Markdown,$l.PlainText]}},hover:{contentFormat:[$l.Markdown,$l.PlainText]}}},(qc=Kc||(Kc={}))[qc.Unknown=0]="Unknown",qc[qc.File=1]="File",qc[qc.Directory=2]="Directory",qc[qc.SymbolicLink=64]="SymbolicLink";var Jc={E:"Edge",FF:"Firefox",S:"Safari",C:"Chrome",IE:"IE",O:"Opera"};function Xc(e){switch(e){case"experimental":return"\u26a0\ufe0f Property is experimental. Be cautious when using it.\ufe0f\n\n";case"nonstandard":return"\ud83d\udea8\ufe0f Property is nonstandard. Avoid using it.\n\n";case"obsolete":return"\ud83d\udea8\ufe0f\ufe0f\ufe0f Property is obsolete. Avoid using it.\n\n";default:return""}}function Yc(e,t,n){var i;if(""!==(i=t?{kind:"markdown",value:eh(e,n)}:{kind:"plaintext",value:Zc(e,n)}).value)return i}function Qc(e){return(e=e.replace(/[\\`*_{}[\]()#+\-.!]/g,"\\$&")).replace(//g,">")}function Zc(e,t){if(!e.description||""===e.description)return"";if("string"!==typeof e.description)return e.description.value;var n="";if(!1!==(null===t||void 0===t?void 0:t.documentation)){e.status&&(n+=Xc(e.status)),n+=e.description;var i=th(e.browsers);i&&(n+="\n("+i+")"),"syntax"in e&&(n+="\n\nSyntax: ".concat(e.syntax))}return e.references&&e.references.length>0&&!1!==(null===t||void 0===t?void 0:t.references)&&(n.length>0&&(n+="\n\n"),n+=e.references.map((function(e){return"".concat(e.name,": ").concat(e.url)})).join(" | ")),n}function eh(e,t){if(!e.description||""===e.description)return"";var n="";if(!1!==(null===t||void 0===t?void 0:t.documentation)){e.status&&(n+=Xc(e.status)),"string"===typeof e.description?n+=Qc(e.description):n+=e.description.kind===$l.Markdown?e.description.value:Qc(e.description.value);var i=th(e.browsers);i&&(n+="\n\n("+Qc(i)+")"),"syntax"in e&&e.syntax&&(n+="\n\nSyntax: ".concat(Qc(e.syntax)))}return e.references&&e.references.length>0&&!1!==(null===t||void 0===t?void 0:t.references)&&(n.length>0&&(n+="\n\n"),n+=e.references.map((function(e){return"[".concat(e.name,"](").concat(e.url,")")})).join(" | ")),n}function th(e){return void 0===e&&(e=[]),0===e.length?null:e.map((function(e){var t="",n=e.match(/([A-Z]+)(\d+)?/),i=n[1],r=n[2];return i in Jc&&(t+=Jc[i]),r&&(t+=" "+r),t})).join(", ")}var nh=Wa(),ih=[{func:"rgb($red, $green, $blue)",desc:nh("css.builtin.rgb","Creates a Color from red, green, and blue values.")},{func:"rgba($red, $green, $blue, $alpha)",desc:nh("css.builtin.rgba","Creates a Color from red, green, blue, and alpha values.")},{func:"hsl($hue, $saturation, $lightness)",desc:nh("css.builtin.hsl","Creates a Color from hue, saturation, and lightness values.")},{func:"hsla($hue, $saturation, $lightness, $alpha)",desc:nh("css.builtin.hsla","Creates a Color from hue, saturation, lightness, and alpha values.")},{func:"hwb($hue $white $black)",desc:nh("css.builtin.hwb","Creates a Color from hue, white and black.")}],rh={aliceblue:"#f0f8ff",antiquewhite:"#faebd7",aqua:"#00ffff",aquamarine:"#7fffd4",azure:"#f0ffff",beige:"#f5f5dc",bisque:"#ffe4c4",black:"#000000",blanchedalmond:"#ffebcd",blue:"#0000ff",blueviolet:"#8a2be2",brown:"#a52a2a",burlywood:"#deb887",cadetblue:"#5f9ea0",chartreuse:"#7fff00",chocolate:"#d2691e",coral:"#ff7f50",cornflowerblue:"#6495ed",cornsilk:"#fff8dc",crimson:"#dc143c",cyan:"#00ffff",darkblue:"#00008b",darkcyan:"#008b8b",darkgoldenrod:"#b8860b",darkgray:"#a9a9a9",darkgrey:"#a9a9a9",darkgreen:"#006400",darkkhaki:"#bdb76b",darkmagenta:"#8b008b",darkolivegreen:"#556b2f",darkorange:"#ff8c00",darkorchid:"#9932cc",darkred:"#8b0000",darksalmon:"#e9967a",darkseagreen:"#8fbc8f",darkslateblue:"#483d8b",darkslategray:"#2f4f4f",darkslategrey:"#2f4f4f",darkturquoise:"#00ced1",darkviolet:"#9400d3",deeppink:"#ff1493",deepskyblue:"#00bfff",dimgray:"#696969",dimgrey:"#696969",dodgerblue:"#1e90ff",firebrick:"#b22222",floralwhite:"#fffaf0",forestgreen:"#228b22",fuchsia:"#ff00ff",gainsboro:"#dcdcdc",ghostwhite:"#f8f8ff",gold:"#ffd700",goldenrod:"#daa520",gray:"#808080",grey:"#808080",green:"#008000",greenyellow:"#adff2f",honeydew:"#f0fff0",hotpink:"#ff69b4",indianred:"#cd5c5c",indigo:"#4b0082",ivory:"#fffff0",khaki:"#f0e68c",lavender:"#e6e6fa",lavenderblush:"#fff0f5",lawngreen:"#7cfc00",lemonchiffon:"#fffacd",lightblue:"#add8e6",lightcoral:"#f08080",lightcyan:"#e0ffff",lightgoldenrodyellow:"#fafad2",lightgray:"#d3d3d3",lightgrey:"#d3d3d3",lightgreen:"#90ee90",lightpink:"#ffb6c1",lightsalmon:"#ffa07a",lightseagreen:"#20b2aa",lightskyblue:"#87cefa",lightslategray:"#778899",lightslategrey:"#778899",lightsteelblue:"#b0c4de",lightyellow:"#ffffe0",lime:"#00ff00",limegreen:"#32cd32",linen:"#faf0e6",magenta:"#ff00ff",maroon:"#800000",mediumaquamarine:"#66cdaa",mediumblue:"#0000cd",mediumorchid:"#ba55d3",mediumpurple:"#9370d8",mediumseagreen:"#3cb371",mediumslateblue:"#7b68ee",mediumspringgreen:"#00fa9a",mediumturquoise:"#48d1cc",mediumvioletred:"#c71585",midnightblue:"#191970",mintcream:"#f5fffa",mistyrose:"#ffe4e1",moccasin:"#ffe4b5",navajowhite:"#ffdead",navy:"#000080",oldlace:"#fdf5e6",olive:"#808000",olivedrab:"#6b8e23",orange:"#ffa500",orangered:"#ff4500",orchid:"#da70d6",palegoldenrod:"#eee8aa",palegreen:"#98fb98",paleturquoise:"#afeeee",palevioletred:"#d87093",papayawhip:"#ffefd5",peachpuff:"#ffdab9",peru:"#cd853f",pink:"#ffc0cb",plum:"#dda0dd",powderblue:"#b0e0e6",purple:"#800080",red:"#ff0000",rebeccapurple:"#663399",rosybrown:"#bc8f8f",royalblue:"#4169e1",saddlebrown:"#8b4513",salmon:"#fa8072",sandybrown:"#f4a460",seagreen:"#2e8b57",seashell:"#fff5ee",sienna:"#a0522d",silver:"#c0c0c0",skyblue:"#87ceeb",slateblue:"#6a5acd",slategray:"#708090",slategrey:"#708090",snow:"#fffafa",springgreen:"#00ff7f",steelblue:"#4682b4",tan:"#d2b48c",teal:"#008080",thistle:"#d8bfd8",tomato:"#ff6347",turquoise:"#40e0d0",violet:"#ee82ee",wheat:"#f5deb3",white:"#ffffff",whitesmoke:"#f5f5f5",yellow:"#ffff00",yellowgreen:"#9acd32"},oh={currentColor:"The value of the 'color' property. The computed value of the 'currentColor' keyword is the computed value of the 'color' property. If the 'currentColor' keyword is set on the 'color' property itself, it is treated as 'color:inherit' at parse time.",transparent:"Fully transparent. This keyword can be considered a shorthand for rgba(0,0,0,0) which is its computed value."};function sh(e,t){var n=e.getText().match(/^([-+]?[0-9]*\.?[0-9]+)(%?)$/);if(n){n[2]&&(t=100);var i=parseFloat(n[1])/t;if(i>=0&&i<=1)return i}throw new Error}function ah(e){var t=e.getText(),n=t.match(/^([-+]?[0-9]*\.?[0-9]+)(deg|rad|grad|turn)?$/);if(n)switch(n[2]){case"deg":return parseFloat(t)%360;case"rad":return 180*parseFloat(t)/Math.PI%360;case"grad":return.9*parseFloat(t)%360;case"turn":return 360*parseFloat(t)%360;default:if("undefined"===typeof n[2])return parseFloat(t)%360}throw new Error}var lh=48,ch=57,hh=65,dh=97,ph=102;function uh(e){return e=dh&&e<=ph?e-dh+10:0)}function mh(e){if("#"!==e[0])return null;switch(e.length){case 4:return{red:17*uh(e.charCodeAt(1))/255,green:17*uh(e.charCodeAt(2))/255,blue:17*uh(e.charCodeAt(3))/255,alpha:1};case 5:return{red:17*uh(e.charCodeAt(1))/255,green:17*uh(e.charCodeAt(2))/255,blue:17*uh(e.charCodeAt(3))/255,alpha:17*uh(e.charCodeAt(4))/255};case 7:return{red:(16*uh(e.charCodeAt(1))+uh(e.charCodeAt(2)))/255,green:(16*uh(e.charCodeAt(3))+uh(e.charCodeAt(4)))/255,blue:(16*uh(e.charCodeAt(5))+uh(e.charCodeAt(6)))/255,alpha:1};case 9:return{red:(16*uh(e.charCodeAt(1))+uh(e.charCodeAt(2)))/255,green:(16*uh(e.charCodeAt(3))+uh(e.charCodeAt(4)))/255,blue:(16*uh(e.charCodeAt(5))+uh(e.charCodeAt(6)))/255,alpha:(16*uh(e.charCodeAt(7))+uh(e.charCodeAt(8)))/255}}return null}function fh(e,t,n,i){if(void 0===i&&(i=1),0===t)return{red:n,green:n,blue:n,alpha:i};var r=function(e,t,n){for(;n<0;)n+=6;for(;n>=6;)n-=6;return n<1?(t-e)*n+e:n<3?t:n<4?(t-e)*(4-n)+e:e},o=n<=.5?n*(t+1):n+t-n*t,s=2*n-o;return{red:r(s,o,(e/=60)+2),green:r(s,o,e),blue:r(s,o,e-2),alpha:i}}function gh(e){var t=e.red,n=e.green,i=e.blue,r=e.alpha,o=Math.max(t,n,i),s=Math.min(t,n,i),a=0,l=0,c=(s+o)/2,h=o-s;if(h>0){switch(l=Math.min(c<=.5?h/(2*c):h/(2-2*c),1),o){case t:a=(n-i)/h+(n4)return null;try{var c=4===i.length?sh(i[3],1):1;if("rgb"===n||"rgba"===n)return{red:sh(i[0],255),green:sh(i[1],255),blue:sh(i[2],255),alpha:c};if("hsl"===n||"hsla"===n)return fh(ah(i[0]),sh(i[1],100),sh(i[2],100),c);if("hwb"===n)return function(e,t,n,i){if(void 0===i&&(i=1),t+n>=1){var r=t/(t+n);return{red:r,green:r,blue:r,alpha:i}}var o=fh(e,1,.5,i),s=o.red;s*=1-t-n,s+=t;var a=o.green;a*=1-t-n,a+=t;var l=o.blue;return l*=1-t-n,{red:s,green:a,blue:l+=t,alpha:i}}(ah(i[0]),sh(i[1],100),sh(i[2],100),c)}catch(Du){return null}}else if(e.type===us.Identifier){if(e.parent&&e.parent.type!==us.Term)return null;var h=e.parent;if(h&&h.parent&&h.parent.type===us.BinaryExpression){var d=h.parent;if(d.parent&&d.parent.type===us.ListEntry&&d.parent.key===d)return null}var p=e.getText().toLowerCase();if("none"===p)return null;var u=rh[p];if(u)return mh(u)}return null}var vh={bottom:"Computes to \u2018100%\u2019 for the vertical position if one or two values are given, otherwise specifies the bottom edge as the origin for the next offset.",center:"Computes to \u201850%\u2019 (\u2018left 50%\u2019) for the horizontal position if the horizontal position is not otherwise specified, or \u201850%\u2019 (\u2018top 50%\u2019) for the vertical position if it is.",left:"Computes to \u20180%\u2019 for the horizontal position if one or two values are given, otherwise specifies the left edge as the origin for the next offset.",right:"Computes to \u2018100%\u2019 for the horizontal position if one or two values are given, otherwise specifies the right edge as the origin for the next offset.",top:"Computes to \u20180%\u2019 for the vertical position if one or two values are given, otherwise specifies the top edge as the origin for the next offset."},yh={"no-repeat":"Placed once and not repeated in this direction.",repeat:"Repeated in this direction as often as needed to cover the background painting area.","repeat-x":"Computes to \u2018repeat no-repeat\u2019.","repeat-y":"Computes to \u2018no-repeat repeat\u2019.",round:"Repeated as often as will fit within the background positioning area. If it doesn\u2019t fit a whole number of times, it is rescaled so that it does.",space:"Repeated as often as will fit within the background positioning area without being clipped and then the images are spaced out to fill the area."},wh={dashed:"A series of square-ended dashes.",dotted:"A series of round dots.",double:"Two parallel solid lines with some space between them.",groove:"Looks as if it were carved in the canvas.",hidden:"Same as \u2018none\u2019, but has different behavior in the border conflict resolution rules for border-collapsed tables.",inset:"Looks as if the content on the inside of the border is sunken into the canvas.",none:"No border. Color and width are ignored.",outset:"Looks as if the content on the inside of the border is coming out of the canvas.",ridge:"Looks as if it were coming out of the canvas.",solid:"A single line segment."},xh=["medium","thick","thin"],Sh={"border-box":"The background is painted within (clipped to) the border box.","content-box":"The background is painted within (clipped to) the content box.","padding-box":"The background is painted within (clipped to) the padding box."},Ch={"margin-box":"Uses the margin box as reference box.","fill-box":"Uses the object bounding box as reference box.","stroke-box":"Uses the stroke bounding box as reference box.","view-box":"Uses the nearest SVG viewport as reference box."},_h={initial:"Represents the value specified as the property\u2019s initial value.",inherit:"Represents the computed value of the property on the element\u2019s parent.",unset:"Acts as either `inherit` or `initial`, depending on whether the property is inherited or not."},kh={"var()":"Evaluates the value of a custom variable.","calc()":"Evaluates an mathematical expression. The following operators can be used: + - * /."},Eh={"url()":"Reference an image file by URL","image()":"Provide image fallbacks and annotations.","-webkit-image-set()":"Provide multiple resolutions. Remember to use unprefixed image-set() in addition.","image-set()":"Provide multiple resolutions of an image and const the UA decide which is most appropriate in a given situation.","-moz-element()":"Use an element in the document as an image. Remember to use unprefixed element() in addition.","element()":"Use an element in the document as an image.","cross-fade()":"Indicates the two images to be combined and how far along in the transition the combination is.","-webkit-gradient()":"Deprecated. Use modern linear-gradient() or radial-gradient() instead.","-webkit-linear-gradient()":"Linear gradient. Remember to use unprefixed version in addition.","-moz-linear-gradient()":"Linear gradient. Remember to use unprefixed version in addition.","-o-linear-gradient()":"Linear gradient. Remember to use unprefixed version in addition.","linear-gradient()":"A linear gradient is created by specifying a straight gradient line, and then several colors placed along that line.","-webkit-repeating-linear-gradient()":"Repeating Linear gradient. Remember to use unprefixed version in addition.","-moz-repeating-linear-gradient()":"Repeating Linear gradient. Remember to use unprefixed version in addition.","-o-repeating-linear-gradient()":"Repeating Linear gradient. Remember to use unprefixed version in addition.","repeating-linear-gradient()":"Same as linear-gradient, except the color-stops are repeated infinitely in both directions, with their positions shifted by multiples of the difference between the last specified color-stop\u2019s position and the first specified color-stop\u2019s position.","-webkit-radial-gradient()":"Radial gradient. Remember to use unprefixed version in addition.","-moz-radial-gradient()":"Radial gradient. Remember to use unprefixed version in addition.","radial-gradient()":"Colors emerge from a single point and smoothly spread outward in a circular or elliptical shape.","-webkit-repeating-radial-gradient()":"Repeating radial gradient. Remember to use unprefixed version in addition.","-moz-repeating-radial-gradient()":"Repeating radial gradient. Remember to use unprefixed version in addition.","repeating-radial-gradient()":"Same as radial-gradient, except the color-stops are repeated infinitely in both directions, with their positions shifted by multiples of the difference between the last specified color-stop\u2019s position and the first specified color-stop\u2019s position."},Rh={ease:"Equivalent to cubic-bezier(0.25, 0.1, 0.25, 1.0).","ease-in":"Equivalent to cubic-bezier(0.42, 0, 1.0, 1.0).","ease-in-out":"Equivalent to cubic-bezier(0.42, 0, 0.58, 1.0).","ease-out":"Equivalent to cubic-bezier(0, 0, 0.58, 1.0).",linear:"Equivalent to cubic-bezier(0.0, 0.0, 1.0, 1.0).","step-end":"Equivalent to steps(1, end).","step-start":"Equivalent to steps(1, start).","steps()":"The first parameter specifies the number of intervals in the function. The second parameter, which is optional, is either the value \u201cstart\u201d or \u201cend\u201d.","cubic-bezier()":"Specifies a cubic-bezier curve. The four values specify points P1 and P2 of the curve as (x1, y1, x2, y2).","cubic-bezier(0.6, -0.28, 0.735, 0.045)":"Ease-in Back. Overshoots.","cubic-bezier(0.68, -0.55, 0.265, 1.55)":"Ease-in-out Back. Overshoots.","cubic-bezier(0.175, 0.885, 0.32, 1.275)":"Ease-out Back. Overshoots.","cubic-bezier(0.6, 0.04, 0.98, 0.335)":"Ease-in Circular. Based on half circle.","cubic-bezier(0.785, 0.135, 0.15, 0.86)":"Ease-in-out Circular. Based on half circle.","cubic-bezier(0.075, 0.82, 0.165, 1)":"Ease-out Circular. Based on half circle.","cubic-bezier(0.55, 0.055, 0.675, 0.19)":"Ease-in Cubic. Based on power of three.","cubic-bezier(0.645, 0.045, 0.355, 1)":"Ease-in-out Cubic. Based on power of three.","cubic-bezier(0.215, 0.610, 0.355, 1)":"Ease-out Cubic. Based on power of three.","cubic-bezier(0.95, 0.05, 0.795, 0.035)":"Ease-in Exponential. Based on two to the power ten.","cubic-bezier(1, 0, 0, 1)":"Ease-in-out Exponential. Based on two to the power ten.","cubic-bezier(0.19, 1, 0.22, 1)":"Ease-out Exponential. Based on two to the power ten.","cubic-bezier(0.47, 0, 0.745, 0.715)":"Ease-in Sine.","cubic-bezier(0.445, 0.05, 0.55, 0.95)":"Ease-in-out Sine.","cubic-bezier(0.39, 0.575, 0.565, 1)":"Ease-out Sine.","cubic-bezier(0.55, 0.085, 0.68, 0.53)":"Ease-in Quadratic. Based on power of two.","cubic-bezier(0.455, 0.03, 0.515, 0.955)":"Ease-in-out Quadratic. Based on power of two.","cubic-bezier(0.25, 0.46, 0.45, 0.94)":"Ease-out Quadratic. Based on power of two.","cubic-bezier(0.895, 0.03, 0.685, 0.22)":"Ease-in Quartic. Based on power of four.","cubic-bezier(0.77, 0, 0.175, 1)":"Ease-in-out Quartic. Based on power of four.","cubic-bezier(0.165, 0.84, 0.44, 1)":"Ease-out Quartic. Based on power of four.","cubic-bezier(0.755, 0.05, 0.855, 0.06)":"Ease-in Quintic. Based on power of five.","cubic-bezier(0.86, 0, 0.07, 1)":"Ease-in-out Quintic. Based on power of five.","cubic-bezier(0.23, 1, 0.320, 1)":"Ease-out Quintic. Based on power of five."},Nh={"circle()":"Defines a circle.","ellipse()":"Defines an ellipse.","inset()":"Defines an inset rectangle.","polygon()":"Defines a polygon."},Fh={length:["em","rem","ex","px","cm","mm","in","pt","pc","ch","vw","vh","vmin","vmax"],angle:["deg","rad","grad","turn"],time:["ms","s"],frequency:["Hz","kHz"],resolution:["dpi","dpcm","dppx"],percentage:["%","fr"]},Dh=["a","abbr","address","area","article","aside","audio","b","base","bdi","bdo","blockquote","body","br","button","canvas","caption","cite","code","col","colgroup","data","datalist","dd","del","details","dfn","dialog","div","dl","dt","em","embed","fieldset","figcaption","figure","footer","form","h1","h2","h3","h4","h5","h6","head","header","hgroup","hr","html","i","iframe","img","input","ins","kbd","keygen","label","legend","li","link","main","map","mark","menu","menuitem","meta","meter","nav","noscript","object","ol","optgroup","option","output","p","param","picture","pre","progress","q","rb","rp","rt","rtc","ruby","s","samp","script","section","select","small","source","span","strong","style","sub","summary","sup","table","tbody","td","template","textarea","tfoot","th","thead","time","title","tr","track","u","ul","const","video","wbr"],Th=["circle","clipPath","cursor","defs","desc","ellipse","feBlend","feColorMatrix","feComponentTransfer","feComposite","feConvolveMatrix","feDiffuseLighting","feDisplacementMap","feDistantLight","feDropShadow","feFlood","feFuncA","feFuncB","feFuncG","feFuncR","feGaussianBlur","feImage","feMerge","feMergeNode","feMorphology","feOffset","fePointLight","feSpecularLighting","feSpotLight","feTile","feTurbulence","filter","foreignObject","g","hatch","hatchpath","image","line","linearGradient","marker","mask","mesh","meshpatch","meshrow","metadata","mpath","path","pattern","polygon","polyline","radialGradient","rect","set","solidcolor","stop","svg","switch","symbol","text","textPath","tspan","use","view"],Ah=["@bottom-center","@bottom-left","@bottom-left-corner","@bottom-right","@bottom-right-corner","@left-bottom","@left-middle","@left-top","@right-bottom","@right-middle","@right-top","@top-center","@top-left","@top-left-corner","@top-right","@top-right-corner"];function Mh(e){return Object.keys(e).map((function(t){return e[t]}))}function zh(e){return"undefined"!==typeof e}var Ih=function(e,t,n){if(n||2===arguments.length)for(var i,r=0,o=t.length;re.offset?r-e.offset:0}return e},e.prototype.markError=function(e,t,n,i){this.token!==this.lastErrorToken&&(e.addIssue(new La(e,t,ws.Error,void 0,this.token.offset,this.token.len)),this.lastErrorToken=this.token),(n||i)&&this.resync(n,i)},e.prototype.parseStylesheet=function(e){var t=e.version,n=e.getText();return this.internalParse(n,this._parseStylesheet,(function(i,r){if(e.version!==t)throw new Error("Underlying model has changed, AST is no longer valid");return n.substr(i,r)}))},e.prototype.internalParse=function(e,t,n){this.scanner.setSource(e),this.token=this.scanner.scan();var i=t.bind(this)();return i&&(i.textProvider=n||function(t,n){return e.substr(t,n)}),i},e.prototype._parseStylesheet=function(){for(var e=this.create(Es);e.addChild(this._parseStylesheetStart()););var t=!1;do{var n=!1;do{n=!1;var i=this._parseStylesheetStatement();for(i&&(e.addChild(i),n=!0,t=!1,this.peek(mo.EOF)||!this._needsSemicolonAfter(i)||this.accept(mo.SemiColon)||this.markError(e,Pl.SemiColonExpected));this.accept(mo.SemiColon)||this.accept(mo.CDO)||this.accept(mo.CDC);)n=!0,t=!1}while(n);if(this.peek(mo.EOF))break;t||(this.peek(mo.AtKeyword)?this.markError(e,Pl.UnknownAtRule):this.markError(e,Pl.RuleOrSelectorExpected),t=!0),this.consumeToken()}while(!this.peek(mo.EOF));return this.finish(e)},e.prototype._parseStylesheetStart=function(){return this._parseCharset()},e.prototype._parseStylesheetStatement=function(e){return void 0===e&&(e=!1),this.peek(mo.AtKeyword)?this._parseStylesheetAtStatement(e):this._parseRuleset(e)},e.prototype._parseStylesheetAtStatement=function(e){return void 0===e&&(e=!1),this._parseImport()||this._parseMedia(e)||this._parsePage()||this._parseFontFace()||this._parseKeyframe()||this._parseSupports(e)||this._parseViewPort()||this._parseNamespace()||this._parseDocument()||this._parseUnknownAtRule()},e.prototype._tryParseRuleset=function(e){var t=this.mark();if(this._parseSelector(e)){for(;this.accept(mo.Comma)&&this._parseSelector(e););if(this.accept(mo.CurlyL))return this.restoreAtMark(t),this._parseRuleset(e)}return this.restoreAtMark(t),null},e.prototype._parseRuleset=function(e){void 0===e&&(e=!1);var t=this.create(Fs),n=t.getSelectors();if(!n.addChild(this._parseSelector(e)))return null;for(;this.accept(mo.Comma);)if(!n.addChild(this._parseSelector(e)))return this.finish(t,Pl.SelectorExpected);return this._parseBody(t,this._parseRuleSetDeclaration.bind(this))},e.prototype._parseRuleSetDeclarationAtStatement=function(){return this._parseUnknownAtRule()},e.prototype._parseRuleSetDeclaration=function(){return this.peek(mo.AtKeyword)?this._parseRuleSetDeclarationAtStatement():this._parseDeclaration()},e.prototype._needsSemicolonAfter=function(e){switch(e.type){case us.Keyframe:case us.ViewPort:case us.Media:case us.Ruleset:case us.Namespace:case us.If:case us.For:case us.Each:case us.While:case us.MixinDeclaration:case us.FunctionDeclaration:case us.MixinContentDeclaration:return!1;case us.ExtendsReference:case us.MixinContentReference:case us.ReturnStatement:case us.MediaQuery:case us.Debug:case us.Import:case us.AtApplyRule:case us.CustomPropertyDeclaration:return!0;case us.VariableDeclaration:return e.needsSemicolon;case us.MixinReference:return!e.getContent();case us.Declaration:return!e.getNestedProperties()}return!1},e.prototype._parseDeclarations=function(e){var t=this.create(Rs);if(!this.accept(mo.CurlyL))return null;for(var n=e();t.addChild(n)&&!this.peek(mo.CurlyR);){if(this._needsSemicolonAfter(n)&&!this.accept(mo.SemiColon))return this.finish(t,Pl.SemiColonExpected,[mo.SemiColon,mo.CurlyR]);for(n&&this.prevToken&&this.prevToken.type===mo.SemiColon&&(n.semicolonPosition=this.prevToken.offset);this.accept(mo.SemiColon););n=e()}return this.accept(mo.CurlyR)?this.finish(t):this.finish(t,Pl.RightCurlyExpected,[mo.CurlyR,mo.SemiColon])},e.prototype._parseBody=function(e,t){return e.setDeclarations(this._parseDeclarations(t))?this.finish(e):this.finish(e,Pl.LeftCurlyExpected,[mo.CurlyR,mo.SemiColon])},e.prototype._parseSelector=function(e){var t=this.create(Ds),n=!1;for(e&&(n=t.addChild(this._parseCombinator()));t.addChild(this._parseSimpleSelector());)n=!0,t.addChild(this._parseCombinator());return n?this.finish(t):null},e.prototype._parseDeclaration=function(e){var t=this._tryParseCustomPropertyDeclaration(e);if(t)return t;var n=this.create(zs);return n.setProperty(this._parseProperty())?this.accept(mo.Colon)?(this.prevToken&&(n.colonPosition=this.prevToken.offset),n.setValue(this._parseExpr())?(n.addChild(this._parsePrio()),this.peek(mo.SemiColon)&&(n.semicolonPosition=this.token.offset),this.finish(n)):this.finish(n,Pl.PropertyValueExpected)):this.finish(n,Pl.ColonExpected,[mo.Colon],e||[mo.SemiColon]):null},e.prototype._tryParseCustomPropertyDeclaration=function(e){if(!this.peekRegExp(mo.Ident,/^--/))return null;var t=this.create(Is);if(!t.setProperty(this._parseProperty()))return null;if(!this.accept(mo.Colon))return this.finish(t,Pl.ColonExpected,[mo.Colon]);this.prevToken&&(t.colonPosition=this.prevToken.offset);var n=this.mark();if(this.peek(mo.CurlyL)){var i=this.create(Ms),r=this._parseDeclarations(this._parseRuleSetDeclaration.bind(this));if(i.setDeclarations(r)&&!r.isErroneous(!0)&&(i.addChild(this._parsePrio()),this.peek(mo.SemiColon)))return this.finish(i),t.setPropertySet(i),t.semicolonPosition=this.token.offset,this.finish(t);this.restoreAtMark(n)}var o=this._parseExpr();return o&&!o.isErroneous(!0)&&(this._parsePrio(),this.peekOne.apply(this,Ih(Ih([],e||[],!1),[mo.SemiColon,mo.EOF],!1)))?(t.setValue(o),this.peek(mo.SemiColon)&&(t.semicolonPosition=this.token.offset),this.finish(t)):(this.restoreAtMark(n),t.addChild(this._parseCustomPropertyValue(e)),t.addChild(this._parsePrio()),zh(t.colonPosition)&&this.token.offset===t.colonPosition+1?this.finish(t,Pl.PropertyValueExpected):this.finish(t))},e.prototype._parseCustomPropertyValue=function(e){var t=this;void 0===e&&(e=[mo.CurlyR]);var n=this.create(Ss),i=function(){return 0===o&&0===s&&0===a},r=function(){return-1!==e.indexOf(t.token.type)},o=0,s=0,a=0;e:for(;;){switch(this.token.type){case mo.SemiColon:case mo.Exclamation:if(i())break e;break;case mo.CurlyL:o++;break;case mo.CurlyR:if(--o<0){if(r()&&0===s&&0===a)break e;return this.finish(n,Pl.LeftCurlyExpected)}break;case mo.ParenthesisL:s++;break;case mo.ParenthesisR:if(--s<0){if(r()&&0===a&&0===o)break e;return this.finish(n,Pl.LeftParenthesisExpected)}break;case mo.BracketL:a++;break;case mo.BracketR:if(--a<0)return this.finish(n,Pl.LeftSquareBracketExpected);break;case mo.BadString:break e;case mo.EOF:var l=Pl.RightCurlyExpected;return a>0?l=Pl.RightSquareBracketExpected:s>0&&(l=Pl.RightParenthesisExpected),this.finish(n,l)}this.consumeToken()}return this.finish(n)},e.prototype._tryToParseDeclaration=function(e){var t=this.mark();return this._parseProperty()&&this.accept(mo.Colon)?(this.restoreAtMark(t),this._parseDeclaration(e)):(this.restoreAtMark(t),null)},e.prototype._parseProperty=function(){var e=this.create(Ls),t=this.mark();return(this.acceptDelim("*")||this.acceptDelim("_"))&&this.hasWhitespace()?(this.restoreAtMark(t),null):e.setIdentifier(this._parsePropertyIdentifier())?this.finish(e):null},e.prototype._parsePropertyIdentifier=function(){return this._parseIdent()},e.prototype._parseCharset=function(){if(!this.peek(mo.Charset))return null;var e=this.create(Ss);return this.consumeToken(),this.accept(mo.String)?this.accept(mo.SemiColon)?this.finish(e):this.finish(e,Pl.SemiColonExpected):this.finish(e,Pl.IdentifierExpected)},e.prototype._parseImport=function(){if(!this.peekKeyword("@import"))return null;var e=this.create(Ys);return this.consumeToken(),e.addChild(this._parseURILiteral())||e.addChild(this._parseStringLiteral())?(this.peek(mo.SemiColon)||this.peek(mo.EOF)||e.setMedialist(this._parseMediaQueryList()),this.finish(e)):this.finish(e,Pl.URIOrStringExpected)},e.prototype._parseNamespace=function(){if(!this.peekKeyword("@namespace"))return null;var e=this.create(na);return this.consumeToken(),e.addChild(this._parseURILiteral())||(e.addChild(this._parseIdent()),e.addChild(this._parseURILiteral())||e.addChild(this._parseStringLiteral()))?this.accept(mo.SemiColon)?this.finish(e):this.finish(e,Pl.SemiColonExpected):this.finish(e,Pl.URIExpected,[mo.SemiColon])},e.prototype._parseFontFace=function(){if(!this.peekKeyword("@font-face"))return null;var e=this.create(Hs);return this.consumeToken(),this._parseBody(e,this._parseRuleSetDeclaration.bind(this))},e.prototype._parseViewPort=function(){if(!this.peekKeyword("@-ms-viewport")&&!this.peekKeyword("@-o-viewport")&&!this.peekKeyword("@viewport"))return null;var e=this.create($s);return this.consumeToken(),this._parseBody(e,this._parseRuleSetDeclaration.bind(this))},e.prototype._parseKeyframe=function(){if(!this.peekRegExp(mo.AtKeyword,this.keyframeRegex))return null;var e=this.create(Js),t=this.create(Ss);return this.consumeToken(),e.setKeyword(this.finish(t)),t.matches("@-ms-keyframes")&&this.markError(t,Pl.UnknownKeyword),e.setIdentifier(this._parseKeyframeIdent())?this._parseBody(e,this._parseKeyframeSelector.bind(this)):this.finish(e,Pl.IdentifierExpected,[mo.CurlyR])},e.prototype._parseKeyframeIdent=function(){return this._parseIdent([fs.Keyframe])},e.prototype._parseKeyframeSelector=function(){var e=this.create(Xs);if(!e.addChild(this._parseIdent())&&!this.accept(mo.Percentage))return null;for(;this.accept(mo.Comma);)if(!e.addChild(this._parseIdent())&&!this.accept(mo.Percentage))return this.finish(e,Pl.PercentageExpected);return this._parseBody(e,this._parseRuleSetDeclaration.bind(this))},e.prototype._tryParseKeyframeSelector=function(){var e=this.create(Xs),t=this.mark();if(!e.addChild(this._parseIdent())&&!this.accept(mo.Percentage))return null;for(;this.accept(mo.Comma);)if(!e.addChild(this._parseIdent())&&!this.accept(mo.Percentage))return this.restoreAtMark(t),null;return this.peek(mo.CurlyL)?this._parseBody(e,this._parseRuleSetDeclaration.bind(this)):(this.restoreAtMark(t),null)},e.prototype._parseSupports=function(e){if(void 0===e&&(e=!1),!this.peekKeyword("@supports"))return null;var t=this.create(ra);return this.consumeToken(),t.addChild(this._parseSupportsCondition()),this._parseBody(t,this._parseSupportsDeclaration.bind(this,e))},e.prototype._parseSupportsDeclaration=function(e){return void 0===e&&(e=!1),e?this._tryParseRuleset(!0)||this._tryToParseDeclaration()||this._parseStylesheetStatement(!0):this._parseStylesheetStatement(!1)},e.prototype._parseSupportsCondition=function(){var e=this.create(ha);if(this.acceptIdent("not"))e.addChild(this._parseSupportsConditionInParens());else if(e.addChild(this._parseSupportsConditionInParens()),this.peekRegExp(mo.Ident,/^(and|or)$/i))for(var t=this.token.text.toLowerCase();this.acceptIdent(t);)e.addChild(this._parseSupportsConditionInParens());return this.finish(e)},e.prototype._parseSupportsConditionInParens=function(){var e=this.create(ha);if(this.accept(mo.ParenthesisL))return this.prevToken&&(e.lParent=this.prevToken.offset),e.addChild(this._tryToParseDeclaration([mo.ParenthesisR]))||this._parseSupportsCondition()?this.accept(mo.ParenthesisR)?(this.prevToken&&(e.rParent=this.prevToken.offset),this.finish(e)):this.finish(e,Pl.RightParenthesisExpected,[mo.ParenthesisR],[]):this.finish(e,Pl.ConditionExpected);if(this.peek(mo.Ident)){var t=this.mark();if(this.consumeToken(),!this.hasWhitespace()&&this.accept(mo.ParenthesisL)){for(var n=1;this.token.type!==mo.EOF&&0!==n;)this.token.type===mo.ParenthesisL?n++:this.token.type===mo.ParenthesisR&&n--,this.consumeToken();return this.finish(e)}this.restoreAtMark(t)}return this.finish(e,Pl.LeftParenthesisExpected,[],[mo.ParenthesisL])},e.prototype._parseMediaDeclaration=function(e){return void 0===e&&(e=!1),e?this._tryParseRuleset(!0)||this._tryToParseDeclaration()||this._parseStylesheetStatement(!0):this._parseStylesheetStatement(!1)},e.prototype._parseMedia=function(e){if(void 0===e&&(e=!1),!this.peekKeyword("@media"))return null;var t=this.create(ia);return this.consumeToken(),t.addChild(this._parseMediaQueryList())?this._parseBody(t,this._parseMediaDeclaration.bind(this,e)):this.finish(t,Pl.MediaQueryExpected)},e.prototype._parseMediaQueryList=function(){var e=this.create(sa);if(!e.addChild(this._parseMediaQuery()))return this.finish(e,Pl.MediaQueryExpected);for(;this.accept(mo.Comma);)if(!e.addChild(this._parseMediaQuery()))return this.finish(e,Pl.MediaQueryExpected);return this.finish(e)},e.prototype._parseMediaQuery=function(){var e=this.create(aa),t=this.mark();if(this.acceptIdent("not"),this.peek(mo.ParenthesisL))this.restoreAtMark(t),e.addChild(this._parseMediaCondition());else{if(this.acceptIdent("only"),!e.addChild(this._parseIdent()))return null;this.acceptIdent("and")&&e.addChild(this._parseMediaCondition())}return this.finish(e)},e.prototype._parseRatio=function(){var e=this.mark(),t=this.create(va);return this._parseNumeric()?this.acceptDelim("/")?this._parseNumeric()?this.finish(t):this.finish(t,Pl.NumberExpected):(this.restoreAtMark(e),null):null},e.prototype._parseMediaCondition=function(){var e=this.create(la);this.acceptIdent("not");for(var t=!0;t;){if(!this.accept(mo.ParenthesisL))return this.finish(e,Pl.LeftParenthesisExpected,[],[mo.CurlyL]);if(this.peek(mo.ParenthesisL)||this.peekIdent("not")?e.addChild(this._parseMediaCondition()):e.addChild(this._parseMediaFeature()),!this.accept(mo.ParenthesisR))return this.finish(e,Pl.RightParenthesisExpected,[],[mo.CurlyL]);t=this.acceptIdent("and")||this.acceptIdent("or")}return this.finish(e)},e.prototype._parseMediaFeature=function(){var e=this,t=[mo.ParenthesisR],n=this.create(ca),i=function(){return e.acceptDelim("<")||e.acceptDelim(">")?(e.hasWhitespace()||e.acceptDelim("="),!0):!!e.acceptDelim("=")};if(n.addChild(this._parseMediaFeatureName())){if(this.accept(mo.Colon)){if(!n.addChild(this._parseMediaFeatureValue()))return this.finish(n,Pl.TermExpected,[],t)}else if(i()){if(!n.addChild(this._parseMediaFeatureValue()))return this.finish(n,Pl.TermExpected,[],t);if(i()&&!n.addChild(this._parseMediaFeatureValue()))return this.finish(n,Pl.TermExpected,[],t)}}else{if(!n.addChild(this._parseMediaFeatureValue()))return this.finish(n,Pl.IdentifierExpected,[],t);if(!i())return this.finish(n,Pl.OperatorExpected,[],t);if(!n.addChild(this._parseMediaFeatureName()))return this.finish(n,Pl.IdentifierExpected,[],t);if(i()&&!n.addChild(this._parseMediaFeatureValue()))return this.finish(n,Pl.TermExpected,[],t)}return this.finish(n)},e.prototype._parseMediaFeatureName=function(){return this._parseIdent()},e.prototype._parseMediaFeatureValue=function(){return this._parseRatio()||this._parseTermExpression()},e.prototype._parseMedium=function(){var e=this.create(Ss);return e.addChild(this._parseIdent())?this.finish(e):null},e.prototype._parsePageDeclaration=function(){return this._parsePageMarginBox()||this._parseRuleSetDeclaration()},e.prototype._parsePage=function(){if(!this.peekKeyword("@page"))return null;var e=this.create(da);if(this.consumeToken(),e.addChild(this._parsePageSelector()))for(;this.accept(mo.Comma);)if(!e.addChild(this._parsePageSelector()))return this.finish(e,Pl.IdentifierExpected);return this._parseBody(e,this._parsePageDeclaration.bind(this))},e.prototype._parsePageMarginBox=function(){if(!this.peek(mo.AtKeyword))return null;var e=this.create(pa);return this.acceptOneKeyword(Ah)||this.markError(e,Pl.UnknownAtRule,[],[mo.CurlyL]),this._parseBody(e,this._parseRuleSetDeclaration.bind(this))},e.prototype._parsePageSelector=function(){if(!this.peek(mo.Ident)&&!this.peek(mo.Colon))return null;var e=this.create(Ss);return e.addChild(this._parseIdent()),this.accept(mo.Colon)&&!e.addChild(this._parseIdent())?this.finish(e,Pl.IdentifierExpected):this.finish(e)},e.prototype._parseDocument=function(){if(!this.peekKeyword("@-moz-document"))return null;var e=this.create(oa);return this.consumeToken(),this.resync([],[mo.CurlyL]),this._parseBody(e,this._parseStylesheetStatement.bind(this))},e.prototype._parseUnknownAtRule=function(){if(!this.peek(mo.AtKeyword))return null;var e=this.create(Ta);e.addChild(this._parseUnknownAtRuleName());var t=0,n=0,i=0,r=0;e:for(;;){switch(this.token.type){case mo.SemiColon:if(0===n&&0===i&&0===r)break e;break;case mo.EOF:return n>0?this.finish(e,Pl.RightCurlyExpected):r>0?this.finish(e,Pl.RightSquareBracketExpected):i>0?this.finish(e,Pl.RightParenthesisExpected):this.finish(e);case mo.CurlyL:t++,n++;break;case mo.CurlyR:if(n--,t>0&&0===n){if(this.consumeToken(),r>0)return this.finish(e,Pl.RightSquareBracketExpected);if(i>0)return this.finish(e,Pl.RightParenthesisExpected);break e}if(n<0){if(0===i&&0===r)break e;return this.finish(e,Pl.LeftCurlyExpected)}break;case mo.ParenthesisL:i++;break;case mo.ParenthesisR:if(--i<0)return this.finish(e,Pl.LeftParenthesisExpected);break;case mo.BracketL:r++;break;case mo.BracketR:if(--r<0)return this.finish(e,Pl.LeftSquareBracketExpected)}this.consumeToken()}return e},e.prototype._parseUnknownAtRuleName=function(){var e=this.create(Ss);return this.accept(mo.AtKeyword)?this.finish(e):e},e.prototype._parseOperator=function(){if(this.peekDelim("/")||this.peekDelim("*")||this.peekDelim("+")||this.peekDelim("-")||this.peek(mo.Dashmatch)||this.peek(mo.Includes)||this.peek(mo.SubstringOperator)||this.peek(mo.PrefixOperator)||this.peek(mo.SuffixOperator)||this.peekDelim("=")){var e=this.createNode(us.Operator);return this.consumeToken(),this.finish(e)}return null},e.prototype._parseUnaryOperator=function(){if(!this.peekDelim("+")&&!this.peekDelim("-"))return null;var e=this.create(Ss);return this.consumeToken(),this.finish(e)},e.prototype._parseCombinator=function(){if(this.peekDelim(">")){var e=this.create(Ss);this.consumeToken();var t=this.mark();if(!this.hasWhitespace()&&this.acceptDelim(">")){if(!this.hasWhitespace()&&this.acceptDelim(">"))return e.type=us.SelectorCombinatorShadowPiercingDescendant,this.finish(e);this.restoreAtMark(t)}return e.type=us.SelectorCombinatorParent,this.finish(e)}if(this.peekDelim("+")){e=this.create(Ss);return this.consumeToken(),e.type=us.SelectorCombinatorSibling,this.finish(e)}if(this.peekDelim("~")){e=this.create(Ss);return this.consumeToken(),e.type=us.SelectorCombinatorAllSiblings,this.finish(e)}if(this.peekDelim("/")){e=this.create(Ss);this.consumeToken();t=this.mark();if(!this.hasWhitespace()&&this.acceptIdent("deep")&&!this.hasWhitespace()&&this.acceptDelim("/"))return e.type=us.SelectorCombinatorShadowPiercingDescendant,this.finish(e);this.restoreAtMark(t)}return null},e.prototype._parseSimpleSelector=function(){var e=this.create(Ts),t=0;for(e.addChild(this._parseElementName())&&t++;(0===t||!this.hasWhitespace())&&e.addChild(this._parseSimpleSelectorBody());)t++;return t>0?this.finish(e):null},e.prototype._parseSimpleSelectorBody=function(){return this._parsePseudo()||this._parseHash()||this._parseClass()||this._parseAttrib()},e.prototype._parseSelectorIdent=function(){return this._parseIdent()},e.prototype._parseHash=function(){if(!this.peek(mo.Hash)&&!this.peekDelim("#"))return null;var e=this.createNode(us.IdentifierSelector);if(this.acceptDelim("#")){if(this.hasWhitespace()||!e.addChild(this._parseSelectorIdent()))return this.finish(e,Pl.IdentifierExpected)}else this.consumeToken();return this.finish(e)},e.prototype._parseClass=function(){if(!this.peekDelim("."))return null;var e=this.createNode(us.ClassSelector);return this.consumeToken(),this.hasWhitespace()||!e.addChild(this._parseSelectorIdent())?this.finish(e,Pl.IdentifierExpected):this.finish(e)},e.prototype._parseElementName=function(){var e=this.mark(),t=this.createNode(us.ElementNameSelector);return t.addChild(this._parseNamespacePrefix()),t.addChild(this._parseSelectorIdent())||this.acceptDelim("*")?this.finish(t):(this.restoreAtMark(e),null)},e.prototype._parseNamespacePrefix=function(){var e=this.mark(),t=this.createNode(us.NamespacePrefix);return!t.addChild(this._parseIdent())&&this.acceptDelim("*"),this.acceptDelim("|")?this.finish(t):(this.restoreAtMark(e),null)},e.prototype._parseAttrib=function(){if(!this.peek(mo.BracketL))return null;var e=this.create(ga);return this.consumeToken(),e.setNamespacePrefix(this._parseNamespacePrefix()),e.setIdentifier(this._parseIdent())?(e.setOperator(this._parseOperator())&&(e.setValue(this._parseBinaryExpr()),this.acceptIdent("i"),this.acceptIdent("s")),this.accept(mo.BracketR)?this.finish(e):this.finish(e,Pl.RightSquareBracketExpected)):this.finish(e,Pl.IdentifierExpected)},e.prototype._parsePseudo=function(){var e=this,t=this._tryParsePseudoIdentifier();if(t){if(!this.hasWhitespace()&&this.accept(mo.ParenthesisL)){if(t.addChild(this.try((function(){var t=e.create(Ss);if(!t.addChild(e._parseSelector(!1)))return null;for(;e.accept(mo.Comma)&&t.addChild(e._parseSelector(!1)););return e.peek(mo.ParenthesisR)?e.finish(t):null}))||this._parseBinaryExpr()),!this.accept(mo.ParenthesisR))return this.finish(t,Pl.RightParenthesisExpected)}return this.finish(t)}return null},e.prototype._tryParsePseudoIdentifier=function(){if(!this.peek(mo.Colon))return null;var e=this.mark(),t=this.createNode(us.PseudoSelector);return this.consumeToken(),this.hasWhitespace()?(this.restoreAtMark(e),null):(this.accept(mo.Colon),this.hasWhitespace()||!t.addChild(this._parseIdent())?this.finish(t,Pl.IdentifierExpected):this.finish(t))},e.prototype._tryParsePrio=function(){var e=this.mark(),t=this._parsePrio();return t||(this.restoreAtMark(e),null)},e.prototype._parsePrio=function(){if(!this.peek(mo.Exclamation))return null;var e=this.createNode(us.Prio);return this.accept(mo.Exclamation)&&this.acceptIdent("important")?this.finish(e):null},e.prototype._parseExpr=function(e){void 0===e&&(e=!1);var t=this.create(ua);if(!t.addChild(this._parseBinaryExpr()))return null;for(;;){if(this.peek(mo.Comma)){if(e)return this.finish(t);this.consumeToken()}else if(!this.hasWhitespace())break;if(!t.addChild(this._parseBinaryExpr()))break}return this.finish(t)},e.prototype._parseUnicodeRange=function(){if(!this.peekIdent("u"))return null;var e=this.create(_s);return this.acceptUnicodeRange()?this.finish(e):null},e.prototype._parseNamedLine=function(){if(!this.peek(mo.BracketL))return null;var e=this.createNode(us.GridLine);for(this.consumeToken();e.addChild(this._parseIdent()););return this.accept(mo.BracketR)?this.finish(e):this.finish(e,Pl.RightSquareBracketExpected)},e.prototype._parseBinaryExpr=function(e,t){var n=this.create(ma);if(!n.setLeft(e||this._parseTerm()))return null;if(!n.setOperator(t||this._parseOperator()))return this.finish(n);if(!n.setRight(this._parseTerm()))return this.finish(n,Pl.TermExpected);n=this.finish(n);var i=this._parseOperator();return i&&(n=this._parseBinaryExpr(n,i)),this.finish(n)},e.prototype._parseTerm=function(){var e=this.create(fa);return e.setOperator(this._parseUnaryOperator()),e.setExpression(this._parseTermExpression())?this.finish(e):null},e.prototype._parseTermExpression=function(){return this._parseURILiteral()||this._parseUnicodeRange()||this._parseFunction()||this._parseIdent()||this._parseStringLiteral()||this._parseNumeric()||this._parseHexColor()||this._parseOperation()||this._parseNamedLine()},e.prototype._parseOperation=function(){if(!this.peek(mo.ParenthesisL))return null;var e=this.create(Ss);return this.consumeToken(),e.addChild(this._parseExpr()),this.accept(mo.ParenthesisR)?this.finish(e):this.finish(e,Pl.RightParenthesisExpected)},e.prototype._parseNumeric=function(){if(this.peek(mo.Num)||this.peek(mo.Percentage)||this.peek(mo.Resolution)||this.peek(mo.Length)||this.peek(mo.EMS)||this.peek(mo.EXS)||this.peek(mo.Angle)||this.peek(mo.Time)||this.peek(mo.Dimension)||this.peek(mo.Freq)){var e=this.create(Sa);return this.consumeToken(),this.finish(e)}return null},e.prototype._parseStringLiteral=function(){if(!this.peek(mo.String)&&!this.peek(mo.BadString))return null;var e=this.createNode(us.StringLiteral);return this.consumeToken(),this.finish(e)},e.prototype._parseURILiteral=function(){if(!this.peekRegExp(mo.Ident,/^url(-prefix)?$/i))return null;var e=this.mark(),t=this.createNode(us.URILiteral);return this.accept(mo.Ident),this.hasWhitespace()||!this.peek(mo.ParenthesisL)?(this.restoreAtMark(e),null):(this.scanner.inURL=!0,this.consumeToken(),t.addChild(this._parseURLArgument()),this.scanner.inURL=!1,this.accept(mo.ParenthesisR)?this.finish(t):this.finish(t,Pl.RightParenthesisExpected))},e.prototype._parseURLArgument=function(){var e=this.create(Ss);return this.accept(mo.String)||this.accept(mo.BadString)||this.acceptUnquotedString()?this.finish(e):null},e.prototype._parseIdent=function(e){if(!this.peek(mo.Ident))return null;var t=this.create(ks);return e&&(t.referenceTypes=e),t.isCustomProperty=this.peekRegExp(mo.Ident,/^--/),this.consumeToken(),this.finish(t)},e.prototype._parseFunction=function(){var e=this.mark(),t=this.create(Ps);if(!t.setIdentifier(this._parseFunctionIdentifier()))return null;if(this.hasWhitespace()||!this.accept(mo.ParenthesisL))return this.restoreAtMark(e),null;if(t.getArguments().addChild(this._parseFunctionArgument()))for(;this.accept(mo.Comma)&&!this.peek(mo.ParenthesisR);)t.getArguments().addChild(this._parseFunctionArgument())||this.markError(t,Pl.ExpressionExpected);return this.accept(mo.ParenthesisR)?this.finish(t):this.finish(t,Pl.RightParenthesisExpected)},e.prototype._parseFunctionIdentifier=function(){if(!this.peek(mo.Ident))return null;var e=this.create(ks);if(e.referenceTypes=[fs.Function],this.acceptIdent("progid")){if(this.accept(mo.Colon))for(;this.accept(mo.Ident)&&this.acceptDelim("."););return this.finish(e)}return this.consumeToken(),this.finish(e)},e.prototype._parseFunctionArgument=function(){var e=this.create(Ws);return e.setValue(this._parseExpr(!0))?this.finish(e):null},e.prototype._parseHexColor=function(){if(this.peekRegExp(mo.Hash,/^#([A-Fa-f0-9]{3}|[A-Fa-f0-9]{4}|[A-Fa-f0-9]{6}|[A-Fa-f0-9]{8})$/g)){var e=this.create(ba);return this.consumeToken(),this.finish(e)}return null},e}();function Ph(e,t){return-1!==e.indexOf(t)}function Oh(){for(var e=[],t=0;te+t||this.offset===e&&this.length===t?this.findInScope(e,t):null},e.prototype.findInScope=function(e,t){void 0===t&&(t=0);var n=e+t,i=function(e,t){var n=0,i=e.length;if(0===i)return 0;for(;nn}));if(0===i)return this;var r=this.children[i-1];return r.offset<=e&&r.offset+r.length>=e+t?r.findInScope(e,t):this},e.prototype.addSymbol=function(e){this.symbols.push(e)},e.prototype.getSymbol=function(e,t){for(var n=0;n{var e={470:e=>{function t(e){if("string"!=typeof e)throw new TypeError("Path must be a string. Received "+JSON.stringify(e))}function n(e,t){for(var n,i="",r=0,o=-1,s=0,a=0;a<=e.length;++a){if(a2){var l=i.lastIndexOf("/");if(l!==i.length-1){-1===l?(i="",r=0):r=(i=i.slice(0,l)).length-1-i.lastIndexOf("/"),o=a,s=0;continue}}else if(2===i.length||1===i.length){i="",r=0,o=a,s=0;continue}t&&(i.length>0?i+="/..":i="..",r=2)}else i.length>0?i+="/"+e.slice(o+1,a):i=e.slice(o+1,a),r=a-o-1;o=a,s=0}else 46===n&&-1!==s?++s:s=-1}return i}var i={resolve:function(){for(var e,i="",r=!1,o=arguments.length-1;o>=-1&&!r;o--){var s;o>=0?s=arguments[o]:(void 0===e&&(e=process.cwd()),s=e),t(s),0!==s.length&&(i=s+"/"+i,r=47===s.charCodeAt(0))}return i=n(i,!r),r?i.length>0?"/"+i:"/":i.length>0?i:"."},normalize:function(e){if(t(e),0===e.length)return".";var i=47===e.charCodeAt(0),r=47===e.charCodeAt(e.length-1);return 0!==(e=n(e,!i)).length||i||(e="."),e.length>0&&r&&(e+="/"),i?"/"+e:e},isAbsolute:function(e){return t(e),e.length>0&&47===e.charCodeAt(0)},join:function(){if(0===arguments.length)return".";for(var e,n=0;n0&&(void 0===e?e=r:e+="/"+r)}return void 0===e?".":i.normalize(e)},relative:function(e,n){if(t(e),t(n),e===n)return"";if((e=i.resolve(e))===(n=i.resolve(n)))return"";for(var r=1;rc){if(47===n.charCodeAt(a+d))return n.slice(a+d+1);if(0===d)return n.slice(a+d)}else s>c&&(47===e.charCodeAt(r+d)?h=d:0===d&&(h=0));break}var p=e.charCodeAt(r+d);if(p!==n.charCodeAt(a+d))break;47===p&&(h=d)}var u="";for(d=r+h+1;d<=o;++d)d!==o&&47!==e.charCodeAt(d)||(0===u.length?u+="..":u+="/..");return u.length>0?u+n.slice(a+h):(a+=h,47===n.charCodeAt(a)&&++a,n.slice(a))},_makeLong:function(e){return e},dirname:function(e){if(t(e),0===e.length)return".";for(var n=e.charCodeAt(0),i=47===n,r=-1,o=!0,s=e.length-1;s>=1;--s)if(47===(n=e.charCodeAt(s))){if(!o){r=s;break}}else o=!1;return-1===r?i?"/":".":i&&1===r?"//":e.slice(0,r)},basename:function(e,n){if(void 0!==n&&"string"!=typeof n)throw new TypeError('"ext" argument must be a string');t(e);var i,r=0,o=-1,s=!0;if(void 0!==n&&n.length>0&&n.length<=e.length){if(n.length===e.length&&n===e)return"";var a=n.length-1,l=-1;for(i=e.length-1;i>=0;--i){var c=e.charCodeAt(i);if(47===c){if(!s){r=i+1;break}}else-1===l&&(s=!1,l=i+1),a>=0&&(c===n.charCodeAt(a)?-1==--a&&(o=i):(a=-1,o=l))}return r===o?o=l:-1===o&&(o=e.length),e.slice(r,o)}for(i=e.length-1;i>=0;--i)if(47===e.charCodeAt(i)){if(!s){r=i+1;break}}else-1===o&&(s=!1,o=i+1);return-1===o?"":e.slice(r,o)},extname:function(e){t(e);for(var n=-1,i=0,r=-1,o=!0,s=0,a=e.length-1;a>=0;--a){var l=e.charCodeAt(a);if(47!==l)-1===r&&(o=!1,r=a+1),46===l?-1===n?n=a:1!==s&&(s=1):-1!==n&&(s=-1);else if(!o){i=a+1;break}}return-1===n||-1===r||0===s||1===s&&n===r-1&&n===i+1?"":e.slice(n,r)},format:function(e){if(null===e||"object"!=typeof e)throw new TypeError('The "pathObject" argument must be of type Object. Received type '+typeof e);return function(e,t){var n=t.dir||t.root,i=t.base||(t.name||"")+(t.ext||"");return n?n===t.root?n+i:n+"/"+i:i}(0,e)},parse:function(e){t(e);var n={root:"",dir:"",base:"",ext:"",name:""};if(0===e.length)return n;var i,r=e.charCodeAt(0),o=47===r;o?(n.root="/",i=1):i=0;for(var s=-1,a=0,l=-1,c=!0,h=e.length-1,d=0;h>=i;--h)if(47!==(r=e.charCodeAt(h)))-1===l&&(c=!1,l=h+1),46===r?-1===s?s=h:1!==d&&(d=1):-1!==s&&(d=-1);else if(!c){a=h+1;break}return-1===s||-1===l||0===d||1===d&&s===l-1&&s===a+1?-1!==l&&(n.base=n.name=0===a&&o?e.slice(1,l):e.slice(a,l)):(0===a&&o?(n.name=e.slice(1,s),n.base=e.slice(1,l)):(n.name=e.slice(a,s),n.base=e.slice(a,l)),n.ext=e.slice(s,l)),a>0?n.dir=e.slice(0,a-1):o&&(n.dir="/"),n},sep:"/",delimiter:":",win32:null,posix:null};i.posix=i,e.exports=i},447:(e,t,n)=>{var i;if(n.r(t),n.d(t,{URI:()=>f,Utils:()=>E}),"object"==typeof process)i="win32"===process.platform;else if("object"==typeof navigator){var r=navigator.userAgent;i=r.indexOf("Windows")>=0}var o,s,a=(o=function(e,t){return(o=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(e,t){e.__proto__=t}||function(e,t){for(var n in t)Object.prototype.hasOwnProperty.call(t,n)&&(e[n]=t[n])})(e,t)},function(e,t){if("function"!=typeof t&&null!==t)throw new TypeError("Class extends value "+String(t)+" is not a constructor or null");function n(){this.constructor=e}o(e,t),e.prototype=null===t?Object.create(t):(n.prototype=t.prototype,new n)}),l=/^\w[\w\d+.-]*$/,c=/^\//,h=/^\/\//;function d(e,t){if(!e.scheme&&t)throw new Error('[UriError]: Scheme is missing: {scheme: "", authority: "'.concat(e.authority,'", path: "').concat(e.path,'", query: "').concat(e.query,'", fragment: "').concat(e.fragment,'"}'));if(e.scheme&&!l.test(e.scheme))throw new Error("[UriError]: Scheme contains illegal characters.");if(e.path)if(e.authority){if(!c.test(e.path))throw new Error('[UriError]: If a URI contains an authority component, then the path component must either be empty or begin with a slash ("/") character')}else if(h.test(e.path))throw new Error('[UriError]: If a URI does not contain an authority component, then the path cannot begin with two slash characters ("//")')}var p="",u="/",m=/^(([^:/?#]+?):)?(\/\/([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/,f=function(){function e(e,t,n,i,r,o){void 0===o&&(o=!1),"object"==typeof e?(this.scheme=e.scheme||p,this.authority=e.authority||p,this.path=e.path||p,this.query=e.query||p,this.fragment=e.fragment||p):(this.scheme=function(e,t){return e||t?e:"file"}(e,o),this.authority=t||p,this.path=function(e,t){switch(e){case"https":case"http":case"file":t?t[0]!==u&&(t=u+t):t=u}return t}(this.scheme,n||p),this.query=i||p,this.fragment=r||p,d(this,o))}return e.isUri=function(t){return t instanceof e||!!t&&"string"==typeof t.authority&&"string"==typeof t.fragment&&"string"==typeof t.path&&"string"==typeof t.query&&"string"==typeof t.scheme&&"string"==typeof t.fsPath&&"function"==typeof t.with&&"function"==typeof t.toString},Object.defineProperty(e.prototype,"fsPath",{get:function(){return x(this,!1)},enumerable:!1,configurable:!0}),e.prototype.with=function(e){if(!e)return this;var t=e.scheme,n=e.authority,i=e.path,r=e.query,o=e.fragment;return void 0===t?t=this.scheme:null===t&&(t=p),void 0===n?n=this.authority:null===n&&(n=p),void 0===i?i=this.path:null===i&&(i=p),void 0===r?r=this.query:null===r&&(r=p),void 0===o?o=this.fragment:null===o&&(o=p),t===this.scheme&&n===this.authority&&i===this.path&&r===this.query&&o===this.fragment?this:new b(t,n,i,r,o)},e.parse=function(e,t){void 0===t&&(t=!1);var n=m.exec(e);return n?new b(n[2]||p,k(n[4]||p),k(n[5]||p),k(n[7]||p),k(n[9]||p),t):new b(p,p,p,p,p)},e.file=function(e){var t=p;if(i&&(e=e.replace(/\\/g,u)),e[0]===u&&e[1]===u){var n=e.indexOf(u,2);-1===n?(t=e.substring(2),e=u):(t=e.substring(2,n),e=e.substring(n)||u)}return new b("file",t,e,p,p)},e.from=function(e){var t=new b(e.scheme,e.authority,e.path,e.query,e.fragment);return d(t,!0),t},e.prototype.toString=function(e){return void 0===e&&(e=!1),S(this,e)},e.prototype.toJSON=function(){return this},e.revive=function(t){if(t){if(t instanceof e)return t;var n=new b(t);return n._formatted=t.external,n._fsPath=t._sep===g?t.fsPath:null,n}return t},e}(),g=i?1:void 0,b=function(e){function t(){var t=null!==e&&e.apply(this,arguments)||this;return t._formatted=null,t._fsPath=null,t}return a(t,e),Object.defineProperty(t.prototype,"fsPath",{get:function(){return this._fsPath||(this._fsPath=x(this,!1)),this._fsPath},enumerable:!1,configurable:!0}),t.prototype.toString=function(e){return void 0===e&&(e=!1),e?S(this,!0):(this._formatted||(this._formatted=S(this,!1)),this._formatted)},t.prototype.toJSON=function(){var e={$mid:1};return this._fsPath&&(e.fsPath=this._fsPath,e._sep=g),this._formatted&&(e.external=this._formatted),this.path&&(e.path=this.path),this.scheme&&(e.scheme=this.scheme),this.authority&&(e.authority=this.authority),this.query&&(e.query=this.query),this.fragment&&(e.fragment=this.fragment),e},t}(f),v=((s={})[58]="%3A",s[47]="%2F",s[63]="%3F",s[35]="%23",s[91]="%5B",s[93]="%5D",s[64]="%40",s[33]="%21",s[36]="%24",s[38]="%26",s[39]="%27",s[40]="%28",s[41]="%29",s[42]="%2A",s[43]="%2B",s[44]="%2C",s[59]="%3B",s[61]="%3D",s[32]="%20",s);function y(e,t){for(var n=void 0,i=-1,r=0;r=97&&o<=122||o>=65&&o<=90||o>=48&&o<=57||45===o||46===o||95===o||126===o||t&&47===o)-1!==i&&(n+=encodeURIComponent(e.substring(i,r)),i=-1),void 0!==n&&(n+=e.charAt(r));else{void 0===n&&(n=e.substr(0,r));var s=v[o];void 0!==s?(-1!==i&&(n+=encodeURIComponent(e.substring(i,r)),i=-1),n+=s):-1===i&&(i=r)}}return-1!==i&&(n+=encodeURIComponent(e.substring(i))),void 0!==n?n:e}function w(e){for(var t=void 0,n=0;n1&&"file"===e.scheme?"//".concat(e.authority).concat(e.path):47===e.path.charCodeAt(0)&&(e.path.charCodeAt(1)>=65&&e.path.charCodeAt(1)<=90||e.path.charCodeAt(1)>=97&&e.path.charCodeAt(1)<=122)&&58===e.path.charCodeAt(2)?t?e.path.substr(1):e.path[1].toLowerCase()+e.path.substr(2):e.path,i&&(n=n.replace(/\//g,"\\")),n}function S(e,t){var n=t?w:y,i="",r=e.scheme,o=e.authority,s=e.path,a=e.query,l=e.fragment;if(r&&(i+=r,i+=":"),(o||"file"===r)&&(i+=u,i+=u),o){var c=o.indexOf("@");if(-1!==c){var h=o.substr(0,c);o=o.substr(c+1),-1===(c=h.indexOf(":"))?i+=n(h,!1):(i+=n(h.substr(0,c),!1),i+=":",i+=n(h.substr(c+1),!1)),i+="@"}-1===(c=(o=o.toLowerCase()).indexOf(":"))?i+=n(o,!1):(i+=n(o.substr(0,c),!1),i+=o.substr(c))}if(s){if(s.length>=3&&47===s.charCodeAt(0)&&58===s.charCodeAt(2))(d=s.charCodeAt(1))>=65&&d<=90&&(s="/".concat(String.fromCharCode(d+32),":").concat(s.substr(3)));else if(s.length>=2&&58===s.charCodeAt(1)){var d;(d=s.charCodeAt(0))>=65&&d<=90&&(s="".concat(String.fromCharCode(d+32),":").concat(s.substr(2)))}i+=n(s,!0)}return a&&(i+="?",i+=n(a,!1)),l&&(i+="#",i+=t?l:y(l,!1)),i}function C(e){try{return decodeURIComponent(e)}catch(t){return e.length>3?e.substr(0,3)+C(e.substr(3)):e}}var _=/(%[0-9A-Za-z][0-9A-Za-z])+/g;function k(e){return e.match(_)?e.replace(_,(function(e){return C(e)})):e}var E,R,N=n(470),F=function(e,t,n){if(n||2===arguments.length)for(var i,r=0,o=t.length;r{for(var i in t)n.o(t,i)&&!n.o(e,i)&&Object.defineProperty(e,i,{enumerable:!0,get:t[i]})},n.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t),n.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},n(447)})();var{URI:$h,Utils:Hh}=Wh,Gh=function(e,t,n){if(n||2===arguments.length)for(var i,r=0,o=t.length;r0&&r[r.length-1])&&(6===o[0]||2===o[0])){s=0;continue}if(3===o[0]&&(!r||o[1]>r[0]&&o[1]0&&r[r.length-1])&&(6===o[0]||2===o[0])){s=0;continue}if(3===o[0]&&(!r||o[1]>r[0]&&o[1]=0&&-1===' \t\n\r":{[()]},*>+'.indexOf(i.charAt(n));)n--;return i.substring(n+1,t)}(e,this.offset),this.defaultReplaceRange=$a.create(Ba.create(this.position.line,this.position.character-this.currentWord.length),this.position),this.textDocument=e,this.styleSheet=n,this.documentSettings=i;try{var r={isIncomplete:!1,items:[]};this.nodePath=ys(this.styleSheet,this.offset);for(var o=this.nodePath.length-1;o>=0;o--){var s=this.nodePath[o];if(s instanceof Ls)this.getCompletionsForDeclarationProperty(s.getParent(),r);else if(s instanceof ua)s.parent instanceof _a?this.getVariableProposals(null,r):this.getCompletionsForExpression(s,r);else if(s instanceof Ts){var a=s.findAParent(us.ExtendsReference,us.Ruleset);if(a)if(a.type===us.ExtendsReference)this.getCompletionsForExtendsReference(a,s,r);else{var l=a;this.getCompletionsForSelector(l,l&&l.isNested(),r)}}else if(s instanceof Ws)this.getCompletionsForFunctionArgument(s,s.getParent(),r);else if(s instanceof Rs)this.getCompletionsForDeclarations(s,r);else if(s instanceof Ca)this.getCompletionsForVariableDeclaration(s,r);else if(s instanceof Fs)this.getCompletionsForRuleSet(s,r);else if(s instanceof _a)this.getCompletionsForInterpolation(s,r);else if(s instanceof js)this.getCompletionsForFunctionDeclaration(s,r);else if(s instanceof Fa)this.getCompletionsForMixinReference(s,r);else if(s instanceof Ps)this.getCompletionsForFunctionArgument(null,s,r);else if(s instanceof ra)this.getCompletionsForSupports(s,r);else if(s instanceof ha)this.getCompletionsForSupportsCondition(s,r);else if(s instanceof Ea)this.getCompletionsForExtendsReference(s,null,r);else if(s.type===us.URILiteral)this.getCompletionForUriLiteralValue(s,r);else if(null===s.parent)this.getCompletionForTopLevel(r);else{if(s.type!==us.StringLiteral||!this.isImportPathParent(s.parent.type))continue;this.getCompletionForImportPath(s,r)}if(r.items.length>0||this.offset>s.offset)return this.finalize(r)}return this.getCompletionsForStylesheet(r),0===r.items.length&&this.variablePrefix&&0===this.currentWord.indexOf(this.variablePrefix)&&this.getVariableProposals(null,r),this.finalize(r)}finally{this.position=null,this.currentWord=null,this.textDocument=null,this.styleSheet=null,this.symbolContext=null,this.defaultReplaceRange=null,this.nodePath=null}},e.prototype.isImportPathParent=function(e){return e===us.Import},e.prototype.finalize=function(e){return e},e.prototype.findInNodePath=function(){for(var e=[],t=0;t=0;n--){var i=this.nodePath[n];if(-1!==e.indexOf(i.type))return i}return null},e.prototype.getCompletionsForDeclarationProperty=function(e,t){return this.getPropertyProposals(e,t)},e.prototype.getPropertyProposals=function(e,t){var n=this,i=this.isTriggerPropertyValueCompletionEnabled,r=this.isCompletePropertyWithSemicolonEnabled;return this.cssDataManager.getProperties().forEach((function(o){var s,a,l=!1;e?(s=n.getCompletionRange(e.getProperty()),a=o.name,zh(e.colonPosition)||(a+=": ",l=!0)):(s=n.getCompletionRange(null),a=o.name+": ",l=!0),!e&&r&&(a+="$0;"),e&&!e.semicolonPosition&&r&&n.offset>=n.textDocument.offsetAt(s.end)&&(a+="$0;");var c={label:o.name,documentation:Yc(o,n.doesSupportMarkdown()),tags:ud(o)?[Zl.Deprecated]:[],textEdit:yl.replace(s,a),insertTextFormat:Yl.Snippet,kind:Jl.Property};o.restrictions||(l=!1),i&&l&&(c.command=dd);var h=(255-("number"===typeof o.relevance?Math.min(Math.max(o.relevance,0),99):50)).toString(16),d=cs(o.name,"-")?od.VendorPrefixed:od.Normal;c.sortText=d+"_"+h,t.items.push(c)})),this.completionParticipants.forEach((function(e){e.onCssProperty&&e.onCssProperty({propertyName:n.currentWord,range:n.defaultReplaceRange})})),t},Object.defineProperty(e.prototype,"isTriggerPropertyValueCompletionEnabled",{get:function(){var e,t;return null===(t=null===(e=this.documentSettings)||void 0===e?void 0:e.triggerPropertyValueCompletion)||void 0===t||t},enumerable:!1,configurable:!0}),Object.defineProperty(e.prototype,"isCompletePropertyWithSemicolonEnabled",{get:function(){var e,t;return null===(t=null===(e=this.documentSettings)||void 0===e?void 0:e.completePropertyWithSemicolon)||void 0===t||t},enumerable:!1,configurable:!0}),e.prototype.getCompletionsForDeclarationValue=function(e,t){for(var n=this,i=e.getFullPropertyName(),r=this.cssDataManager.getProperty(i),o=e.getValue()||null;o&&o.hasChildren();)o=o.findChildAtOffset(this.offset,!1);if(this.completionParticipants.forEach((function(e){e.onCssPropertyValue&&e.onCssPropertyValue({propertyName:i,propertyValue:n.currentWord,range:n.getCompletionRange(o)})})),r){if(r.restrictions)for(var s=0,a=r.restrictions;s=e.offset+2&&this.getVariableProposals(null,t),t},e.prototype.getVariableProposals=function(e,t){for(var n=0,i=this.getSymbolContext().findSymbolsAtOffset(this.offset,fs.Variable);n0){var r=this.currentWord.match(/^-?\d[\.\d+]*/);r&&(i=r[0],n.isIncomplete=i.length===this.currentWord.length)}else 0===this.currentWord.length&&(n.isIncomplete=!0);if(t&&t.parent&&t.parent.type===us.Term&&(t=t.getParent()),e.restrictions)for(var o=0,s=e.restrictions;o=n.end?this.getCompletionForTopLevel(t):!n||this.offset<=n.offset?this.getCompletionsForSelector(e,e.isNested(),t):this.getCompletionsForDeclarations(e.getDeclarations(),t)},e.prototype.getCompletionsForSelector=function(e,t,n){var i=this,r=this.findInNodePath(us.PseudoSelector,us.IdentifierSelector,us.ClassSelector,us.ElementNameSelector);if(!r&&this.hasCharacterAtPosition(this.offset-this.currentWord.length-1,":")&&(this.currentWord=":"+this.currentWord,this.hasCharacterAtPosition(this.offset-this.currentWord.length-1,":")&&(this.currentWord=":"+this.currentWord),this.defaultReplaceRange=$a.create(Ba.create(this.position.line,this.position.character-this.currentWord.length),this.position)),this.cssDataManager.getPseudoClasses().forEach((function(e){var t=fd(e.name),o={label:e.name,textEdit:yl.replace(i.getCompletionRange(r),t),documentation:Yc(e,i.doesSupportMarkdown()),tags:ud(e)?[Zl.Deprecated]:[],kind:Jl.Function,insertTextFormat:e.name!==t?hd:void 0};cs(e.name,":-")&&(o.sortText=od.VendorPrefixed),n.items.push(o)})),this.cssDataManager.getPseudoElements().forEach((function(e){var t=fd(e.name),o={label:e.name,textEdit:yl.replace(i.getCompletionRange(r),t),documentation:Yc(e,i.doesSupportMarkdown()),tags:ud(e)?[Zl.Deprecated]:[],kind:Jl.Function,insertTextFormat:e.name!==t?hd:void 0};cs(e.name,"::-")&&(o.sortText=od.VendorPrefixed),n.items.push(o)})),!t){for(var o=0,s=Dh;o0){var t=d.substr(e.offset,e.length);return"."!==t.charAt(0)||h[t]||(h[t]=!0,n.items.push({label:t,textEdit:yl.replace(i.getCompletionRange(r),t),kind:Jl.Keyword})),!1}return!0})),e&&e.isNested()){var p=e.getSelectors().findFirstChildBeforeOffset(this.offset);p&&0===e.getSelectors().getChildren().indexOf(p)&&this.getPropertyProposals(null,n)}return n},e.prototype.getCompletionsForDeclarations=function(e,t){if(!e||this.offset===e.offset)return t;var n=e.findFirstChildBeforeOffset(this.offset);if(!n)return this.getCompletionsForDeclarationProperty(null,t);if(n instanceof As){var i=n;if(!zh(i.colonPosition)||this.offset<=i.colonPosition)return this.getCompletionsForDeclarationProperty(i,t);if(zh(i.semicolonPosition)&&i.semicolonPositione.colonPosition&&this.getVariableProposals(e.getValue(),t),t},e.prototype.getCompletionsForExpression=function(e,t){var n=e.getParent();if(n instanceof Ws)return this.getCompletionsForFunctionArgument(n,n.getParent(),t),t;var i=e.findParent(us.Declaration);if(!i)return this.getTermProposals(void 0,null,t),t;var r=e.findChildAtOffset(this.offset,!0);return r?r instanceof Sa||r instanceof ks?this.getCompletionsForDeclarationValue(i,t):t:this.getCompletionsForDeclarationValue(i,t)},e.prototype.getCompletionsForFunctionArgument=function(e,t,n){var i=t.getIdentifier();return i&&i.matches("var")&&(t.getArguments().hasChildren()&&t.getArguments().getChild(0)!==e||this.getVariableProposalsForCSSVarFunction(n)),n},e.prototype.getCompletionsForFunctionDeclaration=function(e,t){var n=e.getDeclarations();return n&&this.offset>n.offset&&this.offsete.lParent&&(!zh(e.rParent)||this.offset<=e.rParent)?this.getCompletionsForDeclarationProperty(null,t):t},e.prototype.getCompletionsForSupports=function(e,t){var n=e.getDeclarations();if(!n||this.offset<=n.offset){var i=e.findFirstChildBeforeOffset(this.offset);return i instanceof ha?this.getCompletionsForSupportsCondition(i,t):t}return this.getCompletionForTopLevel(t)},e.prototype.getCompletionsForExtendsReference=function(e,t,n){return n},e.prototype.getCompletionForUriLiteralValue=function(e,t){var n,i,r;if(e.hasChildren()){var o=e.getChild(0);n=o.getText(),i=this.position,r=this.getCompletionRange(o)}else{n="",i=this.position;var s=this.textDocument.positionAt(e.offset+"url(".length);r=$a.create(s,s)}return this.completionParticipants.forEach((function(e){e.onCssURILiteralValue&&e.onCssURILiteralValue({uriValue:n,position:i,range:r})})),t},e.prototype.getCompletionForImportPath=function(e,t){var n=this;return this.completionParticipants.forEach((function(t){t.onCssImportPath&&t.onCssImportPath({pathValue:e.getText(),position:n.position,range:n.getCompletionRange(e)})})),t},e.prototype.hasCharacterAtPosition=function(e,t){var n=this.textDocument.getText();return e>=0&&e"),this.writeLine(t,i.join(""))}},e}();!function(e){function t(e){var t=e.match(/^['"](.*)["']$/);return t?t[1]:e}e.ensure=function(e,n){return n+t(e)+n},e.remove=t}(yd||(yd={}));var Ed=function(){return function(){this.id=0,this.attr=0,this.tag=0}}();function Rd(e,t){for(var n=new Sd,i=0,r=e.getChildren();i1){var l=t.cloneWithParent();n.addChild(l.findRoot()),n=l}n.append(s[a])}}break;case us.SelectorPlaceholder:if(o.matches("@at-root"))return n;case us.ElementNameSelector:var c=o.getText();n.addAttr("name","*"===c?"element":Nd(c));break;case us.ClassSelector:n.addAttr("class",Nd(o.getText().substring(1)));break;case us.IdentifierSelector:n.addAttr("id",Nd(o.getText().substring(1)));break;case us.MixinDeclaration:n.addAttr("class",o.getName());break;case us.PseudoSelector:n.addAttr(Nd(o.getText()),"");break;case us.AttributeSelector:var h=o,d=h.getIdentifier();if(d){var p=h.getValue(),u=h.getOperator(),m=void 0;if(p&&u)switch(Nd(u.getText())){case"|=":m="".concat(yd.remove(Nd(p.getText())),"-\u2026");break;case"^=":m="".concat(yd.remove(Nd(p.getText())),"\u2026");break;case"$=":m="\u2026".concat(yd.remove(Nd(p.getText())));break;case"~=":m=" \u2026 ".concat(yd.remove(Nd(p.getText()))," \u2026 ");break;case"*=":m="\u2026".concat(yd.remove(Nd(p.getText())),"\u2026");break;default:m=yd.remove(Nd(p.getText()))}n.addAttr(Nd(d.getText()),m)}}}return n}function Nd(e){var t=new ls;t.setSource(e);var n=t.scanUnquotedString();return n?n.text:e}var Fd=function(){function e(e){this.cssDataManager=e}return e.prototype.selectorToMarkedString=function(e){var t=function(e){if(e.matches("@at-root"))return null;var t=new Cd,n=[],i=e.getParent();if(i instanceof Fs)for(var r=i.getParent();r&&!Td(r);){if(r instanceof Fs){if(r.getSelectors().matches("@at-root"))break;n.push(r)}r=r.getParent()}for(var o=new Dd(t),s=n.length-1;s>=0;s--){var a=n[s].getSelectors().getChild(0);a&&o.processSelector(a)}return o.processSelector(e),t}(e);if(t){var n=new kd('"').print(t);return n.push(this.selectorToSpecificityMarkedString(e)),n}return[]},e.prototype.simpleSelectorToMarkedString=function(e){var t=Rd(e),n=new kd('"').print(t);return n.push(this.selectorToSpecificityMarkedString(e)),n},e.prototype.isPseudoElementIdentifier=function(e){var t=e.match(/^::?([\w-]+)/);return!!t&&!!this.cssDataManager.getPseudoElement("::"+t[1])},e.prototype.selectorToSpecificityMarkedString=function(e){var t=this,n=function(e){var i=new Ed;e:for(var r=0,o=e.getChildren();r0){for(var l=new Ed,c=0,h=s.getChildren();cl.id?l=f:f.idl.attr?l=f:f.attrl.tag&&(l=f))}}i.id+=l.id,i.attr+=l.attr,i.tag+=l.tag;continue e}i.attr++}if(s.getChildren().length>0){var f=n(s);i.id+=f.id,i.attr+=f.attr,i.tag+=f.tag}}return i},i=n(e);return xd("specificity","[Selector Specificity](https://developer.mozilla.org/en-US/docs/Web/CSS/Specificity): ({0}, {1}, {2})",i.id,i.attr,i.tag)},e}(),Dd=function(){function e(e){this.prev=null,this.element=e}return e.prototype.processSelector=function(e){var t=null;if(!(this.element instanceof Cd)&&e.getChildren().some((function(e){return e.hasChildren()&&e.getChild(0).type===us.SelectorCombinator}))){var n=this.element.findRoot();n.parent instanceof Cd&&(t=this.element,this.element=n.parent,this.element.removeChild(n),this.prev=null)}for(var i=0,r=e.getChildren();i0&&r[r.length-1])&&(6===o[0]||2===o[0])){s=0;continue}if(3===o[0]&&(!r||o[1]>r[0]&&o[1]n)return 0;var r,o,s=[],a=[];for(r=0;r=r.length/2&&o.push({property:e.name,score:t})})),o.sort((function(e,t){return t.score-e.score||e.property.localeCompare(t.property)}));for(var s=3,a=0,l=o;a=0;a--){var l=s[a];if(l instanceof zs){var c=l.getProperty();if(c&&c.offset===r&&c.end===o)return void this.getFixesForUnknownProperty(e,c,n,i)}}},e}(),Zd=function(){return function(e){this.fullPropertyName=e.getFullPropertyName().toLowerCase(),this.node=e}}();function ep(e,t,n,i){var r=e[t];r.value=n,n&&(Ph(r.properties,i)||r.properties.push(i))}function tp(e,t,n,i){"top"===t||"right"===t||"bottom"===t||"left"===t?ep(e,t,n,i):function(e,t,n){ep(e,"top",t,n),ep(e,"right",t,n),ep(e,"bottom",t,n),ep(e,"left",t,n)}(e,n,i)}function np(e,t,n){switch(t.length){case 1:tp(e,void 0,t[0],n);break;case 2:tp(e,"top",t[0],n),tp(e,"bottom",t[0],n),tp(e,"right",t[1],n),tp(e,"left",t[1],n);break;case 3:tp(e,"top",t[0],n),tp(e,"right",t[1],n),tp(e,"left",t[1],n),tp(e,"bottom",t[2],n);break;case 4:tp(e,"top",t[0],n),tp(e,"right",t[1],n),tp(e,"bottom",t[2],n),tp(e,"left",t[3],n)}}function ip(e,t){for(var n=0,i=t;n0)for(var m=this.fetch(i,"float"),f=0;f0)for(m=this.fetch(i,"vertical-align"),f=0;f1)for(var S=0;S")||this.peekDelim("<")||this.peekIdent("and")||this.peekIdent("or")||this.peekDelim("%")){var t=this.createNode(us.Operator);return this.consumeToken(),this.finish(t)}return e.prototype._parseOperator.call(this)},t.prototype._parseUnaryOperator=function(){if(this.peekIdent("not")){var t=this.create(Ss);return this.consumeToken(),this.finish(t)}return e.prototype._parseUnaryOperator.call(this)},t.prototype._parseRuleSetDeclaration=function(){return this.peek(mo.AtKeyword)?this._parseKeyframe()||this._parseImport()||this._parseMedia(!0)||this._parseFontFace()||this._parseWarnAndDebug()||this._parseControlStatement()||this._parseFunctionDeclaration()||this._parseExtends()||this._parseMixinReference()||this._parseMixinContent()||this._parseMixinDeclaration()||this._parseRuleset(!0)||this._parseSupports(!0)||e.prototype._parseRuleSetDeclarationAtStatement.call(this):this._parseVariableDeclaration()||this._tryParseRuleset(!0)||e.prototype._parseRuleSetDeclaration.call(this)},t.prototype._parseDeclaration=function(e){var t=this._tryParseCustomPropertyDeclaration(e);if(t)return t;var n=this.create(zs);if(!n.setProperty(this._parseProperty()))return null;if(!this.accept(mo.Colon))return this.finish(n,Pl.ColonExpected,[mo.Colon],e||[mo.SemiColon]);this.prevToken&&(n.colonPosition=this.prevToken.offset);var i=!1;if(n.setValue(this._parseExpr())&&(i=!0,n.addChild(this._parsePrio())),this.peek(mo.CurlyL))n.setNestedProperties(this._parseNestedProperties());else if(!i)return this.finish(n,Pl.PropertyValueExpected);return this.peek(mo.SemiColon)&&(n.semicolonPosition=this.token.offset),this.finish(n)},t.prototype._parseNestedProperties=function(){var e=this.create(Gs);return this._parseBody(e,this._parseDeclaration.bind(this))},t.prototype._parseExtends=function(){if(this.peekKeyword("@extend")){var e=this.create(Ea);if(this.consumeToken(),!e.getSelectors().addChild(this._parseSimpleSelector()))return this.finish(e,Pl.SelectorExpected);for(;this.accept(mo.Comma);)e.getSelectors().addChild(this._parseSimpleSelector());return this.accept(mo.Exclamation)&&!this.acceptIdent("optional")?this.finish(e,Pl.UnknownKeyword):this.finish(e)}return null},t.prototype._parseSimpleSelectorBody=function(){return this._parseSelectorCombinator()||this._parseSelectorPlaceholder()||e.prototype._parseSimpleSelectorBody.call(this)},t.prototype._parseSelectorCombinator=function(){if(this.peekDelim("&")){var e=this.createNode(us.SelectorCombinator);for(this.consumeToken();!this.hasWhitespace()&&(this.acceptDelim("-")||this.accept(mo.Num)||this.accept(mo.Dimension)||e.addChild(this._parseIdent())||this.acceptDelim("&")););return this.finish(e)}return null},t.prototype._parseSelectorPlaceholder=function(){if(this.peekDelim("%")){var e=this.createNode(us.SelectorPlaceholder);return this.consumeToken(),this._parseIdent(),this.finish(e)}if(this.peekKeyword("@at-root")){e=this.createNode(us.SelectorPlaceholder);return this.consumeToken(),this.finish(e)}return null},t.prototype._parseElementName=function(){var t=this.mark(),n=e.prototype._parseElementName.call(this);return n&&!this.hasWhitespace()&&this.peek(mo.ParenthesisL)?(this.restoreAtMark(t),null):n},t.prototype._tryParsePseudoIdentifier=function(){return this._parseInterpolation()||e.prototype._tryParsePseudoIdentifier.call(this)},t.prototype._parseWarnAndDebug=function(){if(!this.peekKeyword("@debug")&&!this.peekKeyword("@warn")&&!this.peekKeyword("@error"))return null;var e=this.createNode(us.Debug);return this.consumeToken(),e.addChild(this._parseExpr()),this.finish(e)},t.prototype._parseControlStatement=function(e){return void 0===e&&(e=this._parseRuleSetDeclaration.bind(this)),this.peek(mo.AtKeyword)?this._parseIfStatement(e)||this._parseForStatement(e)||this._parseEachStatement(e)||this._parseWhileStatement(e):null},t.prototype._parseIfStatement=function(e){return this.peekKeyword("@if")?this._internalParseIfStatement(e):null},t.prototype._internalParseIfStatement=function(e){var t=this.create(Vs);if(this.consumeToken(),!t.setExpression(this._parseExpr(!0)))return this.finish(t,Pl.ExpressionExpected);if(this._parseBody(t,e),this.acceptKeyword("@else"))if(this.peekIdent("if"))t.setElseClause(this._internalParseIfStatement(e));else if(this.peek(mo.CurlyL)){var n=this.create(Bs);this._parseBody(n,e),t.setElseClause(n)}return this.finish(t)},t.prototype._parseForStatement=function(e){if(!this.peekKeyword("@for"))return null;var t=this.create(Us);return this.consumeToken(),t.setVariable(this._parseVariable())?this.acceptIdent("from")?t.addChild(this._parseBinaryExpr())?this.acceptIdent("to")||this.acceptIdent("through")?t.addChild(this._parseBinaryExpr())?this._parseBody(t,e):this.finish(t,Pl.ExpressionExpected,[mo.CurlyR]):this.finish(t,Pp.ThroughOrToExpected,[mo.CurlyR]):this.finish(t,Pl.ExpressionExpected,[mo.CurlyR]):this.finish(t,Pp.FromExpected,[mo.CurlyR]):this.finish(t,Pl.VariableNameExpected,[mo.CurlyR])},t.prototype._parseEachStatement=function(e){if(!this.peekKeyword("@each"))return null;var t=this.create(Ks);this.consumeToken();var n=t.getVariables();if(!n.addChild(this._parseVariable()))return this.finish(t,Pl.VariableNameExpected,[mo.CurlyR]);for(;this.accept(mo.Comma);)if(!n.addChild(this._parseVariable()))return this.finish(t,Pl.VariableNameExpected,[mo.CurlyR]);return this.finish(n),this.acceptIdent("in")?t.addChild(this._parseExpr())?this._parseBody(t,e):this.finish(t,Pl.ExpressionExpected,[mo.CurlyR]):this.finish(t,Pp.InExpected,[mo.CurlyR])},t.prototype._parseWhileStatement=function(e){if(!this.peekKeyword("@while"))return null;var t=this.create(qs);return this.consumeToken(),t.addChild(this._parseBinaryExpr())?this._parseBody(t,e):this.finish(t,Pl.ExpressionExpected,[mo.CurlyR])},t.prototype._parseFunctionBodyDeclaration=function(){return this._parseVariableDeclaration()||this._parseReturnStatement()||this._parseWarnAndDebug()||this._parseControlStatement(this._parseFunctionBodyDeclaration.bind(this))},t.prototype._parseFunctionDeclaration=function(){if(!this.peekKeyword("@function"))return null;var e=this.create(js);if(this.consumeToken(),!e.setIdentifier(this._parseIdent([fs.Function])))return this.finish(e,Pl.IdentifierExpected,[mo.CurlyR]);if(!this.accept(mo.ParenthesisL))return this.finish(e,Pl.LeftParenthesisExpected,[mo.CurlyR]);if(e.getParameters().addChild(this._parseParameterDeclaration()))for(;this.accept(mo.Comma)&&!this.peek(mo.ParenthesisR);)if(!e.getParameters().addChild(this._parseParameterDeclaration()))return this.finish(e,Pl.VariableNameExpected);return this.accept(mo.ParenthesisR)?this._parseBody(e,this._parseFunctionBodyDeclaration.bind(this)):this.finish(e,Pl.RightParenthesisExpected,[mo.CurlyR])},t.prototype._parseReturnStatement=function(){if(!this.peekKeyword("@return"))return null;var e=this.createNode(us.ReturnStatement);return this.consumeToken(),e.addChild(this._parseExpr())?this.finish(e):this.finish(e,Pl.ExpressionExpected)},t.prototype._parseMixinDeclaration=function(){if(!this.peekKeyword("@mixin"))return null;var e=this.create(Da);if(this.consumeToken(),!e.setIdentifier(this._parseIdent([fs.Mixin])))return this.finish(e,Pl.IdentifierExpected,[mo.CurlyR]);if(this.accept(mo.ParenthesisL)){if(e.getParameters().addChild(this._parseParameterDeclaration()))for(;this.accept(mo.Comma)&&!this.peek(mo.ParenthesisR);)if(!e.getParameters().addChild(this._parseParameterDeclaration()))return this.finish(e,Pl.VariableNameExpected);if(!this.accept(mo.ParenthesisR))return this.finish(e,Pl.RightParenthesisExpected,[mo.CurlyR])}return this._parseBody(e,this._parseRuleSetDeclaration.bind(this))},t.prototype._parseParameterDeclaration=function(){var e=this.create(Os);return e.setIdentifier(this._parseVariable())?(this.accept(Mp),this.accept(mo.Colon)&&!e.setDefaultValue(this._parseExpr(!0))?this.finish(e,Pl.VariableValueExpected,[],[mo.Comma,mo.ParenthesisR]):this.finish(e)):null},t.prototype._parseMixinContent=function(){if(!this.peekKeyword("@content"))return null;var e=this.create(Ra);if(this.consumeToken(),this.accept(mo.ParenthesisL)){if(e.getArguments().addChild(this._parseFunctionArgument()))for(;this.accept(mo.Comma)&&!this.peek(mo.ParenthesisR);)if(!e.getArguments().addChild(this._parseFunctionArgument()))return this.finish(e,Pl.ExpressionExpected);if(!this.accept(mo.ParenthesisR))return this.finish(e,Pl.RightParenthesisExpected)}return this.finish(e)},t.prototype._parseMixinReference=function(){if(!this.peekKeyword("@include"))return null;var e=this.create(Fa);this.consumeToken();var t=this._parseIdent([fs.Mixin]);if(!e.setIdentifier(t))return this.finish(e,Pl.IdentifierExpected,[mo.CurlyR]);if(!this.hasWhitespace()&&this.acceptDelim(".")&&!this.hasWhitespace()){var n=this._parseIdent([fs.Mixin]);if(!n)return this.finish(e,Pl.IdentifierExpected,[mo.CurlyR]);var i=this.create(Ia);t.referenceTypes=[fs.Module],i.setIdentifier(t),e.setIdentifier(n),e.addChild(i)}if(this.accept(mo.ParenthesisL)){if(e.getArguments().addChild(this._parseFunctionArgument()))for(;this.accept(mo.Comma)&&!this.peek(mo.ParenthesisR);)if(!e.getArguments().addChild(this._parseFunctionArgument()))return this.finish(e,Pl.ExpressionExpected);if(!this.accept(mo.ParenthesisR))return this.finish(e,Pl.RightParenthesisExpected)}return(this.peekIdent("using")||this.peek(mo.CurlyL))&&e.setContent(this._parseMixinContentDeclaration()),this.finish(e)},t.prototype._parseMixinContentDeclaration=function(){var e=this.create(Na);if(this.acceptIdent("using")){if(!this.accept(mo.ParenthesisL))return this.finish(e,Pl.LeftParenthesisExpected,[mo.CurlyL]);if(e.getParameters().addChild(this._parseParameterDeclaration()))for(;this.accept(mo.Comma)&&!this.peek(mo.ParenthesisR);)if(!e.getParameters().addChild(this._parseParameterDeclaration()))return this.finish(e,Pl.VariableNameExpected);if(!this.accept(mo.ParenthesisR))return this.finish(e,Pl.RightParenthesisExpected,[mo.CurlyL])}return this.peek(mo.CurlyL)&&this._parseBody(e,this._parseMixinReferenceBodyStatement.bind(this)),this.finish(e)},t.prototype._parseMixinReferenceBodyStatement=function(){return this._tryParseKeyframeSelector()||this._parseRuleSetDeclaration()},t.prototype._parseFunctionArgument=function(){var e=this.create(Ws),t=this.mark(),n=this._parseVariable();if(n)if(this.accept(mo.Colon))e.setIdentifier(n);else{if(this.accept(Mp))return e.setValue(n),this.finish(e);this.restoreAtMark(t)}return e.setValue(this._parseExpr(!0))?(this.accept(Mp),e.addChild(this._parsePrio()),this.finish(e)):e.setValue(this._tryParsePrio())?this.finish(e):null},t.prototype._parseURLArgument=function(){var t=this.mark(),n=e.prototype._parseURLArgument.call(this);if(!n||!this.peek(mo.ParenthesisR)){this.restoreAtMark(t);var i=this.create(Ss);return i.addChild(this._parseBinaryExpr()),this.finish(i)}return n},t.prototype._parseOperation=function(){if(!this.peek(mo.ParenthesisL))return null;var e=this.create(Ss);for(this.consumeToken();e.addChild(this._parseListElement());)this.accept(mo.Comma);return this.accept(mo.ParenthesisR)?this.finish(e):this.finish(e,Pl.RightParenthesisExpected)},t.prototype._parseListElement=function(){var e=this.create(Aa),t=this._parseBinaryExpr();if(!t)return null;if(this.accept(mo.Colon)){if(e.setKey(t),!e.setValue(this._parseBinaryExpr()))return this.finish(e,Pl.ExpressionExpected)}else e.setValue(t);return this.finish(e)},t.prototype._parseUse=function(){if(!this.peekKeyword("@use"))return null;var e=this.create(Qs);if(this.consumeToken(),!e.addChild(this._parseStringLiteral()))return this.finish(e,Pl.StringLiteralExpected);if(!this.peek(mo.SemiColon)&&!this.peek(mo.EOF)){if(!this.peekRegExp(mo.Ident,/as|with/))return this.finish(e,Pl.UnknownKeyword);if(this.acceptIdent("as")&&!e.setIdentifier(this._parseIdent([fs.Module]))&&!this.acceptDelim("*"))return this.finish(e,Pl.IdentifierOrWildcardExpected);if(this.acceptIdent("with")){if(!this.accept(mo.ParenthesisL))return this.finish(e,Pl.LeftParenthesisExpected,[mo.ParenthesisR]);if(!e.getParameters().addChild(this._parseModuleConfigDeclaration()))return this.finish(e,Pl.VariableNameExpected);for(;this.accept(mo.Comma)&&!this.peek(mo.ParenthesisR);)if(!e.getParameters().addChild(this._parseModuleConfigDeclaration()))return this.finish(e,Pl.VariableNameExpected);if(!this.accept(mo.ParenthesisR))return this.finish(e,Pl.RightParenthesisExpected)}}return this.accept(mo.SemiColon)||this.accept(mo.EOF)?this.finish(e):this.finish(e,Pl.SemiColonExpected)},t.prototype._parseModuleConfigDeclaration=function(){var e=this.create(Zs);return e.setIdentifier(this._parseVariable())?this.accept(mo.Colon)&&e.setValue(this._parseExpr(!0))?!this.accept(mo.Exclamation)||!this.hasWhitespace()&&this.acceptIdent("default")?this.finish(e):this.finish(e,Pl.UnknownKeyword):this.finish(e,Pl.VariableValueExpected,[],[mo.Comma,mo.ParenthesisR]):null},t.prototype._parseForward=function(){if(!this.peekKeyword("@forward"))return null;var e=this.create(ea);if(this.consumeToken(),!e.addChild(this._parseStringLiteral()))return this.finish(e,Pl.StringLiteralExpected);if(this.acceptIdent("with")){if(!this.accept(mo.ParenthesisL))return this.finish(e,Pl.LeftParenthesisExpected,[mo.ParenthesisR]);if(!e.getParameters().addChild(this._parseModuleConfigDeclaration()))return this.finish(e,Pl.VariableNameExpected);for(;this.accept(mo.Comma)&&!this.peek(mo.ParenthesisR);)if(!e.getParameters().addChild(this._parseModuleConfigDeclaration()))return this.finish(e,Pl.VariableNameExpected);if(!this.accept(mo.ParenthesisR))return this.finish(e,Pl.RightParenthesisExpected)}if(!this.peek(mo.SemiColon)&&!this.peek(mo.EOF)){if(!this.peekRegExp(mo.Ident,/as|hide|show/))return this.finish(e,Pl.UnknownKeyword);if(this.acceptIdent("as")){var t=this._parseIdent([fs.Forward]);if(!e.setIdentifier(t))return this.finish(e,Pl.IdentifierExpected);if(this.hasWhitespace()||!this.acceptDelim("*"))return this.finish(e,Pl.WildcardExpected)}if((this.peekIdent("hide")||this.peekIdent("show"))&&!e.addChild(this._parseForwardVisibility()))return this.finish(e,Pl.IdentifierOrVariableExpected)}return this.accept(mo.SemiColon)||this.accept(mo.EOF)?this.finish(e):this.finish(e,Pl.SemiColonExpected)},t.prototype._parseForwardVisibility=function(){var e=this.create(ta);for(e.setIdentifier(this._parseIdent());e.addChild(this._parseVariable()||this._parseIdent());)this.accept(mo.Comma);return e.getChildren().length>1?e:null},t.prototype._parseSupportsCondition=function(){return this._parseInterpolation()||e.prototype._parseSupportsCondition.call(this)},t}(Lh),Vp=function(){var e=function(t,n){return(e=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(e,t){e.__proto__=t}||function(e,t){for(var n in t)Object.prototype.hasOwnProperty.call(t,n)&&(e[n]=t[n])})(t,n)};return function(t,n){if("function"!==typeof n&&null!==n)throw new TypeError("Class extends value "+String(n)+" is not a constructor or null");function i(){this.constructor=t}e(t,n),t.prototype=null===n?Object.create(n):(i.prototype=n.prototype,new i)}}(),Up=Wa(),Kp=function(e){function t(n,i){var r=e.call(this,"$",n,i)||this;return qp(t.scssModuleLoaders),qp(t.scssModuleBuiltIns),r}return Vp(t,e),t.prototype.isImportPathParent=function(t){return t===us.Forward||t===us.Use||e.prototype.isImportPathParent.call(this,t)},t.prototype.getCompletionForImportPath=function(n,i){var r=n.getParent().type;if(r===us.Forward||r===us.Use)for(var o=0,s=t.scssModuleBuiltIns;o0){var t="string"===typeof e.documentation?{kind:"markdown",value:e.documentation}:{kind:"markdown",value:e.documentation.value};t.value+="\n\n",t.value+=e.references.map((function(e){return"[".concat(e.name,"](").concat(e.url,")")})).join(" | "),e.documentation=t}}))}var Bp,jp=function(){var e=function(t,n){return(e=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(e,t){e.__proto__=t}||function(e,t){for(var n in t)Object.prototype.hasOwnProperty.call(t,n)&&(e[n]=t[n])})(t,n)};return function(t,n){if("function"!==typeof n&&null!==n)throw new TypeError("Class extends value "+String(n)+" is not a constructor or null");function i(){this.constructor=t}e(t,n),t.prototype=null===n?Object.create(n):(i.prototype=n.prototype,new i)}}(),$p="/".charCodeAt(0),Hp="\n".charCodeAt(0),Gp="\r".charCodeAt(0),Jp="\f".charCodeAt(0),Xp="`".charCodeAt(0),Yp=".".charCodeAt(0),Qp=mo.CustomToken,Zp=Qp++,eu=function(e){function t(){return null!==e&&e.apply(this,arguments)||this}return jp(t,e),t.prototype.scanNext=function(t){var n=this.escapedJavaScript();return null!==n?this.finishToken(t,n):this.stream.advanceIfChars([Yp,Yp,Yp])?this.finishToken(t,Zp):e.prototype.scanNext.call(this,t)},t.prototype.comment=function(){return!!e.prototype.comment.call(this)||!(this.inURL||!this.stream.advanceIfChars([$p,$p]))&&(this.stream.advanceWhileChar((function(e){switch(e){case Hp:case Gp:case Jp:return!1;default:return!0}})),!0)},t.prototype.escapedJavaScript=function(){return this.stream.peekChar()===Xp?(this.stream.advance(1),this.stream.advanceWhileChar((function(e){return e!==Xp})),this.stream.advanceIfChar(Xp)?mo.EscapedJavaScript:mo.BadEscapedJavaScript):null},t}(ls),tu=function(){var e=function(t,n){return(e=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(e,t){e.__proto__=t}||function(e,t){for(var n in t)Object.prototype.hasOwnProperty.call(t,n)&&(e[n]=t[n])})(t,n)};return function(t,n){if("function"!==typeof n&&null!==n)throw new TypeError("Class extends value "+String(n)+" is not a constructor or null");function i(){this.constructor=t}e(t,n),t.prototype=null===n?Object.create(n):(i.prototype=n.prototype,new i)}}(),nu=function(e){function t(){return e.call(this,new eu)||this}return tu(t,e),t.prototype._parseStylesheetStatement=function(t){return void 0===t&&(t=!1),this.peek(mo.AtKeyword)?this._parseVariableDeclaration()||this._parsePlugin()||e.prototype._parseStylesheetAtStatement.call(this,t):this._tryParseMixinDeclaration()||this._tryParseMixinReference()||this._parseFunction()||this._parseRuleset(!0)},t.prototype._parseImport=function(){if(!this.peekKeyword("@import")&&!this.peekKeyword("@import-once"))return null;var e=this.create(Ys);if(this.consumeToken(),this.accept(mo.ParenthesisL)){if(!this.accept(mo.Ident))return this.finish(e,Pl.IdentifierExpected,[mo.SemiColon]);do{if(!this.accept(mo.Comma))break}while(this.accept(mo.Ident));if(!this.accept(mo.ParenthesisR))return this.finish(e,Pl.RightParenthesisExpected,[mo.SemiColon])}return e.addChild(this._parseURILiteral())||e.addChild(this._parseStringLiteral())?(this.peek(mo.SemiColon)||this.peek(mo.EOF)||e.setMedialist(this._parseMediaQueryList()),this.finish(e)):this.finish(e,Pl.URIOrStringExpected,[mo.SemiColon])},t.prototype._parsePlugin=function(){if(!this.peekKeyword("@plugin"))return null;var e=this.createNode(us.Plugin);return this.consumeToken(),e.addChild(this._parseStringLiteral())?this.accept(mo.SemiColon)?this.finish(e):this.finish(e,Pl.SemiColonExpected):this.finish(e,Pl.StringLiteralExpected)},t.prototype._parseMediaQuery=function(){var t=e.prototype._parseMediaQuery.call(this);if(!t){var n=this.create(aa);return n.addChild(this._parseVariable())?this.finish(n):null}return t},t.prototype._parseMediaDeclaration=function(e){return void 0===e&&(e=!1),this._tryParseRuleset(e)||this._tryToParseDeclaration()||this._tryParseMixinDeclaration()||this._tryParseMixinReference()||this._parseDetachedRuleSetMixin()||this._parseStylesheetStatement(e)},t.prototype._parseMediaFeatureName=function(){return this._parseIdent()||this._parseVariable()},t.prototype._parseVariableDeclaration=function(e){void 0===e&&(e=[]);var t=this.create(Ca),n=this.mark();if(!t.setVariable(this._parseVariable(!0)))return null;if(!this.accept(mo.Colon))return this.restoreAtMark(n),null;if(this.prevToken&&(t.colonPosition=this.prevToken.offset),t.setValue(this._parseDetachedRuleSet()))t.needsSemicolon=!1;else if(!t.setValue(this._parseExpr()))return this.finish(t,Pl.VariableValueExpected,[],e);return t.addChild(this._parsePrio()),this.peek(mo.SemiColon)&&(t.semicolonPosition=this.token.offset),this.finish(t)},t.prototype._parseDetachedRuleSet=function(){var e=this.mark();if(this.peekDelim("#")||this.peekDelim(".")){if(this.consumeToken(),this.hasWhitespace()||!this.accept(mo.ParenthesisL))return this.restoreAtMark(e),null;var t=this.create(Da);if(t.getParameters().addChild(this._parseMixinParameter()))for(;(this.accept(mo.Comma)||this.accept(mo.SemiColon))&&!this.peek(mo.ParenthesisR);)t.getParameters().addChild(this._parseMixinParameter())||this.markError(t,Pl.IdentifierExpected,[],[mo.ParenthesisR]);if(!this.accept(mo.ParenthesisR))return this.restoreAtMark(e),null}if(!this.peek(mo.CurlyL))return null;var n=this.create(Ns);return this._parseBody(n,this._parseDetachedRuleSetBody.bind(this)),this.finish(n)},t.prototype._parseDetachedRuleSetBody=function(){return this._tryParseKeyframeSelector()||this._parseRuleSetDeclaration()},t.prototype._addLookupChildren=function(e){if(!e.addChild(this._parseLookupValue()))return!1;for(var t=!1;this.peek(mo.BracketL)&&(t=!0),e.addChild(this._parseLookupValue());)t=!1;return!t},t.prototype._parseLookupValue=function(){var e=this.create(Ss),t=this.mark();return this.accept(mo.BracketL)&&((e.addChild(this._parseVariable(!1,!0))||e.addChild(this._parsePropertyIdentifier()))&&this.accept(mo.BracketR)||this.accept(mo.BracketR))?e:(this.restoreAtMark(t),null)},t.prototype._parseVariable=function(e,t){void 0===e&&(e=!1),void 0===t&&(t=!1);var n=!e&&this.peekDelim("$");if(!this.peekDelim("@")&&!n&&!this.peek(mo.AtKeyword))return null;for(var i=this.create(ka),r=this.mark();this.acceptDelim("@")||!e&&this.acceptDelim("$");)if(this.hasWhitespace())return this.restoreAtMark(r),null;return(this.accept(mo.AtKeyword)||this.accept(mo.Ident))&&(t||!this.peek(mo.BracketL)||this._addLookupChildren(i))?i:(this.restoreAtMark(r),null)},t.prototype._parseTermExpression=function(){return this._parseVariable()||this._parseEscaped()||e.prototype._parseTermExpression.call(this)||this._tryParseMixinReference(!1)},t.prototype._parseEscaped=function(){if(this.peek(mo.EscapedJavaScript)||this.peek(mo.BadEscapedJavaScript)){var e=this.createNode(us.EscapedValue);return this.consumeToken(),this.finish(e)}if(this.peekDelim("~")){e=this.createNode(us.EscapedValue);return this.consumeToken(),this.accept(mo.String)||this.accept(mo.EscapedJavaScript)?this.finish(e):this.finish(e,Pl.TermExpected)}return null},t.prototype._parseOperator=function(){var t=this._parseGuardOperator();return t||e.prototype._parseOperator.call(this)},t.prototype._parseGuardOperator=function(){if(this.peekDelim(">")){var e=this.createNode(us.Operator);return this.consumeToken(),this.acceptDelim("="),e}if(this.peekDelim("=")){e=this.createNode(us.Operator);return this.consumeToken(),this.acceptDelim("<"),e}if(this.peekDelim("<")){e=this.createNode(us.Operator);return this.consumeToken(),this.acceptDelim("="),e}return null},t.prototype._parseRuleSetDeclaration=function(){return this.peek(mo.AtKeyword)?this._parseKeyframe()||this._parseMedia(!0)||this._parseImport()||this._parseSupports(!0)||this._parseDetachedRuleSetMixin()||this._parseVariableDeclaration()||e.prototype._parseRuleSetDeclarationAtStatement.call(this):this._tryParseMixinDeclaration()||this._tryParseRuleset(!0)||this._tryParseMixinReference()||this._parseFunction()||this._parseExtend()||e.prototype._parseRuleSetDeclaration.call(this)},t.prototype._parseKeyframeIdent=function(){return this._parseIdent([fs.Keyframe])||this._parseVariable()},t.prototype._parseKeyframeSelector=function(){return this._parseDetachedRuleSetMixin()||e.prototype._parseKeyframeSelector.call(this)},t.prototype._parseSimpleSelectorBody=function(){return this._parseSelectorCombinator()||e.prototype._parseSimpleSelectorBody.call(this)},t.prototype._parseSelector=function(e){var t=this.create(Ds),n=!1;for(e&&(n=t.addChild(this._parseCombinator()));t.addChild(this._parseSimpleSelector());){n=!0;var i=this.mark();if(t.addChild(this._parseGuard())&&this.peek(mo.CurlyL))break;this.restoreAtMark(i),t.addChild(this._parseCombinator())}return n?this.finish(t):null},t.prototype._parseSelectorCombinator=function(){if(this.peekDelim("&")){var e=this.createNode(us.SelectorCombinator);for(this.consumeToken();!this.hasWhitespace()&&(this.acceptDelim("-")||this.accept(mo.Num)||this.accept(mo.Dimension)||e.addChild(this._parseIdent())||this.acceptDelim("&")););return this.finish(e)}return null},t.prototype._parseSelectorIdent=function(){if(!this.peekInterpolatedIdent())return null;var e=this.createNode(us.SelectorInterpolation);return this._acceptInterpolatedIdent(e)?this.finish(e):null},t.prototype._parsePropertyIdentifier=function(e){void 0===e&&(e=!1);var t=/^[\w-]+/;if(!this.peekInterpolatedIdent()&&!this.peekRegExp(this.token.type,t))return null;var n=this.mark(),i=this.create(ks);i.isCustomProperty=this.acceptDelim("-")&&this.acceptDelim("-");return(e?i.isCustomProperty?i.addChild(this._parseIdent()):i.addChild(this._parseRegexp(t)):i.isCustomProperty?this._acceptInterpolatedIdent(i):this._acceptInterpolatedIdent(i,t))?(e||this.hasWhitespace()||(this.acceptDelim("+"),this.hasWhitespace()||this.acceptIdent("_")),this.finish(i)):(this.restoreAtMark(n),null)},t.prototype.peekInterpolatedIdent=function(){return this.peek(mo.Ident)||this.peekDelim("@")||this.peekDelim("$")||this.peekDelim("-")},t.prototype._acceptInterpolatedIdent=function(e,t){for(var n=this,i=!1,r=function(){var e=n.mark();return n.acceptDelim("-")&&(n.hasWhitespace()||n.acceptDelim("-"),n.hasWhitespace())?(n.restoreAtMark(e),null):n._parseInterpolation()},o=t?function(){return n.acceptRegexp(t)}:function(){return n.accept(mo.Ident)};(o()||e.addChild(this._parseInterpolation()||this.try(r)))&&(i=!0,!this.hasWhitespace()););return i},t.prototype._parseInterpolation=function(){var e=this.mark();if(this.peekDelim("@")||this.peekDelim("$")){var t=this.createNode(us.Interpolation);return this.consumeToken(),this.hasWhitespace()||!this.accept(mo.CurlyL)?(this.restoreAtMark(e),null):t.addChild(this._parseIdent())?this.accept(mo.CurlyR)?this.finish(t):this.finish(t,Pl.RightCurlyExpected):this.finish(t,Pl.IdentifierExpected)}return null},t.prototype._tryParseMixinDeclaration=function(){var e=this.mark(),t=this.create(Da);if(!t.setIdentifier(this._parseMixinDeclarationIdentifier())||!this.accept(mo.ParenthesisL))return this.restoreAtMark(e),null;if(t.getParameters().addChild(this._parseMixinParameter()))for(;(this.accept(mo.Comma)||this.accept(mo.SemiColon))&&!this.peek(mo.ParenthesisR);)t.getParameters().addChild(this._parseMixinParameter())||this.markError(t,Pl.IdentifierExpected,[],[mo.ParenthesisR]);return this.accept(mo.ParenthesisR)?(t.setGuard(this._parseGuard()),this.peek(mo.CurlyL)?this._parseBody(t,this._parseMixInBodyDeclaration.bind(this)):(this.restoreAtMark(e),null)):(this.restoreAtMark(e),null)},t.prototype._parseMixInBodyDeclaration=function(){return this._parseFontFace()||this._parseRuleSetDeclaration()},t.prototype._parseMixinDeclarationIdentifier=function(){var e;if(this.peekDelim("#")||this.peekDelim(".")){if(e=this.create(ks),this.consumeToken(),this.hasWhitespace()||!e.addChild(this._parseIdent()))return null}else{if(!this.peek(mo.Hash))return null;e=this.create(ks),this.consumeToken()}return e.referenceTypes=[fs.Mixin],this.finish(e)},t.prototype._parsePseudo=function(){if(!this.peek(mo.Colon))return null;var t=this.mark(),n=this.create(Ea);return this.consumeToken(),this.acceptIdent("extend")?this._completeExtends(n):(this.restoreAtMark(t),e.prototype._parsePseudo.call(this))},t.prototype._parseExtend=function(){if(!this.peekDelim("&"))return null;var e=this.mark(),t=this.create(Ea);return this.consumeToken(),!this.hasWhitespace()&&this.accept(mo.Colon)&&this.acceptIdent("extend")?this._completeExtends(t):(this.restoreAtMark(e),null)},t.prototype._completeExtends=function(e){if(!this.accept(mo.ParenthesisL))return this.finish(e,Pl.LeftParenthesisExpected);var t=e.getSelectors();if(!t.addChild(this._parseSelector(!0)))return this.finish(e,Pl.SelectorExpected);for(;this.accept(mo.Comma);)if(!t.addChild(this._parseSelector(!0)))return this.finish(e,Pl.SelectorExpected);return this.accept(mo.ParenthesisR)?this.finish(e):this.finish(e,Pl.RightParenthesisExpected)},t.prototype._parseDetachedRuleSetMixin=function(){if(!this.peek(mo.AtKeyword))return null;var e=this.mark(),t=this.create(Fa);return!t.addChild(this._parseVariable(!0))||!this.hasWhitespace()&&this.accept(mo.ParenthesisL)?this.accept(mo.ParenthesisR)?this.finish(t):this.finish(t,Pl.RightParenthesisExpected):(this.restoreAtMark(e),null)},t.prototype._tryParseMixinReference=function(e){void 0===e&&(e=!0);for(var t=this.mark(),n=this.create(Fa),i=this._parseMixinDeclarationIdentifier();i;){this.acceptDelim(">");var r=this._parseMixinDeclarationIdentifier();if(!r)break;n.getNamespaces().addChild(i),i=r}if(!n.setIdentifier(i))return this.restoreAtMark(t),null;var o=!1;if(this.accept(mo.ParenthesisL)){if(o=!0,n.getArguments().addChild(this._parseMixinArgument()))for(;(this.accept(mo.Comma)||this.accept(mo.SemiColon))&&!this.peek(mo.ParenthesisR);)if(!n.getArguments().addChild(this._parseMixinArgument()))return this.finish(n,Pl.ExpressionExpected);if(!this.accept(mo.ParenthesisR))return this.finish(n,Pl.RightParenthesisExpected);i.referenceTypes=[fs.Mixin]}else i.referenceTypes=[fs.Mixin,fs.Rule];return this.peek(mo.BracketL)?e||this._addLookupChildren(n):n.addChild(this._parsePrio()),o||this.peek(mo.SemiColon)||this.peek(mo.CurlyR)||this.peek(mo.EOF)?this.finish(n):(this.restoreAtMark(t),null)},t.prototype._parseMixinArgument=function(){var e=this.create(Ws),t=this.mark(),n=this._parseVariable();return n&&(this.accept(mo.Colon)?e.setIdentifier(n):this.restoreAtMark(t)),e.setValue(this._parseDetachedRuleSet()||this._parseExpr(!0))?this.finish(e):(this.restoreAtMark(t),null)},t.prototype._parseMixinParameter=function(){var e=this.create(Os);if(this.peekKeyword("@rest")){var t=this.create(Ss);return this.consumeToken(),this.accept(Zp)?(e.setIdentifier(this.finish(t)),this.finish(e)):this.finish(e,Pl.DotExpected,[],[mo.Comma,mo.ParenthesisR])}if(this.peek(Zp)){var n=this.create(Ss);return this.consumeToken(),e.setIdentifier(this.finish(n)),this.finish(e)}var i=!1;return e.setIdentifier(this._parseVariable())&&(this.accept(mo.Colon),i=!0),e.setDefaultValue(this._parseDetachedRuleSet()||this._parseExpr(!0))||i?this.finish(e):null},t.prototype._parseGuard=function(){if(!this.peekIdent("when"))return null;var e=this.create(Ma);if(this.consumeToken(),e.isNegated=this.acceptIdent("not"),!e.getConditions().addChild(this._parseGuardCondition()))return this.finish(e,Pl.ConditionExpected);for(;this.acceptIdent("and")||this.accept(mo.Comma);)if(!e.getConditions().addChild(this._parseGuardCondition()))return this.finish(e,Pl.ConditionExpected);return this.finish(e)},t.prototype._parseGuardCondition=function(){if(!this.peek(mo.ParenthesisL))return null;var e=this.create(za);return this.consumeToken(),e.addChild(this._parseExpr()),this.accept(mo.ParenthesisR)?this.finish(e):this.finish(e,Pl.RightParenthesisExpected)},t.prototype._parseFunction=function(){var e=this.mark(),t=this.create(Ps);if(!t.setIdentifier(this._parseFunctionIdentifier()))return null;if(this.hasWhitespace()||!this.accept(mo.ParenthesisL))return this.restoreAtMark(e),null;if(t.getArguments().addChild(this._parseMixinArgument()))for(;(this.accept(mo.Comma)||this.accept(mo.SemiColon))&&!this.peek(mo.ParenthesisR);)if(!t.getArguments().addChild(this._parseMixinArgument()))return this.finish(t,Pl.ExpressionExpected);return this.accept(mo.ParenthesisR)?this.finish(t):this.finish(t,Pl.RightParenthesisExpected)},t.prototype._parseFunctionIdentifier=function(){if(this.peekDelim("%")){var t=this.create(ks);return t.referenceTypes=[fs.Function],this.consumeToken(),this.finish(t)}return e.prototype._parseFunctionIdentifier.call(this)},t.prototype._parseURLArgument=function(){var t=this.mark(),n=e.prototype._parseURLArgument.call(this);if(!n||!this.peek(mo.ParenthesisR)){this.restoreAtMark(t);var i=this.create(Ss);return i.addChild(this._parseBinaryExpr()),this.finish(i)}return n},t}(Lh),iu=function(){var e=function(t,n){return(e=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(e,t){e.__proto__=t}||function(e,t){for(var n in t)Object.prototype.hasOwnProperty.call(t,n)&&(e[n]=t[n])})(t,n)};return function(t,n){if("function"!==typeof n&&null!==n)throw new TypeError("Class extends value "+String(n)+" is not a constructor or null");function i(){this.constructor=t}e(t,n),t.prototype=null===n?Object.create(n):(i.prototype=n.prototype,new i)}}(),ru=Wa(),ou=function(e){function t(t,n){return e.call(this,"@",t,n)||this}return iu(t,e),t.prototype.createFunctionProposals=function(e,t,n,i){for(var r=0,o=e;r 50%"),example:"percentage(@number);",type:"percentage"},{name:"round",description:ru("less.builtin.round","rounds a number to a number of places"),example:"round(number, [places: 0]);"},{name:"sqrt",description:ru("less.builtin.sqrt","calculates square root of a number"),example:"sqrt(number);"},{name:"sin",description:ru("less.builtin.sin","sine function"),example:"sin(number);"},{name:"tan",description:ru("less.builtin.tan","tangent function"),example:"tan(number);"},{name:"atan",description:ru("less.builtin.atan","arctangent - inverse of tangent function"),example:"atan(number);"},{name:"pi",description:ru("less.builtin.pi","returns pi"),example:"pi();"},{name:"pow",description:ru("less.builtin.pow","first argument raised to the power of the second argument"),example:"pow(@base, @exponent);"},{name:"mod",description:ru("less.builtin.mod","first argument modulus second argument"),example:"mod(number, number);"},{name:"min",description:ru("less.builtin.min","returns the lowest of one or more values"),example:"min(@x, @y);"},{name:"max",description:ru("less.builtin.max","returns the lowest of one or more values"),example:"max(@x, @y);"}],t.colorProposals=[{name:"argb",example:"argb(@color);",description:ru("less.builtin.argb","creates a #AARRGGBB")},{name:"hsl",example:"hsl(@hue, @saturation, @lightness);",description:ru("less.builtin.hsl","creates a color")},{name:"hsla",example:"hsla(@hue, @saturation, @lightness, @alpha);",description:ru("less.builtin.hsla","creates a color")},{name:"hsv",example:"hsv(@hue, @saturation, @value);",description:ru("less.builtin.hsv","creates a color")},{name:"hsva",example:"hsva(@hue, @saturation, @value, @alpha);",description:ru("less.builtin.hsva","creates a color")},{name:"hue",example:"hue(@color);",description:ru("less.builtin.hue","returns the `hue` channel of `@color` in the HSL space")},{name:"saturation",example:"saturation(@color);",description:ru("less.builtin.saturation","returns the `saturation` channel of `@color` in the HSL space")},{name:"lightness",example:"lightness(@color);",description:ru("less.builtin.lightness","returns the `lightness` channel of `@color` in the HSL space")},{name:"hsvhue",example:"hsvhue(@color);",description:ru("less.builtin.hsvhue","returns the `hue` channel of `@color` in the HSV space")},{name:"hsvsaturation",example:"hsvsaturation(@color);",description:ru("less.builtin.hsvsaturation","returns the `saturation` channel of `@color` in the HSV space")},{name:"hsvvalue",example:"hsvvalue(@color);",description:ru("less.builtin.hsvvalue","returns the `value` channel of `@color` in the HSV space")},{name:"red",example:"red(@color);",description:ru("less.builtin.red","returns the `red` channel of `@color`")},{name:"green",example:"green(@color);",description:ru("less.builtin.green","returns the `green` channel of `@color`")},{name:"blue",example:"blue(@color);",description:ru("less.builtin.blue","returns the `blue` channel of `@color`")},{name:"alpha",example:"alpha(@color);",description:ru("less.builtin.alpha","returns the `alpha` channel of `@color`")},{name:"luma",example:"luma(@color);",description:ru("less.builtin.luma","returns the `luma` value (perceptual brightness) of `@color`")},{name:"saturate",example:"saturate(@color, 10%);",description:ru("less.builtin.saturate","return `@color` 10% points more saturated")},{name:"desaturate",example:"desaturate(@color, 10%);",description:ru("less.builtin.desaturate","return `@color` 10% points less saturated")},{name:"lighten",example:"lighten(@color, 10%);",description:ru("less.builtin.lighten","return `@color` 10% points lighter")},{name:"darken",example:"darken(@color, 10%);",description:ru("less.builtin.darken","return `@color` 10% points darker")},{name:"fadein",example:"fadein(@color, 10%);",description:ru("less.builtin.fadein","return `@color` 10% points less transparent")},{name:"fadeout",example:"fadeout(@color, 10%);",description:ru("less.builtin.fadeout","return `@color` 10% points more transparent")},{name:"fade",example:"fade(@color, 50%);",description:ru("less.builtin.fade","return `@color` with 50% transparency")},{name:"spin",example:"spin(@color, 10);",description:ru("less.builtin.spin","return `@color` with a 10 degree larger in hue")},{name:"mix",example:"mix(@color1, @color2, [@weight: 50%]);",description:ru("less.builtin.mix","return a mix of `@color1` and `@color2`")},{name:"greyscale",example:"greyscale(@color);",description:ru("less.builtin.greyscale","returns a grey, 100% desaturated color")},{name:"contrast",example:"contrast(@color1, [@darkcolor: black], [@lightcolor: white], [@threshold: 43%]);",description:ru("less.builtin.contrast","return `@darkcolor` if `@color1 is> 43% luma` otherwise return `@lightcolor`, see notes")},{name:"multiply",example:"multiply(@color1, @color2);"},{name:"screen",example:"screen(@color1, @color2);"},{name:"overlay",example:"overlay(@color1, @color2);"},{name:"softlight",example:"softlight(@color1, @color2);"},{name:"hardlight",example:"hardlight(@color1, @color2);"},{name:"difference",example:"difference(@color1, @color2);"},{name:"exclusion",example:"exclusion(@color1, @color2);"},{name:"average",example:"average(@color1, @color2);"},{name:"negation",example:"negation(@color1, @color2);"}],t}(pd);function su(e,t){var n=function(e){function t(t){return e.positionAt(t.offset).line}function n(t){return e.positionAt(t.offset+t.len).line}function i(){switch(e.languageId){case"scss":return new zp;case"less":return new eu;default:return new ls}}function r(e,i){var r=t(e),o=n(e);return r!==o?{startLine:r,endLine:o,kind:i}:null}var o=[],s=[],a=i();a.ignoreComment=!1,a.setSource(e.getText());var l=a.scan(),c=null,h=function(){switch(l.type){case mo.CurlyL:case Np:s.push({line:t(l),type:"brace",isStart:!0});break;case mo.CurlyR:if(0!==s.length){if(!(p=au(s,"brace")))break;var i=n(l);"brace"===p.type&&(c&&n(c)!==i&&i--,p.line!==i&&o.push({startLine:p.line,endLine:i,kind:void 0}))}break;case mo.Comment:var h=function(e){return"#region"===e?{line:t(l),type:"comment",isStart:!0}:{line:n(l),type:"comment",isStart:!1}},d=function(t){var n=t.text.match(/^\s*\/\*\s*(#region|#endregion)\b\s*(.*?)\s*\*\//);if(n)return h(n[1]);if("scss"===e.languageId||"less"===e.languageId){var i=t.text.match(/^\s*\/\/\s*(#region|#endregion)\b\s*(.*?)\s*/);if(i)return h(i[1])}return null}(l);if(d)if(d.isStart)s.push(d);else{var p;if(!(p=au(s,"comment")))break;"comment"===p.type&&p.line!==d.line&&o.push({startLine:p.line,endLine:d.line,kind:"region"})}else{var u=r(l,"comment");u&&o.push(u)}}c=l,l=a.scan()};for(;l.type!==mo.EOF;)h();return o}(e);return function(e,t){var n=t&&t.rangeLimit||Number.MAX_VALUE,i=e.sort((function(e,t){var n=e.startLine-t.startLine;return 0===n&&(n=e.endLine-t.endLine),n})),r=[],o=-1;return i.forEach((function(e){e.startLine=0;n--)if(e[n].type===t&&e[n].isStart)return e.splice(n,1)[0];return null}!function(){var e=[,,function(e){function t(e){this.__parent=e,this.__character_count=0,this.__indent_count=-1,this.__alignment_count=0,this.__wrap_point_index=0,this.__wrap_point_character_count=0,this.__wrap_point_indent_count=-1,this.__wrap_point_alignment_count=0,this.__items=[]}function n(e,t){this.__cache=[""],this.__indent_size=e.indent_size,this.__indent_string=e.indent_char,e.indent_with_tabs||(this.__indent_string=new Array(e.indent_size+1).join(e.indent_char)),t=t||"",e.indent_level>0&&(t=new Array(e.indent_level+1).join(this.__indent_string)),this.__base_string=t,this.__base_string_length=t.length}function i(e,i){this.__indent_cache=new n(e,i),this.raw=!1,this._end_with_newline=e.end_with_newline,this.indent_size=e.indent_size,this.wrap_line_length=e.wrap_line_length,this.indent_empty_lines=e.indent_empty_lines,this.__lines=[],this.previous_line=null,this.current_line=null,this.next_line=new t(this),this.space_before_token=!1,this.non_breaking_space=!1,this.previous_token_wrapped=!1,this.__add_outputline()}t.prototype.clone_empty=function(){var e=new t(this.__parent);return e.set_indent(this.__indent_count,this.__alignment_count),e},t.prototype.item=function(e){return e<0?this.__items[this.__items.length+e]:this.__items[e]},t.prototype.has_match=function(e){for(var t=this.__items.length-1;t>=0;t--)if(this.__items[t].match(e))return!0;return!1},t.prototype.set_indent=function(e,t){this.is_empty()&&(this.__indent_count=e||0,this.__alignment_count=t||0,this.__character_count=this.__parent.get_indent_size(this.__indent_count,this.__alignment_count))},t.prototype._set_wrap_point=function(){this.__parent.wrap_line_length&&(this.__wrap_point_index=this.__items.length,this.__wrap_point_character_count=this.__character_count,this.__wrap_point_indent_count=this.__parent.next_line.__indent_count,this.__wrap_point_alignment_count=this.__parent.next_line.__alignment_count)},t.prototype._should_wrap=function(){return this.__wrap_point_index&&this.__character_count>this.__parent.wrap_line_length&&this.__wrap_point_character_count>this.__parent.next_line.__character_count},t.prototype._allow_wrap=function(){if(this._should_wrap()){this.__parent.add_new_line();var e=this.__parent.current_line;return e.set_indent(this.__wrap_point_indent_count,this.__wrap_point_alignment_count),e.__items=this.__items.slice(this.__wrap_point_index),this.__items=this.__items.slice(0,this.__wrap_point_index),e.__character_count+=this.__character_count-this.__wrap_point_character_count,this.__character_count=this.__wrap_point_character_count," "===e.__items[0]&&(e.__items.splice(0,1),e.__character_count-=1),!0}return!1},t.prototype.is_empty=function(){return 0===this.__items.length},t.prototype.last=function(){return this.is_empty()?null:this.__items[this.__items.length-1]},t.prototype.push=function(e){this.__items.push(e);var t=e.lastIndexOf("\n");-1!==t?this.__character_count=e.length-t:this.__character_count+=e.length},t.prototype.pop=function(){var e=null;return this.is_empty()||(e=this.__items.pop(),this.__character_count-=e.length),e},t.prototype._remove_indent=function(){this.__indent_count>0&&(this.__indent_count-=1,this.__character_count-=this.__parent.indent_size)},t.prototype._remove_wrap_indent=function(){this.__wrap_point_indent_count>0&&(this.__wrap_point_indent_count-=1)},t.prototype.trim=function(){for(;" "===this.last();)this.__items.pop(),this.__character_count-=1},t.prototype.toString=function(){var e="";return this.is_empty()?this.__parent.indent_empty_lines&&(e=this.__parent.get_indent_string(this.__indent_count)):(e=this.__parent.get_indent_string(this.__indent_count,this.__alignment_count),e+=this.__items.join("")),e},n.prototype.get_indent_size=function(e,t){var n=this.__base_string_length;return t=t||0,e<0&&(n=0),n+=e*this.__indent_size,n+=t},n.prototype.get_indent_string=function(e,t){var n=this.__base_string;return t=t||0,e<0&&(e=0,n=""),t+=e*this.__indent_size,this.__ensure_cache(t),n+=this.__cache[t]},n.prototype.__ensure_cache=function(e){for(;e>=this.__cache.length;)this.__add_column()},n.prototype.__add_column=function(){var e=this.__cache.length,t=0,n="";this.__indent_size&&e>=this.__indent_size&&(e-=(t=Math.floor(e/this.__indent_size))*this.__indent_size,n=new Array(t+1).join(this.__indent_string)),e&&(n+=new Array(e+1).join(" ")),this.__cache.push(n)},i.prototype.__add_outputline=function(){this.previous_line=this.current_line,this.current_line=this.next_line.clone_empty(),this.__lines.push(this.current_line)},i.prototype.get_line_number=function(){return this.__lines.length},i.prototype.get_indent_string=function(e,t){return this.__indent_cache.get_indent_string(e,t)},i.prototype.get_indent_size=function(e,t){return this.__indent_cache.get_indent_size(e,t)},i.prototype.is_empty=function(){return!this.previous_line&&this.current_line.is_empty()},i.prototype.add_new_line=function(e){return!(this.is_empty()||!e&&this.just_added_newline())&&(this.raw||this.__add_outputline(),!0)},i.prototype.get_code=function(e){this.trim(!0);var t=this.current_line.pop();t&&("\n"===t[t.length-1]&&(t=t.replace(/\n+$/g,"")),this.current_line.push(t)),this._end_with_newline&&this.__add_outputline();var n=this.__lines.join("\n");return"\n"!==e&&(n=n.replace(/[\n]/g,e)),n},i.prototype.set_wrap_point=function(){this.current_line._set_wrap_point()},i.prototype.set_indent=function(e,t){return e=e||0,t=t||0,this.next_line.set_indent(e,t),this.__lines.length>1?(this.current_line.set_indent(e,t),!0):(this.current_line.set_indent(),!1)},i.prototype.add_raw_token=function(e){for(var t=0;t1&&this.current_line.is_empty();)this.__lines.pop(),this.current_line=this.__lines[this.__lines.length-1],this.current_line.trim();this.previous_line=this.__lines.length>1?this.__lines[this.__lines.length-2]:null},i.prototype.just_added_newline=function(){return this.current_line.is_empty()},i.prototype.just_added_blankline=function(){return this.is_empty()||this.current_line.is_empty()&&this.previous_line.is_empty()},i.prototype.ensure_empty_line_above=function(e,n){for(var i=this.__lines.length-2;i>=0;){var r=this.__lines[i];if(r.is_empty())break;if(0!==r.item(0).indexOf(e)&&r.item(-1)!==n){this.__lines.splice(i+1,0,new t(this)),this.previous_line=this.__lines[this.__lines.length-2];break}i--}},e.exports.Output=i},,,,function(e){function t(e,t){this.raw_options=n(e,t),this.disabled=this._get_boolean("disabled"),this.eol=this._get_characters("eol","auto"),this.end_with_newline=this._get_boolean("end_with_newline"),this.indent_size=this._get_number("indent_size",4),this.indent_char=this._get_characters("indent_char"," "),this.indent_level=this._get_number("indent_level"),this.preserve_newlines=this._get_boolean("preserve_newlines",!0),this.max_preserve_newlines=this._get_number("max_preserve_newlines",32786),this.preserve_newlines||(this.max_preserve_newlines=0),this.indent_with_tabs=this._get_boolean("indent_with_tabs","\t"===this.indent_char),this.indent_with_tabs&&(this.indent_char="\t",1===this.indent_size&&(this.indent_size=4)),this.wrap_line_length=this._get_number("wrap_line_length",this._get_number("max_char")),this.indent_empty_lines=this._get_boolean("indent_empty_lines"),this.templating=this._get_selection_list("templating",["auto","none","django","erb","handlebars","php","smarty"],["auto"])}function n(e,t){var n,r={};for(n in e=i(e))n!==t&&(r[n]=e[n]);if(t&&e[t])for(n in e[t])r[n]=e[t][n];return r}function i(e){var t,n={};for(t in e){n[t.replace(/-/g,"_")]=e[t]}return n}t.prototype._get_array=function(e,t){var n=this.raw_options[e],i=t||[];return"object"===typeof n?null!==n&&"function"===typeof n.concat&&(i=n.concat()):"string"===typeof n&&(i=n.split(/[^a-zA-Z0-9_\/\-]+/)),i},t.prototype._get_boolean=function(e,t){var n=this.raw_options[e];return void 0===n?!!t:!!n},t.prototype._get_characters=function(e,t){var n=this.raw_options[e],i=t||"";return"string"===typeof n&&(i=n.replace(/\\r/,"\r").replace(/\\n/,"\n").replace(/\\t/,"\t")),i},t.prototype._get_number=function(e,t){var n=this.raw_options[e];t=parseInt(t,10),isNaN(t)&&(t=0);var i=parseInt(n,10);return isNaN(i)&&(i=t),i},t.prototype._get_selection=function(e,t,n){var i=this._get_selection_list(e,t,n);if(1!==i.length)throw new Error("Invalid Option Value: The option '"+e+"' can only be one of the following values:\n"+t+"\nYou passed in: '"+this.raw_options[e]+"'");return i[0]},t.prototype._get_selection_list=function(e,t,n){if(!t||0===t.length)throw new Error("Selection list cannot be empty.");if(n=n||[t[0]],!this._is_valid_selection(n,t))throw new Error("Invalid Default Value!");var i=this._get_array(e,n);if(!this._is_valid_selection(i,t))throw new Error("Invalid Option Value: The option '"+e+"' can contain only the following values:\n"+t+"\nYou passed in: '"+this.raw_options[e]+"'");return i},t.prototype._is_valid_selection=function(e,t){return e.length&&t.length&&!e.some((function(e){return-1===t.indexOf(e)}))},e.exports.Options=t,e.exports.normalizeOpts=i,e.exports.mergeOpts=n},,function(e){var t=RegExp.prototype.hasOwnProperty("sticky");function n(e){this.__input=e||"",this.__input_length=this.__input.length,this.__position=0}n.prototype.restart=function(){this.__position=0},n.prototype.back=function(){this.__position>0&&(this.__position-=1)},n.prototype.hasNext=function(){return this.__position=0&&e=0&&t=e.length&&this.__input.substring(t-e.length,t).toLowerCase()===e},e.exports.InputScanner=n},,,,,function(e){function t(e,t){e="string"===typeof e?e:e.source,t="string"===typeof t?t:t.source,this.__directives_block_pattern=new RegExp(e+/ beautify( \w+[:]\w+)+ /.source+t,"g"),this.__directive_pattern=/ (\w+)[:](\w+)/g,this.__directives_end_ignore_pattern=new RegExp(e+/\sbeautify\signore:end\s/.source+t,"g")}t.prototype.get_directives=function(e){if(!e.match(this.__directives_block_pattern))return null;var t={};this.__directive_pattern.lastIndex=0;for(var n=this.__directive_pattern.exec(e);n;)t[n[1]]=n[2],n=this.__directive_pattern.exec(e);return t},t.prototype.readIgnored=function(e){return e.readUntilAfter(this.__directives_end_ignore_pattern)},e.exports.Directives=t},,function(e,t,n){var i=n(16).Beautifier,r=n(17).Options;e.exports=function(e,t){return new i(e,t).beautify()},e.exports.defaultOptions=function(){return new r}},function(e,t,n){var i=n(17).Options,r=n(2).Output,o=n(8).InputScanner,s=new(0,n(13).Directives)(/\/\*/,/\*\//),a=/\r\n|[\r\n]/,l=/\r\n|[\r\n]/g,c=/\s/,h=/(?:\s|\n)+/g,d=/\/\*(?:[\s\S]*?)((?:\*\/)|$)/g,p=/\/\/(?:[^\n\r\u2028\u2029]*)/g;function u(e,t){this._source_text=e||"",this._options=new i(t),this._ch=null,this._input=null,this.NESTED_AT_RULE={"@page":!0,"@font-face":!0,"@keyframes":!0,"@media":!0,"@supports":!0,"@document":!0},this.CONDITIONAL_GROUP_RULE={"@media":!0,"@supports":!0,"@document":!0}}u.prototype.eatString=function(e){var t="";for(this._ch=this._input.next();this._ch;){if(t+=this._ch,"\\"===this._ch)t+=this._input.next();else if(-1!==e.indexOf(this._ch)||"\n"===this._ch)break;this._ch=this._input.next()}return t},u.prototype.eatWhitespace=function(e){for(var t=c.test(this._input.peek()),n=0;c.test(this._input.peek());)this._ch=this._input.next(),e&&"\n"===this._ch&&(0===n||n0&&this._indentLevel--},u.prototype.beautify=function(){if(this._options.disabled)return this._source_text;var e=this._source_text,t=this._options.eol;"auto"===t&&(t="\n",e&&a.test(e||"")&&(t=e.match(a)[0]));var n=(e=e.replace(l,"\n")).match(/^[\t ]*/)[0];this._output=new r(this._options,n),this._input=new o(e),this._indentLevel=0,this._nestedLevel=0,this._ch=null;for(var i,u,m=0,f=!1,g=!1,b=!1,v=!1,y=!1,w=this._ch;i=""!==this._input.read(h),u=w,this._ch=this._input.next(),"\\"===this._ch&&this._input.hasNext()&&(this._ch+=this._input.next()),w=this._ch,this._ch;)if("/"===this._ch&&"*"===this._input.peek()){this._output.add_new_line(),this._input.back();var x=this._input.read(d),S=s.get_directives(x);S&&"start"===S.ignore&&(x+=s.readIgnored(this._input)),this.print_string(x),this.eatWhitespace(!0),this._output.add_new_line()}else if("/"===this._ch&&"/"===this._input.peek())this._output.space_before_token=!0,this._input.back(),this.print_string(this._input.read(p)),this.eatWhitespace(!0);else if("@"===this._ch)if(this.preserveSingleSpace(i),"{"===this._input.peek())this.print_string(this._ch+this.eatString("}"));else{this.print_string(this._ch);var C=this._input.peekUntilAfter(/[: ,;{}()[\]\/='"]/g);C.match(/[ :]$/)&&(C=this.eatString(": ").replace(/\s$/,""),this.print_string(C),this._output.space_before_token=!0),"extend"===(C=C.replace(/\s$/,""))?v=!0:"import"===C&&(y=!0),C in this.NESTED_AT_RULE?(this._nestedLevel+=1,C in this.CONDITIONAL_GROUP_RULE&&(b=!0)):f||0!==m||-1===C.indexOf(":")||(g=!0,this.indent())}else"#"===this._ch&&"{"===this._input.peek()?(this.preserveSingleSpace(i),this.print_string(this._ch+this.eatString("}"))):"{"===this._ch?(g&&(g=!1,this.outdent()),b?(b=!1,f=this._indentLevel>=this._nestedLevel):f=this._indentLevel>=this._nestedLevel-1,this._options.newline_between_rules&&f&&this._output.previous_line&&"{"!==this._output.previous_line.item(-1)&&this._output.ensure_empty_line_above("/",","),this._output.space_before_token=!0,"expand"===this._options.brace_style?(this._output.add_new_line(),this.print_string(this._ch),this.indent(),this._output.set_indent(this._indentLevel)):(this.indent(),this.print_string(this._ch)),this.eatWhitespace(!0),this._output.add_new_line()):"}"===this._ch?(this.outdent(),this._output.add_new_line(),"{"===u&&this._output.trim(!0),y=!1,v=!1,g&&(this.outdent(),g=!1),this.print_string(this._ch),f=!1,this._nestedLevel&&this._nestedLevel--,this.eatWhitespace(!0),this._output.add_new_line(),this._options.newline_between_rules&&!this._output.just_added_blankline()&&"}"!==this._input.peek()&&this._output.add_new_line(!0)):":"===this._ch?!f&&!b||this._input.lookBack("&")||this.foundNestedPseudoClass()||this._input.lookBack("(")||v||0!==m?(this._input.lookBack(" ")&&(this._output.space_before_token=!0),":"===this._input.peek()?(this._ch=this._input.next(),this.print_string("::")):this.print_string(":")):(this.print_string(":"),g||(g=!0,this._output.space_before_token=!0,this.eatWhitespace(!0),this.indent())):'"'===this._ch||"'"===this._ch?(this.preserveSingleSpace(i),this.print_string(this._ch+this.eatString(this._ch)),this.eatWhitespace(!0)):";"===this._ch?0===m?(g&&(this.outdent(),g=!1),v=!1,y=!1,this.print_string(this._ch),this.eatWhitespace(!0),"/"!==this._input.peek()&&this._output.add_new_line()):(this.print_string(this._ch),this.eatWhitespace(!0),this._output.space_before_token=!0):"("===this._ch?this._input.lookBack("url")?(this.print_string(this._ch),this.eatWhitespace(),m++,this.indent(),this._ch=this._input.next(),")"===this._ch||'"'===this._ch||"'"===this._ch?this._input.back():this._ch&&(this.print_string(this._ch+this.eatString(")")),m&&(m--,this.outdent()))):(this.preserveSingleSpace(i),this.print_string(this._ch),this.eatWhitespace(),m++,this.indent()):")"===this._ch?(m&&(m--,this.outdent()),this.print_string(this._ch)):","===this._ch?(this.print_string(this._ch),this.eatWhitespace(!0),!this._options.selector_separator_newline||g||0!==m||y||v?this._output.space_before_token=!0:this._output.add_new_line()):">"!==this._ch&&"+"!==this._ch&&"~"!==this._ch||g||0!==m?"]"===this._ch?this.print_string(this._ch):"["===this._ch?(this.preserveSingleSpace(i),this.print_string(this._ch)):"="===this._ch?(this.eatWhitespace(),this.print_string("="),c.test(this._ch)&&(this._ch="")):"!"!==this._ch||this._input.lookBack("\\")?(this.preserveSingleSpace(i),this.print_string(this._ch)):(this.print_string(" "),this.print_string(this._ch)):this._options.space_around_combinator?(this._output.space_before_token=!0,this.print_string(this._ch),this._output.space_before_token=!0):(this.print_string(this._ch),this.eatWhitespace(),this._ch&&c.test(this._ch)&&(this._ch=""));return this._output.get_code(t)},e.exports.Beautifier=u},function(e,t,n){var i=n(6).Options;function r(e){i.call(this,e,"css"),this.selector_separator_newline=this._get_boolean("selector_separator_newline",!0),this.newline_between_rules=this._get_boolean("newline_between_rules",!0);var t=this._get_boolean("space_around_selector_separator");this.space_around_combinator=this._get_boolean("space_around_combinator")||t;var n=this._get_selection_list("brace_style",["collapse","expand","end-expand","none","preserve-inline"]);this.brace_style="collapse";for(var r=0;r0&&fu(i,c-1);)c--;0===c||mu(i,c-1)?l=c:c=0;){var n=e.charCodeAt(t);if(n===du)return!0;if(n===pu)return!1;t--}return!1}(i,l),r=h===i.length,i=i.substring(l,h),0!==l){var p=e.offsetAt(Ba.create(t.start.line,0));o=function(e,t,n){var i=t,r=0,o=n.tabSize||4;for(;i0){var f=n.insertSpaces?ps(" ",a*o):ps("\t",o);m=m.split("\n").join("\n"+f),0===t.start.character&&(m=f+m)}return[{range:t,newText:m}]}function hu(e){return e.replace(/^\s+/,"")}var du="{".charCodeAt(0),pu="}".charCodeAt(0);function uu(e,t,n){if(e&&e.hasOwnProperty(t)){var i=e[t];if(null!==i)return i}return n}function mu(e,t){return-1!=="\r\n".indexOf(e.charAt(t))}function fu(e,t){return-1!==" \t".indexOf(e.charAt(t))}var gu={version:1.1,properties:[{name:"additive-symbols",browsers:["FF33"],syntax:"[ && ]#",relevance:50,description:"@counter-style descriptor. Specifies the symbols used by the marker-construction algorithm specified by the system descriptor. Needs to be specified if the counter system is 'additive'.",restrictions:["integer","string","image","identifier"]},{name:"align-content",values:[{name:"center",description:"Lines are packed toward the center of the flex container."},{name:"flex-end",description:"Lines are packed toward the end of the flex container."},{name:"flex-start",description:"Lines are packed toward the start of the flex container."},{name:"space-around",description:"Lines are evenly distributed in the flex container, with half-size spaces on either end."},{name:"space-between",description:"Lines are evenly distributed in the flex container."},{name:"stretch",description:"Lines stretch to take up the remaining space."}],syntax:"normal | | | ? ",relevance:62,description:"Aligns a flex container\u2019s lines within the flex container when there is extra space in the cross-axis, similar to how 'justify-content' aligns individual items within the main-axis.",restrictions:["enum"]},{name:"align-items",values:[{name:"baseline",description:"If the flex item\u2019s inline axis is the same as the cross axis, this value is identical to 'flex-start'. Otherwise, it participates in baseline alignment."},{name:"center",description:"The flex item\u2019s margin box is centered in the cross axis within the line."},{name:"flex-end",description:"The cross-end margin edge of the flex item is placed flush with the cross-end edge of the line."},{name:"flex-start",description:"The cross-start margin edge of the flex item is placed flush with the cross-start edge of the line."},{name:"stretch",description:"If the cross size property of the flex item computes to auto, and neither of the cross-axis margins are auto, the flex item is stretched."}],syntax:"normal | stretch | | [ ? ]",relevance:85,description:"Aligns flex items along the cross axis of the current line of the flex container.",restrictions:["enum"]},{name:"justify-items",values:[{name:"auto"},{name:"normal"},{name:"end"},{name:"start"},{name:"flex-end",description:'"Flex items are packed toward the end of the line."'},{name:"flex-start",description:'"Flex items are packed toward the start of the line."'},{name:"self-end",description:"The item is packed flush to the edge of the alignment container of the end side of the item, in the appropriate axis."},{name:"self-start",description:"The item is packed flush to the edge of the alignment container of the start side of the item, in the appropriate axis.."},{name:"center",description:"The items are packed flush to each other toward the center of the of the alignment container."},{name:"left"},{name:"right"},{name:"baseline"},{name:"first baseline"},{name:"last baseline"},{name:"stretch",description:"If the cross size property of the flex item computes to auto, and neither of the cross-axis margins are auto, the flex item is stretched."},{name:"save"},{name:"unsave"},{name:"legacy"}],syntax:"normal | stretch | | ? [ | left | right ] | legacy | legacy && [ left | right | center ]",relevance:53,description:"Defines the default justify-self for all items of the box, giving them the default way of justifying each box along the appropriate axis",restrictions:["enum"]},{name:"justify-self",values:[{name:"auto"},{name:"normal"},{name:"end"},{name:"start"},{name:"flex-end",description:'"Flex items are packed toward the end of the line."'},{name:"flex-start",description:'"Flex items are packed toward the start of the line."'},{name:"self-end",description:"The item is packed flush to the edge of the alignment container of the end side of the item, in the appropriate axis."},{name:"self-start",description:"The item is packed flush to the edge of the alignment container of the start side of the item, in the appropriate axis.."},{name:"center",description:"The items are packed flush to each other toward the center of the of the alignment container."},{name:"left"},{name:"right"},{name:"baseline"},{name:"first baseline"},{name:"last baseline"},{name:"stretch",description:"If the cross size property of the flex item computes to auto, and neither of the cross-axis margins are auto, the flex item is stretched."},{name:"save"},{name:"unsave"}],syntax:"auto | normal | stretch | | ? [ | left | right ]",relevance:53,description:"Defines the way of justifying a box inside its container along the appropriate axis.",restrictions:["enum"]},{name:"align-self",values:[{name:"auto",description:"Computes to the value of 'align-items' on the element\u2019s parent, or 'stretch' if the element has no parent. On absolutely positioned elements, it computes to itself."},{name:"baseline",description:"If the flex item\u2019s inline axis is the same as the cross axis, this value is identical to 'flex-start'. Otherwise, it participates in baseline alignment."},{name:"center",description:"The flex item\u2019s margin box is centered in the cross axis within the line."},{name:"flex-end",description:"The cross-end margin edge of the flex item is placed flush with the cross-end edge of the line."},{name:"flex-start",description:"The cross-start margin edge of the flex item is placed flush with the cross-start edge of the line."},{name:"stretch",description:"If the cross size property of the flex item computes to auto, and neither of the cross-axis margins are auto, the flex item is stretched."}],syntax:"auto | normal | stretch | | ? ",relevance:72,description:"Allows the default alignment along the cross axis to be overridden for individual flex items.",restrictions:["enum"]},{name:"all",browsers:["E79","FF27","S9.1","C37","O24"],values:[],syntax:"initial | inherit | unset | revert",relevance:53,references:[{name:"MDN Reference",url:"https://developer.mozilla.org/docs/Web/CSS/all"}],description:"Shorthand that resets all properties except 'direction' and 'unicode-bidi'.",restrictions:["enum"]},{name:"alt",browsers:["S9"],values:[],relevance:50,references:[{name:"MDN Reference",url:"https://developer.mozilla.org/docs/Web/CSS/alt"}],description:"Provides alternative text for assistive technology to replace the generated content of a ::before or ::after element.",restrictions:["string","enum"]},{name:"animation",values:[{name:"alternate",description:"The animation cycle iterations that are odd counts are played in the normal direction, and the animation cycle iterations that are even counts are played in a reverse direction."},{name:"alternate-reverse",description:"The animation cycle iterations that are odd counts are played in the reverse direction, and the animation cycle iterations that are even counts are played in a normal direction."},{name:"backwards",description:"The beginning property value (as defined in the first @keyframes at-rule) is applied before the animation is displayed, during the period defined by 'animation-delay'."},{name:"both",description:"Both forwards and backwards fill modes are applied."},{name:"forwards",description:"The final property value (as defined in the last @keyframes at-rule) is maintained after the animation completes."},{name:"infinite",description:"Causes the animation to repeat forever."},{name:"none",description:"No animation is performed"},{name:"normal",description:"Normal playback."},{name:"reverse",description:"All iterations of the animation are played in the reverse direction from the way they were specified."}],syntax:"#",relevance:82,references:[{name:"MDN Reference",url:"https://developer.mozilla.org/docs/Web/CSS/animation"}],description:"Shorthand property combines six of the animation properties into a single property.",restrictions:["time","timing-function","enum","identifier","number"]},{name:"animation-delay",syntax:"